1 #include <../src/mat/impls/aij/seq/aij.h> 2 #include <../src/ksp/pc/impls/bddc/bddc.h> 3 #include <../src/ksp/pc/impls/bddc/bddcprivate.h> 4 #include <petscblaslapack.h> 5 #include <petsc/private/sfimpl.h> 6 7 #undef __FUNCT__ 8 #define __FUNCT__ "PCBDDCBenignMatMult_Private_Private" 9 PetscErrorCode PCBDDCBenignMatMult_Private_Private(Mat A, Vec x, Vec y, PetscBool transpose) 10 { 11 PCBDDCBenignMatMult_ctx ctx; 12 PetscErrorCode ierr; 13 PetscBool apply_right,apply_left,reset_x; 14 15 PetscFunctionBegin; 16 ierr = MatShellGetContext(A,&ctx);CHKERRQ(ierr); 17 if (transpose) { 18 apply_right = ctx->apply_left; 19 apply_left = ctx->apply_right; 20 } else { 21 apply_right = ctx->apply_right; 22 apply_left = ctx->apply_left; 23 } 24 reset_x = PETSC_FALSE; 25 if (apply_right) { 26 const PetscScalar *ax; 27 PetscInt nl,i; 28 29 ierr = VecGetLocalSize(x,&nl);CHKERRQ(ierr); 30 ierr = VecGetArrayRead(x,&ax);CHKERRQ(ierr); 31 ierr = PetscMemcpy(ctx->work,ax,nl*sizeof(PetscScalar));CHKERRQ(ierr); 32 ierr = VecRestoreArrayRead(x,&ax);CHKERRQ(ierr); 33 for (i=0;i<ctx->benign_n;i++) { 34 PetscScalar sum,val; 35 const PetscInt *idxs; 36 PetscInt nz,j; 37 ierr = ISGetLocalSize(ctx->benign_zerodiag_subs[i],&nz);CHKERRQ(ierr); 38 ierr = ISGetIndices(ctx->benign_zerodiag_subs[i],&idxs);CHKERRQ(ierr); 39 sum = 0.; 40 if (ctx->apply_p0) { 41 val = ctx->work[idxs[nz-1]]; 42 for (j=0;j<nz-1;j++) { 43 sum += ctx->work[idxs[j]]; 44 ctx->work[idxs[j]] += val; 45 } 46 } else { 47 for (j=0;j<nz-1;j++) { 48 sum += ctx->work[idxs[j]]; 49 } 50 } 51 ctx->work[idxs[nz-1]] -= sum; 52 ierr = ISRestoreIndices(ctx->benign_zerodiag_subs[i],&idxs);CHKERRQ(ierr); 53 } 54 ierr = VecPlaceArray(x,ctx->work);CHKERRQ(ierr); 55 reset_x = PETSC_TRUE; 56 } 57 if (transpose) { 58 ierr = MatMultTranspose(ctx->A,x,y);CHKERRQ(ierr); 59 } else { 60 ierr = MatMult(ctx->A,x,y);CHKERRQ(ierr); 61 } 62 if (reset_x) { 63 ierr = VecResetArray(x);CHKERRQ(ierr); 64 } 65 if (apply_left) { 66 PetscScalar *ay; 67 PetscInt i; 68 69 ierr = VecGetArray(y,&ay);CHKERRQ(ierr); 70 for (i=0;i<ctx->benign_n;i++) { 71 PetscScalar sum,val; 72 const PetscInt *idxs; 73 PetscInt nz,j; 74 ierr = ISGetLocalSize(ctx->benign_zerodiag_subs[i],&nz);CHKERRQ(ierr); 75 ierr = ISGetIndices(ctx->benign_zerodiag_subs[i],&idxs);CHKERRQ(ierr); 76 val = -ay[idxs[nz-1]]; 77 if (ctx->apply_p0) { 78 sum = 0.; 79 for (j=0;j<nz-1;j++) { 80 sum += ay[idxs[j]]; 81 ay[idxs[j]] += val; 82 } 83 ay[idxs[nz-1]] += sum; 84 } else { 85 for (j=0;j<nz-1;j++) { 86 ay[idxs[j]] += val; 87 } 88 ay[idxs[nz-1]] = 0.; 89 } 90 ierr = ISRestoreIndices(ctx->benign_zerodiag_subs[i],&idxs);CHKERRQ(ierr); 91 } 92 ierr = VecRestoreArray(y,&ay);CHKERRQ(ierr); 93 } 94 PetscFunctionReturn(0); 95 } 96 97 #undef __FUNCT__ 98 #define __FUNCT__ "PCBDDCBenignMatMultTranspose_Private" 99 PetscErrorCode PCBDDCBenignMatMultTranspose_Private(Mat A, Vec x, Vec y) 100 { 101 PetscErrorCode ierr; 102 103 PetscFunctionBegin; 104 ierr = PCBDDCBenignMatMult_Private_Private(A,x,y,PETSC_TRUE);CHKERRQ(ierr); 105 PetscFunctionReturn(0); 106 } 107 108 #undef __FUNCT__ 109 #define __FUNCT__ "PCBDDCBenignMatMult_Private" 110 PetscErrorCode PCBDDCBenignMatMult_Private(Mat A, Vec x, Vec y) 111 { 112 PetscErrorCode ierr; 113 114 PetscFunctionBegin; 115 ierr = PCBDDCBenignMatMult_Private_Private(A,x,y,PETSC_FALSE);CHKERRQ(ierr); 116 PetscFunctionReturn(0); 117 } 118 119 #undef __FUNCT__ 120 #define __FUNCT__ "PCBDDCBenignShellMat" 121 PetscErrorCode PCBDDCBenignShellMat(PC pc, PetscBool restore) 122 { 123 PC_IS *pcis = (PC_IS*)pc->data; 124 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 125 PCBDDCBenignMatMult_ctx ctx; 126 PetscErrorCode ierr; 127 128 PetscFunctionBegin; 129 if (!restore) { 130 Mat A_IB,A_BI; 131 PetscScalar *work; 132 PCBDDCReuseSolvers reuse = pcbddc->sub_schurs->reuse_solver; 133 134 if (pcbddc->benign_original_mat) { 135 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Benign original mat has not been restored"); 136 } 137 if (!pcbddc->benign_change || !pcbddc->benign_n || pcbddc->benign_change_explicit) { 138 PetscFunctionReturn(0); 139 } 140 ierr = PetscMalloc1(pcis->n,&work);CHKERRQ(ierr); 141 ierr = MatCreate(PETSC_COMM_SELF,&A_IB);CHKERRQ(ierr); 142 ierr = MatSetSizes(A_IB,pcis->n-pcis->n_B,pcis->n_B,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr); 143 ierr = MatSetType(A_IB,MATSHELL);CHKERRQ(ierr); 144 ierr = MatShellSetOperation(A_IB,MATOP_MULT,(void (*)(void))PCBDDCBenignMatMult_Private);CHKERRQ(ierr); 145 ierr = MatShellSetOperation(A_IB,MATOP_MULT_TRANSPOSE,(void (*)(void))PCBDDCBenignMatMultTranspose_Private);CHKERRQ(ierr); 146 ierr = PetscNew(&ctx);CHKERRQ(ierr); 147 ierr = MatShellSetContext(A_IB,ctx);CHKERRQ(ierr); 148 ctx->apply_left = PETSC_TRUE; 149 ctx->apply_right = PETSC_FALSE; 150 ctx->apply_p0 = PETSC_FALSE; 151 ctx->benign_n = pcbddc->benign_n; 152 if (reuse) { 153 ctx->benign_zerodiag_subs = reuse->benign_zerodiag_subs; 154 ctx->free = PETSC_FALSE; 155 } else { /* TODO: could be optimized for successive solves */ 156 ISLocalToGlobalMapping N_to_D; 157 PetscInt i; 158 159 ierr = ISLocalToGlobalMappingCreateIS(pcis->is_I_local,&N_to_D);CHKERRQ(ierr); 160 ierr = PetscMalloc1(pcbddc->benign_n,&ctx->benign_zerodiag_subs);CHKERRQ(ierr); 161 for (i=0;i<pcbddc->benign_n;i++) { 162 ierr = ISGlobalToLocalMappingApplyIS(N_to_D,IS_GTOLM_DROP,pcbddc->benign_zerodiag_subs[i],&ctx->benign_zerodiag_subs[i]);CHKERRQ(ierr); 163 } 164 ierr = ISLocalToGlobalMappingDestroy(&N_to_D);CHKERRQ(ierr); 165 ctx->free = PETSC_TRUE; 166 } 167 ctx->A = pcis->A_IB; 168 ctx->work = work; 169 ierr = MatSetUp(A_IB);CHKERRQ(ierr); 170 ierr = MatAssemblyBegin(A_IB,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 171 ierr = MatAssemblyEnd(A_IB,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 172 pcis->A_IB = A_IB; 173 174 /* A_BI as A_IB^T */ 175 ierr = MatCreateTranspose(A_IB,&A_BI);CHKERRQ(ierr); 176 pcbddc->benign_original_mat = pcis->A_BI; 177 pcis->A_BI = A_BI; 178 } else { 179 if (!pcbddc->benign_original_mat) { 180 PetscFunctionReturn(0); 181 } 182 ierr = MatShellGetContext(pcis->A_IB,&ctx);CHKERRQ(ierr); 183 ierr = MatDestroy(&pcis->A_IB);CHKERRQ(ierr); 184 pcis->A_IB = ctx->A; 185 ctx->A = NULL; 186 ierr = MatDestroy(&pcis->A_BI);CHKERRQ(ierr); 187 pcis->A_BI = pcbddc->benign_original_mat; 188 pcbddc->benign_original_mat = NULL; 189 if (ctx->free) { 190 PetscInt i; 191 for (i=0;i<ctx->benign_n;i++) { 192 ierr = ISDestroy(&ctx->benign_zerodiag_subs[i]);CHKERRQ(ierr); 193 } 194 ierr = PetscFree(ctx->benign_zerodiag_subs);CHKERRQ(ierr); 195 } 196 ierr = PetscFree(ctx->work);CHKERRQ(ierr); 197 ierr = PetscFree(ctx);CHKERRQ(ierr); 198 } 199 PetscFunctionReturn(0); 200 } 201 202 /* used just in bddc debug mode */ 203 #undef __FUNCT__ 204 #define __FUNCT__ "PCBDDCBenignProject" 205 PetscErrorCode PCBDDCBenignProject(PC pc, IS is1, IS is2, Mat *B) 206 { 207 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 208 Mat_IS *matis = (Mat_IS*)pc->pmat->data; 209 Mat An; 210 PetscErrorCode ierr; 211 212 PetscFunctionBegin; 213 ierr = MatPtAP(matis->A,pcbddc->benign_change,MAT_INITIAL_MATRIX,2.0,&An);CHKERRQ(ierr); 214 ierr = MatZeroRowsColumns(An,pcbddc->benign_n,pcbddc->benign_p0_lidx,1.0,NULL,NULL);CHKERRQ(ierr); 215 if (is1) { 216 ierr = MatGetSubMatrix(An,is1,is2,MAT_INITIAL_MATRIX,B);CHKERRQ(ierr); 217 ierr = MatDestroy(&An);CHKERRQ(ierr); 218 } else { 219 *B = An; 220 } 221 PetscFunctionReturn(0); 222 } 223 224 /* TODO: add reuse flag */ 225 #undef __FUNCT__ 226 #define __FUNCT__ "MatSeqAIJCompress" 227 PetscErrorCode MatSeqAIJCompress(Mat A, Mat *B) 228 { 229 Mat Bt; 230 PetscScalar *a,*bdata; 231 const PetscInt *ii,*ij; 232 PetscInt m,n,i,nnz,*bii,*bij; 233 PetscBool flg_row; 234 PetscErrorCode ierr; 235 236 PetscFunctionBegin; 237 ierr = MatGetSize(A,&n,&m);CHKERRQ(ierr); 238 ierr = MatGetRowIJ(A,0,PETSC_FALSE,PETSC_FALSE,&n,&ii,&ij,&flg_row);CHKERRQ(ierr); 239 ierr = MatSeqAIJGetArray(A,&a);CHKERRQ(ierr); 240 nnz = n; 241 for (i=0;i<ii[n];i++) { 242 if (PetscLikely(PetscAbsScalar(a[i]) > PETSC_SMALL)) nnz++; 243 } 244 ierr = PetscMalloc1(n+1,&bii);CHKERRQ(ierr); 245 ierr = PetscMalloc1(nnz,&bij);CHKERRQ(ierr); 246 ierr = PetscMalloc1(nnz,&bdata);CHKERRQ(ierr); 247 nnz = 0; 248 bii[0] = 0; 249 for (i=0;i<n;i++) { 250 PetscInt j; 251 for (j=ii[i];j<ii[i+1];j++) { 252 PetscScalar entry = a[j]; 253 if (PetscLikely(PetscAbsScalar(entry) > PETSC_SMALL) || ij[j] == i) { 254 bij[nnz] = ij[j]; 255 bdata[nnz] = entry; 256 nnz++; 257 } 258 } 259 bii[i+1] = nnz; 260 } 261 ierr = MatSeqAIJRestoreArray(A,&a);CHKERRQ(ierr); 262 ierr = MatCreateSeqAIJWithArrays(PetscObjectComm((PetscObject)A),n,m,bii,bij,bdata,&Bt);CHKERRQ(ierr); 263 ierr = MatRestoreRowIJ(A,0,PETSC_FALSE,PETSC_FALSE,&n,&ii,&ij,&flg_row);CHKERRQ(ierr); 264 { 265 Mat_SeqAIJ *b = (Mat_SeqAIJ*)(Bt->data); 266 b->free_a = PETSC_TRUE; 267 b->free_ij = PETSC_TRUE; 268 } 269 *B = Bt; 270 PetscFunctionReturn(0); 271 } 272 273 #undef __FUNCT__ 274 #define __FUNCT__ "MatDetectDisconnectedComponents" 275 PetscErrorCode MatDetectDisconnectedComponents(Mat A, PetscBool filter, PetscInt *ncc, IS* cc[]) 276 { 277 Mat B; 278 IS is_dummy,*cc_n; 279 ISLocalToGlobalMapping l2gmap_dummy; 280 PCBDDCGraph graph; 281 PetscInt i,n; 282 PetscInt *xadj,*adjncy; 283 PetscInt *xadj_filtered,*adjncy_filtered; 284 PetscBool flg_row,isseqaij; 285 PetscErrorCode ierr; 286 287 PetscFunctionBegin; 288 if (!A->rmap->N || !A->cmap->N) { 289 *ncc = 0; 290 *cc = NULL; 291 PetscFunctionReturn(0); 292 } 293 ierr = PetscObjectTypeCompare((PetscObject)A,MATSEQAIJ,&isseqaij);CHKERRQ(ierr); 294 if (!isseqaij && filter) { 295 PetscBool isseqdense; 296 297 ierr = PetscObjectTypeCompare((PetscObject)A,MATSEQDENSE,&isseqdense);CHKERRQ(ierr); 298 if (!isseqdense) { 299 ierr = MatConvert(A,MATSEQAIJ,MAT_INITIAL_MATRIX,&B);CHKERRQ(ierr); 300 } else { /* TODO: rectangular case and LDA */ 301 PetscScalar *array; 302 PetscReal chop=1.e-6; 303 304 ierr = MatDuplicate(A,MAT_COPY_VALUES,&B);CHKERRQ(ierr); 305 ierr = MatDenseGetArray(B,&array);CHKERRQ(ierr); 306 ierr = MatGetSize(B,&n,NULL);CHKERRQ(ierr); 307 for (i=0;i<n;i++) { 308 PetscInt j; 309 for (j=i+1;j<n;j++) { 310 PetscReal thresh = chop*(PetscAbsScalar(array[i*(n+1)])+PetscAbsScalar(array[j*(n+1)])); 311 if (PetscAbsScalar(array[i*n+j]) < thresh) array[i*n+j] = 0.; 312 if (PetscAbsScalar(array[j*n+i]) < thresh) array[j*n+i] = 0.; 313 } 314 } 315 ierr = MatDenseRestoreArray(B,&array);CHKERRQ(ierr); 316 ierr = MatConvert(B,MATSEQAIJ,MAT_INPLACE_MATRIX,&B);CHKERRQ(ierr); 317 } 318 } else { 319 B = A; 320 } 321 ierr = MatGetRowIJ(B,0,PETSC_TRUE,PETSC_FALSE,&n,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);CHKERRQ(ierr); 322 323 /* if filter is true, then removes entries lower than PETSC_SMALL in magnitude */ 324 if (filter) { 325 PetscScalar *data; 326 PetscInt j,cum; 327 328 ierr = PetscCalloc2(n+1,&xadj_filtered,xadj[n],&adjncy_filtered);CHKERRQ(ierr); 329 ierr = MatSeqAIJGetArray(B,&data);CHKERRQ(ierr); 330 cum = 0; 331 for (i=0;i<n;i++) { 332 PetscInt t; 333 334 for (j=xadj[i];j<xadj[i+1];j++) { 335 if (PetscUnlikely(PetscAbsScalar(data[j]) < PETSC_SMALL)) { 336 continue; 337 } 338 adjncy_filtered[cum+xadj_filtered[i]++] = adjncy[j]; 339 } 340 t = xadj_filtered[i]; 341 xadj_filtered[i] = cum; 342 cum += t; 343 } 344 ierr = MatSeqAIJRestoreArray(B,&data);CHKERRQ(ierr); 345 } else { 346 xadj_filtered = NULL; 347 adjncy_filtered = NULL; 348 } 349 350 /* compute local connected components using PCBDDCGraph */ 351 ierr = ISCreateStride(PETSC_COMM_SELF,n,0,1,&is_dummy);CHKERRQ(ierr); 352 ierr = ISLocalToGlobalMappingCreateIS(is_dummy,&l2gmap_dummy);CHKERRQ(ierr); 353 ierr = ISDestroy(&is_dummy);CHKERRQ(ierr); 354 ierr = PCBDDCGraphCreate(&graph);CHKERRQ(ierr); 355 ierr = PCBDDCGraphInit(graph,l2gmap_dummy,n);CHKERRQ(ierr); 356 ierr = ISLocalToGlobalMappingDestroy(&l2gmap_dummy);CHKERRQ(ierr); 357 if (xadj_filtered) { 358 graph->xadj = xadj_filtered; 359 graph->adjncy = adjncy_filtered; 360 } else { 361 graph->xadj = xadj; 362 graph->adjncy = adjncy; 363 } 364 ierr = PCBDDCGraphSetUp(graph,1,NULL,NULL,0,NULL,NULL);CHKERRQ(ierr); 365 ierr = PCBDDCGraphComputeConnectedComponents(graph);CHKERRQ(ierr); 366 /* partial clean up */ 367 ierr = PetscFree2(xadj_filtered,adjncy_filtered);CHKERRQ(ierr); 368 ierr = MatRestoreRowIJ(B,0,PETSC_TRUE,PETSC_FALSE,&n,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);CHKERRQ(ierr); 369 if (A != B) { 370 ierr = MatDestroy(&B);CHKERRQ(ierr); 371 } 372 373 /* get back data */ 374 if (ncc) *ncc = graph->ncc; 375 if (cc) { 376 ierr = PetscMalloc1(graph->ncc,&cc_n);CHKERRQ(ierr); 377 for (i=0;i<graph->ncc;i++) { 378 ierr = ISCreateGeneral(PETSC_COMM_SELF,graph->cptr[i+1]-graph->cptr[i],graph->queue+graph->cptr[i],PETSC_COPY_VALUES,&cc_n[i]);CHKERRQ(ierr); 379 } 380 *cc = cc_n; 381 } 382 /* clean up graph */ 383 graph->xadj = 0; 384 graph->adjncy = 0; 385 ierr = PCBDDCGraphDestroy(&graph);CHKERRQ(ierr); 386 PetscFunctionReturn(0); 387 } 388 389 #undef __FUNCT__ 390 #define __FUNCT__ "PCBDDCBenignCheck" 391 PetscErrorCode PCBDDCBenignCheck(PC pc, IS zerodiag) 392 { 393 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 394 PC_IS* pcis = (PC_IS*)(pc->data); 395 IS dirIS = NULL; 396 PetscInt i; 397 PetscErrorCode ierr; 398 399 PetscFunctionBegin; 400 ierr = PCBDDCGraphGetDirichletDofs(pcbddc->mat_graph,&dirIS);CHKERRQ(ierr); 401 if (zerodiag) { 402 Mat A; 403 Vec vec3_N; 404 PetscScalar *vals; 405 const PetscInt *idxs; 406 PetscInt nz,*count; 407 408 /* p0 */ 409 ierr = VecSet(pcis->vec1_N,0.);CHKERRQ(ierr); 410 ierr = PetscMalloc1(pcis->n,&vals);CHKERRQ(ierr); 411 ierr = ISGetLocalSize(zerodiag,&nz);CHKERRQ(ierr); 412 ierr = ISGetIndices(zerodiag,&idxs);CHKERRQ(ierr); 413 for (i=0;i<nz;i++) vals[i] = 1.; 414 ierr = VecSetValues(pcis->vec1_N,nz,idxs,vals,INSERT_VALUES);CHKERRQ(ierr); 415 ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr); 416 ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr); 417 /* v_I */ 418 ierr = VecSetRandom(pcis->vec2_N,NULL);CHKERRQ(ierr); 419 for (i=0;i<nz;i++) vals[i] = 0.; 420 ierr = VecSetValues(pcis->vec2_N,nz,idxs,vals,INSERT_VALUES);CHKERRQ(ierr); 421 ierr = ISRestoreIndices(zerodiag,&idxs);CHKERRQ(ierr); 422 ierr = ISGetIndices(pcis->is_B_local,&idxs);CHKERRQ(ierr); 423 for (i=0;i<pcis->n_B;i++) vals[i] = 0.; 424 ierr = VecSetValues(pcis->vec2_N,pcis->n_B,idxs,vals,INSERT_VALUES);CHKERRQ(ierr); 425 ierr = ISRestoreIndices(pcis->is_B_local,&idxs);CHKERRQ(ierr); 426 if (dirIS) { 427 PetscInt n; 428 429 ierr = ISGetLocalSize(dirIS,&n);CHKERRQ(ierr); 430 ierr = ISGetIndices(dirIS,&idxs);CHKERRQ(ierr); 431 for (i=0;i<n;i++) vals[i] = 0.; 432 ierr = VecSetValues(pcis->vec2_N,n,idxs,vals,INSERT_VALUES);CHKERRQ(ierr); 433 ierr = ISRestoreIndices(dirIS,&idxs);CHKERRQ(ierr); 434 } 435 ierr = VecAssemblyBegin(pcis->vec2_N);CHKERRQ(ierr); 436 ierr = VecAssemblyEnd(pcis->vec2_N);CHKERRQ(ierr); 437 ierr = VecDuplicate(pcis->vec1_N,&vec3_N);CHKERRQ(ierr); 438 ierr = VecSet(vec3_N,0.);CHKERRQ(ierr); 439 ierr = MatISGetLocalMat(pc->mat,&A);CHKERRQ(ierr); 440 ierr = MatMult(A,pcis->vec1_N,vec3_N);CHKERRQ(ierr); 441 ierr = VecDot(vec3_N,pcis->vec2_N,&vals[0]);CHKERRQ(ierr); 442 if (PetscAbsScalar(vals[0]) > 1.e-1) { 443 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"Benign trick can not be applied! b(v_I,p_0) = %1.6e (should be numerically 0.)",PetscAbsScalar(vals[0])); 444 } 445 ierr = PetscFree(vals);CHKERRQ(ierr); 446 ierr = VecDestroy(&vec3_N);CHKERRQ(ierr); 447 448 /* there should not be any pressure dofs lying on the interface */ 449 ierr = PetscCalloc1(pcis->n,&count);CHKERRQ(ierr); 450 ierr = ISGetIndices(pcis->is_B_local,&idxs);CHKERRQ(ierr); 451 for (i=0;i<pcis->n_B;i++) count[idxs[i]]++; 452 ierr = ISRestoreIndices(pcis->is_B_local,&idxs);CHKERRQ(ierr); 453 ierr = ISGetIndices(zerodiag,&idxs);CHKERRQ(ierr); 454 for (i=0;i<nz;i++) { 455 if (count[idxs[i]]) { 456 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"Benign trick can not be applied! pressure dof %d is an interface dof",idxs[i]); 457 } 458 } 459 ierr = ISRestoreIndices(zerodiag,&idxs);CHKERRQ(ierr); 460 ierr = PetscFree(count);CHKERRQ(ierr); 461 } 462 ierr = ISDestroy(&dirIS);CHKERRQ(ierr); 463 464 /* check PCBDDCBenignGetOrSetP0 */ 465 ierr = VecSetRandom(pcis->vec1_global,NULL);CHKERRQ(ierr); 466 for (i=0;i<pcbddc->benign_n;i++) pcbddc->benign_p0[i] = -PetscGlobalRank-i; 467 ierr = PCBDDCBenignGetOrSetP0(pc,pcis->vec1_global,PETSC_FALSE);CHKERRQ(ierr); 468 for (i=0;i<pcbddc->benign_n;i++) pcbddc->benign_p0[i] = 1; 469 ierr = PCBDDCBenignGetOrSetP0(pc,pcis->vec1_global,PETSC_TRUE);CHKERRQ(ierr); 470 for (i=0;i<pcbddc->benign_n;i++) { 471 if ((PetscInt)PetscRealPart(pcbddc->benign_p0[i]) != -PetscGlobalRank-i) { 472 SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error testing PCBDDCBenignGetOrSetP0! Found %1.4e at %d instead of %1.4e\n",pcbddc->benign_p0[i],i,-PetscGlobalRank-i);CHKERRQ(ierr); 473 } 474 } 475 PetscFunctionReturn(0); 476 } 477 478 #undef __FUNCT__ 479 #define __FUNCT__ "PCBDDCBenignDetectSaddlePoint" 480 PetscErrorCode PCBDDCBenignDetectSaddlePoint(PC pc, IS *zerodiaglocal) 481 { 482 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 483 IS pressures,zerodiag,*zerodiag_subs; 484 PetscInt nz,n; 485 PetscBool sorted,have_null,has_null_pressures,recompute_zerodiag; 486 PetscErrorCode ierr; 487 488 PetscFunctionBegin; 489 ierr = PetscSFDestroy(&pcbddc->benign_sf);CHKERRQ(ierr); 490 ierr = MatDestroy(&pcbddc->benign_B0);CHKERRQ(ierr); 491 for (n=0;n<pcbddc->benign_n;n++) { 492 ierr = ISDestroy(&pcbddc->benign_zerodiag_subs[n]);CHKERRQ(ierr); 493 } 494 ierr = PetscFree(pcbddc->benign_zerodiag_subs);CHKERRQ(ierr); 495 pcbddc->benign_n = 0; 496 /* if a local info on dofs is present, assumes the last field is represented by "pressures" 497 otherwise, it uses only zerodiagonal dofs (ok if the pressure block is all zero; it could fail if it is not) 498 Checks if all the pressure dofs in each subdomain have a zero diagonal 499 If not, a change of basis on pressures is not needed 500 since the local Schur complements are SPD 501 */ 502 has_null_pressures = PETSC_TRUE; 503 have_null = PETSC_TRUE; 504 if (pcbddc->n_ISForDofsLocal) { 505 PetscInt npl,*idxs,p = pcbddc->n_ISForDofsLocal-1; 506 507 /* Dofs splitting for BDDC cannot have PETSC_COMM_SELF, so create a sequential IS */ 508 ierr = ISGetLocalSize(pcbddc->ISForDofsLocal[p],&npl);CHKERRQ(ierr); 509 ierr = ISGetIndices(pcbddc->ISForDofsLocal[p],(const PetscInt**)&idxs);CHKERRQ(ierr); 510 ierr = ISCreateGeneral(PETSC_COMM_SELF,npl,idxs,PETSC_COPY_VALUES,&pressures);CHKERRQ(ierr); 511 ierr = ISRestoreIndices(pcbddc->ISForDofsLocal[p],(const PetscInt**)&idxs);CHKERRQ(ierr); 512 ierr = ISSorted(pressures,&sorted);CHKERRQ(ierr); 513 if (!sorted) { 514 ierr = ISSort(pressures);CHKERRQ(ierr); 515 } 516 } else { 517 pressures = NULL; 518 } 519 /* pcis has not been setup yet, so get the local size from the subdomain matrix */ 520 ierr = MatGetLocalSize(pcbddc->local_mat,&n,NULL);CHKERRQ(ierr); 521 ierr = MatFindZeroDiagonals(pcbddc->local_mat,&zerodiag);CHKERRQ(ierr); 522 ierr = ISSorted(zerodiag,&sorted);CHKERRQ(ierr); 523 if (!sorted) { 524 ierr = ISSort(zerodiag);CHKERRQ(ierr); 525 } 526 ierr = ISGetLocalSize(zerodiag,&nz);CHKERRQ(ierr); 527 if (!nz) { 528 if (n) have_null = PETSC_FALSE; 529 has_null_pressures = PETSC_FALSE; 530 ierr = ISDestroy(&zerodiag);CHKERRQ(ierr); 531 } 532 recompute_zerodiag = PETSC_FALSE; 533 /* in case disconnected subdomains info is present, split the pressures accordingly (otherwise the benign trick could fail) */ 534 zerodiag_subs = NULL; 535 pcbddc->benign_n = 0; 536 if (has_null_pressures) { 537 IS *subs; 538 PetscInt nsubs,i; 539 540 subs = pcbddc->local_subs; 541 nsubs = pcbddc->n_local_subs; 542 if (nsubs > 1) { 543 ierr = PetscCalloc1(nsubs,&zerodiag_subs);CHKERRQ(ierr); 544 for (i=0;i<nsubs;i++) { 545 ISLocalToGlobalMapping l2g; 546 IS t_zerodiag_subs; 547 PetscInt nl; 548 549 ierr = ISLocalToGlobalMappingCreateIS(subs[i],&l2g);CHKERRQ(ierr); 550 ierr = ISGlobalToLocalMappingApplyIS(l2g,IS_GTOLM_DROP,zerodiag,&t_zerodiag_subs);CHKERRQ(ierr); 551 ierr = ISGetLocalSize(t_zerodiag_subs,&nl);CHKERRQ(ierr); 552 if (nl) { 553 PetscBool valid = PETSC_TRUE; 554 555 if (pressures) { 556 IS t_pressure_subs; 557 ierr = ISGlobalToLocalMappingApplyIS(l2g,IS_GTOLM_DROP,pressures,&t_pressure_subs);CHKERRQ(ierr); 558 ierr = ISEqual(t_pressure_subs,t_zerodiag_subs,&valid);CHKERRQ(ierr); 559 ierr = ISDestroy(&t_pressure_subs);CHKERRQ(ierr); 560 } 561 if (valid) { 562 ierr = ISLocalToGlobalMappingApplyIS(l2g,t_zerodiag_subs,&zerodiag_subs[pcbddc->benign_n]);CHKERRQ(ierr); 563 pcbddc->benign_n++; 564 } else { 565 recompute_zerodiag = PETSC_TRUE; 566 } 567 } 568 ierr = ISDestroy(&t_zerodiag_subs);CHKERRQ(ierr); 569 ierr = ISLocalToGlobalMappingDestroy(&l2g);CHKERRQ(ierr); 570 } 571 } else { /* there's just one subdomain (or zero if they have not been detected */ 572 PetscBool valid = PETSC_TRUE; 573 if (pressures) { 574 ierr = ISEqual(pressures,zerodiag,&valid);CHKERRQ(ierr); 575 } 576 if (valid) { 577 pcbddc->benign_n = 1; 578 ierr = PetscMalloc1(pcbddc->benign_n,&zerodiag_subs);CHKERRQ(ierr); 579 ierr = PetscObjectReference((PetscObject)zerodiag);CHKERRQ(ierr); 580 zerodiag_subs[0] = zerodiag; 581 } 582 } 583 } 584 585 if (!pcbddc->benign_n) { 586 PetscInt n; 587 588 ierr = ISDestroy(&zerodiag);CHKERRQ(ierr); 589 recompute_zerodiag = PETSC_FALSE; 590 ierr = MatGetLocalSize(pcbddc->local_mat,&n,NULL);CHKERRQ(ierr); 591 if (n) { 592 has_null_pressures = PETSC_FALSE; 593 have_null = PETSC_FALSE; 594 } 595 } 596 597 /* final check for null pressures */ 598 if (zerodiag && pressures) { 599 PetscInt nz,np; 600 ierr = ISGetLocalSize(zerodiag,&nz);CHKERRQ(ierr); 601 ierr = ISGetLocalSize(pressures,&np);CHKERRQ(ierr); 602 if (nz != np) have_null = PETSC_FALSE; 603 } 604 605 if (recompute_zerodiag) { 606 ierr = ISDestroy(&zerodiag);CHKERRQ(ierr); 607 if (pcbddc->benign_n == 1) { 608 ierr = PetscObjectReference((PetscObject)zerodiag_subs[0]);CHKERRQ(ierr); 609 zerodiag = zerodiag_subs[0]; 610 } else { 611 PetscInt i,nzn,*new_idxs; 612 613 nzn = 0; 614 for (i=0;i<pcbddc->benign_n;i++) { 615 PetscInt ns; 616 ierr = ISGetLocalSize(zerodiag_subs[i],&ns);CHKERRQ(ierr); 617 nzn += ns; 618 } 619 ierr = PetscMalloc1(nzn,&new_idxs);CHKERRQ(ierr); 620 nzn = 0; 621 for (i=0;i<pcbddc->benign_n;i++) { 622 PetscInt ns,*idxs; 623 ierr = ISGetLocalSize(zerodiag_subs[i],&ns);CHKERRQ(ierr); 624 ierr = ISGetIndices(zerodiag_subs[i],(const PetscInt**)&idxs);CHKERRQ(ierr); 625 ierr = PetscMemcpy(new_idxs+nzn,idxs,ns*sizeof(PetscInt));CHKERRQ(ierr); 626 ierr = ISRestoreIndices(zerodiag_subs[i],(const PetscInt**)&idxs);CHKERRQ(ierr); 627 nzn += ns; 628 } 629 ierr = PetscSortInt(nzn,new_idxs);CHKERRQ(ierr); 630 ierr = ISCreateGeneral(PETSC_COMM_SELF,nzn,new_idxs,PETSC_OWN_POINTER,&zerodiag);CHKERRQ(ierr); 631 } 632 have_null = PETSC_FALSE; 633 } 634 635 /* no-net-flux */ 636 if (pcbddc->benign_compute_nonetflux) { 637 Mat_IS* matis = (Mat_IS*)(pc->pmat->data); 638 MatNullSpace near_null_space; 639 ISLocalToGlobalMapping rmap; 640 Vec quad_vec; 641 PetscScalar *pvals; 642 PetscInt i,np,*dummyins; 643 644 /* create vector to hold quadrature weights */ 645 ierr = MatCreateVecs(pc->pmat,&quad_vec,NULL);CHKERRQ(ierr); 646 ierr = VecSet(quad_vec,0.0);CHKERRQ(ierr); 647 ierr = MatGetLocalToGlobalMapping(pc->pmat,&rmap,NULL);CHKERRQ(ierr); 648 ierr = VecSetLocalToGlobalMapping(quad_vec,rmap);CHKERRQ(ierr); 649 650 /* compute B^{(i)T} * 1_p */ 651 np = 0; 652 if (zerodiag) { 653 ierr = ISGetLocalSize(zerodiag,&np);CHKERRQ(ierr); 654 } else if (pressures) { 655 ierr = ISGetLocalSize(pressures,&np);CHKERRQ(ierr); 656 } 657 ierr = PetscMalloc1(np,&pvals);CHKERRQ(ierr); 658 for (i=0;i<np;i++) pvals[i] = 1.; 659 ierr = VecSet(matis->x,0.);CHKERRQ(ierr); 660 if (np) { 661 const PetscInt *pidxs; 662 663 if (zerodiag) { 664 ierr = ISGetIndices(zerodiag,&pidxs);CHKERRQ(ierr); 665 } else if (pressures) { 666 ierr = ISGetIndices(pressures,&pidxs);CHKERRQ(ierr); 667 } 668 ierr = VecSetValues(matis->x,np,pidxs,pvals,INSERT_VALUES);CHKERRQ(ierr); 669 if (zerodiag) { 670 ierr = ISRestoreIndices(zerodiag,&pidxs);CHKERRQ(ierr); 671 } else if (pressures) { 672 ierr = ISRestoreIndices(pressures,&pidxs);CHKERRQ(ierr); 673 } 674 } 675 ierr = VecAssemblyBegin(matis->x);CHKERRQ(ierr); 676 ierr = VecAssemblyEnd(matis->x);CHKERRQ(ierr); 677 ierr = MatMult(matis->A,matis->x,matis->y);CHKERRQ(ierr); 678 ierr = PetscFree(pvals);CHKERRQ(ierr); 679 680 /* decide which of the sharing ranks (per dof) has to insert the values (should just be a matter of having a different orientation) */ 681 ierr = MatISSetUpSF(pc->pmat);CHKERRQ(ierr); 682 for (i=0;i<matis->sf->nroots;i++) matis->sf_rootdata[i] = -1; 683 for (i=0;i<matis->sf->nleaves;i++) matis->sf_leafdata[i] = PetscGlobalRank; 684 ierr = PetscSFReduceBegin(matis->sf,MPIU_INT,matis->sf_leafdata,matis->sf_rootdata,MPI_MAX);CHKERRQ(ierr); 685 ierr = PetscSFReduceEnd(matis->sf,MPIU_INT,matis->sf_leafdata,matis->sf_rootdata,MPI_MAX);CHKERRQ(ierr); 686 ierr = PetscSFBcastBegin(matis->sf,MPIU_INT,matis->sf_rootdata,matis->sf_leafdata);CHKERRQ(ierr); 687 ierr = PetscSFBcastEnd(matis->sf,MPIU_INT,matis->sf_rootdata,matis->sf_leafdata);CHKERRQ(ierr); 688 ierr = VecGetArray(matis->y,&pvals);CHKERRQ(ierr); 689 for (i=0;i<matis->sf->nleaves;i++) { 690 if (PetscGlobalRank != matis->sf_leafdata[i]) { 691 pvals[i] = 0.; 692 } 693 } 694 ierr = PetscMalloc1(matis->sf->nleaves,&dummyins);CHKERRQ(ierr); 695 for (i=0;i<matis->sf->nleaves;i++) dummyins[i] = i; 696 ierr = VecSetValuesLocal(quad_vec,matis->sf->nleaves,dummyins,pvals,ADD_VALUES);CHKERRQ(ierr); 697 ierr = VecRestoreArray(matis->y,&pvals);CHKERRQ(ierr); 698 ierr = PetscFree(dummyins);CHKERRQ(ierr); 699 700 /* assembly quadrature vec and attach near null space to pmat */ 701 ierr = VecAssemblyBegin(quad_vec);CHKERRQ(ierr); 702 ierr = VecAssemblyEnd(quad_vec);CHKERRQ(ierr); 703 ierr = VecNormalize(quad_vec,NULL);CHKERRQ(ierr); 704 ierr = MatNullSpaceCreate(PetscObjectComm((PetscObject)pc),PETSC_FALSE,1,&quad_vec,&near_null_space);CHKERRQ(ierr); 705 ierr = VecDestroy(&quad_vec);CHKERRQ(ierr); 706 ierr = MatSetNearNullSpace(pc->pmat,near_null_space);CHKERRQ(ierr); 707 ierr = MatNullSpaceDestroy(&near_null_space);CHKERRQ(ierr); 708 } 709 710 /* change of basis and p0 dofs */ 711 if (has_null_pressures) { 712 IS zerodiagc; 713 const PetscInt *idxs,*idxsc; 714 PetscInt i,s,*nnz; 715 716 ierr = ISGetLocalSize(zerodiag,&nz);CHKERRQ(ierr); 717 ierr = ISComplement(zerodiag,0,n,&zerodiagc);CHKERRQ(ierr); 718 ierr = ISGetIndices(zerodiagc,&idxsc);CHKERRQ(ierr); 719 /* local change of basis for pressures */ 720 ierr = MatDestroy(&pcbddc->benign_change);CHKERRQ(ierr); 721 ierr = MatCreate(PetscObjectComm((PetscObject)pcbddc->local_mat),&pcbddc->benign_change);CHKERRQ(ierr); 722 ierr = MatSetType(pcbddc->benign_change,MATAIJ);CHKERRQ(ierr); 723 ierr = MatSetSizes(pcbddc->benign_change,n,n,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr); 724 ierr = PetscMalloc1(n,&nnz);CHKERRQ(ierr); 725 for (i=0;i<n-nz;i++) nnz[idxsc[i]] = 1; /* identity on velocities plus pressure dofs for non-singular subdomains */ 726 for (i=0;i<pcbddc->benign_n;i++) { 727 PetscInt nzs,j; 728 729 ierr = ISGetLocalSize(zerodiag_subs[i],&nzs);CHKERRQ(ierr); 730 ierr = ISGetIndices(zerodiag_subs[i],&idxs);CHKERRQ(ierr); 731 for (j=0;j<nzs-1;j++) nnz[idxs[j]] = 2; /* change on pressures */ 732 nnz[idxs[nzs-1]] = nzs; /* last local pressure dof in subdomain */ 733 ierr = ISRestoreIndices(zerodiag_subs[i],&idxs);CHKERRQ(ierr); 734 } 735 ierr = MatSeqAIJSetPreallocation(pcbddc->benign_change,0,nnz);CHKERRQ(ierr); 736 ierr = PetscFree(nnz);CHKERRQ(ierr); 737 /* set identity on velocities */ 738 for (i=0;i<n-nz;i++) { 739 ierr = MatSetValue(pcbddc->benign_change,idxsc[i],idxsc[i],1.,INSERT_VALUES);CHKERRQ(ierr); 740 } 741 ierr = ISRestoreIndices(zerodiagc,&idxsc);CHKERRQ(ierr); 742 ierr = ISDestroy(&zerodiagc);CHKERRQ(ierr); 743 ierr = PetscFree3(pcbddc->benign_p0_lidx,pcbddc->benign_p0_gidx,pcbddc->benign_p0);CHKERRQ(ierr); 744 ierr = PetscMalloc3(pcbddc->benign_n,&pcbddc->benign_p0_lidx,pcbddc->benign_n,&pcbddc->benign_p0_gidx,pcbddc->benign_n,&pcbddc->benign_p0);CHKERRQ(ierr); 745 /* set change on pressures */ 746 for (s=0;s<pcbddc->benign_n;s++) { 747 PetscScalar *array; 748 PetscInt nzs; 749 750 ierr = ISGetLocalSize(zerodiag_subs[s],&nzs);CHKERRQ(ierr); 751 ierr = ISGetIndices(zerodiag_subs[s],&idxs);CHKERRQ(ierr); 752 for (i=0;i<nzs-1;i++) { 753 PetscScalar vals[2]; 754 PetscInt cols[2]; 755 756 cols[0] = idxs[i]; 757 cols[1] = idxs[nzs-1]; 758 vals[0] = 1.; 759 vals[1] = 1.; 760 ierr = MatSetValues(pcbddc->benign_change,1,cols,2,cols,vals,INSERT_VALUES);CHKERRQ(ierr); 761 } 762 ierr = PetscMalloc1(nzs,&array);CHKERRQ(ierr); 763 for (i=0;i<nzs-1;i++) array[i] = -1.; 764 array[nzs-1] = 1.; 765 ierr = MatSetValues(pcbddc->benign_change,1,idxs+nzs-1,nzs,idxs,array,INSERT_VALUES);CHKERRQ(ierr); 766 /* store local idxs for p0 */ 767 pcbddc->benign_p0_lidx[s] = idxs[nzs-1]; 768 ierr = ISRestoreIndices(zerodiag_subs[s],&idxs);CHKERRQ(ierr); 769 ierr = PetscFree(array);CHKERRQ(ierr); 770 } 771 ierr = MatAssemblyBegin(pcbddc->benign_change,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 772 ierr = MatAssemblyEnd(pcbddc->benign_change,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 773 /* project if needed */ 774 if (pcbddc->benign_change_explicit) { 775 Mat M; 776 777 ierr = MatPtAP(pcbddc->local_mat,pcbddc->benign_change,MAT_INITIAL_MATRIX,2.0,&M);CHKERRQ(ierr); 778 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 779 ierr = MatSeqAIJCompress(M,&pcbddc->local_mat);CHKERRQ(ierr); 780 ierr = MatDestroy(&M);CHKERRQ(ierr); 781 } 782 /* store global idxs for p0 */ 783 ierr = ISLocalToGlobalMappingApply(pc->pmat->rmap->mapping,pcbddc->benign_n,pcbddc->benign_p0_lidx,pcbddc->benign_p0_gidx);CHKERRQ(ierr); 784 } 785 pcbddc->benign_zerodiag_subs = zerodiag_subs; 786 ierr = ISDestroy(&pressures);CHKERRQ(ierr); 787 788 /* determines if the coarse solver will be singular or not */ 789 ierr = MPI_Allreduce(&have_null,&pcbddc->benign_null,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 790 *zerodiaglocal = zerodiag; 791 PetscFunctionReturn(0); 792 } 793 794 #undef __FUNCT__ 795 #define __FUNCT__ "PCBDDCBenignGetOrSetP0" 796 PetscErrorCode PCBDDCBenignGetOrSetP0(PC pc, Vec v, PetscBool get) 797 { 798 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 799 PetscScalar *array; 800 PetscErrorCode ierr; 801 802 PetscFunctionBegin; 803 if (!pcbddc->benign_sf) { 804 ierr = PetscSFCreate(PetscObjectComm((PetscObject)pc),&pcbddc->benign_sf);CHKERRQ(ierr); 805 ierr = PetscSFSetGraphLayout(pcbddc->benign_sf,pc->pmat->rmap,pcbddc->benign_n,NULL,PETSC_OWN_POINTER,pcbddc->benign_p0_gidx);CHKERRQ(ierr); 806 } 807 if (get) { 808 ierr = VecGetArrayRead(v,(const PetscScalar**)&array);CHKERRQ(ierr); 809 ierr = PetscSFBcastBegin(pcbddc->benign_sf,MPIU_SCALAR,array,pcbddc->benign_p0);CHKERRQ(ierr); 810 ierr = PetscSFBcastEnd(pcbddc->benign_sf,MPIU_SCALAR,array,pcbddc->benign_p0);CHKERRQ(ierr); 811 ierr = VecRestoreArrayRead(v,(const PetscScalar**)&array);CHKERRQ(ierr); 812 } else { 813 ierr = VecGetArray(v,&array);CHKERRQ(ierr); 814 ierr = PetscSFReduceBegin(pcbddc->benign_sf,MPIU_SCALAR,pcbddc->benign_p0,array,MPIU_REPLACE);CHKERRQ(ierr); 815 ierr = PetscSFReduceEnd(pcbddc->benign_sf,MPIU_SCALAR,pcbddc->benign_p0,array,MPIU_REPLACE);CHKERRQ(ierr); 816 ierr = VecRestoreArray(v,&array);CHKERRQ(ierr); 817 } 818 PetscFunctionReturn(0); 819 } 820 821 #undef __FUNCT__ 822 #define __FUNCT__ "PCBDDCBenignPopOrPushB0" 823 PetscErrorCode PCBDDCBenignPopOrPushB0(PC pc, PetscBool pop) 824 { 825 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 826 PetscErrorCode ierr; 827 828 PetscFunctionBegin; 829 /* TODO: add error checking 830 - avoid nested pop (or push) calls. 831 - cannot push before pop. 832 - cannot call this if pcbddc->local_mat is NULL 833 */ 834 if (!pcbddc->benign_n) { 835 PetscFunctionReturn(0); 836 } 837 if (pop) { 838 if (pcbddc->benign_change_explicit) { 839 IS is_p0; 840 MatReuse reuse; 841 842 /* extract B_0 */ 843 reuse = MAT_INITIAL_MATRIX; 844 if (pcbddc->benign_B0) { 845 reuse = MAT_REUSE_MATRIX; 846 } 847 ierr = ISCreateGeneral(PETSC_COMM_SELF,pcbddc->benign_n,pcbddc->benign_p0_lidx,PETSC_COPY_VALUES,&is_p0);CHKERRQ(ierr); 848 ierr = MatGetSubMatrix(pcbddc->local_mat,is_p0,NULL,reuse,&pcbddc->benign_B0);CHKERRQ(ierr); 849 /* remove rows and cols from local problem */ 850 ierr = MatSetOption(pcbddc->local_mat,MAT_KEEP_NONZERO_PATTERN,PETSC_TRUE);CHKERRQ(ierr); 851 ierr = MatSetOption(pcbddc->local_mat,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr); 852 ierr = MatZeroRowsColumnsIS(pcbddc->local_mat,is_p0,1.0,NULL,NULL);CHKERRQ(ierr); 853 ierr = ISDestroy(&is_p0);CHKERRQ(ierr); 854 } else { 855 Mat_IS *matis = (Mat_IS*)pc->pmat->data; 856 PetscScalar *vals; 857 PetscInt i,n,*idxs_ins; 858 859 ierr = VecGetLocalSize(matis->y,&n);CHKERRQ(ierr); 860 ierr = PetscMalloc2(n,&idxs_ins,n,&vals);CHKERRQ(ierr); 861 if (!pcbddc->benign_B0) { 862 PetscInt *nnz; 863 ierr = MatCreate(PetscObjectComm((PetscObject)pcbddc->local_mat),&pcbddc->benign_B0);CHKERRQ(ierr); 864 ierr = MatSetType(pcbddc->benign_B0,MATAIJ);CHKERRQ(ierr); 865 ierr = MatSetSizes(pcbddc->benign_B0,pcbddc->benign_n,n,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr); 866 ierr = PetscMalloc1(pcbddc->benign_n,&nnz);CHKERRQ(ierr); 867 for (i=0;i<pcbddc->benign_n;i++) { 868 ierr = ISGetLocalSize(pcbddc->benign_zerodiag_subs[i],&nnz[i]);CHKERRQ(ierr); 869 nnz[i] = n - nnz[i]; 870 } 871 ierr = MatSeqAIJSetPreallocation(pcbddc->benign_B0,0,nnz);CHKERRQ(ierr); 872 ierr = PetscFree(nnz);CHKERRQ(ierr); 873 } 874 875 for (i=0;i<pcbddc->benign_n;i++) { 876 PetscScalar *array; 877 PetscInt *idxs,j,nz,cum; 878 879 ierr = VecSet(matis->x,0.);CHKERRQ(ierr); 880 ierr = ISGetLocalSize(pcbddc->benign_zerodiag_subs[i],&nz);CHKERRQ(ierr); 881 ierr = ISGetIndices(pcbddc->benign_zerodiag_subs[i],(const PetscInt**)&idxs);CHKERRQ(ierr); 882 for (j=0;j<nz;j++) vals[j] = 1.; 883 ierr = VecSetValues(matis->x,nz,idxs,vals,INSERT_VALUES);CHKERRQ(ierr); 884 ierr = VecAssemblyBegin(matis->x);CHKERRQ(ierr); 885 ierr = VecAssemblyEnd(matis->x);CHKERRQ(ierr); 886 ierr = VecSet(matis->y,0.);CHKERRQ(ierr); 887 ierr = MatMult(matis->A,matis->x,matis->y);CHKERRQ(ierr); 888 ierr = VecGetArray(matis->y,&array);CHKERRQ(ierr); 889 cum = 0; 890 for (j=0;j<n;j++) { 891 if (PetscUnlikely(PetscAbsReal(array[j]) > PETSC_SMALL)) { 892 vals[cum] = array[j]; 893 idxs_ins[cum] = j; 894 cum++; 895 } 896 } 897 ierr = MatSetValues(pcbddc->benign_B0,1,&i,cum,idxs_ins,vals,INSERT_VALUES);CHKERRQ(ierr); 898 ierr = VecRestoreArray(matis->y,&array);CHKERRQ(ierr); 899 ierr = ISRestoreIndices(pcbddc->benign_zerodiag_subs[i],(const PetscInt**)&idxs);CHKERRQ(ierr); 900 } 901 ierr = MatAssemblyBegin(pcbddc->benign_B0,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 902 ierr = MatAssemblyEnd(pcbddc->benign_B0,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 903 ierr = PetscFree2(idxs_ins,vals);CHKERRQ(ierr); 904 } 905 } else { /* push */ 906 if (pcbddc->benign_change_explicit) { 907 PetscInt i; 908 909 for (i=0;i<pcbddc->benign_n;i++) { 910 PetscScalar *B0_vals; 911 PetscInt *B0_cols,B0_ncol; 912 913 ierr = MatGetRow(pcbddc->benign_B0,i,&B0_ncol,(const PetscInt**)&B0_cols,(const PetscScalar**)&B0_vals);CHKERRQ(ierr); 914 ierr = MatSetValues(pcbddc->local_mat,1,pcbddc->benign_p0_lidx+i,B0_ncol,B0_cols,B0_vals,INSERT_VALUES);CHKERRQ(ierr); 915 ierr = MatSetValues(pcbddc->local_mat,B0_ncol,B0_cols,1,pcbddc->benign_p0_lidx+i,B0_vals,INSERT_VALUES);CHKERRQ(ierr); 916 ierr = MatSetValue(pcbddc->local_mat,pcbddc->benign_p0_lidx[i],pcbddc->benign_p0_lidx[i],0.0,INSERT_VALUES);CHKERRQ(ierr); 917 ierr = MatRestoreRow(pcbddc->benign_B0,i,&B0_ncol,(const PetscInt**)&B0_cols,(const PetscScalar**)&B0_vals);CHKERRQ(ierr); 918 } 919 ierr = MatAssemblyBegin(pcbddc->local_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 920 ierr = MatAssemblyEnd(pcbddc->local_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 921 } else { 922 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Cannot push B0!\n"); 923 } 924 } 925 PetscFunctionReturn(0); 926 } 927 928 #undef __FUNCT__ 929 #define __FUNCT__ "PCBDDCAdaptiveSelection" 930 PetscErrorCode PCBDDCAdaptiveSelection(PC pc) 931 { 932 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 933 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 934 PetscBLASInt B_dummyint,B_neigs,B_ierr,B_lwork; 935 PetscBLASInt *B_iwork,*B_ifail; 936 PetscScalar *work,lwork; 937 PetscScalar *St,*S,*eigv; 938 PetscScalar *Sarray,*Starray; 939 PetscReal *eigs,thresh; 940 PetscInt i,nmax,nmin,nv,cum,mss,cum2,cumarray,maxneigs; 941 PetscBool allocated_S_St; 942 #if defined(PETSC_USE_COMPLEX) 943 PetscReal *rwork; 944 #endif 945 PetscErrorCode ierr; 946 947 PetscFunctionBegin; 948 if (!sub_schurs->schur_explicit) SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_SUP,"Adaptive selection of constraints requires MUMPS and/or MKL_CPARDISO"); 949 if (sub_schurs->n_subs && (!sub_schurs->is_hermitian || !sub_schurs->is_posdef)) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_SUP,"Adaptive selection not yet implemented for general matrix pencils (herm %d, posdef %d)\n",sub_schurs->is_hermitian,sub_schurs->is_posdef); 950 951 if (pcbddc->dbg_flag) { 952 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 953 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 954 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Check adaptive selection of constraints\n");CHKERRQ(ierr); 955 ierr = PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 956 } 957 958 if (pcbddc->dbg_flag) { 959 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d cc %d (%d,%d).\n",PetscGlobalRank,sub_schurs->n_subs,sub_schurs->is_hermitian,sub_schurs->is_posdef); 960 } 961 962 /* max size of subsets */ 963 mss = 0; 964 for (i=0;i<sub_schurs->n_subs;i++) { 965 PetscInt subset_size; 966 967 ierr = ISGetLocalSize(sub_schurs->is_subs[i],&subset_size);CHKERRQ(ierr); 968 mss = PetscMax(mss,subset_size); 969 } 970 971 /* min/max and threshold */ 972 nmax = pcbddc->adaptive_nmax > 0 ? pcbddc->adaptive_nmax : mss; 973 nmin = pcbddc->adaptive_nmin > 0 ? pcbddc->adaptive_nmin : 0; 974 nmax = PetscMax(nmin,nmax); 975 allocated_S_St = PETSC_FALSE; 976 if (nmin) { 977 allocated_S_St = PETSC_TRUE; 978 } 979 980 /* allocate lapack workspace */ 981 cum = cum2 = 0; 982 maxneigs = 0; 983 for (i=0;i<sub_schurs->n_subs;i++) { 984 PetscInt n,subset_size; 985 986 ierr = ISGetLocalSize(sub_schurs->is_subs[i],&subset_size);CHKERRQ(ierr); 987 n = PetscMin(subset_size,nmax); 988 cum += subset_size; 989 cum2 += subset_size*n; 990 maxneigs = PetscMax(maxneigs,n); 991 } 992 if (mss) { 993 if (sub_schurs->is_hermitian && sub_schurs->is_posdef) { 994 PetscBLASInt B_itype = 1; 995 PetscBLASInt B_N = mss; 996 PetscReal zero = 0.0; 997 PetscReal eps = 0.0; /* dlamch? */ 998 999 B_lwork = -1; 1000 S = NULL; 1001 St = NULL; 1002 eigs = NULL; 1003 eigv = NULL; 1004 B_iwork = NULL; 1005 B_ifail = NULL; 1006 #if defined(PETSC_USE_COMPLEX) 1007 rwork = NULL; 1008 #endif 1009 thresh = 1.0; 1010 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 1011 #if defined(PETSC_USE_COMPLEX) 1012 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&zero,&thresh,&B_dummyint,&B_dummyint,&eps,&B_neigs,eigs,eigv,&B_N,&lwork,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr)); 1013 #else 1014 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&zero,&thresh,&B_dummyint,&B_dummyint,&eps,&B_neigs,eigs,eigv,&B_N,&lwork,&B_lwork,B_iwork,B_ifail,&B_ierr)); 1015 #endif 1016 if (B_ierr != 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to SYGVX Lapack routine %d",(int)B_ierr); 1017 ierr = PetscFPTrapPop();CHKERRQ(ierr); 1018 } else { 1019 /* TODO */ 1020 } 1021 } else { 1022 lwork = 0; 1023 } 1024 1025 nv = 0; 1026 if (sub_schurs->is_vertices && pcbddc->use_vertices) { /* complement set of active subsets, each entry is a vertex (boundary made by active subsets, vertices and dirichlet dofs) */ 1027 ierr = ISGetLocalSize(sub_schurs->is_vertices,&nv);CHKERRQ(ierr); 1028 } 1029 ierr = PetscBLASIntCast((PetscInt)PetscRealPart(lwork),&B_lwork);CHKERRQ(ierr); 1030 if (allocated_S_St) { 1031 ierr = PetscMalloc2(mss*mss,&S,mss*mss,&St);CHKERRQ(ierr); 1032 } 1033 ierr = PetscMalloc5(mss*mss,&eigv,mss,&eigs,B_lwork,&work,5*mss,&B_iwork,mss,&B_ifail);CHKERRQ(ierr); 1034 #if defined(PETSC_USE_COMPLEX) 1035 ierr = PetscMalloc1(7*mss,&rwork);CHKERRQ(ierr); 1036 #endif 1037 ierr = PetscMalloc5(nv+sub_schurs->n_subs,&pcbddc->adaptive_constraints_n, 1038 nv+sub_schurs->n_subs+1,&pcbddc->adaptive_constraints_idxs_ptr, 1039 nv+sub_schurs->n_subs+1,&pcbddc->adaptive_constraints_data_ptr, 1040 nv+cum,&pcbddc->adaptive_constraints_idxs, 1041 nv+cum2,&pcbddc->adaptive_constraints_data);CHKERRQ(ierr); 1042 ierr = PetscMemzero(pcbddc->adaptive_constraints_n,(nv+sub_schurs->n_subs)*sizeof(PetscInt));CHKERRQ(ierr); 1043 1044 maxneigs = 0; 1045 cum = cumarray = 0; 1046 pcbddc->adaptive_constraints_idxs_ptr[0] = 0; 1047 pcbddc->adaptive_constraints_data_ptr[0] = 0; 1048 if (sub_schurs->is_vertices && pcbddc->use_vertices) { 1049 const PetscInt *idxs; 1050 1051 ierr = ISGetIndices(sub_schurs->is_vertices,&idxs);CHKERRQ(ierr); 1052 for (cum=0;cum<nv;cum++) { 1053 pcbddc->adaptive_constraints_n[cum] = 1; 1054 pcbddc->adaptive_constraints_idxs[cum] = idxs[cum]; 1055 pcbddc->adaptive_constraints_data[cum] = 1.0; 1056 pcbddc->adaptive_constraints_idxs_ptr[cum+1] = pcbddc->adaptive_constraints_idxs_ptr[cum]+1; 1057 pcbddc->adaptive_constraints_data_ptr[cum+1] = pcbddc->adaptive_constraints_data_ptr[cum]+1; 1058 } 1059 ierr = ISRestoreIndices(sub_schurs->is_vertices,&idxs);CHKERRQ(ierr); 1060 } 1061 1062 if (mss) { /* multilevel */ 1063 ierr = MatSeqAIJGetArray(sub_schurs->sum_S_Ej_inv_all,&Sarray);CHKERRQ(ierr); 1064 ierr = MatSeqAIJGetArray(sub_schurs->sum_S_Ej_tilda_all,&Starray);CHKERRQ(ierr); 1065 } 1066 1067 thresh = pcbddc->adaptive_threshold; 1068 for (i=0;i<sub_schurs->n_subs;i++) { 1069 const PetscInt *idxs; 1070 PetscReal upper,lower; 1071 PetscInt j,subset_size,eigs_start = 0; 1072 PetscBLASInt B_N; 1073 PetscBool same_data = PETSC_FALSE; 1074 1075 if (pcbddc->use_deluxe_scaling) { 1076 upper = PETSC_MAX_REAL; 1077 lower = thresh; 1078 } else { 1079 upper = 1./thresh; 1080 lower = 0.; 1081 } 1082 ierr = ISGetLocalSize(sub_schurs->is_subs[i],&subset_size);CHKERRQ(ierr); 1083 ierr = ISGetIndices(sub_schurs->is_subs[i],&idxs);CHKERRQ(ierr); 1084 ierr = PetscBLASIntCast(subset_size,&B_N);CHKERRQ(ierr); 1085 if (allocated_S_St) { /* S and S_t should be copied since we could need them later */ 1086 if (sub_schurs->is_hermitian) { 1087 PetscInt j,k; 1088 if (sub_schurs->n_subs == 1) { /* zeroing memory to use PetscMemcmp later */ 1089 ierr = PetscMemzero(S,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 1090 ierr = PetscMemzero(St,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 1091 } 1092 for (j=0;j<subset_size;j++) { 1093 for (k=j;k<subset_size;k++) { 1094 S [j*subset_size+k] = Sarray [cumarray+j*subset_size+k]; 1095 St[j*subset_size+k] = Starray[cumarray+j*subset_size+k]; 1096 } 1097 } 1098 } else { 1099 ierr = PetscMemcpy(S,Sarray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 1100 ierr = PetscMemcpy(St,Starray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 1101 } 1102 } else { 1103 S = Sarray + cumarray; 1104 St = Starray + cumarray; 1105 } 1106 /* see if we can save some work */ 1107 if (sub_schurs->n_subs == 1 && pcbddc->use_deluxe_scaling) { 1108 ierr = PetscMemcmp(S,St,subset_size*subset_size*sizeof(PetscScalar),&same_data);CHKERRQ(ierr); 1109 } 1110 1111 if (same_data && !sub_schurs->change) { /* there's no need of constraints here */ 1112 B_neigs = 0; 1113 } else { 1114 if (sub_schurs->is_hermitian && sub_schurs->is_posdef) { 1115 PetscBLASInt B_itype = 1; 1116 PetscBLASInt B_IL, B_IU; 1117 PetscReal eps = -1.0; /* dlamch? */ 1118 PetscInt nmin_s; 1119 PetscBool compute_range = PETSC_FALSE; 1120 1121 if (pcbddc->dbg_flag) { 1122 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Computing for sub %d/%d %d %d.\n",i,sub_schurs->n_subs,subset_size,pcbddc->mat_graph->count[idxs[0]]); 1123 } 1124 1125 compute_range = PETSC_FALSE; 1126 if (thresh > 1.+PETSC_SMALL && !same_data) { 1127 compute_range = PETSC_TRUE; 1128 } 1129 1130 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 1131 if (compute_range) { 1132 1133 /* ask for eigenvalues larger than thresh */ 1134 #if defined(PETSC_USE_COMPLEX) 1135 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&lower,&upper,&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr)); 1136 #else 1137 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&lower,&upper,&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr)); 1138 #endif 1139 } else if (!same_data) { 1140 B_IU = PetscMax(1,PetscMin(B_N,nmax)); 1141 B_IL = 1; 1142 #if defined(PETSC_USE_COMPLEX) 1143 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","I","L",&B_N,St,&B_N,S,&B_N,&lower,&upper,&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr)); 1144 #else 1145 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","I","L",&B_N,St,&B_N,S,&B_N,&lower,&upper,&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr)); 1146 #endif 1147 } else { /* same_data is true, so get the adaptive function requested by the user */ 1148 PetscInt k; 1149 if (!sub_schurs->change_primal_sub) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"This should not happen"); 1150 ierr = ISGetLocalSize(sub_schurs->change_primal_sub[i],&nmax);CHKERRQ(ierr); 1151 ierr = PetscBLASIntCast(nmax,&B_neigs);CHKERRQ(ierr); 1152 nmin = nmax; 1153 ierr = PetscMemzero(eigv,subset_size*nmax*sizeof(PetscScalar));CHKERRQ(ierr); 1154 for (k=0;k<nmax;k++) { 1155 eigs[k] = 1./PETSC_SMALL; 1156 eigv[k*(subset_size+1)] = 1.0; 1157 } 1158 } 1159 ierr = PetscFPTrapPop();CHKERRQ(ierr); 1160 if (B_ierr) { 1161 if (B_ierr < 0 ) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: illegal value for argument %d",-(int)B_ierr); 1162 else if (B_ierr <= B_N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: %d eigenvalues failed to converge",(int)B_ierr); 1163 else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: leading minor of order %d is not positive definite",(int)B_ierr-B_N-1); 1164 } 1165 1166 if (B_neigs > nmax) { 1167 if (pcbddc->dbg_flag) { 1168 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," found %d eigs, more than maximum required %d.\n",B_neigs,nmax); 1169 } 1170 if (pcbddc->use_deluxe_scaling) eigs_start = B_neigs -nmax; 1171 B_neigs = nmax; 1172 } 1173 1174 nmin_s = PetscMin(nmin,B_N); 1175 if (B_neigs < nmin_s) { 1176 PetscBLASInt B_neigs2; 1177 1178 if (pcbddc->use_deluxe_scaling) { 1179 B_IL = B_N - nmin_s + 1; 1180 B_IU = B_N - B_neigs; 1181 } else { 1182 B_IL = B_neigs + 1; 1183 B_IU = nmin_s; 1184 } 1185 if (pcbddc->dbg_flag) { 1186 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," found %d eigs, less than minimum required %d. Asking for %d to %d incl (fortran like)\n",B_neigs,nmin,B_IL,B_IU); 1187 } 1188 if (sub_schurs->is_hermitian) { 1189 PetscInt j; 1190 for (j=0;j<subset_size;j++) { 1191 ierr = PetscMemcpy(S+j*(subset_size+1),Sarray+cumarray+j*(subset_size+1),(subset_size-j)*sizeof(PetscScalar));CHKERRQ(ierr); 1192 } 1193 for (j=0;j<subset_size;j++) { 1194 ierr = PetscMemcpy(St+j*(subset_size+1),Starray+cumarray+j*(subset_size+1),(subset_size-j)*sizeof(PetscScalar));CHKERRQ(ierr); 1195 } 1196 } else { 1197 ierr = PetscMemcpy(S,Sarray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 1198 ierr = PetscMemcpy(St,Starray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 1199 } 1200 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 1201 #if defined(PETSC_USE_COMPLEX) 1202 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","I","L",&B_N,St,&B_N,S,&B_N,&lower,&upper,&B_IL,&B_IU,&eps,&B_neigs2,eigs+B_neigs,eigv+B_neigs*subset_size,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr)); 1203 #else 1204 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","I","L",&B_N,St,&B_N,S,&B_N,&lower,&upper,&B_IL,&B_IU,&eps,&B_neigs2,eigs+B_neigs,eigv+B_neigs*subset_size,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr)); 1205 #endif 1206 ierr = PetscFPTrapPop();CHKERRQ(ierr); 1207 B_neigs += B_neigs2; 1208 } 1209 if (B_ierr) { 1210 if (B_ierr < 0 ) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: illegal value for argument %d",-(int)B_ierr); 1211 else if (B_ierr <= B_N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: %d eigenvalues failed to converge",(int)B_ierr); 1212 else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: leading minor of order %d is not positive definite",(int)B_ierr-B_N-1); 1213 } 1214 if (pcbddc->dbg_flag) { 1215 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," -> Got %d eigs\n",B_neigs);CHKERRQ(ierr); 1216 for (j=0;j<B_neigs;j++) { 1217 if (eigs[j] == 0.0) { 1218 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," Inf\n");CHKERRQ(ierr); 1219 } else { 1220 if (pcbddc->use_deluxe_scaling) { 1221 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," %1.6e\n",eigs[j+eigs_start]);CHKERRQ(ierr); 1222 } else { 1223 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," %1.6e\n",1./eigs[j+eigs_start]);CHKERRQ(ierr); 1224 } 1225 } 1226 } 1227 } 1228 } else { 1229 /* TODO */ 1230 } 1231 } 1232 /* change the basis back to the original one */ 1233 if (sub_schurs->change) { 1234 Mat change,phi,phit; 1235 1236 if (pcbddc->dbg_flag > 1) { 1237 PetscInt ii; 1238 for (ii=0;ii<B_neigs;ii++) { 1239 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," -> Eigenvector (old basis) %d/%d (%d)\n",ii,B_neigs,B_N);CHKERRQ(ierr); 1240 for (j=0;j<B_N;j++) { 1241 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," %1.4e\n",eigv[(ii+eigs_start)*subset_size+j]);CHKERRQ(ierr); 1242 } 1243 } 1244 } 1245 ierr = KSPGetOperators(sub_schurs->change[i],&change,NULL);CHKERRQ(ierr); 1246 ierr = MatCreateSeqDense(PETSC_COMM_SELF,subset_size,B_neigs,eigv+eigs_start*subset_size,&phit);CHKERRQ(ierr); 1247 ierr = MatMatMult(change,phit,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&phi);CHKERRQ(ierr); 1248 ierr = MatCopy(phi,phit,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 1249 ierr = MatDestroy(&phit);CHKERRQ(ierr); 1250 ierr = MatDestroy(&phi);CHKERRQ(ierr); 1251 } 1252 maxneigs = PetscMax(B_neigs,maxneigs); 1253 pcbddc->adaptive_constraints_n[i+nv] = B_neigs; 1254 if (B_neigs) { 1255 ierr = PetscMemcpy(pcbddc->adaptive_constraints_data+pcbddc->adaptive_constraints_data_ptr[cum],eigv+eigs_start*subset_size,B_neigs*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 1256 1257 if (pcbddc->dbg_flag > 1) { 1258 PetscInt ii; 1259 for (ii=0;ii<B_neigs;ii++) { 1260 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," -> Eigenvector %d/%d (%d)\n",ii,B_neigs,B_N);CHKERRQ(ierr); 1261 for (j=0;j<B_N;j++) { 1262 #if defined(PETSC_USE_COMPLEX) 1263 PetscReal r = PetscRealPart(pcbddc->adaptive_constraints_data[ii*subset_size+j+pcbddc->adaptive_constraints_data_ptr[cum]]); 1264 PetscReal c = PetscImaginaryPart(pcbddc->adaptive_constraints_data[ii*subset_size+j+pcbddc->adaptive_constraints_data_ptr[cum]]); 1265 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," %1.4e + %1.4e i\n",r,c);CHKERRQ(ierr); 1266 #else 1267 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," %1.4e\n",pcbddc->adaptive_constraints_data[ii*subset_size+j+pcbddc->adaptive_constraints_data_ptr[cum]]);CHKERRQ(ierr); 1268 #endif 1269 } 1270 } 1271 } 1272 ierr = PetscMemcpy(pcbddc->adaptive_constraints_idxs+pcbddc->adaptive_constraints_idxs_ptr[cum],idxs,subset_size*sizeof(PetscInt));CHKERRQ(ierr); 1273 pcbddc->adaptive_constraints_idxs_ptr[cum+1] = pcbddc->adaptive_constraints_idxs_ptr[cum] + subset_size; 1274 pcbddc->adaptive_constraints_data_ptr[cum+1] = pcbddc->adaptive_constraints_data_ptr[cum] + subset_size*B_neigs; 1275 cum++; 1276 } 1277 ierr = ISRestoreIndices(sub_schurs->is_subs[i],&idxs);CHKERRQ(ierr); 1278 /* shift for next computation */ 1279 cumarray += subset_size*subset_size; 1280 } 1281 if (pcbddc->dbg_flag) { 1282 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 1283 } 1284 1285 if (mss) { 1286 ierr = MatSeqAIJRestoreArray(sub_schurs->sum_S_Ej_inv_all,&Sarray);CHKERRQ(ierr); 1287 ierr = MatSeqAIJRestoreArray(sub_schurs->sum_S_Ej_tilda_all,&Starray);CHKERRQ(ierr); 1288 /* destroy matrices (junk) */ 1289 ierr = MatDestroy(&sub_schurs->sum_S_Ej_inv_all);CHKERRQ(ierr); 1290 ierr = MatDestroy(&sub_schurs->sum_S_Ej_tilda_all);CHKERRQ(ierr); 1291 } 1292 if (allocated_S_St) { 1293 ierr = PetscFree2(S,St);CHKERRQ(ierr); 1294 } 1295 ierr = PetscFree5(eigv,eigs,work,B_iwork,B_ifail);CHKERRQ(ierr); 1296 #if defined(PETSC_USE_COMPLEX) 1297 ierr = PetscFree(rwork);CHKERRQ(ierr); 1298 #endif 1299 if (pcbddc->dbg_flag) { 1300 PetscInt maxneigs_r; 1301 ierr = MPIU_Allreduce(&maxneigs,&maxneigs_r,1,MPIU_INT,MPI_MAX,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 1302 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Maximum number of constraints per cc %d\n",maxneigs_r);CHKERRQ(ierr); 1303 } 1304 PetscFunctionReturn(0); 1305 } 1306 1307 #undef __FUNCT__ 1308 #define __FUNCT__ "PCBDDCSetUpSolvers" 1309 PetscErrorCode PCBDDCSetUpSolvers(PC pc) 1310 { 1311 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 1312 PetscScalar *coarse_submat_vals; 1313 PetscErrorCode ierr; 1314 1315 PetscFunctionBegin; 1316 /* Setup local scatters R_to_B and (optionally) R_to_D */ 1317 /* PCBDDCSetUpLocalWorkVectors should be called first! */ 1318 ierr = PCBDDCSetUpLocalScatters(pc);CHKERRQ(ierr); 1319 1320 /* Setup local neumann solver ksp_R */ 1321 /* PCBDDCSetUpLocalScatters should be called first! */ 1322 ierr = PCBDDCSetUpLocalSolvers(pc,PETSC_FALSE,PETSC_TRUE);CHKERRQ(ierr); 1323 1324 /* Change global null space passed in by the user if change of basis has been requested */ 1325 if (pcbddc->NullSpace && pcbddc->ChangeOfBasisMatrix) { 1326 ierr = PCBDDCNullSpaceAdaptGlobal(pc);CHKERRQ(ierr); 1327 } 1328 1329 /* 1330 Setup local correction and local part of coarse basis. 1331 Gives back the dense local part of the coarse matrix in column major ordering 1332 */ 1333 ierr = PCBDDCSetUpCorrection(pc,&coarse_submat_vals);CHKERRQ(ierr); 1334 1335 /* Compute total number of coarse nodes and setup coarse solver */ 1336 ierr = PCBDDCSetUpCoarseSolver(pc,coarse_submat_vals);CHKERRQ(ierr); 1337 1338 /* free */ 1339 ierr = PetscFree(coarse_submat_vals);CHKERRQ(ierr); 1340 PetscFunctionReturn(0); 1341 } 1342 1343 #undef __FUNCT__ 1344 #define __FUNCT__ "PCBDDCResetCustomization" 1345 PetscErrorCode PCBDDCResetCustomization(PC pc) 1346 { 1347 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 1348 PetscErrorCode ierr; 1349 1350 PetscFunctionBegin; 1351 ierr = PCBDDCGraphResetCSR(pcbddc->mat_graph);CHKERRQ(ierr); 1352 ierr = ISDestroy(&pcbddc->user_primal_vertices);CHKERRQ(ierr); 1353 ierr = ISDestroy(&pcbddc->user_primal_vertices_local);CHKERRQ(ierr); 1354 ierr = MatNullSpaceDestroy(&pcbddc->NullSpace);CHKERRQ(ierr); 1355 ierr = ISDestroy(&pcbddc->NeumannBoundaries);CHKERRQ(ierr); 1356 ierr = ISDestroy(&pcbddc->NeumannBoundariesLocal);CHKERRQ(ierr); 1357 ierr = ISDestroy(&pcbddc->DirichletBoundaries);CHKERRQ(ierr); 1358 ierr = MatNullSpaceDestroy(&pcbddc->onearnullspace);CHKERRQ(ierr); 1359 ierr = PetscFree(pcbddc->onearnullvecs_state);CHKERRQ(ierr); 1360 ierr = ISDestroy(&pcbddc->DirichletBoundariesLocal);CHKERRQ(ierr); 1361 ierr = PCBDDCSetDofsSplitting(pc,0,NULL);CHKERRQ(ierr); 1362 ierr = PCBDDCSetDofsSplittingLocal(pc,0,NULL);CHKERRQ(ierr); 1363 PetscFunctionReturn(0); 1364 } 1365 1366 #undef __FUNCT__ 1367 #define __FUNCT__ "PCBDDCResetTopography" 1368 PetscErrorCode PCBDDCResetTopography(PC pc) 1369 { 1370 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 1371 PetscInt i; 1372 PetscErrorCode ierr; 1373 1374 PetscFunctionBegin; 1375 ierr = MatDestroy(&pcbddc->user_ChangeOfBasisMatrix);CHKERRQ(ierr); 1376 ierr = MatDestroy(&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 1377 ierr = MatDestroy(&pcbddc->switch_static_change);CHKERRQ(ierr); 1378 ierr = VecDestroy(&pcbddc->work_change);CHKERRQ(ierr); 1379 ierr = MatDestroy(&pcbddc->ConstraintMatrix);CHKERRQ(ierr); 1380 ierr = PCBDDCGraphReset(pcbddc->mat_graph);CHKERRQ(ierr); 1381 for (i=0;i<pcbddc->n_local_subs;i++) { 1382 ierr = ISDestroy(&pcbddc->local_subs[i]);CHKERRQ(ierr); 1383 } 1384 ierr = PetscFree(pcbddc->local_subs);CHKERRQ(ierr); 1385 ierr = PCBDDCSubSchursReset(pcbddc->sub_schurs);CHKERRQ(ierr); 1386 PetscFunctionReturn(0); 1387 } 1388 1389 #undef __FUNCT__ 1390 #define __FUNCT__ "PCBDDCResetSolvers" 1391 PetscErrorCode PCBDDCResetSolvers(PC pc) 1392 { 1393 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 1394 PetscErrorCode ierr; 1395 1396 PetscFunctionBegin; 1397 ierr = VecDestroy(&pcbddc->coarse_vec);CHKERRQ(ierr); 1398 if (pcbddc->coarse_phi_B) { 1399 PetscScalar *array; 1400 ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&array);CHKERRQ(ierr); 1401 ierr = PetscFree(array);CHKERRQ(ierr); 1402 } 1403 ierr = MatDestroy(&pcbddc->coarse_phi_B);CHKERRQ(ierr); 1404 ierr = MatDestroy(&pcbddc->coarse_phi_D);CHKERRQ(ierr); 1405 ierr = MatDestroy(&pcbddc->coarse_psi_B);CHKERRQ(ierr); 1406 ierr = MatDestroy(&pcbddc->coarse_psi_D);CHKERRQ(ierr); 1407 ierr = VecDestroy(&pcbddc->vec1_P);CHKERRQ(ierr); 1408 ierr = VecDestroy(&pcbddc->vec1_C);CHKERRQ(ierr); 1409 ierr = MatDestroy(&pcbddc->local_auxmat2);CHKERRQ(ierr); 1410 ierr = MatDestroy(&pcbddc->local_auxmat1);CHKERRQ(ierr); 1411 ierr = VecDestroy(&pcbddc->vec1_R);CHKERRQ(ierr); 1412 ierr = VecDestroy(&pcbddc->vec2_R);CHKERRQ(ierr); 1413 ierr = ISDestroy(&pcbddc->is_R_local);CHKERRQ(ierr); 1414 ierr = VecScatterDestroy(&pcbddc->R_to_B);CHKERRQ(ierr); 1415 ierr = VecScatterDestroy(&pcbddc->R_to_D);CHKERRQ(ierr); 1416 ierr = VecScatterDestroy(&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr); 1417 ierr = KSPDestroy(&pcbddc->ksp_D);CHKERRQ(ierr); 1418 ierr = KSPDestroy(&pcbddc->ksp_R);CHKERRQ(ierr); 1419 ierr = KSPDestroy(&pcbddc->coarse_ksp);CHKERRQ(ierr); 1420 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 1421 ierr = PetscFree(pcbddc->primal_indices_local_idxs);CHKERRQ(ierr); 1422 ierr = PetscFree2(pcbddc->local_primal_ref_node,pcbddc->local_primal_ref_mult);CHKERRQ(ierr); 1423 ierr = PetscFree(pcbddc->global_primal_indices);CHKERRQ(ierr); 1424 ierr = ISDestroy(&pcbddc->coarse_subassembling);CHKERRQ(ierr); 1425 ierr = MatDestroy(&pcbddc->benign_change);CHKERRQ(ierr); 1426 ierr = VecDestroy(&pcbddc->benign_vec);CHKERRQ(ierr); 1427 ierr = PCBDDCBenignShellMat(pc,PETSC_TRUE);CHKERRQ(ierr); 1428 ierr = MatDestroy(&pcbddc->benign_B0);CHKERRQ(ierr); 1429 ierr = PetscSFDestroy(&pcbddc->benign_sf);CHKERRQ(ierr); 1430 if (pcbddc->benign_zerodiag_subs) { 1431 PetscInt i; 1432 for (i=0;i<pcbddc->benign_n;i++) { 1433 ierr = ISDestroy(&pcbddc->benign_zerodiag_subs[i]);CHKERRQ(ierr); 1434 } 1435 ierr = PetscFree(pcbddc->benign_zerodiag_subs);CHKERRQ(ierr); 1436 } 1437 ierr = PetscFree3(pcbddc->benign_p0_lidx,pcbddc->benign_p0_gidx,pcbddc->benign_p0);CHKERRQ(ierr); 1438 PetscFunctionReturn(0); 1439 } 1440 1441 #undef __FUNCT__ 1442 #define __FUNCT__ "PCBDDCSetUpLocalWorkVectors" 1443 PetscErrorCode PCBDDCSetUpLocalWorkVectors(PC pc) 1444 { 1445 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 1446 PC_IS *pcis = (PC_IS*)pc->data; 1447 VecType impVecType; 1448 PetscInt n_constraints,n_R,old_size; 1449 PetscErrorCode ierr; 1450 1451 PetscFunctionBegin; 1452 if (!pcbddc->ConstraintMatrix) SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"BDDC Constraint matrix has not been created"); 1453 /* get sizes */ 1454 n_constraints = pcbddc->local_primal_size - pcbddc->benign_n - pcbddc->n_vertices; 1455 n_R = pcis->n - pcbddc->n_vertices; 1456 ierr = VecGetType(pcis->vec1_N,&impVecType);CHKERRQ(ierr); 1457 /* local work vectors (try to avoid unneeded work)*/ 1458 /* R nodes */ 1459 old_size = -1; 1460 if (pcbddc->vec1_R) { 1461 ierr = VecGetSize(pcbddc->vec1_R,&old_size);CHKERRQ(ierr); 1462 } 1463 if (n_R != old_size) { 1464 ierr = VecDestroy(&pcbddc->vec1_R);CHKERRQ(ierr); 1465 ierr = VecDestroy(&pcbddc->vec2_R);CHKERRQ(ierr); 1466 ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_R);CHKERRQ(ierr); 1467 ierr = VecSetSizes(pcbddc->vec1_R,PETSC_DECIDE,n_R);CHKERRQ(ierr); 1468 ierr = VecSetType(pcbddc->vec1_R,impVecType);CHKERRQ(ierr); 1469 ierr = VecDuplicate(pcbddc->vec1_R,&pcbddc->vec2_R);CHKERRQ(ierr); 1470 } 1471 /* local primal dofs */ 1472 old_size = -1; 1473 if (pcbddc->vec1_P) { 1474 ierr = VecGetSize(pcbddc->vec1_P,&old_size);CHKERRQ(ierr); 1475 } 1476 if (pcbddc->local_primal_size != old_size) { 1477 ierr = VecDestroy(&pcbddc->vec1_P);CHKERRQ(ierr); 1478 ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_P);CHKERRQ(ierr); 1479 ierr = VecSetSizes(pcbddc->vec1_P,PETSC_DECIDE,pcbddc->local_primal_size);CHKERRQ(ierr); 1480 ierr = VecSetType(pcbddc->vec1_P,impVecType);CHKERRQ(ierr); 1481 } 1482 /* local explicit constraints */ 1483 old_size = -1; 1484 if (pcbddc->vec1_C) { 1485 ierr = VecGetSize(pcbddc->vec1_C,&old_size);CHKERRQ(ierr); 1486 } 1487 if (n_constraints && n_constraints != old_size) { 1488 ierr = VecDestroy(&pcbddc->vec1_C);CHKERRQ(ierr); 1489 ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_C);CHKERRQ(ierr); 1490 ierr = VecSetSizes(pcbddc->vec1_C,PETSC_DECIDE,n_constraints);CHKERRQ(ierr); 1491 ierr = VecSetType(pcbddc->vec1_C,impVecType);CHKERRQ(ierr); 1492 } 1493 PetscFunctionReturn(0); 1494 } 1495 1496 #undef __FUNCT__ 1497 #define __FUNCT__ "PCBDDCSetUpCorrection" 1498 PetscErrorCode PCBDDCSetUpCorrection(PC pc, PetscScalar **coarse_submat_vals_n) 1499 { 1500 PetscErrorCode ierr; 1501 /* pointers to pcis and pcbddc */ 1502 PC_IS* pcis = (PC_IS*)pc->data; 1503 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 1504 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 1505 /* submatrices of local problem */ 1506 Mat A_RV,A_VR,A_VV,local_auxmat2_R; 1507 /* submatrices of local coarse problem */ 1508 Mat S_VV,S_CV,S_VC,S_CC; 1509 /* working matrices */ 1510 Mat C_CR; 1511 /* additional working stuff */ 1512 PC pc_R; 1513 Mat F; 1514 Vec dummy_vec; 1515 PetscBool isLU,isCHOL,isILU,need_benign_correction; 1516 PetscScalar *coarse_submat_vals; /* TODO: use a PETSc matrix */ 1517 PetscScalar *work; 1518 PetscInt *idx_V_B; 1519 PetscInt lda_rhs,n,n_vertices,n_constraints,*p0_lidx_I; 1520 PetscInt i,n_R,n_D,n_B; 1521 1522 /* some shortcuts to scalars */ 1523 PetscScalar one=1.0,m_one=-1.0; 1524 1525 PetscFunctionBegin; 1526 if (!pcbddc->symmetric_primal && pcbddc->benign_n) { 1527 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Non-symmetric primal basis computation with benign trick not yet implemented"); 1528 } 1529 1530 /* Set Non-overlapping dimensions */ 1531 n_vertices = pcbddc->n_vertices; 1532 n_constraints = pcbddc->local_primal_size - pcbddc->benign_n - n_vertices; 1533 n_B = pcis->n_B; 1534 n_D = pcis->n - n_B; 1535 n_R = pcis->n - n_vertices; 1536 1537 /* vertices in boundary numbering */ 1538 ierr = PetscMalloc1(n_vertices,&idx_V_B);CHKERRQ(ierr); 1539 ierr = ISGlobalToLocalMappingApply(pcis->BtoNmap,IS_GTOLM_DROP,n_vertices,pcbddc->local_primal_ref_node,&i,idx_V_B);CHKERRQ(ierr); 1540 if (i != n_vertices) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error in boundary numbering for BDDC vertices! %D != %D\n",n_vertices,i); 1541 1542 /* Subdomain contribution (Non-overlapping) to coarse matrix */ 1543 ierr = PetscCalloc1(pcbddc->local_primal_size*pcbddc->local_primal_size,&coarse_submat_vals);CHKERRQ(ierr); 1544 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_vertices,n_vertices,coarse_submat_vals,&S_VV);CHKERRQ(ierr); 1545 ierr = MatSeqDenseSetLDA(S_VV,pcbddc->local_primal_size);CHKERRQ(ierr); 1546 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_constraints,n_vertices,coarse_submat_vals+n_vertices,&S_CV);CHKERRQ(ierr); 1547 ierr = MatSeqDenseSetLDA(S_CV,pcbddc->local_primal_size);CHKERRQ(ierr); 1548 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_vertices,n_constraints,coarse_submat_vals+pcbddc->local_primal_size*n_vertices,&S_VC);CHKERRQ(ierr); 1549 ierr = MatSeqDenseSetLDA(S_VC,pcbddc->local_primal_size);CHKERRQ(ierr); 1550 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_constraints,n_constraints,coarse_submat_vals+(pcbddc->local_primal_size+1)*n_vertices,&S_CC);CHKERRQ(ierr); 1551 ierr = MatSeqDenseSetLDA(S_CC,pcbddc->local_primal_size);CHKERRQ(ierr); 1552 1553 /* determine if can use MatSolve routines instead of calling KSPSolve on ksp_R */ 1554 ierr = KSPGetPC(pcbddc->ksp_R,&pc_R);CHKERRQ(ierr); 1555 ierr = PetscObjectTypeCompare((PetscObject)pc_R,PCLU,&isLU);CHKERRQ(ierr); 1556 ierr = PetscObjectTypeCompare((PetscObject)pc_R,PCILU,&isILU);CHKERRQ(ierr); 1557 ierr = PetscObjectTypeCompare((PetscObject)pc_R,PCCHOLESKY,&isCHOL);CHKERRQ(ierr); 1558 lda_rhs = n_R; 1559 need_benign_correction = PETSC_FALSE; 1560 if (isLU || isILU || isCHOL) { 1561 ierr = PCFactorGetMatrix(pc_R,&F);CHKERRQ(ierr); 1562 } else if (sub_schurs->reuse_solver) { 1563 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 1564 MatFactorType type; 1565 1566 F = reuse_solver->F; 1567 ierr = MatGetFactorType(F,&type);CHKERRQ(ierr); 1568 if (type == MAT_FACTOR_CHOLESKY) isCHOL = PETSC_TRUE; 1569 ierr = MatGetSize(F,&lda_rhs,NULL);CHKERRQ(ierr); 1570 need_benign_correction = !!reuse_solver->benign_n; 1571 } else { 1572 F = NULL; 1573 } 1574 1575 /* allocate workspace */ 1576 n = 0; 1577 if (n_constraints) { 1578 n += lda_rhs*n_constraints; 1579 } 1580 if (n_vertices) { 1581 n = PetscMax(2*lda_rhs*n_vertices,n); 1582 n = PetscMax((lda_rhs+n_B)*n_vertices,n); 1583 } 1584 ierr = PetscMalloc1(n,&work);CHKERRQ(ierr); 1585 1586 /* create dummy vector to modify rhs and sol of MatMatSolve (work array will never be used) */ 1587 dummy_vec = NULL; 1588 if (need_benign_correction && lda_rhs != n_R && F) { 1589 ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,1,lda_rhs,work,&dummy_vec);CHKERRQ(ierr); 1590 } 1591 1592 /* Precompute stuffs needed for preprocessing and application of BDDC*/ 1593 if (n_constraints) { 1594 Mat M1,M2,M3,C_B; 1595 IS is_aux; 1596 PetscScalar *array,*array2; 1597 1598 ierr = MatDestroy(&pcbddc->local_auxmat1);CHKERRQ(ierr); 1599 ierr = MatDestroy(&pcbddc->local_auxmat2);CHKERRQ(ierr); 1600 1601 /* Extract constraints on R nodes: C_{CR} */ 1602 ierr = ISCreateStride(PETSC_COMM_SELF,n_constraints,n_vertices,1,&is_aux);CHKERRQ(ierr); 1603 ierr = MatGetSubMatrix(pcbddc->ConstraintMatrix,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&C_CR);CHKERRQ(ierr); 1604 ierr = MatGetSubMatrix(pcbddc->ConstraintMatrix,is_aux,pcis->is_B_local,MAT_INITIAL_MATRIX,&C_B);CHKERRQ(ierr); 1605 1606 /* Assemble local_auxmat2_R = (- A_{RR}^{-1} C^T_{CR}) needed by BDDC setup */ 1607 /* Assemble pcbddc->local_auxmat2 = R_to_B (- A_{RR}^{-1} C^T_{CR}) needed by BDDC application */ 1608 ierr = PetscMemzero(work,lda_rhs*n_constraints*sizeof(PetscScalar));CHKERRQ(ierr); 1609 for (i=0;i<n_constraints;i++) { 1610 const PetscScalar *row_cmat_values; 1611 const PetscInt *row_cmat_indices; 1612 PetscInt size_of_constraint,j; 1613 1614 ierr = MatGetRow(C_CR,i,&size_of_constraint,&row_cmat_indices,&row_cmat_values);CHKERRQ(ierr); 1615 for (j=0;j<size_of_constraint;j++) { 1616 work[row_cmat_indices[j]+i*lda_rhs] = -row_cmat_values[j]; 1617 } 1618 ierr = MatRestoreRow(C_CR,i,&size_of_constraint,&row_cmat_indices,&row_cmat_values);CHKERRQ(ierr); 1619 } 1620 ierr = MatCreateSeqDense(PETSC_COMM_SELF,lda_rhs,n_constraints,NULL,&local_auxmat2_R);CHKERRQ(ierr); 1621 if (F) { 1622 Mat B; 1623 1624 ierr = MatCreateSeqDense(PETSC_COMM_SELF,lda_rhs,n_constraints,work,&B);CHKERRQ(ierr); 1625 if (need_benign_correction) { 1626 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 1627 1628 /* rhs is already zero on interior dofs, no need to change the rhs */ 1629 ierr = PetscMemzero(reuse_solver->benign_save_vals,pcbddc->benign_n*sizeof(PetscScalar));CHKERRQ(ierr); 1630 } 1631 ierr = MatMatSolve(F,B,local_auxmat2_R);CHKERRQ(ierr); 1632 if (need_benign_correction) { 1633 PetscScalar *marr; 1634 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 1635 1636 ierr = MatDenseGetArray(local_auxmat2_R,&marr);CHKERRQ(ierr); 1637 if (lda_rhs != n_R) { 1638 for (i=0;i<n_constraints;i++) { 1639 ierr = VecPlaceArray(dummy_vec,marr+i*lda_rhs);CHKERRQ(ierr); 1640 ierr = PCBDDCReuseSolversBenignAdapt(reuse_solver,dummy_vec,NULL,PETSC_TRUE,PETSC_TRUE);CHKERRQ(ierr); 1641 ierr = VecResetArray(dummy_vec);CHKERRQ(ierr); 1642 } 1643 } else { 1644 for (i=0;i<n_constraints;i++) { 1645 ierr = VecPlaceArray(pcbddc->vec1_R,marr+i*lda_rhs);CHKERRQ(ierr); 1646 ierr = PCBDDCReuseSolversBenignAdapt(reuse_solver,pcbddc->vec1_R,NULL,PETSC_TRUE,PETSC_TRUE);CHKERRQ(ierr); 1647 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 1648 } 1649 } 1650 ierr = MatDenseRestoreArray(local_auxmat2_R,&marr);CHKERRQ(ierr); 1651 } 1652 ierr = MatDestroy(&B);CHKERRQ(ierr); 1653 } else { 1654 PetscScalar *marr; 1655 1656 ierr = MatDenseGetArray(local_auxmat2_R,&marr);CHKERRQ(ierr); 1657 for (i=0;i<n_constraints;i++) { 1658 ierr = VecPlaceArray(pcbddc->vec1_R,work+i*lda_rhs);CHKERRQ(ierr); 1659 ierr = VecPlaceArray(pcbddc->vec2_R,marr+i*lda_rhs);CHKERRQ(ierr); 1660 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 1661 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 1662 ierr = VecResetArray(pcbddc->vec2_R);CHKERRQ(ierr); 1663 } 1664 ierr = MatDenseRestoreArray(local_auxmat2_R,&marr);CHKERRQ(ierr); 1665 } 1666 if (!pcbddc->switch_static) { 1667 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_B,n_constraints,NULL,&pcbddc->local_auxmat2);CHKERRQ(ierr); 1668 ierr = MatDenseGetArray(pcbddc->local_auxmat2,&array);CHKERRQ(ierr); 1669 ierr = MatDenseGetArray(local_auxmat2_R,&array2);CHKERRQ(ierr); 1670 for (i=0;i<n_constraints;i++) { 1671 ierr = VecPlaceArray(pcbddc->vec1_R,array2+i*lda_rhs);CHKERRQ(ierr); 1672 ierr = VecPlaceArray(pcis->vec1_B,array+i*n_B);CHKERRQ(ierr); 1673 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1674 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1675 ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr); 1676 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 1677 } 1678 ierr = MatDenseRestoreArray(local_auxmat2_R,&array2);CHKERRQ(ierr); 1679 ierr = MatDenseRestoreArray(pcbddc->local_auxmat2,&array);CHKERRQ(ierr); 1680 ierr = MatMatMult(C_B,pcbddc->local_auxmat2,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&M3);CHKERRQ(ierr); 1681 } else { 1682 if (lda_rhs != n_R) { 1683 IS dummy; 1684 1685 ierr = ISCreateStride(PETSC_COMM_SELF,n_R,0,1,&dummy);CHKERRQ(ierr); 1686 ierr = MatGetSubMatrix(local_auxmat2_R,dummy,NULL,MAT_INITIAL_MATRIX,&pcbddc->local_auxmat2);CHKERRQ(ierr); 1687 ierr = ISDestroy(&dummy);CHKERRQ(ierr); 1688 } else { 1689 ierr = PetscObjectReference((PetscObject)local_auxmat2_R);CHKERRQ(ierr); 1690 pcbddc->local_auxmat2 = local_auxmat2_R; 1691 } 1692 ierr = MatMatMult(C_CR,pcbddc->local_auxmat2,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&M3);CHKERRQ(ierr); 1693 } 1694 ierr = ISDestroy(&is_aux);CHKERRQ(ierr); 1695 /* Assemble explicitly S_CC = ( C_{CR} A_{RR}^{-1} C^T_{CR} )^{-1} */ 1696 ierr = MatScale(M3,m_one);CHKERRQ(ierr); 1697 ierr = MatDuplicate(M3,MAT_DO_NOT_COPY_VALUES,&M1);CHKERRQ(ierr); 1698 ierr = MatDuplicate(M3,MAT_DO_NOT_COPY_VALUES,&M2);CHKERRQ(ierr); 1699 if (isCHOL) { 1700 ierr = MatCholeskyFactor(M3,NULL,NULL);CHKERRQ(ierr); 1701 } else { 1702 ierr = MatLUFactor(M3,NULL,NULL,NULL);CHKERRQ(ierr); 1703 } 1704 ierr = VecSet(pcbddc->vec1_C,one);CHKERRQ(ierr); 1705 ierr = MatDiagonalSet(M2,pcbddc->vec1_C,INSERT_VALUES);CHKERRQ(ierr); 1706 ierr = MatMatSolve(M3,M2,M1);CHKERRQ(ierr); 1707 ierr = MatDestroy(&M2);CHKERRQ(ierr); 1708 ierr = MatDestroy(&M3);CHKERRQ(ierr); 1709 /* Assemble local_auxmat1 = S_CC*C_{CB} needed by BDDC application in KSP and in preproc */ 1710 ierr = MatMatMult(M1,C_B,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&pcbddc->local_auxmat1);CHKERRQ(ierr); 1711 ierr = MatDestroy(&C_B);CHKERRQ(ierr); 1712 ierr = MatCopy(M1,S_CC,SAME_NONZERO_PATTERN);CHKERRQ(ierr); /* S_CC can have a different LDA, MatMatSolve doesn't support it */ 1713 ierr = MatDestroy(&M1);CHKERRQ(ierr); 1714 } 1715 1716 /* Get submatrices from subdomain matrix */ 1717 if (n_vertices) { 1718 IS is_aux; 1719 1720 if (sub_schurs->reuse_solver) { /* is_R_local is not sorted, ISComplement doesn't like it */ 1721 IS tis; 1722 1723 ierr = ISDuplicate(pcbddc->is_R_local,&tis);CHKERRQ(ierr); 1724 ierr = ISSort(tis);CHKERRQ(ierr); 1725 ierr = ISComplement(tis,0,pcis->n,&is_aux);CHKERRQ(ierr); 1726 ierr = ISDestroy(&tis);CHKERRQ(ierr); 1727 } else { 1728 ierr = ISComplement(pcbddc->is_R_local,0,pcis->n,&is_aux);CHKERRQ(ierr); 1729 } 1730 ierr = MatGetSubMatrix(pcbddc->local_mat,pcbddc->is_R_local,is_aux,MAT_INITIAL_MATRIX,&A_RV);CHKERRQ(ierr); 1731 ierr = MatGetSubMatrix(pcbddc->local_mat,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&A_VR);CHKERRQ(ierr); 1732 ierr = MatGetSubMatrix(pcbddc->local_mat,is_aux,is_aux,MAT_INITIAL_MATRIX,&A_VV);CHKERRQ(ierr); 1733 ierr = ISDestroy(&is_aux);CHKERRQ(ierr); 1734 } 1735 1736 /* Matrix of coarse basis functions (local) */ 1737 if (pcbddc->coarse_phi_B) { 1738 PetscInt on_B,on_primal,on_D=n_D; 1739 if (pcbddc->coarse_phi_D) { 1740 ierr = MatGetSize(pcbddc->coarse_phi_D,&on_D,NULL);CHKERRQ(ierr); 1741 } 1742 ierr = MatGetSize(pcbddc->coarse_phi_B,&on_B,&on_primal);CHKERRQ(ierr); 1743 if (on_B != n_B || on_primal != pcbddc->local_primal_size || on_D != n_D) { 1744 PetscScalar *marray; 1745 1746 ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&marray);CHKERRQ(ierr); 1747 ierr = PetscFree(marray);CHKERRQ(ierr); 1748 ierr = MatDestroy(&pcbddc->coarse_phi_B);CHKERRQ(ierr); 1749 ierr = MatDestroy(&pcbddc->coarse_psi_B);CHKERRQ(ierr); 1750 ierr = MatDestroy(&pcbddc->coarse_phi_D);CHKERRQ(ierr); 1751 ierr = MatDestroy(&pcbddc->coarse_psi_D);CHKERRQ(ierr); 1752 } 1753 } 1754 1755 if (!pcbddc->coarse_phi_B) { 1756 PetscScalar *marray; 1757 1758 n = n_B*pcbddc->local_primal_size; 1759 if (pcbddc->switch_static || pcbddc->dbg_flag) { 1760 n += n_D*pcbddc->local_primal_size; 1761 } 1762 if (!pcbddc->symmetric_primal) { 1763 n *= 2; 1764 } 1765 ierr = PetscCalloc1(n,&marray);CHKERRQ(ierr); 1766 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_B,pcbddc->local_primal_size,marray,&pcbddc->coarse_phi_B);CHKERRQ(ierr); 1767 n = n_B*pcbddc->local_primal_size; 1768 if (pcbddc->switch_static || pcbddc->dbg_flag) { 1769 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_D,pcbddc->local_primal_size,marray+n,&pcbddc->coarse_phi_D);CHKERRQ(ierr); 1770 n += n_D*pcbddc->local_primal_size; 1771 } 1772 if (!pcbddc->symmetric_primal) { 1773 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_B,pcbddc->local_primal_size,marray+n,&pcbddc->coarse_psi_B);CHKERRQ(ierr); 1774 if (pcbddc->switch_static || pcbddc->dbg_flag) { 1775 n = n_B*pcbddc->local_primal_size; 1776 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_D,pcbddc->local_primal_size,marray+n,&pcbddc->coarse_psi_D);CHKERRQ(ierr); 1777 } 1778 } else { 1779 ierr = PetscObjectReference((PetscObject)pcbddc->coarse_phi_B);CHKERRQ(ierr); 1780 pcbddc->coarse_psi_B = pcbddc->coarse_phi_B; 1781 if (pcbddc->switch_static || pcbddc->dbg_flag) { 1782 ierr = PetscObjectReference((PetscObject)pcbddc->coarse_phi_D);CHKERRQ(ierr); 1783 pcbddc->coarse_psi_D = pcbddc->coarse_phi_D; 1784 } 1785 } 1786 } 1787 1788 /* We are now ready to evaluate coarse basis functions and subdomain contribution to coarse problem */ 1789 p0_lidx_I = NULL; 1790 if (pcbddc->benign_n && (pcbddc->switch_static || pcbddc->dbg_flag)) { 1791 const PetscInt *idxs; 1792 1793 ierr = ISGetIndices(pcis->is_I_local,&idxs);CHKERRQ(ierr); 1794 ierr = PetscMalloc1(pcbddc->benign_n,&p0_lidx_I);CHKERRQ(ierr); 1795 for (i=0;i<pcbddc->benign_n;i++) { 1796 ierr = PetscFindInt(pcbddc->benign_p0_lidx[i],pcis->n-pcis->n_B,idxs,&p0_lidx_I[i]);CHKERRQ(ierr); 1797 } 1798 ierr = ISRestoreIndices(pcis->is_I_local,&idxs);CHKERRQ(ierr); 1799 } 1800 1801 /* vertices */ 1802 if (n_vertices) { 1803 1804 ierr = MatConvert(A_VV,MATDENSE,MAT_INPLACE_MATRIX,&A_VV);CHKERRQ(ierr); 1805 1806 if (n_R) { 1807 Mat A_RRmA_RV,A_RV_bcorr=NULL,S_VVt; /* S_VVt with LDA=N */ 1808 PetscBLASInt B_N,B_one = 1; 1809 PetscScalar *x,*y; 1810 PetscBool isseqaij; 1811 1812 ierr = MatScale(A_RV,m_one);CHKERRQ(ierr); 1813 if (need_benign_correction) { 1814 ISLocalToGlobalMapping RtoN; 1815 IS is_p0; 1816 PetscInt *idxs_p0,n; 1817 1818 ierr = PetscMalloc1(pcbddc->benign_n,&idxs_p0);CHKERRQ(ierr); 1819 ierr = ISLocalToGlobalMappingCreateIS(pcbddc->is_R_local,&RtoN);CHKERRQ(ierr); 1820 ierr = ISGlobalToLocalMappingApply(RtoN,IS_GTOLM_DROP,pcbddc->benign_n,pcbddc->benign_p0_lidx,&n,idxs_p0);CHKERRQ(ierr); 1821 if (n != pcbddc->benign_n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error in R numbering for benign p0! %d != %d\n",n,pcbddc->benign_n); 1822 ierr = ISLocalToGlobalMappingDestroy(&RtoN);CHKERRQ(ierr); 1823 ierr = ISCreateGeneral(PETSC_COMM_SELF,n,idxs_p0,PETSC_OWN_POINTER,&is_p0);CHKERRQ(ierr); 1824 ierr = MatGetSubMatrix(A_RV,is_p0,NULL,MAT_INITIAL_MATRIX,&A_RV_bcorr);CHKERRQ(ierr); 1825 ierr = ISDestroy(&is_p0);CHKERRQ(ierr); 1826 } 1827 1828 if (lda_rhs == n_R) { 1829 ierr = MatConvert(A_RV,MATDENSE,MAT_INPLACE_MATRIX,&A_RV);CHKERRQ(ierr); 1830 } else { 1831 PetscScalar *av,*array; 1832 const PetscInt *xadj,*adjncy; 1833 PetscInt n; 1834 PetscBool flg_row; 1835 1836 array = work+lda_rhs*n_vertices; 1837 ierr = PetscMemzero(array,lda_rhs*n_vertices*sizeof(PetscScalar));CHKERRQ(ierr); 1838 ierr = MatConvert(A_RV,MATSEQAIJ,MAT_INPLACE_MATRIX,&A_RV);CHKERRQ(ierr); 1839 ierr = MatGetRowIJ(A_RV,0,PETSC_FALSE,PETSC_FALSE,&n,&xadj,&adjncy,&flg_row);CHKERRQ(ierr); 1840 ierr = MatSeqAIJGetArray(A_RV,&av);CHKERRQ(ierr); 1841 for (i=0;i<n;i++) { 1842 PetscInt j; 1843 for (j=xadj[i];j<xadj[i+1];j++) array[lda_rhs*adjncy[j]+i] = av[j]; 1844 } 1845 ierr = MatRestoreRowIJ(A_RV,0,PETSC_FALSE,PETSC_FALSE,&n,&xadj,&adjncy,&flg_row);CHKERRQ(ierr); 1846 ierr = MatDestroy(&A_RV);CHKERRQ(ierr); 1847 ierr = MatCreateSeqDense(PETSC_COMM_SELF,lda_rhs,n_vertices,array,&A_RV);CHKERRQ(ierr); 1848 } 1849 ierr = MatCreateSeqDense(PETSC_COMM_SELF,lda_rhs,n_vertices,work,&A_RRmA_RV);CHKERRQ(ierr); 1850 if (need_benign_correction) { 1851 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 1852 PetscScalar *marr; 1853 1854 ierr = MatDenseGetArray(A_RV,&marr);CHKERRQ(ierr); 1855 /* need \Phi^T A_RV = (I+L)A_RV, L given by 1856 1857 | 0 0 0 | (V) 1858 L = | 0 0 -1 | (P-p0) 1859 | 0 0 -1 | (p0) 1860 1861 */ 1862 for (i=0;i<reuse_solver->benign_n;i++) { 1863 const PetscScalar *vals; 1864 const PetscInt *idxs,*idxs_zero; 1865 PetscInt n,j,nz; 1866 1867 ierr = ISGetLocalSize(reuse_solver->benign_zerodiag_subs[i],&nz);CHKERRQ(ierr); 1868 ierr = ISGetIndices(reuse_solver->benign_zerodiag_subs[i],&idxs_zero);CHKERRQ(ierr); 1869 ierr = MatGetRow(A_RV_bcorr,i,&n,&idxs,&vals);CHKERRQ(ierr); 1870 for (j=0;j<n;j++) { 1871 PetscScalar val = vals[j]; 1872 PetscInt k,col = idxs[j]; 1873 for (k=0;k<nz;k++) marr[idxs_zero[k]+lda_rhs*col] -= val; 1874 } 1875 ierr = MatRestoreRow(A_RV_bcorr,i,&n,&idxs,&vals);CHKERRQ(ierr); 1876 ierr = ISRestoreIndices(reuse_solver->benign_zerodiag_subs[i],&idxs_zero);CHKERRQ(ierr); 1877 } 1878 ierr = MatDenseRestoreArray(A_RV,&marr);CHKERRQ(ierr); 1879 } 1880 if (F) { 1881 /* need to correct the rhs */ 1882 if (need_benign_correction) { 1883 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 1884 PetscScalar *marr; 1885 1886 ierr = MatDenseGetArray(A_RV,&marr);CHKERRQ(ierr); 1887 if (lda_rhs != n_R) { 1888 for (i=0;i<n_vertices;i++) { 1889 ierr = VecPlaceArray(dummy_vec,marr+i*lda_rhs);CHKERRQ(ierr); 1890 ierr = PCBDDCReuseSolversBenignAdapt(reuse_solver,dummy_vec,NULL,PETSC_FALSE,PETSC_TRUE);CHKERRQ(ierr); 1891 ierr = VecResetArray(dummy_vec);CHKERRQ(ierr); 1892 } 1893 } else { 1894 for (i=0;i<n_vertices;i++) { 1895 ierr = VecPlaceArray(pcbddc->vec1_R,marr+i*lda_rhs);CHKERRQ(ierr); 1896 ierr = PCBDDCReuseSolversBenignAdapt(reuse_solver,pcbddc->vec1_R,NULL,PETSC_FALSE,PETSC_TRUE);CHKERRQ(ierr); 1897 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 1898 } 1899 } 1900 ierr = MatDenseRestoreArray(A_RV,&marr);CHKERRQ(ierr); 1901 } 1902 ierr = MatMatSolve(F,A_RV,A_RRmA_RV);CHKERRQ(ierr); 1903 /* need to correct the solution */ 1904 if (need_benign_correction) { 1905 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 1906 PetscScalar *marr; 1907 1908 ierr = MatDenseGetArray(A_RRmA_RV,&marr);CHKERRQ(ierr); 1909 if (lda_rhs != n_R) { 1910 for (i=0;i<n_vertices;i++) { 1911 ierr = VecPlaceArray(dummy_vec,marr+i*lda_rhs);CHKERRQ(ierr); 1912 ierr = PCBDDCReuseSolversBenignAdapt(reuse_solver,dummy_vec,NULL,PETSC_TRUE,PETSC_TRUE);CHKERRQ(ierr); 1913 ierr = VecResetArray(dummy_vec);CHKERRQ(ierr); 1914 } 1915 } else { 1916 for (i=0;i<n_vertices;i++) { 1917 ierr = VecPlaceArray(pcbddc->vec1_R,marr+i*lda_rhs);CHKERRQ(ierr); 1918 ierr = PCBDDCReuseSolversBenignAdapt(reuse_solver,pcbddc->vec1_R,NULL,PETSC_TRUE,PETSC_TRUE);CHKERRQ(ierr); 1919 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 1920 } 1921 } 1922 ierr = MatDenseRestoreArray(A_RRmA_RV,&marr);CHKERRQ(ierr); 1923 } 1924 } else { 1925 ierr = MatDenseGetArray(A_RV,&y);CHKERRQ(ierr); 1926 for (i=0;i<n_vertices;i++) { 1927 ierr = VecPlaceArray(pcbddc->vec1_R,y+i*lda_rhs);CHKERRQ(ierr); 1928 ierr = VecPlaceArray(pcbddc->vec2_R,work+i*lda_rhs);CHKERRQ(ierr); 1929 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 1930 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 1931 ierr = VecResetArray(pcbddc->vec2_R);CHKERRQ(ierr); 1932 } 1933 ierr = MatDenseRestoreArray(A_RV,&y);CHKERRQ(ierr); 1934 } 1935 ierr = MatDestroy(&A_RV);CHKERRQ(ierr); 1936 /* S_VV and S_CV */ 1937 if (n_constraints) { 1938 Mat B; 1939 1940 ierr = PetscMemzero(work+lda_rhs*n_vertices,n_B*n_vertices*sizeof(PetscScalar));CHKERRQ(ierr); 1941 for (i=0;i<n_vertices;i++) { 1942 ierr = VecPlaceArray(pcbddc->vec1_R,work+i*lda_rhs);CHKERRQ(ierr); 1943 ierr = VecPlaceArray(pcis->vec1_B,work+lda_rhs*n_vertices+i*n_B);CHKERRQ(ierr); 1944 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1945 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1946 ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr); 1947 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 1948 } 1949 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_B,n_vertices,work+lda_rhs*n_vertices,&B);CHKERRQ(ierr); 1950 ierr = MatMatMult(pcbddc->local_auxmat1,B,MAT_REUSE_MATRIX,PETSC_DEFAULT,&S_CV);CHKERRQ(ierr); 1951 ierr = MatDestroy(&B);CHKERRQ(ierr); 1952 ierr = MatCreateSeqDense(PETSC_COMM_SELF,lda_rhs,n_vertices,work+lda_rhs*n_vertices,&B);CHKERRQ(ierr); 1953 ierr = MatMatMult(local_auxmat2_R,S_CV,MAT_REUSE_MATRIX,PETSC_DEFAULT,&B);CHKERRQ(ierr); 1954 ierr = MatScale(S_CV,m_one);CHKERRQ(ierr); 1955 ierr = PetscBLASIntCast(lda_rhs*n_vertices,&B_N);CHKERRQ(ierr); 1956 PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&B_N,&one,work+lda_rhs*n_vertices,&B_one,work,&B_one)); 1957 ierr = MatDestroy(&B);CHKERRQ(ierr); 1958 } 1959 ierr = PetscObjectTypeCompare((PetscObject)A_VR,MATSEQAIJ,&isseqaij);CHKERRQ(ierr); 1960 if (!isseqaij) { /* MatMatMult with SEQ(S)BAIJ below will raise an error */ 1961 ierr = MatConvert(A_VR,MATSEQAIJ,MAT_INPLACE_MATRIX,&A_VR);CHKERRQ(ierr); 1962 } 1963 if (lda_rhs != n_R) { 1964 ierr = MatDestroy(&A_RRmA_RV);CHKERRQ(ierr); 1965 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_vertices,work,&A_RRmA_RV);CHKERRQ(ierr); 1966 ierr = MatSeqDenseSetLDA(A_RRmA_RV,lda_rhs);CHKERRQ(ierr); 1967 } 1968 ierr = MatMatMult(A_VR,A_RRmA_RV,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&S_VVt);CHKERRQ(ierr); 1969 /* need A_VR * \Phi * A_RRmA_RV = A_VR * (I+L)^T * A_RRmA_RV, L given as before */ 1970 if (need_benign_correction) { 1971 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 1972 PetscScalar *marr,*sums; 1973 1974 ierr = PetscMalloc1(n_vertices,&sums);CHKERRQ(ierr); 1975 ierr = MatDenseGetArray(S_VVt,&marr); 1976 for (i=0;i<reuse_solver->benign_n;i++) { 1977 const PetscScalar *vals; 1978 const PetscInt *idxs,*idxs_zero; 1979 PetscInt n,j,nz; 1980 1981 ierr = ISGetLocalSize(reuse_solver->benign_zerodiag_subs[i],&nz);CHKERRQ(ierr); 1982 ierr = ISGetIndices(reuse_solver->benign_zerodiag_subs[i],&idxs_zero);CHKERRQ(ierr); 1983 for (j=0;j<n_vertices;j++) { 1984 PetscInt k; 1985 sums[j] = 0.; 1986 for (k=0;k<nz;k++) sums[j] += work[idxs_zero[k]+j*lda_rhs]; 1987 } 1988 ierr = MatGetRow(A_RV_bcorr,i,&n,&idxs,&vals);CHKERRQ(ierr); 1989 for (j=0;j<n;j++) { 1990 PetscScalar val = vals[j]; 1991 PetscInt k; 1992 for (k=0;k<n_vertices;k++) { 1993 marr[idxs[j]+k*n_vertices] += val*sums[k]; 1994 } 1995 } 1996 ierr = MatRestoreRow(A_RV_bcorr,i,&n,&idxs,&vals);CHKERRQ(ierr); 1997 ierr = ISRestoreIndices(reuse_solver->benign_zerodiag_subs[i],&idxs_zero);CHKERRQ(ierr); 1998 } 1999 ierr = PetscFree(sums);CHKERRQ(ierr); 2000 ierr = MatDenseRestoreArray(S_VVt,&marr); 2001 ierr = MatDestroy(&A_RV_bcorr);CHKERRQ(ierr); 2002 } 2003 ierr = MatDestroy(&A_RRmA_RV);CHKERRQ(ierr); 2004 ierr = PetscBLASIntCast(n_vertices*n_vertices,&B_N);CHKERRQ(ierr); 2005 ierr = MatDenseGetArray(A_VV,&x);CHKERRQ(ierr); 2006 ierr = MatDenseGetArray(S_VVt,&y);CHKERRQ(ierr); 2007 PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&B_N,&one,x,&B_one,y,&B_one)); 2008 ierr = MatDenseRestoreArray(A_VV,&x);CHKERRQ(ierr); 2009 ierr = MatDenseRestoreArray(S_VVt,&y);CHKERRQ(ierr); 2010 ierr = MatCopy(S_VVt,S_VV,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 2011 ierr = MatDestroy(&S_VVt);CHKERRQ(ierr); 2012 } else { 2013 ierr = MatCopy(A_VV,S_VV,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 2014 } 2015 ierr = MatDestroy(&A_VV);CHKERRQ(ierr); 2016 2017 /* coarse basis functions */ 2018 for (i=0;i<n_vertices;i++) { 2019 PetscScalar *y; 2020 2021 ierr = VecPlaceArray(pcbddc->vec1_R,work+lda_rhs*i);CHKERRQ(ierr); 2022 ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&y);CHKERRQ(ierr); 2023 ierr = VecPlaceArray(pcis->vec1_B,y+n_B*i);CHKERRQ(ierr); 2024 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2025 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2026 y[n_B*i+idx_V_B[i]] = 1.0; 2027 ierr = MatDenseRestoreArray(pcbddc->coarse_phi_B,&y);CHKERRQ(ierr); 2028 ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr); 2029 2030 if (pcbddc->switch_static || pcbddc->dbg_flag) { 2031 PetscInt j; 2032 2033 ierr = MatDenseGetArray(pcbddc->coarse_phi_D,&y);CHKERRQ(ierr); 2034 ierr = VecPlaceArray(pcis->vec1_D,y+n_D*i);CHKERRQ(ierr); 2035 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2036 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2037 ierr = VecResetArray(pcis->vec1_D);CHKERRQ(ierr); 2038 for (j=0;j<pcbddc->benign_n;j++) y[n_D*i+p0_lidx_I[j]] = 0.0; 2039 ierr = MatDenseRestoreArray(pcbddc->coarse_phi_D,&y);CHKERRQ(ierr); 2040 } 2041 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 2042 } 2043 /* if n_R == 0 the object is not destroyed */ 2044 ierr = MatDestroy(&A_RV);CHKERRQ(ierr); 2045 } 2046 ierr = VecDestroy(&dummy_vec);CHKERRQ(ierr); 2047 2048 if (n_constraints) { 2049 Mat B; 2050 2051 ierr = MatCreateSeqDense(PETSC_COMM_SELF,lda_rhs,n_constraints,work,&B);CHKERRQ(ierr); 2052 ierr = MatScale(S_CC,m_one);CHKERRQ(ierr); 2053 ierr = MatMatMult(local_auxmat2_R,S_CC,MAT_REUSE_MATRIX,PETSC_DEFAULT,&B);CHKERRQ(ierr); 2054 ierr = MatScale(S_CC,m_one);CHKERRQ(ierr); 2055 if (n_vertices) { 2056 if (isCHOL) { /* if we can solve the interior problem with cholesky, we should also be fine with transposing here */ 2057 ierr = MatTranspose(S_CV,MAT_REUSE_MATRIX,&S_VC);CHKERRQ(ierr); 2058 } else { 2059 Mat S_VCt; 2060 2061 if (lda_rhs != n_R) { 2062 ierr = MatDestroy(&B);CHKERRQ(ierr); 2063 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_constraints,work,&B);CHKERRQ(ierr); 2064 ierr = MatSeqDenseSetLDA(B,lda_rhs);CHKERRQ(ierr); 2065 } 2066 ierr = MatMatMult(A_VR,B,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&S_VCt);CHKERRQ(ierr); 2067 ierr = MatCopy(S_VCt,S_VC,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 2068 ierr = MatDestroy(&S_VCt);CHKERRQ(ierr); 2069 } 2070 } 2071 ierr = MatDestroy(&B);CHKERRQ(ierr); 2072 /* coarse basis functions */ 2073 for (i=0;i<n_constraints;i++) { 2074 PetscScalar *y; 2075 2076 ierr = VecPlaceArray(pcbddc->vec1_R,work+lda_rhs*i);CHKERRQ(ierr); 2077 ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&y);CHKERRQ(ierr); 2078 ierr = VecPlaceArray(pcis->vec1_B,y+n_B*(i+n_vertices));CHKERRQ(ierr); 2079 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2080 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2081 ierr = MatDenseRestoreArray(pcbddc->coarse_phi_B,&y);CHKERRQ(ierr); 2082 ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr); 2083 if (pcbddc->switch_static || pcbddc->dbg_flag) { 2084 PetscInt j; 2085 2086 ierr = MatDenseGetArray(pcbddc->coarse_phi_D,&y);CHKERRQ(ierr); 2087 ierr = VecPlaceArray(pcis->vec1_D,y+n_D*(i+n_vertices));CHKERRQ(ierr); 2088 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2089 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2090 ierr = VecResetArray(pcis->vec1_D);CHKERRQ(ierr); 2091 for (j=0;j<pcbddc->benign_n;j++) y[n_D*i+p0_lidx_I[j]] = 0.0; 2092 ierr = MatDenseRestoreArray(pcbddc->coarse_phi_D,&y);CHKERRQ(ierr); 2093 } 2094 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 2095 } 2096 } 2097 if (n_constraints) { 2098 ierr = MatDestroy(&local_auxmat2_R);CHKERRQ(ierr); 2099 } 2100 ierr = PetscFree(p0_lidx_I);CHKERRQ(ierr); 2101 2102 /* coarse matrix entries relative to B_0 */ 2103 if (pcbddc->benign_n) { 2104 Mat B0_B,B0_BPHI; 2105 IS is_dummy; 2106 PetscScalar *data; 2107 PetscInt j; 2108 2109 ierr = ISCreateStride(PETSC_COMM_SELF,pcbddc->benign_n,0,1,&is_dummy);CHKERRQ(ierr); 2110 ierr = MatGetSubMatrix(pcbddc->benign_B0,is_dummy,pcis->is_B_local,MAT_INITIAL_MATRIX,&B0_B);CHKERRQ(ierr); 2111 ierr = ISDestroy(&is_dummy);CHKERRQ(ierr); 2112 ierr = MatMatMult(B0_B,pcbddc->coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&B0_BPHI);CHKERRQ(ierr); 2113 ierr = MatConvert(B0_BPHI,MATSEQDENSE,MAT_INPLACE_MATRIX,&B0_BPHI);CHKERRQ(ierr); 2114 ierr = MatDenseGetArray(B0_BPHI,&data);CHKERRQ(ierr); 2115 for (j=0;j<pcbddc->benign_n;j++) { 2116 PetscInt primal_idx = pcbddc->local_primal_size - pcbddc->benign_n + j; 2117 for (i=0;i<pcbddc->local_primal_size;i++) { 2118 coarse_submat_vals[primal_idx*pcbddc->local_primal_size+i] = data[i*pcbddc->benign_n+j]; 2119 coarse_submat_vals[i*pcbddc->local_primal_size+primal_idx] = data[i*pcbddc->benign_n+j]; 2120 } 2121 } 2122 ierr = MatDenseRestoreArray(B0_BPHI,&data);CHKERRQ(ierr); 2123 ierr = MatDestroy(&B0_B);CHKERRQ(ierr); 2124 ierr = MatDestroy(&B0_BPHI);CHKERRQ(ierr); 2125 } 2126 2127 /* compute other basis functions for non-symmetric problems */ 2128 if (!pcbddc->symmetric_primal) { 2129 Mat B_V=NULL,B_C=NULL; 2130 PetscScalar *marray; 2131 2132 if (n_constraints) { 2133 Mat S_CCT,C_CRT; 2134 2135 ierr = MatTranspose(C_CR,MAT_INPLACE_MATRIX,&C_CRT);CHKERRQ(ierr); 2136 ierr = MatTranspose(S_CC,MAT_INITIAL_MATRIX,&S_CCT);CHKERRQ(ierr); 2137 ierr = MatMatMult(C_CRT,S_CCT,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&B_C);CHKERRQ(ierr); 2138 ierr = MatDestroy(&S_CCT);CHKERRQ(ierr); 2139 if (n_vertices) { 2140 Mat S_VCT; 2141 2142 ierr = MatTranspose(S_VC,MAT_INITIAL_MATRIX,&S_VCT);CHKERRQ(ierr); 2143 ierr = MatMatMult(C_CRT,S_VCT,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&B_V);CHKERRQ(ierr); 2144 ierr = MatDestroy(&S_VCT);CHKERRQ(ierr); 2145 } 2146 ierr = MatDestroy(&C_CRT);CHKERRQ(ierr); 2147 } 2148 if (n_vertices && n_R) { 2149 PetscScalar *av,*marray; 2150 const PetscInt *xadj,*adjncy; 2151 PetscInt n; 2152 PetscBool flg_row; 2153 2154 /* B_V = B_V - A_VR^T */ 2155 ierr = MatConvert(A_VR,MATSEQAIJ,MAT_INPLACE_MATRIX,&A_VR);CHKERRQ(ierr); 2156 ierr = MatGetRowIJ(A_VR,0,PETSC_FALSE,PETSC_FALSE,&n,&xadj,&adjncy,&flg_row);CHKERRQ(ierr); 2157 ierr = MatSeqAIJGetArray(A_VR,&av);CHKERRQ(ierr); 2158 ierr = MatDenseGetArray(B_V,&marray);CHKERRQ(ierr); 2159 for (i=0;i<n;i++) { 2160 PetscInt j; 2161 for (j=xadj[i];j<xadj[i+1];j++) marray[i*n_R + adjncy[j]] -= av[j]; 2162 } 2163 ierr = MatDenseRestoreArray(B_V,&marray);CHKERRQ(ierr); 2164 ierr = MatRestoreRowIJ(A_VR,0,PETSC_FALSE,PETSC_FALSE,&n,&xadj,&adjncy,&flg_row);CHKERRQ(ierr); 2165 ierr = MatDestroy(&A_VR);CHKERRQ(ierr); 2166 } 2167 2168 /* currently there's no support for MatTransposeMatSolve(F,B,X) */ 2169 ierr = MatDenseGetArray(B_V,&marray);CHKERRQ(ierr); 2170 for (i=0;i<n_vertices;i++) { 2171 ierr = VecPlaceArray(pcbddc->vec1_R,marray+i*n_R);CHKERRQ(ierr); 2172 ierr = VecPlaceArray(pcbddc->vec2_R,work+i*n_R);CHKERRQ(ierr); 2173 ierr = KSPSolveTranspose(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 2174 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 2175 ierr = VecResetArray(pcbddc->vec2_R);CHKERRQ(ierr); 2176 } 2177 ierr = MatDenseRestoreArray(B_V,&marray);CHKERRQ(ierr); 2178 ierr = MatDenseGetArray(B_C,&marray);CHKERRQ(ierr); 2179 for (i=n_vertices;i<n_constraints+n_vertices;i++) { 2180 ierr = VecPlaceArray(pcbddc->vec1_R,marray+(i-n_vertices)*n_R);CHKERRQ(ierr); 2181 ierr = VecPlaceArray(pcbddc->vec2_R,work+i*n_R);CHKERRQ(ierr); 2182 ierr = KSPSolveTranspose(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 2183 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 2184 ierr = VecResetArray(pcbddc->vec2_R);CHKERRQ(ierr); 2185 } 2186 ierr = MatDenseRestoreArray(B_C,&marray);CHKERRQ(ierr); 2187 /* coarse basis functions */ 2188 for (i=0;i<pcbddc->local_primal_size;i++) { 2189 PetscScalar *y; 2190 2191 ierr = VecPlaceArray(pcbddc->vec1_R,work+i*n_R);CHKERRQ(ierr); 2192 ierr = MatDenseGetArray(pcbddc->coarse_psi_B,&y);CHKERRQ(ierr); 2193 ierr = VecPlaceArray(pcis->vec1_B,y+n_B*i);CHKERRQ(ierr); 2194 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2195 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2196 if (i<n_vertices) { 2197 y[n_B*i+idx_V_B[i]] = 1.0; 2198 } 2199 ierr = MatDenseRestoreArray(pcbddc->coarse_psi_B,&y);CHKERRQ(ierr); 2200 ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr); 2201 2202 if (pcbddc->switch_static || pcbddc->dbg_flag) { 2203 ierr = MatDenseGetArray(pcbddc->coarse_psi_D,&y);CHKERRQ(ierr); 2204 ierr = VecPlaceArray(pcis->vec1_D,y+n_D*i);CHKERRQ(ierr); 2205 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2206 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2207 ierr = VecResetArray(pcis->vec1_D);CHKERRQ(ierr); 2208 ierr = MatDenseRestoreArray(pcbddc->coarse_psi_D,&y);CHKERRQ(ierr); 2209 } 2210 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 2211 } 2212 ierr = MatDestroy(&B_V);CHKERRQ(ierr); 2213 ierr = MatDestroy(&B_C);CHKERRQ(ierr); 2214 } 2215 /* free memory */ 2216 ierr = PetscFree(idx_V_B);CHKERRQ(ierr); 2217 ierr = MatDestroy(&S_VV);CHKERRQ(ierr); 2218 ierr = MatDestroy(&S_CV);CHKERRQ(ierr); 2219 ierr = MatDestroy(&S_VC);CHKERRQ(ierr); 2220 ierr = MatDestroy(&S_CC);CHKERRQ(ierr); 2221 ierr = PetscFree(work);CHKERRQ(ierr); 2222 if (n_vertices) { 2223 ierr = MatDestroy(&A_VR);CHKERRQ(ierr); 2224 } 2225 if (n_constraints) { 2226 ierr = MatDestroy(&C_CR);CHKERRQ(ierr); 2227 } 2228 /* Checking coarse_sub_mat and coarse basis functios */ 2229 /* Symmetric case : It should be \Phi^{(j)^T} A^{(j)} \Phi^{(j)}=coarse_sub_mat */ 2230 /* Non-symmetric case : It should be \Psi^{(j)^T} A^{(j)} \Phi^{(j)}=coarse_sub_mat */ 2231 if (pcbddc->dbg_flag) { 2232 Mat coarse_sub_mat; 2233 Mat AUXMAT,TM1,TM2,TM3,TM4; 2234 Mat coarse_phi_D,coarse_phi_B; 2235 Mat coarse_psi_D,coarse_psi_B; 2236 Mat A_II,A_BB,A_IB,A_BI; 2237 Mat C_B,CPHI; 2238 IS is_dummy; 2239 Vec mones; 2240 MatType checkmattype=MATSEQAIJ; 2241 PetscReal real_value; 2242 2243 if (pcbddc->benign_n && !pcbddc->benign_change_explicit) { 2244 Mat A; 2245 ierr = PCBDDCBenignProject(pc,NULL,NULL,&A);CHKERRQ(ierr); 2246 ierr = MatGetSubMatrix(A,pcis->is_I_local,pcis->is_I_local,MAT_INITIAL_MATRIX,&A_II);CHKERRQ(ierr); 2247 ierr = MatGetSubMatrix(A,pcis->is_I_local,pcis->is_B_local,MAT_INITIAL_MATRIX,&A_IB);CHKERRQ(ierr); 2248 ierr = MatGetSubMatrix(A,pcis->is_B_local,pcis->is_I_local,MAT_INITIAL_MATRIX,&A_BI);CHKERRQ(ierr); 2249 ierr = MatGetSubMatrix(A,pcis->is_B_local,pcis->is_B_local,MAT_INITIAL_MATRIX,&A_BB);CHKERRQ(ierr); 2250 ierr = MatDestroy(&A);CHKERRQ(ierr); 2251 } else { 2252 ierr = MatConvert(pcis->A_II,checkmattype,MAT_INITIAL_MATRIX,&A_II);CHKERRQ(ierr); 2253 ierr = MatConvert(pcis->A_IB,checkmattype,MAT_INITIAL_MATRIX,&A_IB);CHKERRQ(ierr); 2254 ierr = MatConvert(pcis->A_BI,checkmattype,MAT_INITIAL_MATRIX,&A_BI);CHKERRQ(ierr); 2255 ierr = MatConvert(pcis->A_BB,checkmattype,MAT_INITIAL_MATRIX,&A_BB);CHKERRQ(ierr); 2256 } 2257 ierr = MatConvert(pcbddc->coarse_phi_D,checkmattype,MAT_INITIAL_MATRIX,&coarse_phi_D);CHKERRQ(ierr); 2258 ierr = MatConvert(pcbddc->coarse_phi_B,checkmattype,MAT_INITIAL_MATRIX,&coarse_phi_B);CHKERRQ(ierr); 2259 if (!pcbddc->symmetric_primal) { 2260 ierr = MatConvert(pcbddc->coarse_psi_D,checkmattype,MAT_INITIAL_MATRIX,&coarse_psi_D);CHKERRQ(ierr); 2261 ierr = MatConvert(pcbddc->coarse_psi_B,checkmattype,MAT_INITIAL_MATRIX,&coarse_psi_B);CHKERRQ(ierr); 2262 } 2263 ierr = MatCreateSeqDense(PETSC_COMM_SELF,pcbddc->local_primal_size,pcbddc->local_primal_size,coarse_submat_vals,&coarse_sub_mat);CHKERRQ(ierr); 2264 2265 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 2266 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Check coarse sub mat computation (symmetric %d)\n",pcbddc->symmetric_primal);CHKERRQ(ierr); 2267 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 2268 if (!pcbddc->symmetric_primal) { 2269 ierr = MatMatMult(A_II,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 2270 ierr = MatTransposeMatMult(coarse_psi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM1);CHKERRQ(ierr); 2271 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 2272 ierr = MatMatMult(A_BB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 2273 ierr = MatTransposeMatMult(coarse_psi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM2);CHKERRQ(ierr); 2274 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 2275 ierr = MatMatMult(A_IB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 2276 ierr = MatTransposeMatMult(coarse_psi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM3);CHKERRQ(ierr); 2277 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 2278 ierr = MatMatMult(A_BI,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 2279 ierr = MatTransposeMatMult(coarse_psi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM4);CHKERRQ(ierr); 2280 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 2281 } else { 2282 ierr = MatPtAP(A_II,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&TM1);CHKERRQ(ierr); 2283 ierr = MatPtAP(A_BB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&TM2);CHKERRQ(ierr); 2284 ierr = MatMatMult(A_IB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 2285 ierr = MatTransposeMatMult(coarse_phi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM3);CHKERRQ(ierr); 2286 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 2287 ierr = MatMatMult(A_BI,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 2288 ierr = MatTransposeMatMult(coarse_phi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM4);CHKERRQ(ierr); 2289 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 2290 } 2291 ierr = MatAXPY(TM1,one,TM2,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 2292 ierr = MatAXPY(TM1,one,TM3,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 2293 ierr = MatAXPY(TM1,one,TM4,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 2294 ierr = MatConvert(TM1,MATSEQDENSE,MAT_INPLACE_MATRIX,&TM1);CHKERRQ(ierr); 2295 if (pcbddc->benign_n) { 2296 Mat B0_B,B0_BPHI; 2297 PetscScalar *data,*data2; 2298 PetscInt j; 2299 2300 ierr = ISCreateStride(PETSC_COMM_SELF,pcbddc->benign_n,0,1,&is_dummy);CHKERRQ(ierr); 2301 ierr = MatGetSubMatrix(pcbddc->benign_B0,is_dummy,pcis->is_B_local,MAT_INITIAL_MATRIX,&B0_B);CHKERRQ(ierr); 2302 ierr = MatMatMult(B0_B,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&B0_BPHI);CHKERRQ(ierr); 2303 ierr = MatConvert(B0_BPHI,MATSEQDENSE,MAT_INPLACE_MATRIX,&B0_BPHI);CHKERRQ(ierr); 2304 ierr = MatDenseGetArray(TM1,&data);CHKERRQ(ierr); 2305 ierr = MatDenseGetArray(B0_BPHI,&data2);CHKERRQ(ierr); 2306 for (j=0;j<pcbddc->benign_n;j++) { 2307 PetscInt primal_idx = pcbddc->local_primal_size - pcbddc->benign_n + j; 2308 for (i=0;i<pcbddc->local_primal_size;i++) { 2309 data[primal_idx*pcbddc->local_primal_size+i] += data2[i*pcbddc->benign_n+j]; 2310 data[i*pcbddc->local_primal_size+primal_idx] += data2[i*pcbddc->benign_n+j]; 2311 } 2312 } 2313 ierr = MatDenseRestoreArray(TM1,&data);CHKERRQ(ierr); 2314 ierr = MatDenseRestoreArray(B0_BPHI,&data2);CHKERRQ(ierr); 2315 ierr = MatDestroy(&B0_B);CHKERRQ(ierr); 2316 ierr = ISDestroy(&is_dummy);CHKERRQ(ierr); 2317 ierr = MatDestroy(&B0_BPHI);CHKERRQ(ierr); 2318 } 2319 #if 0 2320 { 2321 PetscViewer viewer; 2322 char filename[256]; 2323 sprintf(filename,"details_local_coarse_mat%d_level%d.m",PetscGlobalRank,pcbddc->current_level); 2324 ierr = PetscViewerASCIIOpen(PETSC_COMM_SELF,filename,&viewer);CHKERRQ(ierr); 2325 ierr = PetscViewerSetFormat(viewer,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr); 2326 ierr = PetscObjectSetName((PetscObject)coarse_sub_mat,"computed");CHKERRQ(ierr); 2327 ierr = MatView(coarse_sub_mat,viewer);CHKERRQ(ierr); 2328 ierr = PetscObjectSetName((PetscObject)TM1,"projected");CHKERRQ(ierr); 2329 ierr = MatView(TM1,viewer);CHKERRQ(ierr); 2330 if (save_change) { 2331 Mat phi_B; 2332 ierr = MatMatMult(save_change,pcbddc->coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&phi_B);CHKERRQ(ierr); 2333 ierr = PetscObjectSetName((PetscObject)phi_B,"phi_B");CHKERRQ(ierr); 2334 ierr = MatView(phi_B,viewer);CHKERRQ(ierr); 2335 ierr = MatDestroy(&phi_B);CHKERRQ(ierr); 2336 } else { 2337 ierr = PetscObjectSetName((PetscObject)pcbddc->coarse_phi_B,"phi_B");CHKERRQ(ierr); 2338 ierr = MatView(pcbddc->coarse_phi_B,viewer);CHKERRQ(ierr); 2339 } 2340 if (pcbddc->coarse_phi_D) { 2341 ierr = PetscObjectSetName((PetscObject)pcbddc->coarse_phi_D,"phi_D");CHKERRQ(ierr); 2342 ierr = MatView(pcbddc->coarse_phi_D,viewer);CHKERRQ(ierr); 2343 } 2344 if (pcbddc->coarse_psi_B) { 2345 ierr = PetscObjectSetName((PetscObject)pcbddc->coarse_psi_B,"psi_B");CHKERRQ(ierr); 2346 ierr = MatView(pcbddc->coarse_psi_B,viewer);CHKERRQ(ierr); 2347 } 2348 if (pcbddc->coarse_psi_D) { 2349 ierr = PetscObjectSetName((PetscObject)pcbddc->coarse_psi_D,"psi_D");CHKERRQ(ierr); 2350 ierr = MatView(pcbddc->coarse_psi_D,viewer);CHKERRQ(ierr); 2351 } 2352 ierr = PetscViewerDestroy(&viewer);CHKERRQ(ierr); 2353 } 2354 #endif 2355 ierr = MatAXPY(TM1,m_one,coarse_sub_mat,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 2356 ierr = MatNorm(TM1,NORM_FROBENIUS,&real_value);CHKERRQ(ierr); 2357 ierr = PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 2358 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d matrix error % 1.14e\n",PetscGlobalRank,real_value);CHKERRQ(ierr); 2359 2360 /* check constraints */ 2361 ierr = ISCreateStride(PETSC_COMM_SELF,pcbddc->local_primal_size-pcbddc->benign_n,0,1,&is_dummy);CHKERRQ(ierr); 2362 ierr = MatGetSubMatrix(pcbddc->ConstraintMatrix,is_dummy,pcis->is_B_local,MAT_INITIAL_MATRIX,&C_B);CHKERRQ(ierr); 2363 if (!pcbddc->benign_n) { /* TODO: add benign case */ 2364 ierr = MatMatMult(C_B,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&CPHI);CHKERRQ(ierr); 2365 } else { 2366 PetscScalar *data; 2367 Mat tmat; 2368 ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&data);CHKERRQ(ierr); 2369 ierr = MatCreateSeqDense(PETSC_COMM_SELF,pcis->n_B,pcbddc->local_primal_size-pcbddc->benign_n,data,&tmat);CHKERRQ(ierr); 2370 ierr = MatDenseRestoreArray(pcbddc->coarse_phi_B,&data);CHKERRQ(ierr); 2371 ierr = MatMatMult(C_B,tmat,MAT_INITIAL_MATRIX,1.0,&CPHI);CHKERRQ(ierr); 2372 ierr = MatDestroy(&tmat);CHKERRQ(ierr); 2373 } 2374 ierr = MatCreateVecs(CPHI,&mones,NULL);CHKERRQ(ierr); 2375 ierr = VecSet(mones,-1.0);CHKERRQ(ierr); 2376 ierr = MatDiagonalSet(CPHI,mones,ADD_VALUES);CHKERRQ(ierr); 2377 ierr = MatNorm(CPHI,NORM_FROBENIUS,&real_value);CHKERRQ(ierr); 2378 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d phi constraints error % 1.14e\n",PetscGlobalRank,real_value);CHKERRQ(ierr); 2379 if (!pcbddc->symmetric_primal) { 2380 ierr = MatMatMult(C_B,coarse_psi_B,MAT_REUSE_MATRIX,1.0,&CPHI);CHKERRQ(ierr); 2381 ierr = VecSet(mones,-1.0);CHKERRQ(ierr); 2382 ierr = MatDiagonalSet(CPHI,mones,ADD_VALUES);CHKERRQ(ierr); 2383 ierr = MatNorm(CPHI,NORM_FROBENIUS,&real_value);CHKERRQ(ierr); 2384 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d psi constraints error % 1.14e\n",PetscGlobalRank,real_value);CHKERRQ(ierr); 2385 } 2386 ierr = MatDestroy(&C_B);CHKERRQ(ierr); 2387 ierr = MatDestroy(&CPHI);CHKERRQ(ierr); 2388 ierr = ISDestroy(&is_dummy);CHKERRQ(ierr); 2389 ierr = VecDestroy(&mones);CHKERRQ(ierr); 2390 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 2391 ierr = MatDestroy(&A_II);CHKERRQ(ierr); 2392 ierr = MatDestroy(&A_BB);CHKERRQ(ierr); 2393 ierr = MatDestroy(&A_IB);CHKERRQ(ierr); 2394 ierr = MatDestroy(&A_BI);CHKERRQ(ierr); 2395 ierr = MatDestroy(&TM1);CHKERRQ(ierr); 2396 ierr = MatDestroy(&TM2);CHKERRQ(ierr); 2397 ierr = MatDestroy(&TM3);CHKERRQ(ierr); 2398 ierr = MatDestroy(&TM4);CHKERRQ(ierr); 2399 ierr = MatDestroy(&coarse_phi_D);CHKERRQ(ierr); 2400 ierr = MatDestroy(&coarse_phi_B);CHKERRQ(ierr); 2401 if (!pcbddc->symmetric_primal) { 2402 ierr = MatDestroy(&coarse_psi_D);CHKERRQ(ierr); 2403 ierr = MatDestroy(&coarse_psi_B);CHKERRQ(ierr); 2404 } 2405 ierr = MatDestroy(&coarse_sub_mat);CHKERRQ(ierr); 2406 } 2407 /* get back data */ 2408 *coarse_submat_vals_n = coarse_submat_vals; 2409 PetscFunctionReturn(0); 2410 } 2411 2412 #undef __FUNCT__ 2413 #define __FUNCT__ "MatGetSubMatrixUnsorted" 2414 PetscErrorCode MatGetSubMatrixUnsorted(Mat A, IS isrow, IS iscol, Mat* B) 2415 { 2416 Mat *work_mat; 2417 IS isrow_s,iscol_s; 2418 PetscBool rsorted,csorted; 2419 PetscInt rsize,*idxs_perm_r,csize,*idxs_perm_c; 2420 PetscErrorCode ierr; 2421 2422 PetscFunctionBegin; 2423 ierr = ISSorted(isrow,&rsorted);CHKERRQ(ierr); 2424 ierr = ISSorted(iscol,&csorted);CHKERRQ(ierr); 2425 ierr = ISGetLocalSize(isrow,&rsize);CHKERRQ(ierr); 2426 ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr); 2427 2428 if (!rsorted) { 2429 const PetscInt *idxs; 2430 PetscInt *idxs_sorted,i; 2431 2432 ierr = PetscMalloc1(rsize,&idxs_perm_r);CHKERRQ(ierr); 2433 ierr = PetscMalloc1(rsize,&idxs_sorted);CHKERRQ(ierr); 2434 for (i=0;i<rsize;i++) { 2435 idxs_perm_r[i] = i; 2436 } 2437 ierr = ISGetIndices(isrow,&idxs);CHKERRQ(ierr); 2438 ierr = PetscSortIntWithPermutation(rsize,idxs,idxs_perm_r);CHKERRQ(ierr); 2439 for (i=0;i<rsize;i++) { 2440 idxs_sorted[i] = idxs[idxs_perm_r[i]]; 2441 } 2442 ierr = ISRestoreIndices(isrow,&idxs);CHKERRQ(ierr); 2443 ierr = ISCreateGeneral(PETSC_COMM_SELF,rsize,idxs_sorted,PETSC_OWN_POINTER,&isrow_s);CHKERRQ(ierr); 2444 } else { 2445 ierr = PetscObjectReference((PetscObject)isrow);CHKERRQ(ierr); 2446 isrow_s = isrow; 2447 } 2448 2449 if (!csorted) { 2450 if (isrow == iscol) { 2451 ierr = PetscObjectReference((PetscObject)isrow_s);CHKERRQ(ierr); 2452 iscol_s = isrow_s; 2453 } else { 2454 const PetscInt *idxs; 2455 PetscInt *idxs_sorted,i; 2456 2457 ierr = PetscMalloc1(csize,&idxs_perm_c);CHKERRQ(ierr); 2458 ierr = PetscMalloc1(csize,&idxs_sorted);CHKERRQ(ierr); 2459 for (i=0;i<csize;i++) { 2460 idxs_perm_c[i] = i; 2461 } 2462 ierr = ISGetIndices(iscol,&idxs);CHKERRQ(ierr); 2463 ierr = PetscSortIntWithPermutation(csize,idxs,idxs_perm_c);CHKERRQ(ierr); 2464 for (i=0;i<csize;i++) { 2465 idxs_sorted[i] = idxs[idxs_perm_c[i]]; 2466 } 2467 ierr = ISRestoreIndices(iscol,&idxs);CHKERRQ(ierr); 2468 ierr = ISCreateGeneral(PETSC_COMM_SELF,csize,idxs_sorted,PETSC_OWN_POINTER,&iscol_s);CHKERRQ(ierr); 2469 } 2470 } else { 2471 ierr = PetscObjectReference((PetscObject)iscol);CHKERRQ(ierr); 2472 iscol_s = iscol; 2473 } 2474 2475 ierr = MatGetSubMatrices(A,1,&isrow_s,&iscol_s,MAT_INITIAL_MATRIX,&work_mat);CHKERRQ(ierr); 2476 2477 if (!rsorted || !csorted) { 2478 Mat new_mat; 2479 IS is_perm_r,is_perm_c; 2480 2481 if (!rsorted) { 2482 PetscInt *idxs_r,i; 2483 ierr = PetscMalloc1(rsize,&idxs_r);CHKERRQ(ierr); 2484 for (i=0;i<rsize;i++) { 2485 idxs_r[idxs_perm_r[i]] = i; 2486 } 2487 ierr = PetscFree(idxs_perm_r);CHKERRQ(ierr); 2488 ierr = ISCreateGeneral(PETSC_COMM_SELF,rsize,idxs_r,PETSC_OWN_POINTER,&is_perm_r);CHKERRQ(ierr); 2489 } else { 2490 ierr = ISCreateStride(PETSC_COMM_SELF,rsize,0,1,&is_perm_r);CHKERRQ(ierr); 2491 } 2492 ierr = ISSetPermutation(is_perm_r);CHKERRQ(ierr); 2493 2494 if (!csorted) { 2495 if (isrow_s == iscol_s) { 2496 ierr = PetscObjectReference((PetscObject)is_perm_r);CHKERRQ(ierr); 2497 is_perm_c = is_perm_r; 2498 } else { 2499 PetscInt *idxs_c,i; 2500 ierr = PetscMalloc1(csize,&idxs_c);CHKERRQ(ierr); 2501 for (i=0;i<csize;i++) { 2502 idxs_c[idxs_perm_c[i]] = i; 2503 } 2504 ierr = PetscFree(idxs_perm_c);CHKERRQ(ierr); 2505 ierr = ISCreateGeneral(PETSC_COMM_SELF,csize,idxs_c,PETSC_OWN_POINTER,&is_perm_c);CHKERRQ(ierr); 2506 } 2507 } else { 2508 ierr = ISCreateStride(PETSC_COMM_SELF,csize,0,1,&is_perm_c);CHKERRQ(ierr); 2509 } 2510 ierr = ISSetPermutation(is_perm_c);CHKERRQ(ierr); 2511 2512 ierr = MatPermute(work_mat[0],is_perm_r,is_perm_c,&new_mat);CHKERRQ(ierr); 2513 ierr = MatDestroy(&work_mat[0]);CHKERRQ(ierr); 2514 work_mat[0] = new_mat; 2515 ierr = ISDestroy(&is_perm_r);CHKERRQ(ierr); 2516 ierr = ISDestroy(&is_perm_c);CHKERRQ(ierr); 2517 } 2518 2519 ierr = PetscObjectReference((PetscObject)work_mat[0]);CHKERRQ(ierr); 2520 *B = work_mat[0]; 2521 ierr = MatDestroyMatrices(1,&work_mat);CHKERRQ(ierr); 2522 ierr = ISDestroy(&isrow_s);CHKERRQ(ierr); 2523 ierr = ISDestroy(&iscol_s);CHKERRQ(ierr); 2524 PetscFunctionReturn(0); 2525 } 2526 2527 #undef __FUNCT__ 2528 #define __FUNCT__ "PCBDDCComputeLocalMatrix" 2529 PetscErrorCode PCBDDCComputeLocalMatrix(PC pc, Mat ChangeOfBasisMatrix) 2530 { 2531 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 2532 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 2533 Mat new_mat; 2534 IS is_local,is_global; 2535 PetscInt local_size; 2536 PetscBool isseqaij; 2537 PetscErrorCode ierr; 2538 2539 PetscFunctionBegin; 2540 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 2541 ierr = MatGetSize(matis->A,&local_size,NULL);CHKERRQ(ierr); 2542 ierr = ISCreateStride(PetscObjectComm((PetscObject)matis->A),local_size,0,1,&is_local);CHKERRQ(ierr); 2543 ierr = ISLocalToGlobalMappingApplyIS(pc->pmat->rmap->mapping,is_local,&is_global);CHKERRQ(ierr); 2544 ierr = ISDestroy(&is_local);CHKERRQ(ierr); 2545 ierr = MatGetSubMatrixUnsorted(ChangeOfBasisMatrix,is_global,is_global,&new_mat);CHKERRQ(ierr); 2546 ierr = ISDestroy(&is_global);CHKERRQ(ierr); 2547 2548 /* check */ 2549 if (pcbddc->dbg_flag) { 2550 Vec x,x_change; 2551 PetscReal error; 2552 2553 ierr = MatCreateVecs(ChangeOfBasisMatrix,&x,&x_change);CHKERRQ(ierr); 2554 ierr = VecSetRandom(x,NULL);CHKERRQ(ierr); 2555 ierr = MatMult(ChangeOfBasisMatrix,x,x_change);CHKERRQ(ierr); 2556 ierr = VecScatterBegin(matis->cctx,x,matis->x,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2557 ierr = VecScatterEnd(matis->cctx,x,matis->x,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2558 ierr = MatMult(new_mat,matis->x,matis->y);CHKERRQ(ierr); 2559 ierr = VecScatterBegin(matis->rctx,matis->y,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2560 ierr = VecScatterEnd(matis->rctx,matis->y,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2561 ierr = VecAXPY(x,-1.0,x_change);CHKERRQ(ierr); 2562 ierr = VecNorm(x,NORM_INFINITY,&error);CHKERRQ(ierr); 2563 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 2564 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Error global vs local change on N: %1.6e\n",error);CHKERRQ(ierr); 2565 ierr = VecDestroy(&x);CHKERRQ(ierr); 2566 ierr = VecDestroy(&x_change);CHKERRQ(ierr); 2567 } 2568 2569 /* TODO: HOW TO WORK WITH BAIJ and SBAIJ and SEQDENSE? */ 2570 ierr = PetscObjectTypeCompare((PetscObject)matis->A,MATSEQAIJ,&isseqaij);CHKERRQ(ierr); 2571 if (isseqaij) { 2572 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 2573 ierr = MatPtAP(matis->A,new_mat,MAT_INITIAL_MATRIX,2.0,&pcbddc->local_mat);CHKERRQ(ierr); 2574 } else { 2575 Mat work_mat; 2576 2577 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 2578 ierr = MatConvert(matis->A,MATSEQAIJ,MAT_INITIAL_MATRIX,&work_mat);CHKERRQ(ierr); 2579 ierr = MatPtAP(work_mat,new_mat,MAT_INITIAL_MATRIX,2.0,&pcbddc->local_mat);CHKERRQ(ierr); 2580 } 2581 if (matis->A->symmetric_set) { 2582 ierr = MatSetOption(pcbddc->local_mat,MAT_SYMMETRIC,matis->A->symmetric);CHKERRQ(ierr); 2583 #if !defined(PETSC_USE_COMPLEX) 2584 ierr = MatSetOption(pcbddc->local_mat,MAT_HERMITIAN,matis->A->symmetric);CHKERRQ(ierr); 2585 #endif 2586 } 2587 ierr = MatDestroy(&new_mat);CHKERRQ(ierr); 2588 PetscFunctionReturn(0); 2589 } 2590 2591 #undef __FUNCT__ 2592 #define __FUNCT__ "PCBDDCSetUpLocalScatters" 2593 PetscErrorCode PCBDDCSetUpLocalScatters(PC pc) 2594 { 2595 PC_IS* pcis = (PC_IS*)(pc->data); 2596 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 2597 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 2598 PetscInt *idx_R_local=NULL; 2599 PetscInt n_vertices,i,j,n_R,n_D,n_B; 2600 PetscInt vbs,bs; 2601 PetscBT bitmask=NULL; 2602 PetscErrorCode ierr; 2603 2604 PetscFunctionBegin; 2605 /* 2606 No need to setup local scatters if 2607 - primal space is unchanged 2608 AND 2609 - we actually have locally some primal dofs (could not be true in multilevel or for isolated subdomains) 2610 AND 2611 - we are not in debugging mode (this is needed since there are Synchronized prints at the end of the subroutine 2612 */ 2613 if (!pcbddc->new_primal_space_local && pcbddc->local_primal_size && !pcbddc->dbg_flag) { 2614 PetscFunctionReturn(0); 2615 } 2616 /* destroy old objects */ 2617 ierr = ISDestroy(&pcbddc->is_R_local);CHKERRQ(ierr); 2618 ierr = VecScatterDestroy(&pcbddc->R_to_B);CHKERRQ(ierr); 2619 ierr = VecScatterDestroy(&pcbddc->R_to_D);CHKERRQ(ierr); 2620 /* Set Non-overlapping dimensions */ 2621 n_B = pcis->n_B; 2622 n_D = pcis->n - n_B; 2623 n_vertices = pcbddc->n_vertices; 2624 2625 /* Dohrmann's notation: dofs splitted in R (Remaining: all dofs but the vertices) and V (Vertices) */ 2626 2627 /* create auxiliary bitmask and allocate workspace */ 2628 if (!sub_schurs->reuse_solver) { 2629 ierr = PetscMalloc1(pcis->n-n_vertices,&idx_R_local);CHKERRQ(ierr); 2630 ierr = PetscBTCreate(pcis->n,&bitmask);CHKERRQ(ierr); 2631 for (i=0;i<n_vertices;i++) { 2632 ierr = PetscBTSet(bitmask,pcbddc->local_primal_ref_node[i]);CHKERRQ(ierr); 2633 } 2634 2635 for (i=0, n_R=0; i<pcis->n; i++) { 2636 if (!PetscBTLookup(bitmask,i)) { 2637 idx_R_local[n_R++] = i; 2638 } 2639 } 2640 } else { /* A different ordering (already computed) is present if we are reusing the Schur solver */ 2641 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 2642 2643 ierr = ISGetIndices(reuse_solver->is_R,(const PetscInt**)&idx_R_local);CHKERRQ(ierr); 2644 ierr = ISGetLocalSize(reuse_solver->is_R,&n_R);CHKERRQ(ierr); 2645 } 2646 2647 /* Block code */ 2648 vbs = 1; 2649 ierr = MatGetBlockSize(pcbddc->local_mat,&bs);CHKERRQ(ierr); 2650 if (bs>1 && !(n_vertices%bs)) { 2651 PetscBool is_blocked = PETSC_TRUE; 2652 PetscInt *vary; 2653 if (!sub_schurs->reuse_solver) { 2654 ierr = PetscMalloc1(pcis->n/bs,&vary);CHKERRQ(ierr); 2655 ierr = PetscMemzero(vary,pcis->n/bs*sizeof(PetscInt));CHKERRQ(ierr); 2656 /* Verify that the vertex indices correspond to each element in a block (code taken from sbaij2.c) */ 2657 /* it is ok to check this way since local_primal_ref_node are always sorted by local numbering and idx_R_local is obtained as a complement */ 2658 for (i=0; i<n_vertices; i++) vary[pcbddc->local_primal_ref_node[i]/bs]++; 2659 for (i=0; i<pcis->n/bs; i++) { 2660 if (vary[i]!=0 && vary[i]!=bs) { 2661 is_blocked = PETSC_FALSE; 2662 break; 2663 } 2664 } 2665 ierr = PetscFree(vary);CHKERRQ(ierr); 2666 } else { 2667 /* Verify directly the R set */ 2668 for (i=0; i<n_R/bs; i++) { 2669 PetscInt j,node=idx_R_local[bs*i]; 2670 for (j=1; j<bs; j++) { 2671 if (node != idx_R_local[bs*i+j]-j) { 2672 is_blocked = PETSC_FALSE; 2673 break; 2674 } 2675 } 2676 } 2677 } 2678 if (is_blocked) { /* build compressed IS for R nodes (complement of vertices) */ 2679 vbs = bs; 2680 for (i=0;i<n_R/vbs;i++) { 2681 idx_R_local[i] = idx_R_local[vbs*i]/vbs; 2682 } 2683 } 2684 } 2685 ierr = ISCreateBlock(PETSC_COMM_SELF,vbs,n_R/vbs,idx_R_local,PETSC_COPY_VALUES,&pcbddc->is_R_local);CHKERRQ(ierr); 2686 if (sub_schurs->reuse_solver) { 2687 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 2688 2689 ierr = ISRestoreIndices(reuse_solver->is_R,(const PetscInt**)&idx_R_local);CHKERRQ(ierr); 2690 ierr = ISDestroy(&reuse_solver->is_R);CHKERRQ(ierr); 2691 ierr = PetscObjectReference((PetscObject)pcbddc->is_R_local);CHKERRQ(ierr); 2692 reuse_solver->is_R = pcbddc->is_R_local; 2693 } else { 2694 ierr = PetscFree(idx_R_local);CHKERRQ(ierr); 2695 } 2696 2697 /* print some info if requested */ 2698 if (pcbddc->dbg_flag) { 2699 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 2700 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 2701 ierr = PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 2702 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d local dimensions\n",PetscGlobalRank);CHKERRQ(ierr); 2703 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local_size = %d, dirichlet_size = %d, boundary_size = %d\n",pcis->n,n_D,n_B);CHKERRQ(ierr); 2704 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"r_size = %d, v_size = %d, constraints = %d, local_primal_size = %d\n",n_R,n_vertices,pcbddc->local_primal_size-n_vertices-pcbddc->benign_n,pcbddc->local_primal_size);CHKERRQ(ierr); 2705 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 2706 } 2707 2708 /* VecScatters pcbddc->R_to_B and (optionally) pcbddc->R_to_D */ 2709 if (!sub_schurs->reuse_solver) { 2710 IS is_aux1,is_aux2; 2711 PetscInt *aux_array1,*aux_array2,*is_indices,*idx_R_local; 2712 2713 ierr = ISGetIndices(pcbddc->is_R_local,(const PetscInt**)&idx_R_local);CHKERRQ(ierr); 2714 ierr = PetscMalloc1(pcis->n_B-n_vertices,&aux_array1);CHKERRQ(ierr); 2715 ierr = PetscMalloc1(pcis->n_B-n_vertices,&aux_array2);CHKERRQ(ierr); 2716 ierr = ISGetIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 2717 for (i=0; i<n_D; i++) { 2718 ierr = PetscBTSet(bitmask,is_indices[i]);CHKERRQ(ierr); 2719 } 2720 ierr = ISRestoreIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 2721 for (i=0, j=0; i<n_R; i++) { 2722 if (!PetscBTLookup(bitmask,idx_R_local[i])) { 2723 aux_array1[j++] = i; 2724 } 2725 } 2726 ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array1,PETSC_OWN_POINTER,&is_aux1);CHKERRQ(ierr); 2727 ierr = ISGetIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 2728 for (i=0, j=0; i<n_B; i++) { 2729 if (!PetscBTLookup(bitmask,is_indices[i])) { 2730 aux_array2[j++] = i; 2731 } 2732 } 2733 ierr = ISRestoreIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 2734 ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array2,PETSC_OWN_POINTER,&is_aux2);CHKERRQ(ierr); 2735 ierr = VecScatterCreate(pcbddc->vec1_R,is_aux1,pcis->vec1_B,is_aux2,&pcbddc->R_to_B);CHKERRQ(ierr); 2736 ierr = ISDestroy(&is_aux1);CHKERRQ(ierr); 2737 ierr = ISDestroy(&is_aux2);CHKERRQ(ierr); 2738 2739 if (pcbddc->switch_static || pcbddc->dbg_flag) { 2740 ierr = PetscMalloc1(n_D,&aux_array1);CHKERRQ(ierr); 2741 for (i=0, j=0; i<n_R; i++) { 2742 if (PetscBTLookup(bitmask,idx_R_local[i])) { 2743 aux_array1[j++] = i; 2744 } 2745 } 2746 ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array1,PETSC_OWN_POINTER,&is_aux1);CHKERRQ(ierr); 2747 ierr = VecScatterCreate(pcbddc->vec1_R,is_aux1,pcis->vec1_D,(IS)0,&pcbddc->R_to_D);CHKERRQ(ierr); 2748 ierr = ISDestroy(&is_aux1);CHKERRQ(ierr); 2749 } 2750 ierr = PetscBTDestroy(&bitmask);CHKERRQ(ierr); 2751 ierr = ISRestoreIndices(pcbddc->is_R_local,(const PetscInt**)&idx_R_local);CHKERRQ(ierr); 2752 } else { 2753 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 2754 IS tis; 2755 PetscInt schur_size; 2756 2757 ierr = ISGetLocalSize(reuse_solver->is_B,&schur_size);CHKERRQ(ierr); 2758 ierr = ISCreateStride(PETSC_COMM_SELF,schur_size,n_D,1,&tis);CHKERRQ(ierr); 2759 ierr = VecScatterCreate(pcbddc->vec1_R,tis,pcis->vec1_B,reuse_solver->is_B,&pcbddc->R_to_B);CHKERRQ(ierr); 2760 ierr = ISDestroy(&tis);CHKERRQ(ierr); 2761 if (pcbddc->switch_static || pcbddc->dbg_flag) { 2762 ierr = ISCreateStride(PETSC_COMM_SELF,n_D,0,1,&tis);CHKERRQ(ierr); 2763 ierr = VecScatterCreate(pcbddc->vec1_R,tis,pcis->vec1_D,(IS)0,&pcbddc->R_to_D);CHKERRQ(ierr); 2764 ierr = ISDestroy(&tis);CHKERRQ(ierr); 2765 } 2766 } 2767 PetscFunctionReturn(0); 2768 } 2769 2770 2771 #undef __FUNCT__ 2772 #define __FUNCT__ "PCBDDCSetUpLocalSolvers" 2773 PetscErrorCode PCBDDCSetUpLocalSolvers(PC pc, PetscBool dirichlet, PetscBool neumann) 2774 { 2775 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 2776 PC_IS *pcis = (PC_IS*)pc->data; 2777 PC pc_temp; 2778 Mat A_RR; 2779 MatReuse reuse; 2780 PetscScalar m_one = -1.0; 2781 PetscReal value; 2782 PetscInt n_D,n_R; 2783 PetscBool use_exact,use_exact_reduced,issbaij; 2784 PetscErrorCode ierr; 2785 /* prefixes stuff */ 2786 char dir_prefix[256],neu_prefix[256],str_level[16]; 2787 size_t len; 2788 2789 PetscFunctionBegin; 2790 2791 /* compute prefixes */ 2792 ierr = PetscStrcpy(dir_prefix,"");CHKERRQ(ierr); 2793 ierr = PetscStrcpy(neu_prefix,"");CHKERRQ(ierr); 2794 if (!pcbddc->current_level) { 2795 ierr = PetscStrcpy(dir_prefix,((PetscObject)pc)->prefix);CHKERRQ(ierr); 2796 ierr = PetscStrcpy(neu_prefix,((PetscObject)pc)->prefix);CHKERRQ(ierr); 2797 ierr = PetscStrcat(dir_prefix,"pc_bddc_dirichlet_");CHKERRQ(ierr); 2798 ierr = PetscStrcat(neu_prefix,"pc_bddc_neumann_");CHKERRQ(ierr); 2799 } else { 2800 ierr = PetscStrcpy(str_level,"");CHKERRQ(ierr); 2801 sprintf(str_level,"l%d_",(int)(pcbddc->current_level)); 2802 ierr = PetscStrlen(((PetscObject)pc)->prefix,&len);CHKERRQ(ierr); 2803 len -= 15; /* remove "pc_bddc_coarse_" */ 2804 if (pcbddc->current_level>1) len -= 3; /* remove "lX_" with X level number */ 2805 if (pcbddc->current_level>10) len -= 1; /* remove another char from level number */ 2806 ierr = PetscStrncpy(dir_prefix,((PetscObject)pc)->prefix,len+1);CHKERRQ(ierr); 2807 ierr = PetscStrncpy(neu_prefix,((PetscObject)pc)->prefix,len+1);CHKERRQ(ierr); 2808 ierr = PetscStrcat(dir_prefix,"pc_bddc_dirichlet_");CHKERRQ(ierr); 2809 ierr = PetscStrcat(neu_prefix,"pc_bddc_neumann_");CHKERRQ(ierr); 2810 ierr = PetscStrcat(dir_prefix,str_level);CHKERRQ(ierr); 2811 ierr = PetscStrcat(neu_prefix,str_level);CHKERRQ(ierr); 2812 } 2813 2814 /* DIRICHLET PROBLEM */ 2815 if (dirichlet) { 2816 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 2817 if (pcbddc->benign_n && !pcbddc->benign_change_explicit) { 2818 if (!sub_schurs->reuse_solver) { 2819 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Not yet implemented\n"); 2820 } 2821 if (pcbddc->dbg_flag) { 2822 Mat A_IIn; 2823 2824 ierr = PCBDDCBenignProject(pc,pcis->is_I_local,pcis->is_I_local,&A_IIn);CHKERRQ(ierr); 2825 ierr = MatDestroy(&pcis->A_II);CHKERRQ(ierr); 2826 pcis->A_II = A_IIn; 2827 } 2828 } 2829 if (pcbddc->local_mat->symmetric_set) { 2830 ierr = MatSetOption(pcis->A_II,MAT_SYMMETRIC,pcbddc->local_mat->symmetric_set);CHKERRQ(ierr); 2831 } 2832 /* Matrix for Dirichlet problem is pcis->A_II */ 2833 n_D = pcis->n - pcis->n_B; 2834 if (!pcbddc->ksp_D) { /* create object if not yet build */ 2835 ierr = KSPCreate(PETSC_COMM_SELF,&pcbddc->ksp_D);CHKERRQ(ierr); 2836 ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->ksp_D,(PetscObject)pc,1);CHKERRQ(ierr); 2837 /* default */ 2838 ierr = KSPSetType(pcbddc->ksp_D,KSPPREONLY);CHKERRQ(ierr); 2839 ierr = KSPSetOptionsPrefix(pcbddc->ksp_D,dir_prefix);CHKERRQ(ierr); 2840 ierr = PetscObjectTypeCompare((PetscObject)pcis->A_II,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr); 2841 ierr = KSPGetPC(pcbddc->ksp_D,&pc_temp);CHKERRQ(ierr); 2842 if (issbaij) { 2843 ierr = PCSetType(pc_temp,PCCHOLESKY);CHKERRQ(ierr); 2844 } else { 2845 ierr = PCSetType(pc_temp,PCLU);CHKERRQ(ierr); 2846 } 2847 /* Allow user's customization */ 2848 ierr = KSPSetFromOptions(pcbddc->ksp_D);CHKERRQ(ierr); 2849 ierr = PCFactorSetReuseFill(pc_temp,PETSC_TRUE);CHKERRQ(ierr); 2850 } 2851 ierr = KSPSetOperators(pcbddc->ksp_D,pcis->A_II,pcis->A_II);CHKERRQ(ierr); 2852 if (sub_schurs->reuse_solver) { 2853 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 2854 2855 ierr = KSPSetPC(pcbddc->ksp_D,reuse_solver->interior_solver);CHKERRQ(ierr); 2856 } 2857 /* umfpack interface has a bug when matrix dimension is zero. TODO solve from umfpack interface */ 2858 if (!n_D) { 2859 ierr = KSPGetPC(pcbddc->ksp_D,&pc_temp);CHKERRQ(ierr); 2860 ierr = PCSetType(pc_temp,PCNONE);CHKERRQ(ierr); 2861 } 2862 /* Set Up KSP for Dirichlet problem of BDDC */ 2863 ierr = KSPSetUp(pcbddc->ksp_D);CHKERRQ(ierr); 2864 /* set ksp_D into pcis data */ 2865 ierr = KSPDestroy(&pcis->ksp_D);CHKERRQ(ierr); 2866 ierr = PetscObjectReference((PetscObject)pcbddc->ksp_D);CHKERRQ(ierr); 2867 pcis->ksp_D = pcbddc->ksp_D; 2868 } 2869 2870 /* NEUMANN PROBLEM */ 2871 A_RR = 0; 2872 if (neumann) { 2873 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 2874 PetscInt ibs,mbs; 2875 PetscBool issbaij; 2876 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 2877 /* Matrix for Neumann problem is A_RR -> we need to create/reuse it at this point */ 2878 ierr = ISGetSize(pcbddc->is_R_local,&n_R);CHKERRQ(ierr); 2879 if (pcbddc->ksp_R) { /* already created ksp */ 2880 PetscInt nn_R; 2881 ierr = KSPGetOperators(pcbddc->ksp_R,NULL,&A_RR);CHKERRQ(ierr); 2882 ierr = PetscObjectReference((PetscObject)A_RR);CHKERRQ(ierr); 2883 ierr = MatGetSize(A_RR,&nn_R,NULL);CHKERRQ(ierr); 2884 if (nn_R != n_R) { /* old ksp is not reusable, so reset it */ 2885 ierr = KSPReset(pcbddc->ksp_R);CHKERRQ(ierr); 2886 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 2887 reuse = MAT_INITIAL_MATRIX; 2888 } else { /* same sizes, but nonzero pattern depend on primal vertices so it can be changed */ 2889 if (pcbddc->new_primal_space_local) { /* we are not sure the matrix will have the same nonzero pattern */ 2890 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 2891 reuse = MAT_INITIAL_MATRIX; 2892 } else { /* safe to reuse the matrix */ 2893 reuse = MAT_REUSE_MATRIX; 2894 } 2895 } 2896 /* last check */ 2897 if (pc->flag == DIFFERENT_NONZERO_PATTERN) { 2898 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 2899 reuse = MAT_INITIAL_MATRIX; 2900 } 2901 } else { /* first time, so we need to create the matrix */ 2902 reuse = MAT_INITIAL_MATRIX; 2903 } 2904 /* convert pcbddc->local_mat if needed later in PCBDDCSetUpCorrection */ 2905 ierr = MatGetBlockSize(pcbddc->local_mat,&mbs);CHKERRQ(ierr); 2906 ierr = ISGetBlockSize(pcbddc->is_R_local,&ibs);CHKERRQ(ierr); 2907 ierr = PetscObjectTypeCompare((PetscObject)pcbddc->local_mat,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr); 2908 if (ibs != mbs) { /* need to convert to SEQAIJ to extract any submatrix with is_R_local */ 2909 if (matis->A == pcbddc->local_mat) { 2910 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 2911 ierr = MatConvert(matis->A,MATSEQAIJ,MAT_INITIAL_MATRIX,&pcbddc->local_mat);CHKERRQ(ierr); 2912 } else { 2913 ierr = MatConvert(pcbddc->local_mat,MATSEQAIJ,MAT_INPLACE_MATRIX,&pcbddc->local_mat);CHKERRQ(ierr); 2914 } 2915 } else if (issbaij) { /* need to convert to BAIJ to get offdiagonal blocks */ 2916 if (matis->A == pcbddc->local_mat) { 2917 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 2918 ierr = MatConvert(matis->A,MATSEQBAIJ,MAT_INITIAL_MATRIX,&pcbddc->local_mat);CHKERRQ(ierr); 2919 } else { 2920 ierr = MatConvert(pcbddc->local_mat,MATSEQBAIJ,MAT_INPLACE_MATRIX,&pcbddc->local_mat);CHKERRQ(ierr); 2921 } 2922 } 2923 /* extract A_RR */ 2924 if (sub_schurs->reuse_solver) { 2925 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 2926 2927 if (pcbddc->dbg_flag) { /* we need A_RR to test the solver later */ 2928 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 2929 if (reuse_solver->benign_n) { /* we are not using the explicit change of basis on the pressures */ 2930 ierr = PCBDDCBenignProject(pc,pcbddc->is_R_local,pcbddc->is_R_local,&A_RR);CHKERRQ(ierr); 2931 } else { 2932 ierr = MatGetSubMatrix(pcbddc->local_mat,pcbddc->is_R_local,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&A_RR);CHKERRQ(ierr); 2933 } 2934 } else { 2935 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 2936 ierr = PCGetOperators(reuse_solver->correction_solver,&A_RR,NULL);CHKERRQ(ierr); 2937 ierr = PetscObjectReference((PetscObject)A_RR);CHKERRQ(ierr); 2938 } 2939 } else { /* we have to build the neumann solver, so we need to extract the relevant matrix */ 2940 ierr = MatGetSubMatrix(pcbddc->local_mat,pcbddc->is_R_local,pcbddc->is_R_local,reuse,&A_RR);CHKERRQ(ierr); 2941 } 2942 if (pcbddc->local_mat->symmetric_set) { 2943 ierr = MatSetOption(A_RR,MAT_SYMMETRIC,pcbddc->local_mat->symmetric_set);CHKERRQ(ierr); 2944 } 2945 if (!pcbddc->ksp_R) { /* create object if not present */ 2946 ierr = KSPCreate(PETSC_COMM_SELF,&pcbddc->ksp_R);CHKERRQ(ierr); 2947 ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->ksp_R,(PetscObject)pc,1);CHKERRQ(ierr); 2948 /* default */ 2949 ierr = KSPSetType(pcbddc->ksp_R,KSPPREONLY);CHKERRQ(ierr); 2950 ierr = KSPSetOptionsPrefix(pcbddc->ksp_R,neu_prefix);CHKERRQ(ierr); 2951 ierr = KSPGetPC(pcbddc->ksp_R,&pc_temp);CHKERRQ(ierr); 2952 ierr = PetscObjectTypeCompare((PetscObject)A_RR,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr); 2953 if (issbaij) { 2954 ierr = PCSetType(pc_temp,PCCHOLESKY);CHKERRQ(ierr); 2955 } else { 2956 ierr = PCSetType(pc_temp,PCLU);CHKERRQ(ierr); 2957 } 2958 /* Allow user's customization */ 2959 ierr = KSPSetFromOptions(pcbddc->ksp_R);CHKERRQ(ierr); 2960 ierr = PCFactorSetReuseFill(pc_temp,PETSC_TRUE);CHKERRQ(ierr); 2961 } 2962 /* umfpack interface has a bug when matrix dimension is zero. TODO solve from umfpack interface */ 2963 if (!n_R) { 2964 ierr = KSPGetPC(pcbddc->ksp_R,&pc_temp);CHKERRQ(ierr); 2965 ierr = PCSetType(pc_temp,PCNONE);CHKERRQ(ierr); 2966 } 2967 ierr = KSPSetOperators(pcbddc->ksp_R,A_RR,A_RR);CHKERRQ(ierr); 2968 /* Reuse solver if it is present */ 2969 if (sub_schurs->reuse_solver) { 2970 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 2971 2972 ierr = KSPSetPC(pcbddc->ksp_R,reuse_solver->correction_solver);CHKERRQ(ierr); 2973 } 2974 /* Set Up KSP for Neumann problem of BDDC */ 2975 ierr = KSPSetUp(pcbddc->ksp_R);CHKERRQ(ierr); 2976 } 2977 2978 /* check Dirichlet and Neumann solvers and adapt them if a nullspace correction is needed */ 2979 if (pcbddc->NullSpace || pcbddc->dbg_flag) { 2980 if (pcbddc->dbg_flag) { 2981 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 2982 ierr = PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 2983 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 2984 } 2985 if (dirichlet) { /* Dirichlet */ 2986 ierr = VecSetRandom(pcis->vec1_D,NULL);CHKERRQ(ierr); 2987 ierr = MatMult(pcis->A_II,pcis->vec1_D,pcis->vec2_D);CHKERRQ(ierr); 2988 ierr = KSPSolve(pcbddc->ksp_D,pcis->vec2_D,pcis->vec2_D);CHKERRQ(ierr); 2989 ierr = VecAXPY(pcis->vec1_D,m_one,pcis->vec2_D);CHKERRQ(ierr); 2990 ierr = VecNorm(pcis->vec1_D,NORM_INFINITY,&value);CHKERRQ(ierr); 2991 /* need to be adapted? */ 2992 use_exact = (PetscAbsReal(value) > 1.e-4 ? PETSC_FALSE : PETSC_TRUE); 2993 ierr = MPIU_Allreduce(&use_exact,&use_exact_reduced,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 2994 ierr = PCBDDCSetUseExactDirichlet(pc,use_exact_reduced);CHKERRQ(ierr); 2995 /* print info */ 2996 if (pcbddc->dbg_flag) { 2997 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d infinity error for Dirichlet solve (%s) = % 1.14e \n",PetscGlobalRank,((PetscObject)(pcbddc->ksp_D))->prefix,value);CHKERRQ(ierr); 2998 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 2999 } 3000 if (pcbddc->NullSpace && !use_exact_reduced && !pcbddc->switch_static) { 3001 ierr = PCBDDCNullSpaceAssembleCorrection(pc,PETSC_TRUE,pcis->is_I_local);CHKERRQ(ierr); 3002 } 3003 } 3004 if (neumann) { /* Neumann */ 3005 ierr = VecSetRandom(pcbddc->vec1_R,NULL);CHKERRQ(ierr); 3006 ierr = MatMult(A_RR,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 3007 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec2_R,pcbddc->vec2_R);CHKERRQ(ierr); 3008 ierr = VecAXPY(pcbddc->vec1_R,m_one,pcbddc->vec2_R);CHKERRQ(ierr); 3009 ierr = VecNorm(pcbddc->vec1_R,NORM_INFINITY,&value);CHKERRQ(ierr); 3010 /* need to be adapted? */ 3011 use_exact = (PetscAbsReal(value) > 1.e-4 ? PETSC_FALSE : PETSC_TRUE); 3012 ierr = MPIU_Allreduce(&use_exact,&use_exact_reduced,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 3013 /* print info */ 3014 if (pcbddc->dbg_flag) { 3015 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d infinity error for Neumann solve (%s) = % 1.14e\n",PetscGlobalRank,((PetscObject)(pcbddc->ksp_R))->prefix,value);CHKERRQ(ierr); 3016 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 3017 } 3018 if (pcbddc->NullSpace && !use_exact_reduced) { /* is it the right logic? */ 3019 ierr = PCBDDCNullSpaceAssembleCorrection(pc,PETSC_FALSE,pcbddc->is_R_local);CHKERRQ(ierr); 3020 } 3021 } 3022 } 3023 /* free Neumann problem's matrix */ 3024 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 3025 PetscFunctionReturn(0); 3026 } 3027 3028 #undef __FUNCT__ 3029 #define __FUNCT__ "PCBDDCSolveSubstructureCorrection" 3030 static PetscErrorCode PCBDDCSolveSubstructureCorrection(PC pc, Vec inout_B, Vec inout_D, PetscBool applytranspose) 3031 { 3032 PetscErrorCode ierr; 3033 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 3034 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 3035 3036 PetscFunctionBegin; 3037 if (!sub_schurs->reuse_solver) { 3038 ierr = VecSet(pcbddc->vec1_R,0.);CHKERRQ(ierr); 3039 } 3040 if (!pcbddc->switch_static) { 3041 if (applytranspose && pcbddc->local_auxmat1) { 3042 ierr = MatMultTranspose(pcbddc->local_auxmat2,inout_B,pcbddc->vec1_C);CHKERRQ(ierr); 3043 ierr = MatMultTransposeAdd(pcbddc->local_auxmat1,pcbddc->vec1_C,inout_B,inout_B);CHKERRQ(ierr); 3044 } 3045 if (!sub_schurs->reuse_solver) { 3046 ierr = VecScatterBegin(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 3047 ierr = VecScatterEnd(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 3048 } else { 3049 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 3050 3051 ierr = VecScatterBegin(reuse_solver->correction_scatter_B,inout_B,reuse_solver->rhs_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3052 ierr = VecScatterEnd(reuse_solver->correction_scatter_B,inout_B,reuse_solver->rhs_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3053 } 3054 } else { 3055 ierr = VecScatterBegin(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 3056 ierr = VecScatterEnd(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 3057 ierr = VecScatterBegin(pcbddc->R_to_D,inout_D,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 3058 ierr = VecScatterEnd(pcbddc->R_to_D,inout_D,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 3059 if (applytranspose && pcbddc->local_auxmat1) { 3060 ierr = MatMultTranspose(pcbddc->local_auxmat2,pcbddc->vec1_R,pcbddc->vec1_C);CHKERRQ(ierr); 3061 ierr = MatMultTransposeAdd(pcbddc->local_auxmat1,pcbddc->vec1_C,inout_B,inout_B);CHKERRQ(ierr); 3062 ierr = VecScatterBegin(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 3063 ierr = VecScatterEnd(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 3064 } 3065 } 3066 if (!sub_schurs->reuse_solver || pcbddc->switch_static) { 3067 if (applytranspose) { 3068 ierr = KSPSolveTranspose(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr); 3069 } else { 3070 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr); 3071 } 3072 } else { 3073 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 3074 3075 if (applytranspose) { 3076 ierr = MatFactorSolveSchurComplementTranspose(reuse_solver->F,reuse_solver->rhs_B,reuse_solver->sol_B);CHKERRQ(ierr); 3077 } else { 3078 ierr = MatFactorSolveSchurComplement(reuse_solver->F,reuse_solver->rhs_B,reuse_solver->sol_B);CHKERRQ(ierr); 3079 } 3080 } 3081 ierr = VecSet(inout_B,0.);CHKERRQ(ierr); 3082 if (!pcbddc->switch_static) { 3083 if (!sub_schurs->reuse_solver) { 3084 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3085 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3086 } else { 3087 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 3088 3089 ierr = VecScatterBegin(reuse_solver->correction_scatter_B,reuse_solver->sol_B,inout_B,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 3090 ierr = VecScatterEnd(reuse_solver->correction_scatter_B,reuse_solver->sol_B,inout_B,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 3091 } 3092 if (!applytranspose && pcbddc->local_auxmat1) { 3093 ierr = MatMult(pcbddc->local_auxmat1,inout_B,pcbddc->vec1_C);CHKERRQ(ierr); 3094 ierr = MatMultAdd(pcbddc->local_auxmat2,pcbddc->vec1_C,inout_B,inout_B);CHKERRQ(ierr); 3095 } 3096 } else { 3097 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3098 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3099 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,inout_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3100 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,inout_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3101 if (!applytranspose && pcbddc->local_auxmat1) { 3102 ierr = MatMult(pcbddc->local_auxmat1,inout_B,pcbddc->vec1_C);CHKERRQ(ierr); 3103 ierr = MatMultAdd(pcbddc->local_auxmat2,pcbddc->vec1_C,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr); 3104 } 3105 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3106 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3107 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,inout_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3108 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,inout_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3109 } 3110 PetscFunctionReturn(0); 3111 } 3112 3113 /* parameter apply transpose determines if the interface preconditioner should be applied transposed or not */ 3114 #undef __FUNCT__ 3115 #define __FUNCT__ "PCBDDCApplyInterfacePreconditioner" 3116 PetscErrorCode PCBDDCApplyInterfacePreconditioner(PC pc, PetscBool applytranspose) 3117 { 3118 PetscErrorCode ierr; 3119 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 3120 PC_IS* pcis = (PC_IS*) (pc->data); 3121 const PetscScalar zero = 0.0; 3122 3123 PetscFunctionBegin; 3124 /* Application of PSI^T or PHI^T (depending on applytranspose, see comment above) */ 3125 if (!pcbddc->benign_apply_coarse_only) { 3126 if (applytranspose) { 3127 ierr = MatMultTranspose(pcbddc->coarse_phi_B,pcis->vec1_B,pcbddc->vec1_P);CHKERRQ(ierr); 3128 if (pcbddc->switch_static) { ierr = MatMultTransposeAdd(pcbddc->coarse_phi_D,pcis->vec1_D,pcbddc->vec1_P,pcbddc->vec1_P);CHKERRQ(ierr); } 3129 } else { 3130 ierr = MatMultTranspose(pcbddc->coarse_psi_B,pcis->vec1_B,pcbddc->vec1_P);CHKERRQ(ierr); 3131 if (pcbddc->switch_static) { ierr = MatMultTransposeAdd(pcbddc->coarse_psi_D,pcis->vec1_D,pcbddc->vec1_P,pcbddc->vec1_P);CHKERRQ(ierr); } 3132 } 3133 } else { 3134 ierr = VecSet(pcbddc->vec1_P,zero);CHKERRQ(ierr); 3135 } 3136 3137 /* add p0 to the last value of vec1_P holding the coarse dof relative to p0 */ 3138 if (pcbddc->benign_n) { 3139 PetscScalar *array; 3140 PetscInt j; 3141 3142 ierr = VecGetArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 3143 for (j=0;j<pcbddc->benign_n;j++) array[pcbddc->local_primal_size-pcbddc->benign_n+j] += pcbddc->benign_p0[j]; 3144 ierr = VecRestoreArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 3145 } 3146 3147 /* start communications from local primal nodes to rhs of coarse solver */ 3148 ierr = VecSet(pcbddc->coarse_vec,zero);CHKERRQ(ierr); 3149 ierr = PCBDDCScatterCoarseDataBegin(pc,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3150 ierr = PCBDDCScatterCoarseDataEnd(pc,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3151 3152 /* Coarse solution -> rhs and sol updated inside PCBDDCScattarCoarseDataBegin/End */ 3153 /* TODO remove null space when doing multilevel */ 3154 if (pcbddc->coarse_ksp) { 3155 Mat coarse_mat; 3156 Vec rhs,sol; 3157 MatNullSpace nullsp; 3158 3159 ierr = KSPGetRhs(pcbddc->coarse_ksp,&rhs);CHKERRQ(ierr); 3160 ierr = KSPGetSolution(pcbddc->coarse_ksp,&sol);CHKERRQ(ierr); 3161 ierr = KSPGetOperators(pcbddc->coarse_ksp,&coarse_mat,NULL);CHKERRQ(ierr); 3162 ierr = MatGetNullSpace(coarse_mat,&nullsp);CHKERRQ(ierr); 3163 if (nullsp) { 3164 ierr = MatNullSpaceRemove(nullsp,rhs);CHKERRQ(ierr); 3165 } 3166 if (applytranspose) { 3167 ierr = KSPSolveTranspose(pcbddc->coarse_ksp,rhs,sol);CHKERRQ(ierr); 3168 } else { 3169 ierr = KSPSolve(pcbddc->coarse_ksp,rhs,sol);CHKERRQ(ierr); 3170 } 3171 if (nullsp) { 3172 ierr = MatNullSpaceRemove(nullsp,sol);CHKERRQ(ierr); 3173 } 3174 } 3175 3176 /* Local solution on R nodes */ 3177 if (pcis->n && !pcbddc->benign_apply_coarse_only) { 3178 ierr = PCBDDCSolveSubstructureCorrection(pc,pcis->vec1_B,pcis->vec1_D,applytranspose);CHKERRQ(ierr); 3179 } 3180 /* communications from coarse sol to local primal nodes */ 3181 ierr = PCBDDCScatterCoarseDataBegin(pc,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 3182 ierr = PCBDDCScatterCoarseDataEnd(pc,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 3183 3184 /* Sum contributions from the two levels */ 3185 if (!pcbddc->benign_apply_coarse_only) { 3186 if (applytranspose) { 3187 ierr = MatMultAdd(pcbddc->coarse_psi_B,pcbddc->vec1_P,pcis->vec1_B,pcis->vec1_B);CHKERRQ(ierr); 3188 if (pcbddc->switch_static) { ierr = MatMultAdd(pcbddc->coarse_psi_D,pcbddc->vec1_P,pcis->vec1_D,pcis->vec1_D);CHKERRQ(ierr); } 3189 } else { 3190 ierr = MatMultAdd(pcbddc->coarse_phi_B,pcbddc->vec1_P,pcis->vec1_B,pcis->vec1_B);CHKERRQ(ierr); 3191 if (pcbddc->switch_static) { ierr = MatMultAdd(pcbddc->coarse_phi_D,pcbddc->vec1_P,pcis->vec1_D,pcis->vec1_D);CHKERRQ(ierr); } 3192 } 3193 /* store p0 */ 3194 if (pcbddc->benign_n) { 3195 PetscScalar *array; 3196 PetscInt j; 3197 3198 ierr = VecGetArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 3199 for (j=0;j<pcbddc->benign_n;j++) pcbddc->benign_p0[j] = array[pcbddc->local_primal_size-pcbddc->benign_n+j]; 3200 ierr = VecRestoreArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 3201 } 3202 } else { /* expand the coarse solution */ 3203 if (applytranspose) { 3204 ierr = MatMult(pcbddc->coarse_psi_B,pcbddc->vec1_P,pcis->vec1_B);CHKERRQ(ierr); 3205 } else { 3206 ierr = MatMult(pcbddc->coarse_phi_B,pcbddc->vec1_P,pcis->vec1_B);CHKERRQ(ierr); 3207 } 3208 } 3209 PetscFunctionReturn(0); 3210 } 3211 3212 #undef __FUNCT__ 3213 #define __FUNCT__ "PCBDDCScatterCoarseDataBegin" 3214 PetscErrorCode PCBDDCScatterCoarseDataBegin(PC pc,InsertMode imode, ScatterMode smode) 3215 { 3216 PetscErrorCode ierr; 3217 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 3218 PetscScalar *array; 3219 Vec from,to; 3220 3221 PetscFunctionBegin; 3222 if (smode == SCATTER_REVERSE) { /* from global to local -> get data from coarse solution */ 3223 from = pcbddc->coarse_vec; 3224 to = pcbddc->vec1_P; 3225 if (pcbddc->coarse_ksp) { /* get array from coarse processes */ 3226 Vec tvec; 3227 3228 ierr = KSPGetRhs(pcbddc->coarse_ksp,&tvec);CHKERRQ(ierr); 3229 ierr = VecResetArray(tvec);CHKERRQ(ierr); 3230 ierr = KSPGetSolution(pcbddc->coarse_ksp,&tvec);CHKERRQ(ierr); 3231 ierr = VecGetArray(tvec,&array);CHKERRQ(ierr); 3232 ierr = VecPlaceArray(from,array);CHKERRQ(ierr); 3233 ierr = VecRestoreArray(tvec,&array);CHKERRQ(ierr); 3234 } 3235 } else { /* from local to global -> put data in coarse right hand side */ 3236 from = pcbddc->vec1_P; 3237 to = pcbddc->coarse_vec; 3238 } 3239 ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,from,to,imode,smode);CHKERRQ(ierr); 3240 PetscFunctionReturn(0); 3241 } 3242 3243 #undef __FUNCT__ 3244 #define __FUNCT__ "PCBDDCScatterCoarseDataEnd" 3245 PetscErrorCode PCBDDCScatterCoarseDataEnd(PC pc, InsertMode imode, ScatterMode smode) 3246 { 3247 PetscErrorCode ierr; 3248 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 3249 PetscScalar *array; 3250 Vec from,to; 3251 3252 PetscFunctionBegin; 3253 if (smode == SCATTER_REVERSE) { /* from global to local -> get data from coarse solution */ 3254 from = pcbddc->coarse_vec; 3255 to = pcbddc->vec1_P; 3256 } else { /* from local to global -> put data in coarse right hand side */ 3257 from = pcbddc->vec1_P; 3258 to = pcbddc->coarse_vec; 3259 } 3260 ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,from,to,imode,smode);CHKERRQ(ierr); 3261 if (smode == SCATTER_FORWARD) { 3262 if (pcbddc->coarse_ksp) { /* get array from coarse processes */ 3263 Vec tvec; 3264 3265 ierr = KSPGetRhs(pcbddc->coarse_ksp,&tvec);CHKERRQ(ierr); 3266 ierr = VecGetArray(to,&array);CHKERRQ(ierr); 3267 ierr = VecPlaceArray(tvec,array);CHKERRQ(ierr); 3268 ierr = VecRestoreArray(to,&array);CHKERRQ(ierr); 3269 } 3270 } else { 3271 if (pcbddc->coarse_ksp) { /* restore array of pcbddc->coarse_vec */ 3272 ierr = VecResetArray(from);CHKERRQ(ierr); 3273 } 3274 } 3275 PetscFunctionReturn(0); 3276 } 3277 3278 /* uncomment for testing purposes */ 3279 /* #define PETSC_MISSING_LAPACK_GESVD 1 */ 3280 #undef __FUNCT__ 3281 #define __FUNCT__ "PCBDDCConstraintsSetUp" 3282 PetscErrorCode PCBDDCConstraintsSetUp(PC pc) 3283 { 3284 PetscErrorCode ierr; 3285 PC_IS* pcis = (PC_IS*)(pc->data); 3286 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 3287 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 3288 /* one and zero */ 3289 PetscScalar one=1.0,zero=0.0; 3290 /* space to store constraints and their local indices */ 3291 PetscScalar *constraints_data; 3292 PetscInt *constraints_idxs,*constraints_idxs_B; 3293 PetscInt *constraints_idxs_ptr,*constraints_data_ptr; 3294 PetscInt *constraints_n; 3295 /* iterators */ 3296 PetscInt i,j,k,total_counts,total_counts_cc,cum; 3297 /* BLAS integers */ 3298 PetscBLASInt lwork,lierr; 3299 PetscBLASInt Blas_N,Blas_M,Blas_K,Blas_one=1; 3300 PetscBLASInt Blas_LDA,Blas_LDB,Blas_LDC; 3301 /* reuse */ 3302 PetscInt olocal_primal_size,olocal_primal_size_cc; 3303 PetscInt *olocal_primal_ref_node,*olocal_primal_ref_mult; 3304 /* change of basis */ 3305 PetscBool qr_needed; 3306 PetscBT change_basis,qr_needed_idx; 3307 /* auxiliary stuff */ 3308 PetscInt *nnz,*is_indices; 3309 PetscInt ncc; 3310 /* some quantities */ 3311 PetscInt n_vertices,total_primal_vertices,valid_constraints; 3312 PetscInt size_of_constraint,max_size_of_constraint=0,max_constraints,temp_constraints; 3313 3314 PetscFunctionBegin; 3315 /* Destroy Mat objects computed previously */ 3316 ierr = MatDestroy(&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 3317 ierr = MatDestroy(&pcbddc->ConstraintMatrix);CHKERRQ(ierr); 3318 ierr = MatDestroy(&pcbddc->switch_static_change);CHKERRQ(ierr); 3319 /* save info on constraints from previous setup (if any) */ 3320 olocal_primal_size = pcbddc->local_primal_size; 3321 olocal_primal_size_cc = pcbddc->local_primal_size_cc; 3322 ierr = PetscMalloc2(olocal_primal_size_cc,&olocal_primal_ref_node,olocal_primal_size_cc,&olocal_primal_ref_mult);CHKERRQ(ierr); 3323 ierr = PetscMemcpy(olocal_primal_ref_node,pcbddc->local_primal_ref_node,olocal_primal_size_cc*sizeof(PetscInt));CHKERRQ(ierr); 3324 ierr = PetscMemcpy(olocal_primal_ref_mult,pcbddc->local_primal_ref_mult,olocal_primal_size_cc*sizeof(PetscInt));CHKERRQ(ierr); 3325 ierr = PetscFree2(pcbddc->local_primal_ref_node,pcbddc->local_primal_ref_mult);CHKERRQ(ierr); 3326 ierr = PetscFree(pcbddc->primal_indices_local_idxs);CHKERRQ(ierr); 3327 3328 if (!pcbddc->adaptive_selection) { 3329 IS ISForVertices,*ISForFaces,*ISForEdges; 3330 MatNullSpace nearnullsp; 3331 const Vec *nearnullvecs; 3332 Vec *localnearnullsp; 3333 PetscScalar *array; 3334 PetscInt n_ISForFaces,n_ISForEdges,nnsp_size; 3335 PetscBool nnsp_has_cnst; 3336 /* LAPACK working arrays for SVD or POD */ 3337 PetscBool skip_lapack,boolforchange; 3338 PetscScalar *work; 3339 PetscReal *singular_vals; 3340 #if defined(PETSC_USE_COMPLEX) 3341 PetscReal *rwork; 3342 #endif 3343 #if defined(PETSC_MISSING_LAPACK_GESVD) 3344 PetscScalar *temp_basis,*correlation_mat; 3345 #else 3346 PetscBLASInt dummy_int=1; 3347 PetscScalar dummy_scalar=1.; 3348 #endif 3349 3350 /* Get index sets for faces, edges and vertices from graph */ 3351 ierr = PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph,&n_ISForFaces,&ISForFaces,&n_ISForEdges,&ISForEdges,&ISForVertices);CHKERRQ(ierr); 3352 /* print some info */ 3353 if (pcbddc->dbg_flag) { 3354 PetscInt nv; 3355 3356 ierr = PCBDDCGraphASCIIView(pcbddc->mat_graph,pcbddc->dbg_flag,pcbddc->dbg_viewer);CHKERRQ(ierr); 3357 ierr = ISGetSize(ISForVertices,&nv);CHKERRQ(ierr); 3358 ierr = PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 3359 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"--------------------------------------------------------------\n");CHKERRQ(ierr); 3360 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate vertices (%d)\n",PetscGlobalRank,nv,pcbddc->use_vertices);CHKERRQ(ierr); 3361 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate edges (%d)\n",PetscGlobalRank,n_ISForEdges,pcbddc->use_edges);CHKERRQ(ierr); 3362 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate faces (%d)\n",PetscGlobalRank,n_ISForFaces,pcbddc->use_faces);CHKERRQ(ierr); 3363 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 3364 ierr = PetscViewerASCIIPopSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 3365 } 3366 3367 /* free unneeded index sets */ 3368 if (!pcbddc->use_vertices) { 3369 ierr = ISDestroy(&ISForVertices);CHKERRQ(ierr); 3370 } 3371 if (!pcbddc->use_edges) { 3372 for (i=0;i<n_ISForEdges;i++) { 3373 ierr = ISDestroy(&ISForEdges[i]);CHKERRQ(ierr); 3374 } 3375 ierr = PetscFree(ISForEdges);CHKERRQ(ierr); 3376 n_ISForEdges = 0; 3377 } 3378 if (!pcbddc->use_faces) { 3379 for (i=0;i<n_ISForFaces;i++) { 3380 ierr = ISDestroy(&ISForFaces[i]);CHKERRQ(ierr); 3381 } 3382 ierr = PetscFree(ISForFaces);CHKERRQ(ierr); 3383 n_ISForFaces = 0; 3384 } 3385 3386 #if defined(PETSC_USE_DEBUG) 3387 /* HACK: when solving singular problems not using vertices, a change of basis is mandatory. 3388 Also use_change_of_basis should be consistent among processors */ 3389 if (pcbddc->NullSpace) { 3390 PetscBool tbool[2],gbool[2]; 3391 3392 if (!ISForVertices && !pcbddc->user_ChangeOfBasisMatrix) { 3393 pcbddc->use_change_of_basis = PETSC_TRUE; 3394 if (!ISForEdges) { 3395 pcbddc->use_change_on_faces = PETSC_TRUE; 3396 } 3397 } 3398 tbool[0] = pcbddc->use_change_of_basis; 3399 tbool[1] = pcbddc->use_change_on_faces; 3400 ierr = MPIU_Allreduce(tbool,gbool,2,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 3401 pcbddc->use_change_of_basis = gbool[0]; 3402 pcbddc->use_change_on_faces = gbool[1]; 3403 } 3404 #endif 3405 3406 /* check if near null space is attached to global mat */ 3407 ierr = MatGetNearNullSpace(pc->pmat,&nearnullsp);CHKERRQ(ierr); 3408 if (nearnullsp) { 3409 ierr = MatNullSpaceGetVecs(nearnullsp,&nnsp_has_cnst,&nnsp_size,&nearnullvecs);CHKERRQ(ierr); 3410 /* remove any stored info */ 3411 ierr = MatNullSpaceDestroy(&pcbddc->onearnullspace);CHKERRQ(ierr); 3412 ierr = PetscFree(pcbddc->onearnullvecs_state);CHKERRQ(ierr); 3413 /* store information for BDDC solver reuse */ 3414 ierr = PetscObjectReference((PetscObject)nearnullsp);CHKERRQ(ierr); 3415 pcbddc->onearnullspace = nearnullsp; 3416 ierr = PetscMalloc1(nnsp_size,&pcbddc->onearnullvecs_state);CHKERRQ(ierr); 3417 for (i=0;i<nnsp_size;i++) { 3418 ierr = PetscObjectStateGet((PetscObject)nearnullvecs[i],&pcbddc->onearnullvecs_state[i]);CHKERRQ(ierr); 3419 } 3420 } else { /* if near null space is not provided BDDC uses constants by default */ 3421 nnsp_size = 0; 3422 nnsp_has_cnst = PETSC_TRUE; 3423 } 3424 /* get max number of constraints on a single cc */ 3425 max_constraints = nnsp_size; 3426 if (nnsp_has_cnst) max_constraints++; 3427 3428 /* 3429 Evaluate maximum storage size needed by the procedure 3430 - Indices for connected component i stored at "constraints_idxs + constraints_idxs_ptr[i]" 3431 - Values for constraints on connected component i stored at "constraints_data + constraints_data_ptr[i]" 3432 There can be multiple constraints per connected component 3433 */ 3434 n_vertices = 0; 3435 if (ISForVertices) { 3436 ierr = ISGetSize(ISForVertices,&n_vertices);CHKERRQ(ierr); 3437 } 3438 ncc = n_vertices+n_ISForFaces+n_ISForEdges; 3439 ierr = PetscMalloc3(ncc+1,&constraints_idxs_ptr,ncc+1,&constraints_data_ptr,ncc,&constraints_n);CHKERRQ(ierr); 3440 3441 total_counts = n_ISForFaces+n_ISForEdges; 3442 total_counts *= max_constraints; 3443 total_counts += n_vertices; 3444 ierr = PetscBTCreate(total_counts,&change_basis);CHKERRQ(ierr); 3445 3446 total_counts = 0; 3447 max_size_of_constraint = 0; 3448 for (i=0;i<n_ISForEdges+n_ISForFaces;i++) { 3449 IS used_is; 3450 if (i<n_ISForEdges) { 3451 used_is = ISForEdges[i]; 3452 } else { 3453 used_is = ISForFaces[i-n_ISForEdges]; 3454 } 3455 ierr = ISGetSize(used_is,&j);CHKERRQ(ierr); 3456 total_counts += j; 3457 max_size_of_constraint = PetscMax(j,max_size_of_constraint); 3458 } 3459 ierr = PetscMalloc3(total_counts*max_constraints+n_vertices,&constraints_data,total_counts+n_vertices,&constraints_idxs,total_counts+n_vertices,&constraints_idxs_B);CHKERRQ(ierr); 3460 3461 /* get local part of global near null space vectors */ 3462 ierr = PetscMalloc1(nnsp_size,&localnearnullsp);CHKERRQ(ierr); 3463 for (k=0;k<nnsp_size;k++) { 3464 ierr = VecDuplicate(pcis->vec1_N,&localnearnullsp[k]);CHKERRQ(ierr); 3465 ierr = VecScatterBegin(matis->rctx,nearnullvecs[k],localnearnullsp[k],INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3466 ierr = VecScatterEnd(matis->rctx,nearnullvecs[k],localnearnullsp[k],INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3467 } 3468 3469 /* whether or not to skip lapack calls */ 3470 skip_lapack = PETSC_TRUE; 3471 if (n_ISForFaces+n_ISForEdges && max_constraints > 1 && !pcbddc->use_nnsp_true) skip_lapack = PETSC_FALSE; 3472 3473 /* First we issue queries to allocate optimal workspace for LAPACKgesvd (or LAPACKsyev if SVD is missing) */ 3474 if (!skip_lapack) { 3475 PetscScalar temp_work; 3476 3477 #if defined(PETSC_MISSING_LAPACK_GESVD) 3478 /* Proper Orthogonal Decomposition (POD) using the snapshot method */ 3479 ierr = PetscMalloc1(max_constraints*max_constraints,&correlation_mat);CHKERRQ(ierr); 3480 ierr = PetscMalloc1(max_constraints,&singular_vals);CHKERRQ(ierr); 3481 ierr = PetscMalloc1(max_size_of_constraint*max_constraints,&temp_basis);CHKERRQ(ierr); 3482 #if defined(PETSC_USE_COMPLEX) 3483 ierr = PetscMalloc1(3*max_constraints,&rwork);CHKERRQ(ierr); 3484 #endif 3485 /* now we evaluate the optimal workspace using query with lwork=-1 */ 3486 ierr = PetscBLASIntCast(max_constraints,&Blas_N);CHKERRQ(ierr); 3487 ierr = PetscBLASIntCast(max_constraints,&Blas_LDA);CHKERRQ(ierr); 3488 lwork = -1; 3489 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 3490 #if !defined(PETSC_USE_COMPLEX) 3491 PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,&temp_work,&lwork,&lierr)); 3492 #else 3493 PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,&temp_work,&lwork,rwork,&lierr)); 3494 #endif 3495 ierr = PetscFPTrapPop();CHKERRQ(ierr); 3496 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to SYEV Lapack routine %d",(int)lierr); 3497 #else /* on missing GESVD */ 3498 /* SVD */ 3499 PetscInt max_n,min_n; 3500 max_n = max_size_of_constraint; 3501 min_n = max_constraints; 3502 if (max_size_of_constraint < max_constraints) { 3503 min_n = max_size_of_constraint; 3504 max_n = max_constraints; 3505 } 3506 ierr = PetscMalloc1(min_n,&singular_vals);CHKERRQ(ierr); 3507 #if defined(PETSC_USE_COMPLEX) 3508 ierr = PetscMalloc1(5*min_n,&rwork);CHKERRQ(ierr); 3509 #endif 3510 /* now we evaluate the optimal workspace using query with lwork=-1 */ 3511 lwork = -1; 3512 ierr = PetscBLASIntCast(max_n,&Blas_M);CHKERRQ(ierr); 3513 ierr = PetscBLASIntCast(min_n,&Blas_N);CHKERRQ(ierr); 3514 ierr = PetscBLASIntCast(max_n,&Blas_LDA);CHKERRQ(ierr); 3515 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 3516 #if !defined(PETSC_USE_COMPLEX) 3517 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&constraints_data[0],&Blas_LDA,singular_vals,&dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,&temp_work,&lwork,&lierr)); 3518 #else 3519 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&constraints_data[0],&Blas_LDA,singular_vals,&dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,&temp_work,&lwork,rwork,&lierr)); 3520 #endif 3521 ierr = PetscFPTrapPop();CHKERRQ(ierr); 3522 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to GESVD Lapack routine %d",(int)lierr); 3523 #endif /* on missing GESVD */ 3524 /* Allocate optimal workspace */ 3525 ierr = PetscBLASIntCast((PetscInt)PetscRealPart(temp_work),&lwork);CHKERRQ(ierr); 3526 ierr = PetscMalloc1(lwork,&work);CHKERRQ(ierr); 3527 } 3528 /* Now we can loop on constraining sets */ 3529 total_counts = 0; 3530 constraints_idxs_ptr[0] = 0; 3531 constraints_data_ptr[0] = 0; 3532 /* vertices */ 3533 if (n_vertices) { 3534 ierr = ISGetIndices(ISForVertices,(const PetscInt**)&is_indices);CHKERRQ(ierr); 3535 ierr = PetscMemcpy(constraints_idxs,is_indices,n_vertices*sizeof(PetscInt));CHKERRQ(ierr); 3536 for (i=0;i<n_vertices;i++) { 3537 constraints_n[total_counts] = 1; 3538 constraints_data[total_counts] = 1.0; 3539 constraints_idxs_ptr[total_counts+1] = constraints_idxs_ptr[total_counts]+1; 3540 constraints_data_ptr[total_counts+1] = constraints_data_ptr[total_counts]+1; 3541 total_counts++; 3542 } 3543 ierr = ISRestoreIndices(ISForVertices,(const PetscInt**)&is_indices);CHKERRQ(ierr); 3544 n_vertices = total_counts; 3545 } 3546 3547 /* edges and faces */ 3548 total_counts_cc = total_counts; 3549 for (ncc=0;ncc<n_ISForEdges+n_ISForFaces;ncc++) { 3550 IS used_is; 3551 PetscBool idxs_copied = PETSC_FALSE; 3552 3553 if (ncc<n_ISForEdges) { 3554 used_is = ISForEdges[ncc]; 3555 boolforchange = pcbddc->use_change_of_basis; /* change or not the basis on the edge */ 3556 } else { 3557 used_is = ISForFaces[ncc-n_ISForEdges]; 3558 boolforchange = (PetscBool)(pcbddc->use_change_of_basis && pcbddc->use_change_on_faces); /* change or not the basis on the face */ 3559 } 3560 temp_constraints = 0; /* zero the number of constraints I have on this conn comp */ 3561 3562 ierr = ISGetSize(used_is,&size_of_constraint);CHKERRQ(ierr); 3563 ierr = ISGetIndices(used_is,(const PetscInt**)&is_indices);CHKERRQ(ierr); 3564 /* change of basis should not be performed on local periodic nodes */ 3565 if (pcbddc->mat_graph->mirrors && pcbddc->mat_graph->mirrors[is_indices[0]]) boolforchange = PETSC_FALSE; 3566 if (nnsp_has_cnst) { 3567 PetscScalar quad_value; 3568 3569 ierr = PetscMemcpy(constraints_idxs + constraints_idxs_ptr[total_counts_cc],is_indices,size_of_constraint*sizeof(PetscInt));CHKERRQ(ierr); 3570 idxs_copied = PETSC_TRUE; 3571 3572 if (!pcbddc->use_nnsp_true) { 3573 quad_value = (PetscScalar)(1.0/PetscSqrtReal((PetscReal)size_of_constraint)); 3574 } else { 3575 quad_value = 1.0; 3576 } 3577 for (j=0;j<size_of_constraint;j++) { 3578 constraints_data[constraints_data_ptr[total_counts_cc]+j] = quad_value; 3579 } 3580 temp_constraints++; 3581 total_counts++; 3582 } 3583 for (k=0;k<nnsp_size;k++) { 3584 PetscReal real_value; 3585 PetscScalar *ptr_to_data; 3586 3587 ierr = VecGetArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr); 3588 ptr_to_data = &constraints_data[constraints_data_ptr[total_counts_cc]+temp_constraints*size_of_constraint]; 3589 for (j=0;j<size_of_constraint;j++) { 3590 ptr_to_data[j] = array[is_indices[j]]; 3591 } 3592 ierr = VecRestoreArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr); 3593 /* check if array is null on the connected component */ 3594 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 3595 PetscStackCallBLAS("BLASasum",real_value = BLASasum_(&Blas_N,ptr_to_data,&Blas_one)); 3596 if (real_value > 0.0) { /* keep indices and values */ 3597 temp_constraints++; 3598 total_counts++; 3599 if (!idxs_copied) { 3600 ierr = PetscMemcpy(constraints_idxs + constraints_idxs_ptr[total_counts_cc],is_indices,size_of_constraint*sizeof(PetscInt));CHKERRQ(ierr); 3601 idxs_copied = PETSC_TRUE; 3602 } 3603 } 3604 } 3605 ierr = ISRestoreIndices(used_is,(const PetscInt**)&is_indices);CHKERRQ(ierr); 3606 valid_constraints = temp_constraints; 3607 if (!pcbddc->use_nnsp_true && temp_constraints) { 3608 if (temp_constraints == 1) { /* just normalize the constraint */ 3609 PetscScalar norm,*ptr_to_data; 3610 3611 ptr_to_data = &constraints_data[constraints_data_ptr[total_counts_cc]]; 3612 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 3613 PetscStackCallBLAS("BLASdot",norm = BLASdot_(&Blas_N,ptr_to_data,&Blas_one,ptr_to_data,&Blas_one)); 3614 norm = 1.0/PetscSqrtReal(PetscRealPart(norm)); 3615 PetscStackCallBLAS("BLASscal",BLASscal_(&Blas_N,&norm,ptr_to_data,&Blas_one)); 3616 } else { /* perform SVD */ 3617 PetscReal tol = 1.0e-8; /* tolerance for retaining eigenmodes */ 3618 PetscScalar *ptr_to_data = &constraints_data[constraints_data_ptr[total_counts_cc]]; 3619 3620 #if defined(PETSC_MISSING_LAPACK_GESVD) 3621 /* SVD: Y = U*S*V^H -> U (eigenvectors of Y*Y^H) = Y*V*(S)^\dag 3622 POD: Y^H*Y = V*D*V^H, D = S^H*S -> U = Y*V*D^(-1/2) 3623 -> When PETSC_USE_COMPLEX and PETSC_MISSING_LAPACK_GESVD are defined 3624 the constraints basis will differ (by a complex factor with absolute value equal to 1) 3625 from that computed using LAPACKgesvd 3626 -> This is due to a different computation of eigenvectors in LAPACKheev 3627 -> The quality of the POD-computed basis will be the same */ 3628 ierr = PetscMemzero(correlation_mat,temp_constraints*temp_constraints*sizeof(PetscScalar));CHKERRQ(ierr); 3629 /* Store upper triangular part of correlation matrix */ 3630 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 3631 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 3632 for (j=0;j<temp_constraints;j++) { 3633 for (k=0;k<j+1;k++) { 3634 PetscStackCallBLAS("BLASdot",correlation_mat[j*temp_constraints+k] = BLASdot_(&Blas_N,ptr_to_data+k*size_of_constraint,&Blas_one,ptr_to_data+j*size_of_constraint,&Blas_one)); 3635 } 3636 } 3637 /* compute eigenvalues and eigenvectors of correlation matrix */ 3638 ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr); 3639 ierr = PetscBLASIntCast(temp_constraints,&Blas_LDA);CHKERRQ(ierr); 3640 #if !defined(PETSC_USE_COMPLEX) 3641 PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,work,&lwork,&lierr)); 3642 #else 3643 PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,work,&lwork,rwork,&lierr)); 3644 #endif 3645 ierr = PetscFPTrapPop();CHKERRQ(ierr); 3646 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYEV Lapack routine %d",(int)lierr); 3647 /* retain eigenvalues greater than tol: note that LAPACKsyev gives eigs in ascending order */ 3648 j = 0; 3649 while (j < temp_constraints && singular_vals[j] < tol) j++; 3650 total_counts = total_counts-j; 3651 valid_constraints = temp_constraints-j; 3652 /* scale and copy POD basis into used quadrature memory */ 3653 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 3654 ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr); 3655 ierr = PetscBLASIntCast(temp_constraints,&Blas_K);CHKERRQ(ierr); 3656 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 3657 ierr = PetscBLASIntCast(temp_constraints,&Blas_LDB);CHKERRQ(ierr); 3658 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDC);CHKERRQ(ierr); 3659 if (j<temp_constraints) { 3660 PetscInt ii; 3661 for (k=j;k<temp_constraints;k++) singular_vals[k] = 1.0/PetscSqrtReal(singular_vals[k]); 3662 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 3663 PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&Blas_M,&Blas_N,&Blas_K,&one,ptr_to_data,&Blas_LDA,correlation_mat,&Blas_LDB,&zero,temp_basis,&Blas_LDC)); 3664 ierr = PetscFPTrapPop();CHKERRQ(ierr); 3665 for (k=0;k<temp_constraints-j;k++) { 3666 for (ii=0;ii<size_of_constraint;ii++) { 3667 ptr_to_data[k*size_of_constraint+ii] = singular_vals[temp_constraints-1-k]*temp_basis[(temp_constraints-1-k)*size_of_constraint+ii]; 3668 } 3669 } 3670 } 3671 #else /* on missing GESVD */ 3672 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 3673 ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr); 3674 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 3675 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 3676 #if !defined(PETSC_USE_COMPLEX) 3677 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,ptr_to_data,&Blas_LDA,singular_vals,&dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,work,&lwork,&lierr)); 3678 #else 3679 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,ptr_to_data,&Blas_LDA,singular_vals,&dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,work,&lwork,rwork,&lierr)); 3680 #endif 3681 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in GESVD Lapack routine %d",(int)lierr); 3682 ierr = PetscFPTrapPop();CHKERRQ(ierr); 3683 /* retain eigenvalues greater than tol: note that LAPACKgesvd gives eigs in descending order */ 3684 k = temp_constraints; 3685 if (k > size_of_constraint) k = size_of_constraint; 3686 j = 0; 3687 while (j < k && singular_vals[k-j-1] < tol) j++; 3688 valid_constraints = k-j; 3689 total_counts = total_counts-temp_constraints+valid_constraints; 3690 #endif /* on missing GESVD */ 3691 } 3692 } 3693 /* update pointers information */ 3694 if (valid_constraints) { 3695 constraints_n[total_counts_cc] = valid_constraints; 3696 constraints_idxs_ptr[total_counts_cc+1] = constraints_idxs_ptr[total_counts_cc]+size_of_constraint; 3697 constraints_data_ptr[total_counts_cc+1] = constraints_data_ptr[total_counts_cc]+size_of_constraint*valid_constraints; 3698 /* set change_of_basis flag */ 3699 if (boolforchange) { 3700 PetscBTSet(change_basis,total_counts_cc); 3701 } 3702 total_counts_cc++; 3703 } 3704 } 3705 /* free workspace */ 3706 if (!skip_lapack) { 3707 ierr = PetscFree(work);CHKERRQ(ierr); 3708 #if defined(PETSC_USE_COMPLEX) 3709 ierr = PetscFree(rwork);CHKERRQ(ierr); 3710 #endif 3711 ierr = PetscFree(singular_vals);CHKERRQ(ierr); 3712 #if defined(PETSC_MISSING_LAPACK_GESVD) 3713 ierr = PetscFree(correlation_mat);CHKERRQ(ierr); 3714 ierr = PetscFree(temp_basis);CHKERRQ(ierr); 3715 #endif 3716 } 3717 for (k=0;k<nnsp_size;k++) { 3718 ierr = VecDestroy(&localnearnullsp[k]);CHKERRQ(ierr); 3719 } 3720 ierr = PetscFree(localnearnullsp);CHKERRQ(ierr); 3721 /* free index sets of faces, edges and vertices */ 3722 for (i=0;i<n_ISForFaces;i++) { 3723 ierr = ISDestroy(&ISForFaces[i]);CHKERRQ(ierr); 3724 } 3725 if (n_ISForFaces) { 3726 ierr = PetscFree(ISForFaces);CHKERRQ(ierr); 3727 } 3728 for (i=0;i<n_ISForEdges;i++) { 3729 ierr = ISDestroy(&ISForEdges[i]);CHKERRQ(ierr); 3730 } 3731 if (n_ISForEdges) { 3732 ierr = PetscFree(ISForEdges);CHKERRQ(ierr); 3733 } 3734 ierr = ISDestroy(&ISForVertices);CHKERRQ(ierr); 3735 } else { 3736 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 3737 3738 total_counts = 0; 3739 n_vertices = 0; 3740 if (sub_schurs->is_vertices && pcbddc->use_vertices) { 3741 ierr = ISGetLocalSize(sub_schurs->is_vertices,&n_vertices);CHKERRQ(ierr); 3742 } 3743 max_constraints = 0; 3744 total_counts_cc = 0; 3745 for (i=0;i<sub_schurs->n_subs+n_vertices;i++) { 3746 total_counts += pcbddc->adaptive_constraints_n[i]; 3747 if (pcbddc->adaptive_constraints_n[i]) total_counts_cc++; 3748 max_constraints = PetscMax(max_constraints,pcbddc->adaptive_constraints_n[i]); 3749 } 3750 constraints_idxs_ptr = pcbddc->adaptive_constraints_idxs_ptr; 3751 constraints_data_ptr = pcbddc->adaptive_constraints_data_ptr; 3752 constraints_idxs = pcbddc->adaptive_constraints_idxs; 3753 constraints_data = pcbddc->adaptive_constraints_data; 3754 /* constraints_n differs from pcbddc->adaptive_constraints_n */ 3755 ierr = PetscMalloc1(total_counts_cc,&constraints_n);CHKERRQ(ierr); 3756 total_counts_cc = 0; 3757 for (i=0;i<sub_schurs->n_subs+n_vertices;i++) { 3758 if (pcbddc->adaptive_constraints_n[i]) { 3759 constraints_n[total_counts_cc++] = pcbddc->adaptive_constraints_n[i]; 3760 } 3761 } 3762 #if 0 3763 printf("Found %d totals (%d)\n",total_counts_cc,total_counts); 3764 for (i=0;i<total_counts_cc;i++) { 3765 printf("const %d, start %d",i,constraints_idxs_ptr[i]); 3766 printf(" end %d:\n",constraints_idxs_ptr[i+1]); 3767 for (j=constraints_idxs_ptr[i];j<constraints_idxs_ptr[i+1];j++) { 3768 printf(" %d",constraints_idxs[j]); 3769 } 3770 printf("\n"); 3771 printf("number of cc: %d\n",constraints_n[i]); 3772 } 3773 for (i=0;i<n_vertices;i++) { 3774 PetscPrintf(PETSC_COMM_SELF,"[%d] vertex %d, n %d\n",PetscGlobalRank,i,pcbddc->adaptive_constraints_n[i]); 3775 } 3776 for (i=0;i<sub_schurs->n_subs;i++) { 3777 PetscPrintf(PETSC_COMM_SELF,"[%d] sub %d, edge %d, n %d\n",PetscGlobalRank,i,(PetscBool)PetscBTLookup(sub_schurs->is_edge,i),pcbddc->adaptive_constraints_n[i+n_vertices]); 3778 } 3779 #endif 3780 3781 max_size_of_constraint = 0; 3782 for (i=0;i<total_counts_cc;i++) max_size_of_constraint = PetscMax(max_size_of_constraint,constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i]); 3783 ierr = PetscMalloc1(constraints_idxs_ptr[total_counts_cc],&constraints_idxs_B);CHKERRQ(ierr); 3784 /* Change of basis */ 3785 ierr = PetscBTCreate(total_counts_cc,&change_basis);CHKERRQ(ierr); 3786 if (pcbddc->use_change_of_basis) { 3787 for (i=0;i<sub_schurs->n_subs;i++) { 3788 if (PetscBTLookup(sub_schurs->is_edge,i) || pcbddc->use_change_on_faces) { 3789 ierr = PetscBTSet(change_basis,i+n_vertices);CHKERRQ(ierr); 3790 } 3791 } 3792 } 3793 } 3794 pcbddc->local_primal_size = total_counts; 3795 ierr = PetscMalloc1(pcbddc->local_primal_size+pcbddc->benign_n,&pcbddc->primal_indices_local_idxs);CHKERRQ(ierr); 3796 3797 /* map constraints_idxs in boundary numbering */ 3798 ierr = ISGlobalToLocalMappingApply(pcis->BtoNmap,IS_GTOLM_DROP,constraints_idxs_ptr[total_counts_cc],constraints_idxs,&i,constraints_idxs_B);CHKERRQ(ierr); 3799 if (i != constraints_idxs_ptr[total_counts_cc]) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_SUP,"Error in boundary numbering for constraints indices %D != %D\n",constraints_idxs_ptr[total_counts_cc],i); 3800 3801 /* Create constraint matrix */ 3802 ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->ConstraintMatrix);CHKERRQ(ierr); 3803 ierr = MatSetType(pcbddc->ConstraintMatrix,MATAIJ);CHKERRQ(ierr); 3804 ierr = MatSetSizes(pcbddc->ConstraintMatrix,pcbddc->local_primal_size,pcis->n,pcbddc->local_primal_size,pcis->n);CHKERRQ(ierr); 3805 3806 /* find primal_dofs: subdomain corners plus dofs selected as primal after change of basis */ 3807 /* determine if a QR strategy is needed for change of basis */ 3808 qr_needed = PETSC_FALSE; 3809 ierr = PetscBTCreate(total_counts_cc,&qr_needed_idx);CHKERRQ(ierr); 3810 total_primal_vertices=0; 3811 pcbddc->local_primal_size_cc = 0; 3812 for (i=0;i<total_counts_cc;i++) { 3813 size_of_constraint = constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i]; 3814 if (size_of_constraint == 1 && pcbddc->mat_graph->custom_minimal_size) { 3815 pcbddc->primal_indices_local_idxs[total_primal_vertices++] = constraints_idxs[constraints_idxs_ptr[i]]; 3816 pcbddc->local_primal_size_cc += 1; 3817 } else if (PetscBTLookup(change_basis,i)) { 3818 for (k=0;k<constraints_n[i];k++) { 3819 pcbddc->primal_indices_local_idxs[total_primal_vertices++] = constraints_idxs[constraints_idxs_ptr[i]+k]; 3820 } 3821 pcbddc->local_primal_size_cc += constraints_n[i]; 3822 if (constraints_n[i] > 1 || pcbddc->use_qr_single) { 3823 PetscBTSet(qr_needed_idx,i); 3824 qr_needed = PETSC_TRUE; 3825 } 3826 } else { 3827 pcbddc->local_primal_size_cc += 1; 3828 } 3829 } 3830 /* note that the local variable n_vertices used below stores the number of pointwise constraints */ 3831 pcbddc->n_vertices = total_primal_vertices; 3832 /* permute indices in order to have a sorted set of vertices */ 3833 ierr = PetscSortInt(total_primal_vertices,pcbddc->primal_indices_local_idxs);CHKERRQ(ierr); 3834 3835 ierr = PetscMalloc2(pcbddc->local_primal_size_cc+pcbddc->benign_n,&pcbddc->local_primal_ref_node,pcbddc->local_primal_size_cc+pcbddc->benign_n,&pcbddc->local_primal_ref_mult);CHKERRQ(ierr); 3836 ierr = PetscMemcpy(pcbddc->local_primal_ref_node,pcbddc->primal_indices_local_idxs,total_primal_vertices*sizeof(PetscInt));CHKERRQ(ierr); 3837 for (i=0;i<total_primal_vertices;i++) pcbddc->local_primal_ref_mult[i] = 1; 3838 3839 /* nonzero structure of constraint matrix */ 3840 /* and get reference dof for local constraints */ 3841 ierr = PetscMalloc1(pcbddc->local_primal_size,&nnz);CHKERRQ(ierr); 3842 for (i=0;i<total_primal_vertices;i++) nnz[i] = 1; 3843 3844 j = total_primal_vertices; 3845 total_counts = total_primal_vertices; 3846 cum = total_primal_vertices; 3847 for (i=n_vertices;i<total_counts_cc;i++) { 3848 if (!PetscBTLookup(change_basis,i)) { 3849 pcbddc->local_primal_ref_node[cum] = constraints_idxs[constraints_idxs_ptr[i]]; 3850 pcbddc->local_primal_ref_mult[cum] = constraints_n[i]; 3851 cum++; 3852 size_of_constraint = constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i]; 3853 for (k=0;k<constraints_n[i];k++) { 3854 pcbddc->primal_indices_local_idxs[total_counts++] = constraints_idxs[constraints_idxs_ptr[i]+k]; 3855 nnz[j+k] = size_of_constraint; 3856 } 3857 j += constraints_n[i]; 3858 } 3859 } 3860 ierr = MatSeqAIJSetPreallocation(pcbddc->ConstraintMatrix,0,nnz);CHKERRQ(ierr); 3861 ierr = PetscFree(nnz);CHKERRQ(ierr); 3862 3863 /* set values in constraint matrix */ 3864 for (i=0;i<total_primal_vertices;i++) { 3865 ierr = MatSetValue(pcbddc->ConstraintMatrix,i,pcbddc->local_primal_ref_node[i],1.0,INSERT_VALUES);CHKERRQ(ierr); 3866 } 3867 total_counts = total_primal_vertices; 3868 for (i=n_vertices;i<total_counts_cc;i++) { 3869 if (!PetscBTLookup(change_basis,i)) { 3870 PetscInt *cols; 3871 3872 size_of_constraint = constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i]; 3873 cols = constraints_idxs+constraints_idxs_ptr[i]; 3874 for (k=0;k<constraints_n[i];k++) { 3875 PetscInt row = total_counts+k; 3876 PetscScalar *vals; 3877 3878 vals = constraints_data+constraints_data_ptr[i]+k*size_of_constraint; 3879 ierr = MatSetValues(pcbddc->ConstraintMatrix,1,&row,size_of_constraint,cols,vals,INSERT_VALUES);CHKERRQ(ierr); 3880 } 3881 total_counts += constraints_n[i]; 3882 } 3883 } 3884 /* assembling */ 3885 ierr = MatAssemblyBegin(pcbddc->ConstraintMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3886 ierr = MatAssemblyEnd(pcbddc->ConstraintMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3887 3888 /* 3889 ierr = PetscViewerPushFormat(PETSC_VIEWER_STDOUT_SELF,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr); 3890 ierr = MatView(pcbddc->ConstraintMatrix,(PetscViewer)0);CHKERRQ(ierr); 3891 ierr = PetscViewerPopFormat(PETSC_VIEWER_STDOUT_SELF);CHKERRQ(ierr); 3892 */ 3893 /* Create matrix for change of basis. We don't need it in case pcbddc->use_change_of_basis is FALSE */ 3894 if (pcbddc->use_change_of_basis) { 3895 /* dual and primal dofs on a single cc */ 3896 PetscInt dual_dofs,primal_dofs; 3897 /* working stuff for GEQRF */ 3898 PetscScalar *qr_basis,*qr_tau = NULL,*qr_work,lqr_work_t; 3899 PetscBLASInt lqr_work; 3900 /* working stuff for UNGQR */ 3901 PetscScalar *gqr_work,lgqr_work_t; 3902 PetscBLASInt lgqr_work; 3903 /* working stuff for TRTRS */ 3904 PetscScalar *trs_rhs; 3905 PetscBLASInt Blas_NRHS; 3906 /* pointers for values insertion into change of basis matrix */ 3907 PetscInt *start_rows,*start_cols; 3908 PetscScalar *start_vals; 3909 /* working stuff for values insertion */ 3910 PetscBT is_primal; 3911 PetscInt *aux_primal_numbering_B; 3912 /* matrix sizes */ 3913 PetscInt global_size,local_size; 3914 /* temporary change of basis */ 3915 Mat localChangeOfBasisMatrix; 3916 /* extra space for debugging */ 3917 PetscScalar *dbg_work; 3918 3919 /* local temporary change of basis acts on local interfaces -> dimension is n_B x n_B */ 3920 ierr = MatCreate(PETSC_COMM_SELF,&localChangeOfBasisMatrix);CHKERRQ(ierr); 3921 ierr = MatSetType(localChangeOfBasisMatrix,MATAIJ);CHKERRQ(ierr); 3922 ierr = MatSetSizes(localChangeOfBasisMatrix,pcis->n,pcis->n,pcis->n,pcis->n);CHKERRQ(ierr); 3923 /* nonzeros for local mat */ 3924 ierr = PetscMalloc1(pcis->n,&nnz);CHKERRQ(ierr); 3925 if (!pcbddc->benign_change || pcbddc->fake_change) { 3926 for (i=0;i<pcis->n;i++) nnz[i]=1; 3927 } else { 3928 const PetscInt *ii; 3929 PetscInt n; 3930 PetscBool flg_row; 3931 ierr = MatGetRowIJ(pcbddc->benign_change,0,PETSC_FALSE,PETSC_FALSE,&n,&ii,NULL,&flg_row);CHKERRQ(ierr); 3932 for (i=0;i<n;i++) nnz[i] = ii[i+1]-ii[i]; 3933 ierr = MatRestoreRowIJ(pcbddc->benign_change,0,PETSC_FALSE,PETSC_FALSE,&n,&ii,NULL,&flg_row);CHKERRQ(ierr); 3934 } 3935 for (i=n_vertices;i<total_counts_cc;i++) { 3936 if (PetscBTLookup(change_basis,i)) { 3937 size_of_constraint = constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i]; 3938 if (PetscBTLookup(qr_needed_idx,i)) { 3939 for (j=0;j<size_of_constraint;j++) nnz[constraints_idxs[constraints_idxs_ptr[i]+j]] = size_of_constraint; 3940 } else { 3941 nnz[constraints_idxs[constraints_idxs_ptr[i]]] = size_of_constraint; 3942 for (j=1;j<size_of_constraint;j++) nnz[constraints_idxs[constraints_idxs_ptr[i]+j]] = 2; 3943 } 3944 } 3945 } 3946 ierr = MatSeqAIJSetPreallocation(localChangeOfBasisMatrix,0,nnz);CHKERRQ(ierr); 3947 ierr = PetscFree(nnz);CHKERRQ(ierr); 3948 /* Set interior change in the matrix */ 3949 if (!pcbddc->benign_change || pcbddc->fake_change) { 3950 for (i=0;i<pcis->n;i++) { 3951 ierr = MatSetValue(localChangeOfBasisMatrix,i,i,1.0,INSERT_VALUES);CHKERRQ(ierr); 3952 } 3953 } else { 3954 const PetscInt *ii,*jj; 3955 PetscScalar *aa; 3956 PetscInt n; 3957 PetscBool flg_row; 3958 ierr = MatGetRowIJ(pcbddc->benign_change,0,PETSC_FALSE,PETSC_FALSE,&n,&ii,&jj,&flg_row);CHKERRQ(ierr); 3959 ierr = MatSeqAIJGetArray(pcbddc->benign_change,&aa);CHKERRQ(ierr); 3960 for (i=0;i<n;i++) { 3961 ierr = MatSetValues(localChangeOfBasisMatrix,1,&i,ii[i+1]-ii[i],jj+ii[i],aa+ii[i],INSERT_VALUES);CHKERRQ(ierr); 3962 } 3963 ierr = MatSeqAIJRestoreArray(pcbddc->benign_change,&aa);CHKERRQ(ierr); 3964 ierr = MatRestoreRowIJ(pcbddc->benign_change,0,PETSC_FALSE,PETSC_FALSE,&n,&ii,&jj,&flg_row);CHKERRQ(ierr); 3965 } 3966 3967 if (pcbddc->dbg_flag) { 3968 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"--------------------------------------------------------------\n");CHKERRQ(ierr); 3969 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Checking change of basis computation for subdomain %04d\n",PetscGlobalRank);CHKERRQ(ierr); 3970 } 3971 3972 3973 /* Now we loop on the constraints which need a change of basis */ 3974 /* 3975 Change of basis matrix is evaluated similarly to the FIRST APPROACH in 3976 Klawonn and Widlund, Dual-primal FETI-DP methods for linear elasticity, (see Sect 6.2.1) 3977 3978 Basic blocks of change of basis matrix T computed by 3979 3980 - Using the following block transformation if there is only a primal dof on the cc (and -pc_bddc_use_qr_single is not specified) 3981 3982 | 1 0 ... 0 s_1/S | 3983 | 0 1 ... 0 s_2/S | 3984 | ... | 3985 | 0 ... 1 s_{n-1}/S | 3986 | -s_1/s_n ... -s_{n-1}/s_n s_n/S | 3987 3988 with S = \sum_{i=1}^n s_i^2 3989 NOTE: in the above example, the primal dof is the last one of the edge in LOCAL ordering 3990 in the current implementation, the primal dof is the first one of the edge in GLOBAL ordering 3991 3992 - QR decomposition of constraints otherwise 3993 */ 3994 if (qr_needed) { 3995 /* space to store Q */ 3996 ierr = PetscMalloc1(max_size_of_constraint*max_size_of_constraint,&qr_basis);CHKERRQ(ierr); 3997 /* first we issue queries for optimal work */ 3998 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_M);CHKERRQ(ierr); 3999 ierr = PetscBLASIntCast(max_constraints,&Blas_N);CHKERRQ(ierr); 4000 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 4001 lqr_work = -1; 4002 PetscStackCallBLAS("LAPACKgeqrf",LAPACKgeqrf_(&Blas_M,&Blas_N,qr_basis,&Blas_LDA,qr_tau,&lqr_work_t,&lqr_work,&lierr)); 4003 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to GEQRF Lapack routine %d",(int)lierr); 4004 ierr = PetscBLASIntCast((PetscInt)PetscRealPart(lqr_work_t),&lqr_work);CHKERRQ(ierr); 4005 ierr = PetscMalloc1((PetscInt)PetscRealPart(lqr_work_t),&qr_work);CHKERRQ(ierr); 4006 lgqr_work = -1; 4007 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_M);CHKERRQ(ierr); 4008 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_N);CHKERRQ(ierr); 4009 ierr = PetscBLASIntCast(max_constraints,&Blas_K);CHKERRQ(ierr); 4010 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 4011 if (Blas_K>Blas_M) Blas_K=Blas_M; /* adjust just for computing optimal work */ 4012 PetscStackCallBLAS("LAPACKungqr",LAPACKungqr_(&Blas_M,&Blas_N,&Blas_K,qr_basis,&Blas_LDA,qr_tau,&lgqr_work_t,&lgqr_work,&lierr)); 4013 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to UNGQR Lapack routine %d",(int)lierr); 4014 ierr = PetscBLASIntCast((PetscInt)PetscRealPart(lgqr_work_t),&lgqr_work);CHKERRQ(ierr); 4015 ierr = PetscMalloc1((PetscInt)PetscRealPart(lgqr_work_t),&gqr_work);CHKERRQ(ierr); 4016 /* array to store scaling factors for reflectors */ 4017 ierr = PetscMalloc1(max_constraints,&qr_tau);CHKERRQ(ierr); 4018 /* array to store rhs and solution of triangular solver */ 4019 ierr = PetscMalloc1(max_constraints*max_constraints,&trs_rhs);CHKERRQ(ierr); 4020 /* allocating workspace for check */ 4021 if (pcbddc->dbg_flag) { 4022 ierr = PetscMalloc1(max_size_of_constraint*(max_constraints+max_size_of_constraint),&dbg_work);CHKERRQ(ierr); 4023 } 4024 } 4025 /* array to store whether a node is primal or not */ 4026 ierr = PetscBTCreate(pcis->n_B,&is_primal);CHKERRQ(ierr); 4027 ierr = PetscMalloc1(total_primal_vertices,&aux_primal_numbering_B);CHKERRQ(ierr); 4028 ierr = ISGlobalToLocalMappingApply(pcis->BtoNmap,IS_GTOLM_DROP,total_primal_vertices,pcbddc->local_primal_ref_node,&i,aux_primal_numbering_B);CHKERRQ(ierr); 4029 if (i != total_primal_vertices) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_SUP,"Error in boundary numbering for BDDC vertices! %D != %D\n",total_primal_vertices,i); 4030 for (i=0;i<total_primal_vertices;i++) { 4031 ierr = PetscBTSet(is_primal,aux_primal_numbering_B[i]);CHKERRQ(ierr); 4032 } 4033 ierr = PetscFree(aux_primal_numbering_B);CHKERRQ(ierr); 4034 4035 /* loop on constraints and see whether or not they need a change of basis and compute it */ 4036 for (total_counts=n_vertices;total_counts<total_counts_cc;total_counts++) { 4037 size_of_constraint = constraints_idxs_ptr[total_counts+1]-constraints_idxs_ptr[total_counts]; 4038 if (PetscBTLookup(change_basis,total_counts)) { 4039 /* get constraint info */ 4040 primal_dofs = constraints_n[total_counts]; 4041 dual_dofs = size_of_constraint-primal_dofs; 4042 4043 if (pcbddc->dbg_flag) { 4044 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Constraints %d: %d need a change of basis (size %d)\n",total_counts,primal_dofs,size_of_constraint);CHKERRQ(ierr); 4045 } 4046 4047 if (PetscBTLookup(qr_needed_idx,total_counts)) { /* QR */ 4048 4049 /* copy quadrature constraints for change of basis check */ 4050 if (pcbddc->dbg_flag) { 4051 ierr = PetscMemcpy(dbg_work,&constraints_data[constraints_data_ptr[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr); 4052 } 4053 /* copy temporary constraints into larger work vector (in order to store all columns of Q) */ 4054 ierr = PetscMemcpy(qr_basis,&constraints_data[constraints_data_ptr[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr); 4055 4056 /* compute QR decomposition of constraints */ 4057 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 4058 ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr); 4059 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 4060 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 4061 PetscStackCallBLAS("LAPACKgeqrf",LAPACKgeqrf_(&Blas_M,&Blas_N,qr_basis,&Blas_LDA,qr_tau,qr_work,&lqr_work,&lierr)); 4062 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in GEQRF Lapack routine %d",(int)lierr); 4063 ierr = PetscFPTrapPop();CHKERRQ(ierr); 4064 4065 /* explictly compute R^-T */ 4066 ierr = PetscMemzero(trs_rhs,primal_dofs*primal_dofs*sizeof(*trs_rhs));CHKERRQ(ierr); 4067 for (j=0;j<primal_dofs;j++) trs_rhs[j*(primal_dofs+1)] = 1.0; 4068 ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr); 4069 ierr = PetscBLASIntCast(primal_dofs,&Blas_NRHS);CHKERRQ(ierr); 4070 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 4071 ierr = PetscBLASIntCast(primal_dofs,&Blas_LDB);CHKERRQ(ierr); 4072 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 4073 PetscStackCallBLAS("LAPACKtrtrs",LAPACKtrtrs_("U","T","N",&Blas_N,&Blas_NRHS,qr_basis,&Blas_LDA,trs_rhs,&Blas_LDB,&lierr)); 4074 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in TRTRS Lapack routine %d",(int)lierr); 4075 ierr = PetscFPTrapPop();CHKERRQ(ierr); 4076 4077 /* explicitly compute all columns of Q (Q = [Q1 | Q2] ) overwriting QR factorization in qr_basis */ 4078 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 4079 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 4080 ierr = PetscBLASIntCast(primal_dofs,&Blas_K);CHKERRQ(ierr); 4081 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 4082 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 4083 PetscStackCallBLAS("LAPACKungqr",LAPACKungqr_(&Blas_M,&Blas_N,&Blas_K,qr_basis,&Blas_LDA,qr_tau,gqr_work,&lgqr_work,&lierr)); 4084 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in UNGQR Lapack routine %d",(int)lierr); 4085 ierr = PetscFPTrapPop();CHKERRQ(ierr); 4086 4087 /* first primal_dofs columns of Q need to be re-scaled in order to be unitary w.r.t constraints 4088 i.e. C_{pxn}*Q_{nxn} should be equal to [I_pxp | 0_pxd] (see check below) 4089 where n=size_of_constraint, p=primal_dofs, d=dual_dofs (n=p+d), I and 0 identity and null matrix resp. */ 4090 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 4091 ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr); 4092 ierr = PetscBLASIntCast(primal_dofs,&Blas_K);CHKERRQ(ierr); 4093 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 4094 ierr = PetscBLASIntCast(primal_dofs,&Blas_LDB);CHKERRQ(ierr); 4095 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDC);CHKERRQ(ierr); 4096 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 4097 PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&Blas_M,&Blas_N,&Blas_K,&one,qr_basis,&Blas_LDA,trs_rhs,&Blas_LDB,&zero,constraints_data+constraints_data_ptr[total_counts],&Blas_LDC)); 4098 ierr = PetscFPTrapPop();CHKERRQ(ierr); 4099 ierr = PetscMemcpy(qr_basis,&constraints_data[constraints_data_ptr[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr); 4100 4101 /* insert values in change of basis matrix respecting global ordering of new primal dofs */ 4102 start_rows = &constraints_idxs[constraints_idxs_ptr[total_counts]]; 4103 /* insert cols for primal dofs */ 4104 for (j=0;j<primal_dofs;j++) { 4105 start_vals = &qr_basis[j*size_of_constraint]; 4106 start_cols = &constraints_idxs[constraints_idxs_ptr[total_counts]+j]; 4107 ierr = MatSetValues(localChangeOfBasisMatrix,size_of_constraint,start_rows,1,start_cols,start_vals,INSERT_VALUES);CHKERRQ(ierr); 4108 } 4109 /* insert cols for dual dofs */ 4110 for (j=0,k=0;j<dual_dofs;k++) { 4111 if (!PetscBTLookup(is_primal,constraints_idxs_B[constraints_idxs_ptr[total_counts]+k])) { 4112 start_vals = &qr_basis[(primal_dofs+j)*size_of_constraint]; 4113 start_cols = &constraints_idxs[constraints_idxs_ptr[total_counts]+k]; 4114 ierr = MatSetValues(localChangeOfBasisMatrix,size_of_constraint,start_rows,1,start_cols,start_vals,INSERT_VALUES);CHKERRQ(ierr); 4115 j++; 4116 } 4117 } 4118 4119 /* check change of basis */ 4120 if (pcbddc->dbg_flag) { 4121 PetscInt ii,jj; 4122 PetscBool valid_qr=PETSC_TRUE; 4123 ierr = PetscBLASIntCast(primal_dofs,&Blas_M);CHKERRQ(ierr); 4124 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 4125 ierr = PetscBLASIntCast(size_of_constraint,&Blas_K);CHKERRQ(ierr); 4126 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 4127 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDB);CHKERRQ(ierr); 4128 ierr = PetscBLASIntCast(primal_dofs,&Blas_LDC);CHKERRQ(ierr); 4129 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 4130 PetscStackCallBLAS("BLASgemm",BLASgemm_("T","N",&Blas_M,&Blas_N,&Blas_K,&one,dbg_work,&Blas_LDA,qr_basis,&Blas_LDB,&zero,&dbg_work[size_of_constraint*primal_dofs],&Blas_LDC)); 4131 ierr = PetscFPTrapPop();CHKERRQ(ierr); 4132 for (jj=0;jj<size_of_constraint;jj++) { 4133 for (ii=0;ii<primal_dofs;ii++) { 4134 if (ii != jj && PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]) > 1.e-12) valid_qr = PETSC_FALSE; 4135 if (ii == jj && PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]-1.0) > 1.e-12) valid_qr = PETSC_FALSE; 4136 } 4137 } 4138 if (!valid_qr) { 4139 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> wrong change of basis!\n");CHKERRQ(ierr); 4140 for (jj=0;jj<size_of_constraint;jj++) { 4141 for (ii=0;ii<primal_dofs;ii++) { 4142 if (ii != jj && PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]) > 1.e-12) { 4143 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\tQr basis function %d is not orthogonal to constraint %d (%1.14e)!\n",jj,ii,PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii])); 4144 } 4145 if (ii == jj && PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]-1.0) > 1.e-12) { 4146 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\tQr basis function %d is not unitary w.r.t constraint %d (%1.14e)!\n",jj,ii,PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii])); 4147 } 4148 } 4149 } 4150 } else { 4151 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> right change of basis!\n");CHKERRQ(ierr); 4152 } 4153 } 4154 } else { /* simple transformation block */ 4155 PetscInt row,col; 4156 PetscScalar val,norm; 4157 4158 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 4159 PetscStackCallBLAS("BLASdot",norm = BLASdot_(&Blas_N,constraints_data+constraints_data_ptr[total_counts],&Blas_one,constraints_data+constraints_data_ptr[total_counts],&Blas_one)); 4160 for (j=0;j<size_of_constraint;j++) { 4161 PetscInt row_B = constraints_idxs_B[constraints_idxs_ptr[total_counts]+j]; 4162 row = constraints_idxs[constraints_idxs_ptr[total_counts]+j]; 4163 if (!PetscBTLookup(is_primal,row_B)) { 4164 col = constraints_idxs[constraints_idxs_ptr[total_counts]]; 4165 ierr = MatSetValue(localChangeOfBasisMatrix,row,row,1.0,INSERT_VALUES);CHKERRQ(ierr); 4166 ierr = MatSetValue(localChangeOfBasisMatrix,row,col,constraints_data[constraints_data_ptr[total_counts]+j]/norm,INSERT_VALUES);CHKERRQ(ierr); 4167 } else { 4168 for (k=0;k<size_of_constraint;k++) { 4169 col = constraints_idxs[constraints_idxs_ptr[total_counts]+k]; 4170 if (row != col) { 4171 val = -constraints_data[constraints_data_ptr[total_counts]+k]/constraints_data[constraints_data_ptr[total_counts]]; 4172 } else { 4173 val = constraints_data[constraints_data_ptr[total_counts]]/norm; 4174 } 4175 ierr = MatSetValue(localChangeOfBasisMatrix,row,col,val,INSERT_VALUES);CHKERRQ(ierr); 4176 } 4177 } 4178 } 4179 if (pcbddc->dbg_flag) { 4180 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> using standard change of basis\n");CHKERRQ(ierr); 4181 } 4182 } 4183 } else { 4184 if (pcbddc->dbg_flag) { 4185 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Constraint %d does not need a change of basis (size %d)\n",total_counts,size_of_constraint);CHKERRQ(ierr); 4186 } 4187 } 4188 } 4189 4190 /* free workspace */ 4191 if (qr_needed) { 4192 if (pcbddc->dbg_flag) { 4193 ierr = PetscFree(dbg_work);CHKERRQ(ierr); 4194 } 4195 ierr = PetscFree(trs_rhs);CHKERRQ(ierr); 4196 ierr = PetscFree(qr_tau);CHKERRQ(ierr); 4197 ierr = PetscFree(qr_work);CHKERRQ(ierr); 4198 ierr = PetscFree(gqr_work);CHKERRQ(ierr); 4199 ierr = PetscFree(qr_basis);CHKERRQ(ierr); 4200 } 4201 ierr = PetscBTDestroy(&is_primal);CHKERRQ(ierr); 4202 ierr = MatAssemblyBegin(localChangeOfBasisMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4203 ierr = MatAssemblyEnd(localChangeOfBasisMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4204 4205 /* assembling of global change of variable */ 4206 if (!pcbddc->fake_change) { 4207 Mat tmat; 4208 PetscInt bs; 4209 4210 ierr = VecGetSize(pcis->vec1_global,&global_size);CHKERRQ(ierr); 4211 ierr = VecGetLocalSize(pcis->vec1_global,&local_size);CHKERRQ(ierr); 4212 ierr = MatDuplicate(pc->pmat,MAT_DO_NOT_COPY_VALUES,&tmat);CHKERRQ(ierr); 4213 ierr = MatISSetLocalMat(tmat,localChangeOfBasisMatrix);CHKERRQ(ierr); 4214 ierr = MatCreate(PetscObjectComm((PetscObject)pc),&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 4215 ierr = MatSetType(pcbddc->ChangeOfBasisMatrix,MATAIJ);CHKERRQ(ierr); 4216 ierr = MatGetBlockSize(pc->pmat,&bs);CHKERRQ(ierr); 4217 ierr = MatSetBlockSize(pcbddc->ChangeOfBasisMatrix,bs);CHKERRQ(ierr); 4218 ierr = MatSetSizes(pcbddc->ChangeOfBasisMatrix,local_size,local_size,global_size,global_size);CHKERRQ(ierr); 4219 ierr = MatISSetMPIXAIJPreallocation_Private(tmat,pcbddc->ChangeOfBasisMatrix,PETSC_TRUE);CHKERRQ(ierr); 4220 ierr = MatISGetMPIXAIJ(tmat,MAT_REUSE_MATRIX,&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 4221 ierr = MatDestroy(&tmat);CHKERRQ(ierr); 4222 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 4223 ierr = VecSet(pcis->vec1_N,1.0);CHKERRQ(ierr); 4224 ierr = VecScatterBegin(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4225 ierr = VecScatterEnd(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4226 ierr = VecReciprocal(pcis->vec1_global);CHKERRQ(ierr); 4227 ierr = MatDiagonalScale(pcbddc->ChangeOfBasisMatrix,pcis->vec1_global,NULL);CHKERRQ(ierr); 4228 4229 /* check */ 4230 if (pcbddc->dbg_flag) { 4231 PetscReal error; 4232 Vec x,x_change; 4233 4234 ierr = VecDuplicate(pcis->vec1_global,&x);CHKERRQ(ierr); 4235 ierr = VecDuplicate(pcis->vec1_global,&x_change);CHKERRQ(ierr); 4236 ierr = VecSetRandom(x,NULL);CHKERRQ(ierr); 4237 ierr = VecCopy(x,pcis->vec1_global);CHKERRQ(ierr); 4238 ierr = VecScatterBegin(matis->rctx,x,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4239 ierr = VecScatterEnd(matis->rctx,x,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4240 ierr = MatMult(localChangeOfBasisMatrix,pcis->vec1_N,pcis->vec2_N);CHKERRQ(ierr); 4241 ierr = VecScatterBegin(matis->rctx,pcis->vec2_N,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4242 ierr = VecScatterEnd(matis->rctx,pcis->vec2_N,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4243 ierr = MatMult(pcbddc->ChangeOfBasisMatrix,pcis->vec1_global,x_change);CHKERRQ(ierr); 4244 ierr = VecAXPY(x,-1.0,x_change);CHKERRQ(ierr); 4245 ierr = VecNorm(x,NORM_INFINITY,&error);CHKERRQ(ierr); 4246 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4247 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Error global vs local change: %1.6e\n",error);CHKERRQ(ierr); 4248 ierr = VecDestroy(&x);CHKERRQ(ierr); 4249 ierr = VecDestroy(&x_change);CHKERRQ(ierr); 4250 } 4251 /* adapt sub_schurs computed (if any) */ 4252 if (pcbddc->use_deluxe_scaling) { 4253 PCBDDCSubSchurs sub_schurs=pcbddc->sub_schurs; 4254 4255 if (pcbddc->use_change_of_basis && pcbddc->adaptive_userdefined) { 4256 SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_SUP,"Cannot mix automatic change of basis, adaptive selection and user-defined constraints");CHKERRQ(ierr); 4257 } 4258 if (sub_schurs->S_Ej_all) { 4259 Mat S_new,tmat; 4260 IS is_all_N,is_V_Sall = NULL; 4261 4262 ierr = ISLocalToGlobalMappingApplyIS(pcis->BtoNmap,sub_schurs->is_Ej_all,&is_all_N);CHKERRQ(ierr); 4263 ierr = MatGetSubMatrix(localChangeOfBasisMatrix,is_all_N,is_all_N,MAT_INITIAL_MATRIX,&tmat);CHKERRQ(ierr); 4264 if (pcbddc->deluxe_zerorows) { 4265 ISLocalToGlobalMapping NtoSall; 4266 IS is_V; 4267 ierr = ISCreateGeneral(PETSC_COMM_SELF,pcbddc->n_vertices,pcbddc->local_primal_ref_node,PETSC_COPY_VALUES,&is_V);CHKERRQ(ierr); 4268 ierr = ISLocalToGlobalMappingCreateIS(is_all_N,&NtoSall);CHKERRQ(ierr); 4269 ierr = ISGlobalToLocalMappingApplyIS(NtoSall,IS_GTOLM_DROP,is_V,&is_V_Sall);CHKERRQ(ierr); 4270 ierr = ISLocalToGlobalMappingDestroy(&NtoSall);CHKERRQ(ierr); 4271 ierr = ISDestroy(&is_V);CHKERRQ(ierr); 4272 } 4273 ierr = ISDestroy(&is_all_N);CHKERRQ(ierr); 4274 ierr = MatPtAP(sub_schurs->S_Ej_all,tmat,MAT_INITIAL_MATRIX,1.0,&S_new);CHKERRQ(ierr); 4275 ierr = MatDestroy(&sub_schurs->S_Ej_all);CHKERRQ(ierr); 4276 ierr = PetscObjectReference((PetscObject)S_new);CHKERRQ(ierr); 4277 if (pcbddc->deluxe_zerorows) { 4278 const PetscScalar *array; 4279 const PetscInt *idxs_V,*idxs_all; 4280 PetscInt i,n_V; 4281 4282 ierr = MatZeroRowsColumnsIS(S_new,is_V_Sall,1.,NULL,NULL);CHKERRQ(ierr); 4283 ierr = ISGetLocalSize(is_V_Sall,&n_V);CHKERRQ(ierr); 4284 ierr = ISGetIndices(is_V_Sall,&idxs_V);CHKERRQ(ierr); 4285 ierr = ISGetIndices(sub_schurs->is_Ej_all,&idxs_all);CHKERRQ(ierr); 4286 ierr = VecGetArrayRead(pcis->D,&array);CHKERRQ(ierr); 4287 for (i=0;i<n_V;i++) { 4288 PetscScalar val; 4289 PetscInt idx; 4290 4291 idx = idxs_V[i]; 4292 val = array[idxs_all[idxs_V[i]]]; 4293 ierr = MatSetValue(S_new,idx,idx,val,INSERT_VALUES);CHKERRQ(ierr); 4294 } 4295 ierr = MatAssemblyBegin(S_new,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4296 ierr = MatAssemblyEnd(S_new,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4297 ierr = VecRestoreArrayRead(pcis->D,&array);CHKERRQ(ierr); 4298 ierr = ISRestoreIndices(sub_schurs->is_Ej_all,&idxs_all);CHKERRQ(ierr); 4299 ierr = ISRestoreIndices(is_V_Sall,&idxs_V);CHKERRQ(ierr); 4300 } 4301 sub_schurs->S_Ej_all = S_new; 4302 ierr = MatDestroy(&S_new);CHKERRQ(ierr); 4303 if (sub_schurs->sum_S_Ej_all) { 4304 ierr = MatPtAP(sub_schurs->sum_S_Ej_all,tmat,MAT_INITIAL_MATRIX,1.0,&S_new);CHKERRQ(ierr); 4305 ierr = MatDestroy(&sub_schurs->sum_S_Ej_all);CHKERRQ(ierr); 4306 ierr = PetscObjectReference((PetscObject)S_new);CHKERRQ(ierr); 4307 if (pcbddc->deluxe_zerorows) { 4308 ierr = MatZeroRowsColumnsIS(S_new,is_V_Sall,1.,NULL,NULL);CHKERRQ(ierr); 4309 } 4310 sub_schurs->sum_S_Ej_all = S_new; 4311 ierr = MatDestroy(&S_new);CHKERRQ(ierr); 4312 } 4313 ierr = ISDestroy(&is_V_Sall);CHKERRQ(ierr); 4314 ierr = MatDestroy(&tmat);CHKERRQ(ierr); 4315 } 4316 /* destroy any change of basis context in sub_schurs */ 4317 if (sub_schurs->change) { 4318 PetscInt i; 4319 4320 for (i=0;i<sub_schurs->n_subs;i++) { 4321 ierr = KSPDestroy(&sub_schurs->change[i]);CHKERRQ(ierr); 4322 } 4323 ierr = PetscFree(sub_schurs->change);CHKERRQ(ierr); 4324 } 4325 } 4326 if (pcbddc->switch_static) { /* need to save the local change */ 4327 pcbddc->switch_static_change = localChangeOfBasisMatrix; 4328 } else { 4329 ierr = MatDestroy(&localChangeOfBasisMatrix);CHKERRQ(ierr); 4330 } 4331 /* determine if any process has changed the pressures locally */ 4332 if (pcbddc->benign_saddle_point) { 4333 PetscBool have_null = (PetscBool)!!pcbddc->benign_change; 4334 ierr = MPI_Allreduce(&have_null,&pcbddc->change_interior,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 4335 } 4336 } else { /* fake change (get back change of basis into ConstraintMatrix and info on qr) */ 4337 ierr = MatDestroy(&pcbddc->ConstraintMatrix);CHKERRQ(ierr); 4338 pcbddc->ConstraintMatrix = localChangeOfBasisMatrix; 4339 pcbddc->use_qr_single = qr_needed; 4340 } 4341 } else if (pcbddc->user_ChangeOfBasisMatrix || pcbddc->benign_saddle_point) { 4342 PetscBool needglobal = PETSC_FALSE; 4343 if (pcbddc->benign_saddle_point) { 4344 PetscBool have_null = (PetscBool)!!pcbddc->benign_change; 4345 ierr = MPI_Allreduce(&have_null,&needglobal,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 4346 } 4347 if (!needglobal && pcbddc->user_ChangeOfBasisMatrix) { 4348 ierr = PetscObjectReference((PetscObject)pcbddc->user_ChangeOfBasisMatrix);CHKERRQ(ierr); 4349 pcbddc->ChangeOfBasisMatrix = pcbddc->user_ChangeOfBasisMatrix; 4350 } else { 4351 Mat benign_global = NULL; 4352 if (needglobal) { 4353 Mat tmat; 4354 4355 pcbddc->change_interior = PETSC_TRUE; 4356 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 4357 ierr = VecSet(pcis->vec1_N,1.0);CHKERRQ(ierr); 4358 ierr = VecScatterBegin(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4359 ierr = VecScatterEnd(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4360 ierr = VecReciprocal(pcis->vec1_global);CHKERRQ(ierr); 4361 ierr = VecScatterBegin(matis->rctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4362 ierr = VecScatterEnd(matis->rctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4363 ierr = MatDuplicate(pc->pmat,MAT_DO_NOT_COPY_VALUES,&tmat);CHKERRQ(ierr); 4364 if (pcbddc->benign_change) { 4365 Mat M; 4366 4367 ierr = MatDuplicate(pcbddc->benign_change,MAT_COPY_VALUES,&M);CHKERRQ(ierr); 4368 ierr = MatDiagonalScale(M,pcis->vec1_N,NULL);CHKERRQ(ierr); 4369 ierr = MatISSetLocalMat(tmat,M);CHKERRQ(ierr); 4370 ierr = MatDestroy(&M);CHKERRQ(ierr); 4371 } else { 4372 Mat eye; 4373 PetscScalar *array; 4374 4375 ierr = VecGetArray(pcis->vec1_N,&array);CHKERRQ(ierr); 4376 ierr = MatCreateSeqAIJ(PETSC_COMM_SELF,pcis->n,pcis->n,1,NULL,&eye);CHKERRQ(ierr); 4377 for (i=0;i<pcis->n;i++) { 4378 ierr = MatSetValue(eye,i,i,array[i],INSERT_VALUES);CHKERRQ(ierr); 4379 } 4380 ierr = VecRestoreArray(pcis->vec1_N,&array);CHKERRQ(ierr); 4381 ierr = MatAssemblyBegin(eye,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4382 ierr = MatAssemblyEnd(eye,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4383 ierr = MatISSetLocalMat(tmat,eye);CHKERRQ(ierr); 4384 ierr = MatDestroy(&eye);CHKERRQ(ierr); 4385 } 4386 ierr = MatISGetMPIXAIJ(tmat,MAT_INITIAL_MATRIX,&benign_global);CHKERRQ(ierr); 4387 ierr = MatDestroy(&tmat);CHKERRQ(ierr); 4388 } 4389 if (pcbddc->user_ChangeOfBasisMatrix) { 4390 ierr = MatMatMult(pcbddc->user_ChangeOfBasisMatrix,benign_global,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 4391 ierr = MatDestroy(&benign_global);CHKERRQ(ierr); 4392 } else if (needglobal) { 4393 pcbddc->ChangeOfBasisMatrix = benign_global; 4394 } 4395 } 4396 if (pcbddc->switch_static && pcbddc->ChangeOfBasisMatrix) { /* need to save the local change */ 4397 IS is_global; 4398 const PetscInt *gidxs; 4399 4400 ierr = ISLocalToGlobalMappingGetIndices(pc->pmat->rmap->mapping,&gidxs);CHKERRQ(ierr); 4401 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),pcis->n,gidxs,PETSC_COPY_VALUES,&is_global);CHKERRQ(ierr); 4402 ierr = ISLocalToGlobalMappingRestoreIndices(pc->pmat->rmap->mapping,&gidxs);CHKERRQ(ierr); 4403 ierr = MatGetSubMatrixUnsorted(pcbddc->ChangeOfBasisMatrix,is_global,is_global,&pcbddc->switch_static_change);CHKERRQ(ierr); 4404 ierr = ISDestroy(&is_global);CHKERRQ(ierr); 4405 } 4406 } 4407 if (!pcbddc->fake_change && pcbddc->ChangeOfBasisMatrix && !pcbddc->work_change) { 4408 ierr = VecDuplicate(pcis->vec1_global,&pcbddc->work_change);CHKERRQ(ierr); 4409 } 4410 4411 if (!pcbddc->fake_change) { 4412 /* add pressure dofs to set of primal nodes for numbering purposes */ 4413 for (i=0;i<pcbddc->benign_n;i++) { 4414 pcbddc->local_primal_ref_node[pcbddc->local_primal_size_cc] = pcbddc->benign_p0_lidx[i]; 4415 pcbddc->primal_indices_local_idxs[pcbddc->local_primal_size] = pcbddc->benign_p0_lidx[i]; 4416 pcbddc->local_primal_ref_mult[pcbddc->local_primal_size_cc] = 1; 4417 pcbddc->local_primal_size_cc++; 4418 pcbddc->local_primal_size++; 4419 } 4420 4421 /* check if a new primal space has been introduced (also take into account benign trick) */ 4422 pcbddc->new_primal_space_local = PETSC_TRUE; 4423 if (olocal_primal_size == pcbddc->local_primal_size) { 4424 ierr = PetscMemcmp(pcbddc->local_primal_ref_node,olocal_primal_ref_node,olocal_primal_size_cc*sizeof(PetscInt),&pcbddc->new_primal_space_local);CHKERRQ(ierr); 4425 pcbddc->new_primal_space_local = (PetscBool)(!pcbddc->new_primal_space_local); 4426 if (!pcbddc->new_primal_space_local) { 4427 ierr = PetscMemcmp(pcbddc->local_primal_ref_mult,olocal_primal_ref_mult,olocal_primal_size_cc*sizeof(PetscInt),&pcbddc->new_primal_space_local);CHKERRQ(ierr); 4428 pcbddc->new_primal_space_local = (PetscBool)(!pcbddc->new_primal_space_local); 4429 } 4430 } 4431 /* new_primal_space will be used for numbering of coarse dofs, so it should be the same across all subdomains */ 4432 ierr = MPIU_Allreduce(&pcbddc->new_primal_space_local,&pcbddc->new_primal_space,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 4433 } 4434 ierr = PetscFree2(olocal_primal_ref_node,olocal_primal_ref_mult);CHKERRQ(ierr); 4435 4436 /* flush dbg viewer */ 4437 if (pcbddc->dbg_flag) { 4438 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4439 } 4440 4441 /* free workspace */ 4442 ierr = PetscBTDestroy(&qr_needed_idx);CHKERRQ(ierr); 4443 ierr = PetscBTDestroy(&change_basis);CHKERRQ(ierr); 4444 if (!pcbddc->adaptive_selection) { 4445 ierr = PetscFree3(constraints_idxs_ptr,constraints_data_ptr,constraints_n);CHKERRQ(ierr); 4446 ierr = PetscFree3(constraints_data,constraints_idxs,constraints_idxs_B);CHKERRQ(ierr); 4447 } else { 4448 ierr = PetscFree5(pcbddc->adaptive_constraints_n, 4449 pcbddc->adaptive_constraints_idxs_ptr, 4450 pcbddc->adaptive_constraints_data_ptr, 4451 pcbddc->adaptive_constraints_idxs, 4452 pcbddc->adaptive_constraints_data);CHKERRQ(ierr); 4453 ierr = PetscFree(constraints_n);CHKERRQ(ierr); 4454 ierr = PetscFree(constraints_idxs_B);CHKERRQ(ierr); 4455 } 4456 PetscFunctionReturn(0); 4457 } 4458 4459 #undef __FUNCT__ 4460 #define __FUNCT__ "PCBDDCAnalyzeInterface" 4461 PetscErrorCode PCBDDCAnalyzeInterface(PC pc) 4462 { 4463 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 4464 PC_IS *pcis = (PC_IS*)pc->data; 4465 Mat_IS *matis = (Mat_IS*)pc->pmat->data; 4466 PetscInt ierr,i,N; 4467 4468 PetscFunctionBegin; 4469 /* Reset previously computed graph */ 4470 ierr = PCBDDCGraphReset(pcbddc->mat_graph);CHKERRQ(ierr); 4471 /* Init local Graph struct */ 4472 ierr = MatGetSize(pc->pmat,&N,NULL);CHKERRQ(ierr); 4473 ierr = PCBDDCGraphInit(pcbddc->mat_graph,pcis->mapping,N);CHKERRQ(ierr); 4474 4475 /* Check validity of the csr graph passed in by the user */ 4476 if (pcbddc->mat_graph->nvtxs_csr && pcbddc->mat_graph->nvtxs_csr != pcbddc->mat_graph->nvtxs) { 4477 SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Invalid size of local CSR graph! Found %d, expected %d\n",pcbddc->mat_graph->nvtxs_csr,pcbddc->mat_graph->nvtxs); 4478 } 4479 4480 /* Set default CSR adjacency of local dofs if not provided by the user with PCBDDCSetLocalAdjacencyGraph */ 4481 if ( (!pcbddc->mat_graph->xadj || !pcbddc->mat_graph->adjncy) && pcbddc->use_local_adj) { 4482 PetscInt *xadj,*adjncy; 4483 PetscInt nvtxs; 4484 PetscBool flg_row=PETSC_FALSE; 4485 4486 ierr = MatGetRowIJ(matis->A,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);CHKERRQ(ierr); 4487 if (flg_row) { 4488 ierr = PCBDDCSetLocalAdjacencyGraph(pc,nvtxs,xadj,adjncy,PETSC_COPY_VALUES);CHKERRQ(ierr); 4489 pcbddc->computed_rowadj = PETSC_TRUE; 4490 } 4491 ierr = MatRestoreRowIJ(matis->A,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);CHKERRQ(ierr); 4492 } 4493 if (pcbddc->dbg_flag) { 4494 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4495 } 4496 4497 /* Set default dofs' splitting if no information has been provided by the user with PCBDDCSetDofsSplitting or PCBDDCSetDofsSplittingLocal */ 4498 if (pcbddc->user_provided_isfordofs) { 4499 if (pcbddc->n_ISForDofs) { /* need to convert from global to local and remove references to global dofs splitting */ 4500 ierr = PetscMalloc1(pcbddc->n_ISForDofs,&pcbddc->ISForDofsLocal);CHKERRQ(ierr); 4501 for (i=0;i<pcbddc->n_ISForDofs;i++) { 4502 ierr = PCBDDCGlobalToLocal(matis->rctx,pcis->vec1_global,pcis->vec1_N,pcbddc->ISForDofs[i],&pcbddc->ISForDofsLocal[i]);CHKERRQ(ierr); 4503 ierr = ISDestroy(&pcbddc->ISForDofs[i]);CHKERRQ(ierr); 4504 } 4505 pcbddc->n_ISForDofsLocal = pcbddc->n_ISForDofs; 4506 pcbddc->n_ISForDofs = 0; 4507 ierr = PetscFree(pcbddc->ISForDofs);CHKERRQ(ierr); 4508 } 4509 } else { 4510 if (!pcbddc->n_ISForDofsLocal) { /* field split not present, create it in local ordering */ 4511 ierr = MatGetBlockSize(pc->pmat,&pcbddc->n_ISForDofsLocal);CHKERRQ(ierr); 4512 ierr = PetscMalloc1(pcbddc->n_ISForDofsLocal,&pcbddc->ISForDofsLocal);CHKERRQ(ierr); 4513 for (i=0;i<pcbddc->n_ISForDofsLocal;i++) { 4514 ierr = ISCreateStride(PetscObjectComm((PetscObject)pc),pcis->n/pcbddc->n_ISForDofsLocal,i,pcbddc->n_ISForDofsLocal,&pcbddc->ISForDofsLocal[i]);CHKERRQ(ierr); 4515 } 4516 } 4517 } 4518 4519 /* Setup of Graph */ 4520 if (!pcbddc->DirichletBoundariesLocal && pcbddc->DirichletBoundaries) { /* need to convert from global to local */ 4521 ierr = PCBDDCGlobalToLocal(matis->rctx,pcis->vec1_global,pcis->vec1_N,pcbddc->DirichletBoundaries,&pcbddc->DirichletBoundariesLocal);CHKERRQ(ierr); 4522 } 4523 if (!pcbddc->NeumannBoundariesLocal && pcbddc->NeumannBoundaries) { /* need to convert from global to local */ 4524 ierr = PCBDDCGlobalToLocal(matis->rctx,pcis->vec1_global,pcis->vec1_N,pcbddc->NeumannBoundaries,&pcbddc->NeumannBoundariesLocal);CHKERRQ(ierr); 4525 } 4526 if (!pcbddc->user_primal_vertices_local && pcbddc->user_primal_vertices) { /* need to convert from global to local */ 4527 ierr = PCBDDCGlobalToLocal(matis->rctx,pcis->vec1_global,pcis->vec1_N,pcbddc->user_primal_vertices,&pcbddc->user_primal_vertices_local);CHKERRQ(ierr); 4528 } 4529 ierr = PCBDDCGraphSetUp(pcbddc->mat_graph,pcbddc->vertex_size,pcbddc->NeumannBoundariesLocal,pcbddc->DirichletBoundariesLocal,pcbddc->n_ISForDofsLocal,pcbddc->ISForDofsLocal,pcbddc->user_primal_vertices_local);CHKERRQ(ierr); 4530 4531 /* attach info on disconnected subdomains if present */ 4532 if (pcbddc->n_local_subs) { 4533 PetscInt *local_subs; 4534 4535 ierr = PetscMalloc1(N,&local_subs);CHKERRQ(ierr); 4536 for (i=0;i<pcbddc->n_local_subs;i++) { 4537 const PetscInt *idxs; 4538 PetscInt nl,j; 4539 4540 ierr = ISGetLocalSize(pcbddc->local_subs[i],&nl);CHKERRQ(ierr); 4541 ierr = ISGetIndices(pcbddc->local_subs[i],&idxs);CHKERRQ(ierr); 4542 for (j=0;j<nl;j++) { 4543 local_subs[idxs[j]] = i; 4544 } 4545 ierr = ISRestoreIndices(pcbddc->local_subs[i],&idxs);CHKERRQ(ierr); 4546 } 4547 pcbddc->mat_graph->n_local_subs = pcbddc->n_local_subs; 4548 pcbddc->mat_graph->local_subs = local_subs; 4549 } 4550 4551 /* Graph's connected components analysis */ 4552 ierr = PCBDDCGraphComputeConnectedComponents(pcbddc->mat_graph);CHKERRQ(ierr); 4553 PetscFunctionReturn(0); 4554 } 4555 4556 /* given an index sets possibly with holes, renumbers the indexes removing the holes */ 4557 #undef __FUNCT__ 4558 #define __FUNCT__ "PCBDDCSubsetNumbering" 4559 PetscErrorCode PCBDDCSubsetNumbering(IS subset, IS subset_mult, PetscInt *N_n, IS *subset_n) 4560 { 4561 PetscSF sf; 4562 PetscLayout map; 4563 const PetscInt *idxs; 4564 PetscInt *leaf_data,*root_data,*gidxs; 4565 PetscInt N,n,i,lbounds[2],gbounds[2],Nl; 4566 PetscInt n_n,nlocals,start,first_index; 4567 PetscMPIInt commsize; 4568 PetscBool first_found; 4569 PetscErrorCode ierr; 4570 4571 PetscFunctionBegin; 4572 ierr = ISGetLocalSize(subset,&n);CHKERRQ(ierr); 4573 if (subset_mult) { 4574 PetscCheckSameComm(subset,1,subset_mult,2); 4575 ierr = ISGetLocalSize(subset,&i);CHKERRQ(ierr); 4576 if (i != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Local subset and multiplicity sizes don't match! %d != %d",n,i); 4577 } 4578 /* create workspace layout for computing global indices of subset */ 4579 ierr = ISGetIndices(subset,&idxs);CHKERRQ(ierr); 4580 lbounds[0] = lbounds[1] = 0; 4581 for (i=0;i<n;i++) { 4582 if (idxs[i] < lbounds[0]) lbounds[0] = idxs[i]; 4583 else if (idxs[i] > lbounds[1]) lbounds[1] = idxs[i]; 4584 } 4585 lbounds[0] = -lbounds[0]; 4586 ierr = MPIU_Allreduce(lbounds,gbounds,2,MPIU_INT,MPI_MAX,PetscObjectComm((PetscObject)subset));CHKERRQ(ierr); 4587 gbounds[0] = -gbounds[0]; 4588 N = gbounds[1] - gbounds[0] + 1; 4589 ierr = PetscLayoutCreate(PetscObjectComm((PetscObject)subset),&map);CHKERRQ(ierr); 4590 ierr = PetscLayoutSetBlockSize(map,1);CHKERRQ(ierr); 4591 ierr = PetscLayoutSetSize(map,N);CHKERRQ(ierr); 4592 ierr = PetscLayoutSetUp(map);CHKERRQ(ierr); 4593 ierr = PetscLayoutGetLocalSize(map,&Nl);CHKERRQ(ierr); 4594 4595 /* create sf : leaf_data == multiplicity of indexes, root data == global index in layout */ 4596 ierr = PetscMalloc2(n,&leaf_data,Nl,&root_data);CHKERRQ(ierr); 4597 if (subset_mult) { 4598 const PetscInt* idxs_mult; 4599 4600 ierr = ISGetIndices(subset_mult,&idxs_mult);CHKERRQ(ierr); 4601 ierr = PetscMemcpy(leaf_data,idxs_mult,n*sizeof(PetscInt));CHKERRQ(ierr); 4602 ierr = ISRestoreIndices(subset_mult,&idxs_mult);CHKERRQ(ierr); 4603 } else { 4604 for (i=0;i<n;i++) leaf_data[i] = 1; 4605 } 4606 /* local size of new subset */ 4607 n_n = 0; 4608 for (i=0;i<n;i++) n_n += leaf_data[i]; 4609 4610 /* global indexes in layout */ 4611 ierr = PetscMalloc1(n_n,&gidxs);CHKERRQ(ierr); /* allocating possibly extra space in gidxs which will be used later */ 4612 for (i=0;i<n;i++) gidxs[i] = idxs[i] - gbounds[0]; 4613 ierr = ISRestoreIndices(subset,&idxs);CHKERRQ(ierr); 4614 ierr = PetscSFCreate(PetscObjectComm((PetscObject)subset),&sf);CHKERRQ(ierr); 4615 ierr = PetscSFSetGraphLayout(sf,map,n,NULL,PETSC_COPY_VALUES,gidxs);CHKERRQ(ierr); 4616 ierr = PetscLayoutDestroy(&map);CHKERRQ(ierr); 4617 4618 /* reduce from leaves to roots */ 4619 ierr = PetscMemzero(root_data,Nl*sizeof(PetscInt));CHKERRQ(ierr); 4620 ierr = PetscSFReduceBegin(sf,MPIU_INT,leaf_data,root_data,MPI_MAX);CHKERRQ(ierr); 4621 ierr = PetscSFReduceEnd(sf,MPIU_INT,leaf_data,root_data,MPI_MAX);CHKERRQ(ierr); 4622 4623 /* count indexes in local part of layout */ 4624 nlocals = 0; 4625 first_index = -1; 4626 first_found = PETSC_FALSE; 4627 for (i=0;i<Nl;i++) { 4628 if (!first_found && root_data[i]) { 4629 first_found = PETSC_TRUE; 4630 first_index = i; 4631 } 4632 nlocals += root_data[i]; 4633 } 4634 4635 /* cumulative of number of indexes and size of subset without holes */ 4636 #if defined(PETSC_HAVE_MPI_EXSCAN) 4637 start = 0; 4638 ierr = MPI_Exscan(&nlocals,&start,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)subset));CHKERRQ(ierr); 4639 #else 4640 ierr = MPI_Scan(&nlocals,&start,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)subset));CHKERRQ(ierr); 4641 start = start-nlocals; 4642 #endif 4643 4644 if (N_n) { /* compute total size of new subset if requested */ 4645 *N_n = start + nlocals; 4646 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)subset),&commsize);CHKERRQ(ierr); 4647 ierr = MPI_Bcast(N_n,1,MPIU_INT,commsize-1,PetscObjectComm((PetscObject)subset));CHKERRQ(ierr); 4648 } 4649 4650 /* adapt root data with cumulative */ 4651 if (first_found) { 4652 PetscInt old_index; 4653 4654 root_data[first_index] += start; 4655 old_index = first_index; 4656 for (i=first_index+1;i<Nl;i++) { 4657 if (root_data[i]) { 4658 root_data[i] += root_data[old_index]; 4659 old_index = i; 4660 } 4661 } 4662 } 4663 4664 /* from roots to leaves */ 4665 ierr = PetscSFBcastBegin(sf,MPIU_INT,root_data,leaf_data);CHKERRQ(ierr); 4666 ierr = PetscSFBcastEnd(sf,MPIU_INT,root_data,leaf_data);CHKERRQ(ierr); 4667 ierr = PetscSFDestroy(&sf);CHKERRQ(ierr); 4668 4669 /* create new IS with global indexes without holes */ 4670 if (subset_mult) { 4671 const PetscInt* idxs_mult; 4672 PetscInt cum; 4673 4674 cum = 0; 4675 ierr = ISGetIndices(subset_mult,&idxs_mult);CHKERRQ(ierr); 4676 for (i=0;i<n;i++) { 4677 PetscInt j; 4678 for (j=0;j<idxs_mult[i];j++) gidxs[cum++] = leaf_data[i] - idxs_mult[i] + j; 4679 } 4680 ierr = ISRestoreIndices(subset_mult,&idxs_mult);CHKERRQ(ierr); 4681 } else { 4682 for (i=0;i<n;i++) { 4683 gidxs[i] = leaf_data[i]-1; 4684 } 4685 } 4686 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)subset),n_n,gidxs,PETSC_OWN_POINTER,subset_n);CHKERRQ(ierr); 4687 ierr = PetscFree2(leaf_data,root_data);CHKERRQ(ierr); 4688 PetscFunctionReturn(0); 4689 } 4690 4691 #undef __FUNCT__ 4692 #define __FUNCT__ "PCBDDCOrthonormalizeVecs" 4693 PetscErrorCode PCBDDCOrthonormalizeVecs(PetscInt n, Vec vecs[]) 4694 { 4695 PetscInt i,j; 4696 PetscScalar *alphas; 4697 PetscErrorCode ierr; 4698 4699 PetscFunctionBegin; 4700 /* this implements stabilized Gram-Schmidt */ 4701 ierr = PetscMalloc1(n,&alphas);CHKERRQ(ierr); 4702 for (i=0;i<n;i++) { 4703 ierr = VecNormalize(vecs[i],NULL);CHKERRQ(ierr); 4704 if (i<n) { ierr = VecMDot(vecs[i],n-i-1,&vecs[i+1],&alphas[i+1]);CHKERRQ(ierr); } 4705 for (j=i+1;j<n;j++) { ierr = VecAXPY(vecs[j],PetscConj(-alphas[j]),vecs[i]);CHKERRQ(ierr); } 4706 } 4707 ierr = PetscFree(alphas);CHKERRQ(ierr); 4708 PetscFunctionReturn(0); 4709 } 4710 4711 #undef __FUNCT__ 4712 #define __FUNCT__ "MatISGetSubassemblingPattern" 4713 PetscErrorCode MatISGetSubassemblingPattern(Mat mat, PetscInt *n_subdomains, PetscInt redprocs, IS* is_sends, PetscBool *have_void) 4714 { 4715 Mat A; 4716 PetscInt n_neighs,*neighs,*n_shared,**shared; 4717 PetscMPIInt size,rank,color; 4718 PetscInt *xadj,*adjncy; 4719 PetscInt *adjncy_wgt,*v_wgt,*ranks_send_to_idx; 4720 PetscInt im_active,active_procs,n,i,local_size,threshold=0; 4721 PetscInt void_procs,*procs_candidates = NULL; 4722 PetscBool ismatis,use_vwgt=PETSC_FALSE,use_square=PETSC_FALSE; 4723 PetscSubcomm subcomm; 4724 PetscErrorCode ierr; 4725 4726 PetscFunctionBegin; 4727 PetscValidHeaderSpecific(mat,MAT_CLASSID,1); 4728 ierr = PetscObjectTypeCompare((PetscObject)mat,MATIS,&ismatis);CHKERRQ(ierr); 4729 if (!ismatis) SETERRQ1(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot use %s on a matrix object which is not of type MATIS",__FUNCT__); 4730 PetscValidLogicalCollectiveInt(mat,*n_subdomains,2); 4731 PetscValidLogicalCollectiveInt(mat,redprocs,3); 4732 if (*n_subdomains <=0) SETERRQ1(PetscObjectComm((PetscObject)mat),PETSC_ERR_ARG_WRONG,"Invalid number of subdomains requested %d\n",*n_subdomains); 4733 4734 if (have_void) *have_void = PETSC_FALSE; 4735 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)mat),&size);CHKERRQ(ierr); 4736 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr); 4737 ierr = MatISGetLocalMat(mat,&A);CHKERRQ(ierr); 4738 ierr = MatGetLocalSize(A,&n,NULL);CHKERRQ(ierr); 4739 im_active = !!(n); 4740 ierr = MPIU_Allreduce(&im_active,&active_procs,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 4741 void_procs = size - active_procs; 4742 /* get ranks of of non-active processes in mat communicator */ 4743 if (void_procs) { 4744 PetscInt ncand; 4745 4746 if (have_void) *have_void = PETSC_TRUE; 4747 ierr = PetscMalloc1(size,&procs_candidates);CHKERRQ(ierr); 4748 ierr = MPI_Allgather(&im_active,1,MPIU_INT,procs_candidates,1,MPIU_INT,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 4749 for (i=0,ncand=0;i<size;i++) { 4750 if (!procs_candidates[i]) { 4751 procs_candidates[ncand++] = i; 4752 } 4753 } 4754 /* force n_subdomains to be not greater that the number of non-active processes */ 4755 *n_subdomains = PetscMin(void_procs,*n_subdomains); 4756 } 4757 4758 /* number of subdomains requested greater than active processes -> just shift the matrix */ 4759 if (active_procs < *n_subdomains) { 4760 PetscInt issize,isidx; 4761 if (im_active) { 4762 issize = 1; 4763 if (procs_candidates) { /* shift the pattern on non-active candidates (if any) */ 4764 isidx = procs_candidates[rank]; 4765 } else { 4766 isidx = rank; 4767 } 4768 } else { 4769 issize = 0; 4770 isidx = -1; 4771 } 4772 *n_subdomains = active_procs; 4773 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)mat),issize,&isidx,PETSC_COPY_VALUES,is_sends);CHKERRQ(ierr); 4774 ierr = PetscFree(procs_candidates);CHKERRQ(ierr); 4775 PetscFunctionReturn(0); 4776 } 4777 ierr = PetscOptionsGetBool(NULL,NULL,"-matis_partitioning_use_square",&use_square,NULL);CHKERRQ(ierr); 4778 ierr = PetscOptionsGetBool(NULL,NULL,"-matis_partitioning_use_vwgt",&use_vwgt,NULL);CHKERRQ(ierr); 4779 ierr = PetscOptionsGetInt(NULL,NULL,"-matis_partitioning_threshold",&threshold,NULL);CHKERRQ(ierr); 4780 4781 /* Get info on mapping */ 4782 ierr = ISLocalToGlobalMappingGetSize(mat->rmap->mapping,&local_size);CHKERRQ(ierr); 4783 ierr = ISLocalToGlobalMappingGetInfo(mat->rmap->mapping,&n_neighs,&neighs,&n_shared,&shared);CHKERRQ(ierr); 4784 4785 /* build local CSR graph of subdomains' connectivity */ 4786 ierr = PetscMalloc1(2,&xadj);CHKERRQ(ierr); 4787 xadj[0] = 0; 4788 xadj[1] = PetscMax(n_neighs-1,0); 4789 ierr = PetscMalloc1(xadj[1],&adjncy);CHKERRQ(ierr); 4790 ierr = PetscMalloc1(xadj[1],&adjncy_wgt);CHKERRQ(ierr); 4791 4792 if (threshold) { 4793 PetscInt xadj_count = 0; 4794 for (i=1;i<n_neighs;i++) { 4795 if (n_shared[i] > threshold) { 4796 adjncy[xadj_count] = neighs[i]; 4797 adjncy_wgt[xadj_count] = n_shared[i]; 4798 xadj_count++; 4799 } 4800 } 4801 xadj[1] = xadj_count; 4802 } else { 4803 if (xadj[1]) { 4804 ierr = PetscMemcpy(adjncy,&neighs[1],xadj[1]*sizeof(*adjncy));CHKERRQ(ierr); 4805 ierr = PetscMemcpy(adjncy_wgt,&n_shared[1],xadj[1]*sizeof(*adjncy_wgt));CHKERRQ(ierr); 4806 } 4807 } 4808 ierr = ISLocalToGlobalMappingRestoreInfo(mat->rmap->mapping,&n_neighs,&neighs,&n_shared,&shared);CHKERRQ(ierr); 4809 if (use_square) { 4810 for (i=0;i<xadj[1];i++) { 4811 adjncy_wgt[i] = adjncy_wgt[i]*adjncy_wgt[i]; 4812 } 4813 } 4814 ierr = PetscSortIntWithArray(xadj[1],adjncy,adjncy_wgt);CHKERRQ(ierr); 4815 4816 ierr = PetscMalloc1(1,&ranks_send_to_idx);CHKERRQ(ierr); 4817 4818 /* 4819 Restrict work on active processes only. 4820 */ 4821 ierr = PetscSubcommCreate(PetscObjectComm((PetscObject)mat),&subcomm);CHKERRQ(ierr); 4822 ierr = PetscSubcommSetNumber(subcomm,2);CHKERRQ(ierr); /* 2 groups, active process and not active processes */ 4823 ierr = PetscMPIIntCast(!local_size,&color);CHKERRQ(ierr); 4824 ierr = PetscSubcommSetTypeGeneral(subcomm,color,rank);CHKERRQ(ierr); 4825 if (color) { 4826 ierr = PetscFree(xadj);CHKERRQ(ierr); 4827 ierr = PetscFree(adjncy);CHKERRQ(ierr); 4828 ierr = PetscFree(adjncy_wgt);CHKERRQ(ierr); 4829 } else { 4830 Mat subdomain_adj; 4831 IS new_ranks,new_ranks_contig; 4832 MatPartitioning partitioner; 4833 PetscInt prank,rstart=0,rend=0; 4834 PetscInt *is_indices,*oldranks; 4835 PetscMPIInt size; 4836 PetscBool aggregate; 4837 4838 ierr = MPI_Comm_size(PetscSubcommChild(subcomm),&size);CHKERRQ(ierr); 4839 ierr = PetscMalloc1(size,&oldranks);CHKERRQ(ierr); 4840 prank = rank; 4841 ierr = MPI_Allgather(&prank,1,MPIU_INT,oldranks,1,MPIU_INT,PetscSubcommChild(subcomm));CHKERRQ(ierr); 4842 /* 4843 for (i=0;i<size;i++) { 4844 PetscPrintf(subcomm->comm,"oldranks[%d] = %d\n",i,oldranks[i]); 4845 } 4846 */ 4847 for (i=0;i<xadj[1];i++) { 4848 ierr = PetscFindInt(adjncy[i],size,oldranks,&adjncy[i]);CHKERRQ(ierr); 4849 } 4850 ierr = PetscSortIntWithArray(xadj[1],adjncy,adjncy_wgt);CHKERRQ(ierr); 4851 aggregate = ((redprocs > 0 && redprocs < size) ? PETSC_TRUE : PETSC_FALSE); 4852 if (aggregate) { 4853 PetscInt lrows,row,ncols,*cols; 4854 PetscMPIInt nrank; 4855 PetscScalar *vals; 4856 4857 ierr = MPI_Comm_rank(PetscSubcommChild(subcomm),&nrank);CHKERRQ(ierr); 4858 lrows = 0; 4859 if (nrank<redprocs) { 4860 lrows = size/redprocs; 4861 if (nrank<size%redprocs) lrows++; 4862 } 4863 ierr = MatCreateAIJ(PetscSubcommChild(subcomm),lrows,lrows,size,size,50,NULL,50,NULL,&subdomain_adj);CHKERRQ(ierr); 4864 ierr = MatGetOwnershipRange(subdomain_adj,&rstart,&rend);CHKERRQ(ierr); 4865 ierr = MatSetOption(subdomain_adj,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr); 4866 ierr = MatSetOption(subdomain_adj,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr); 4867 row = nrank; 4868 ncols = xadj[1]-xadj[0]; 4869 cols = adjncy; 4870 ierr = PetscMalloc1(ncols,&vals);CHKERRQ(ierr); 4871 for (i=0;i<ncols;i++) vals[i] = adjncy_wgt[i]; 4872 ierr = MatSetValues(subdomain_adj,1,&row,ncols,cols,vals,INSERT_VALUES);CHKERRQ(ierr); 4873 ierr = MatAssemblyBegin(subdomain_adj,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4874 ierr = MatAssemblyEnd(subdomain_adj,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4875 ierr = PetscFree(xadj);CHKERRQ(ierr); 4876 ierr = PetscFree(adjncy);CHKERRQ(ierr); 4877 ierr = PetscFree(adjncy_wgt);CHKERRQ(ierr); 4878 ierr = PetscFree(vals);CHKERRQ(ierr); 4879 } else { 4880 ierr = MatCreateMPIAdj(PetscSubcommChild(subcomm),1,(PetscInt)size,xadj,adjncy,adjncy_wgt,&subdomain_adj);CHKERRQ(ierr); 4881 } 4882 /* ierr = MatView(subdomain_adj,0);CHKERRQ(ierr); */ 4883 4884 /* Partition */ 4885 ierr = MatPartitioningCreate(PetscSubcommChild(subcomm),&partitioner);CHKERRQ(ierr); 4886 ierr = MatPartitioningSetAdjacency(partitioner,subdomain_adj);CHKERRQ(ierr); 4887 if (use_vwgt) { 4888 ierr = PetscMalloc1(1,&v_wgt);CHKERRQ(ierr); 4889 v_wgt[0] = local_size; 4890 ierr = MatPartitioningSetVertexWeights(partitioner,v_wgt);CHKERRQ(ierr); 4891 } 4892 *n_subdomains = PetscMin((PetscInt)size,*n_subdomains); 4893 ierr = MatPartitioningSetNParts(partitioner,*n_subdomains);CHKERRQ(ierr); 4894 ierr = MatPartitioningSetFromOptions(partitioner);CHKERRQ(ierr); 4895 ierr = MatPartitioningApply(partitioner,&new_ranks);CHKERRQ(ierr); 4896 /* ierr = MatPartitioningView(partitioner,0);CHKERRQ(ierr); */ 4897 4898 /* renumber new_ranks to avoid "holes" in new set of processors */ 4899 ierr = PCBDDCSubsetNumbering(new_ranks,NULL,NULL,&new_ranks_contig);CHKERRQ(ierr); 4900 ierr = ISDestroy(&new_ranks);CHKERRQ(ierr); 4901 ierr = ISGetIndices(new_ranks_contig,(const PetscInt**)&is_indices);CHKERRQ(ierr); 4902 if (!aggregate) { 4903 if (procs_candidates) { /* shift the pattern on non-active candidates (if any) */ 4904 ranks_send_to_idx[0] = procs_candidates[oldranks[is_indices[0]]]; 4905 } else { 4906 ranks_send_to_idx[0] = oldranks[is_indices[0]]; 4907 } 4908 } else { 4909 PetscInt idxs[1]; 4910 PetscMPIInt tag; 4911 MPI_Request *reqs; 4912 4913 ierr = PetscObjectGetNewTag((PetscObject)subdomain_adj,&tag);CHKERRQ(ierr); 4914 ierr = PetscMalloc1(rend-rstart,&reqs);CHKERRQ(ierr); 4915 for (i=rstart;i<rend;i++) { 4916 ierr = MPI_Isend(is_indices+i-rstart,1,MPIU_INT,i,tag,PetscSubcommChild(subcomm),&reqs[i-rstart]);CHKERRQ(ierr); 4917 } 4918 ierr = MPI_Recv(idxs,1,MPIU_INT,MPI_ANY_SOURCE,tag,PetscSubcommChild(subcomm),MPI_STATUS_IGNORE);CHKERRQ(ierr); 4919 ierr = MPI_Waitall(rend-rstart,reqs,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 4920 ierr = PetscFree(reqs);CHKERRQ(ierr); 4921 if (procs_candidates) { /* shift the pattern on non-active candidates (if any) */ 4922 ranks_send_to_idx[0] = procs_candidates[oldranks[idxs[0]]]; 4923 } else { 4924 ranks_send_to_idx[0] = oldranks[idxs[0]]; 4925 } 4926 } 4927 ierr = ISRestoreIndices(new_ranks_contig,(const PetscInt**)&is_indices);CHKERRQ(ierr); 4928 /* clean up */ 4929 ierr = PetscFree(oldranks);CHKERRQ(ierr); 4930 ierr = ISDestroy(&new_ranks_contig);CHKERRQ(ierr); 4931 ierr = MatDestroy(&subdomain_adj);CHKERRQ(ierr); 4932 ierr = MatPartitioningDestroy(&partitioner);CHKERRQ(ierr); 4933 } 4934 ierr = PetscSubcommDestroy(&subcomm);CHKERRQ(ierr); 4935 ierr = PetscFree(procs_candidates);CHKERRQ(ierr); 4936 4937 /* assemble parallel IS for sends */ 4938 i = 1; 4939 if (color) i=0; 4940 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)mat),i,ranks_send_to_idx,PETSC_OWN_POINTER,is_sends);CHKERRQ(ierr); 4941 PetscFunctionReturn(0); 4942 } 4943 4944 typedef enum {MATDENSE_PRIVATE=0,MATAIJ_PRIVATE,MATBAIJ_PRIVATE,MATSBAIJ_PRIVATE}MatTypePrivate; 4945 4946 #undef __FUNCT__ 4947 #define __FUNCT__ "MatISSubassemble" 4948 PetscErrorCode MatISSubassemble(Mat mat, IS is_sends, PetscInt n_subdomains, PetscBool restrict_comm, PetscBool restrict_full, PetscBool reuse, Mat *mat_n, PetscInt nis, IS isarray[]) 4949 { 4950 Mat local_mat; 4951 IS is_sends_internal; 4952 PetscInt rows,cols,new_local_rows; 4953 PetscInt i,bs,buf_size_idxs,buf_size_idxs_is,buf_size_vals; 4954 PetscBool ismatis,isdense,newisdense,destroy_mat; 4955 ISLocalToGlobalMapping l2gmap; 4956 PetscInt* l2gmap_indices; 4957 const PetscInt* is_indices; 4958 MatType new_local_type; 4959 /* buffers */ 4960 PetscInt *ptr_idxs,*send_buffer_idxs,*recv_buffer_idxs; 4961 PetscInt *ptr_idxs_is,*send_buffer_idxs_is,*recv_buffer_idxs_is; 4962 PetscInt *recv_buffer_idxs_local; 4963 PetscScalar *ptr_vals,*send_buffer_vals,*recv_buffer_vals; 4964 /* MPI */ 4965 MPI_Comm comm,comm_n; 4966 PetscSubcomm subcomm; 4967 PetscMPIInt n_sends,n_recvs,commsize; 4968 PetscMPIInt *iflags,*ilengths_idxs,*ilengths_vals,*ilengths_idxs_is; 4969 PetscMPIInt *onodes,*onodes_is,*olengths_idxs,*olengths_idxs_is,*olengths_vals; 4970 PetscMPIInt len,tag_idxs,tag_idxs_is,tag_vals,source_dest; 4971 MPI_Request *send_req_idxs,*send_req_idxs_is,*send_req_vals; 4972 MPI_Request *recv_req_idxs,*recv_req_idxs_is,*recv_req_vals; 4973 PetscErrorCode ierr; 4974 4975 PetscFunctionBegin; 4976 PetscValidHeaderSpecific(mat,MAT_CLASSID,1); 4977 ierr = PetscObjectTypeCompare((PetscObject)mat,MATIS,&ismatis);CHKERRQ(ierr); 4978 if (!ismatis) SETERRQ1(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot use %s on a matrix object which is not of type MATIS",__FUNCT__); 4979 PetscValidLogicalCollectiveInt(mat,n_subdomains,3); 4980 PetscValidLogicalCollectiveBool(mat,restrict_comm,4); 4981 PetscValidLogicalCollectiveBool(mat,restrict_full,5); 4982 PetscValidLogicalCollectiveBool(mat,reuse,6); 4983 PetscValidLogicalCollectiveInt(mat,nis,8); 4984 4985 /* further checks */ 4986 ierr = MatISGetLocalMat(mat,&local_mat);CHKERRQ(ierr); 4987 ierr = PetscObjectTypeCompare((PetscObject)local_mat,MATSEQDENSE,&isdense);CHKERRQ(ierr); 4988 if (!isdense) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Currently cannot subassemble MATIS when local matrix type is not of type SEQDENSE"); 4989 ierr = MatGetSize(local_mat,&rows,&cols);CHKERRQ(ierr); 4990 if (rows != cols) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Local MATIS matrices should be square"); 4991 if (reuse && *mat_n) { 4992 PetscInt mrows,mcols,mnrows,mncols; 4993 PetscValidHeaderSpecific(*mat_n,MAT_CLASSID,7); 4994 ierr = PetscObjectTypeCompare((PetscObject)*mat_n,MATIS,&ismatis);CHKERRQ(ierr); 4995 if (!ismatis) SETERRQ(PetscObjectComm((PetscObject)*mat_n),PETSC_ERR_SUP,"Cannot reuse a matrix which is not of type MATIS"); 4996 ierr = MatGetSize(mat,&mrows,&mcols);CHKERRQ(ierr); 4997 ierr = MatGetSize(*mat_n,&mnrows,&mncols);CHKERRQ(ierr); 4998 if (mrows != mnrows) SETERRQ2(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot reuse matrix! Wrong number of rows %D != %D",mrows,mnrows); 4999 if (mcols != mncols) SETERRQ2(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot reuse matrix! Wrong number of cols %D != %D",mcols,mncols); 5000 } 5001 ierr = MatGetBlockSize(local_mat,&bs);CHKERRQ(ierr); 5002 PetscValidLogicalCollectiveInt(mat,bs,0); 5003 5004 /* prepare IS for sending if not provided */ 5005 if (!is_sends) { 5006 if (!n_subdomains) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"You should specify either an IS or a target number of subdomains"); 5007 ierr = MatISGetSubassemblingPattern(mat,&n_subdomains,0,&is_sends_internal,NULL);CHKERRQ(ierr); 5008 } else { 5009 ierr = PetscObjectReference((PetscObject)is_sends);CHKERRQ(ierr); 5010 is_sends_internal = is_sends; 5011 } 5012 5013 /* get comm */ 5014 ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr); 5015 5016 /* compute number of sends */ 5017 ierr = ISGetLocalSize(is_sends_internal,&i);CHKERRQ(ierr); 5018 ierr = PetscMPIIntCast(i,&n_sends);CHKERRQ(ierr); 5019 5020 /* compute number of receives */ 5021 ierr = MPI_Comm_size(comm,&commsize);CHKERRQ(ierr); 5022 ierr = PetscMalloc1(commsize,&iflags);CHKERRQ(ierr); 5023 ierr = PetscMemzero(iflags,commsize*sizeof(*iflags));CHKERRQ(ierr); 5024 ierr = ISGetIndices(is_sends_internal,&is_indices);CHKERRQ(ierr); 5025 for (i=0;i<n_sends;i++) iflags[is_indices[i]] = 1; 5026 ierr = PetscGatherNumberOfMessages(comm,iflags,NULL,&n_recvs);CHKERRQ(ierr); 5027 ierr = PetscFree(iflags);CHKERRQ(ierr); 5028 5029 /* restrict comm if requested */ 5030 subcomm = 0; 5031 destroy_mat = PETSC_FALSE; 5032 if (restrict_comm) { 5033 PetscMPIInt color,subcommsize; 5034 5035 color = 0; 5036 if (restrict_full) { 5037 if (!n_recvs) color = 1; /* processes not receiving anything will not partecipate in new comm (full restriction) */ 5038 } else { 5039 if (!n_recvs && n_sends) color = 1; /* just those processes that are sending but not receiving anything will not partecipate in new comm */ 5040 } 5041 ierr = MPIU_Allreduce(&color,&subcommsize,1,MPI_INT,MPI_SUM,comm);CHKERRQ(ierr); 5042 subcommsize = commsize - subcommsize; 5043 /* check if reuse has been requested */ 5044 if (reuse) { 5045 if (*mat_n) { 5046 PetscMPIInt subcommsize2; 5047 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)*mat_n),&subcommsize2);CHKERRQ(ierr); 5048 if (subcommsize != subcommsize2) SETERRQ2(PetscObjectComm((PetscObject)*mat_n),PETSC_ERR_PLIB,"Cannot reuse matrix! wrong subcomm size %d != %d",subcommsize,subcommsize2); 5049 comm_n = PetscObjectComm((PetscObject)*mat_n); 5050 } else { 5051 comm_n = PETSC_COMM_SELF; 5052 } 5053 } else { /* MAT_INITIAL_MATRIX */ 5054 PetscMPIInt rank; 5055 5056 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 5057 ierr = PetscSubcommCreate(comm,&subcomm);CHKERRQ(ierr); 5058 ierr = PetscSubcommSetNumber(subcomm,2);CHKERRQ(ierr); 5059 ierr = PetscSubcommSetTypeGeneral(subcomm,color,rank);CHKERRQ(ierr); 5060 comm_n = PetscSubcommChild(subcomm); 5061 } 5062 /* flag to destroy *mat_n if not significative */ 5063 if (color) destroy_mat = PETSC_TRUE; 5064 } else { 5065 comm_n = comm; 5066 } 5067 5068 /* prepare send/receive buffers */ 5069 ierr = PetscMalloc1(commsize,&ilengths_idxs);CHKERRQ(ierr); 5070 ierr = PetscMemzero(ilengths_idxs,commsize*sizeof(*ilengths_idxs));CHKERRQ(ierr); 5071 ierr = PetscMalloc1(commsize,&ilengths_vals);CHKERRQ(ierr); 5072 ierr = PetscMemzero(ilengths_vals,commsize*sizeof(*ilengths_vals));CHKERRQ(ierr); 5073 if (nis) { 5074 ierr = PetscCalloc1(commsize,&ilengths_idxs_is);CHKERRQ(ierr); 5075 } 5076 5077 /* Get data from local matrices */ 5078 if (!isdense) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Subassembling of AIJ local matrices not yet implemented"); 5079 /* TODO: See below some guidelines on how to prepare the local buffers */ 5080 /* 5081 send_buffer_vals should contain the raw values of the local matrix 5082 send_buffer_idxs should contain: 5083 - MatType_PRIVATE type 5084 - PetscInt size_of_l2gmap 5085 - PetscInt global_row_indices[size_of_l2gmap] 5086 - PetscInt all_other_info_which_is_needed_to_compute_preallocation_and_set_values 5087 */ 5088 else { 5089 ierr = MatDenseGetArray(local_mat,&send_buffer_vals);CHKERRQ(ierr); 5090 ierr = ISLocalToGlobalMappingGetSize(mat->rmap->mapping,&i);CHKERRQ(ierr); 5091 ierr = PetscMalloc1(i+2,&send_buffer_idxs);CHKERRQ(ierr); 5092 send_buffer_idxs[0] = (PetscInt)MATDENSE_PRIVATE; 5093 send_buffer_idxs[1] = i; 5094 ierr = ISLocalToGlobalMappingGetIndices(mat->rmap->mapping,(const PetscInt**)&ptr_idxs);CHKERRQ(ierr); 5095 ierr = PetscMemcpy(&send_buffer_idxs[2],ptr_idxs,i*sizeof(PetscInt));CHKERRQ(ierr); 5096 ierr = ISLocalToGlobalMappingRestoreIndices(mat->rmap->mapping,(const PetscInt**)&ptr_idxs);CHKERRQ(ierr); 5097 ierr = PetscMPIIntCast(i,&len);CHKERRQ(ierr); 5098 for (i=0;i<n_sends;i++) { 5099 ilengths_vals[is_indices[i]] = len*len; 5100 ilengths_idxs[is_indices[i]] = len+2; 5101 } 5102 } 5103 ierr = PetscGatherMessageLengths2(comm,n_sends,n_recvs,ilengths_idxs,ilengths_vals,&onodes,&olengths_idxs,&olengths_vals);CHKERRQ(ierr); 5104 /* additional is (if any) */ 5105 if (nis) { 5106 PetscMPIInt psum; 5107 PetscInt j; 5108 for (j=0,psum=0;j<nis;j++) { 5109 PetscInt plen; 5110 ierr = ISGetLocalSize(isarray[j],&plen);CHKERRQ(ierr); 5111 ierr = PetscMPIIntCast(plen,&len);CHKERRQ(ierr); 5112 psum += len+1; /* indices + lenght */ 5113 } 5114 ierr = PetscMalloc1(psum,&send_buffer_idxs_is);CHKERRQ(ierr); 5115 for (j=0,psum=0;j<nis;j++) { 5116 PetscInt plen; 5117 const PetscInt *is_array_idxs; 5118 ierr = ISGetLocalSize(isarray[j],&plen);CHKERRQ(ierr); 5119 send_buffer_idxs_is[psum] = plen; 5120 ierr = ISGetIndices(isarray[j],&is_array_idxs);CHKERRQ(ierr); 5121 ierr = PetscMemcpy(&send_buffer_idxs_is[psum+1],is_array_idxs,plen*sizeof(PetscInt));CHKERRQ(ierr); 5122 ierr = ISRestoreIndices(isarray[j],&is_array_idxs);CHKERRQ(ierr); 5123 psum += plen+1; /* indices + lenght */ 5124 } 5125 for (i=0;i<n_sends;i++) { 5126 ilengths_idxs_is[is_indices[i]] = psum; 5127 } 5128 ierr = PetscGatherMessageLengths(comm,n_sends,n_recvs,ilengths_idxs_is,&onodes_is,&olengths_idxs_is);CHKERRQ(ierr); 5129 } 5130 5131 buf_size_idxs = 0; 5132 buf_size_vals = 0; 5133 buf_size_idxs_is = 0; 5134 for (i=0;i<n_recvs;i++) { 5135 buf_size_idxs += (PetscInt)olengths_idxs[i]; 5136 buf_size_vals += (PetscInt)olengths_vals[i]; 5137 if (nis) buf_size_idxs_is += (PetscInt)olengths_idxs_is[i]; 5138 } 5139 ierr = PetscMalloc1(buf_size_idxs,&recv_buffer_idxs);CHKERRQ(ierr); 5140 ierr = PetscMalloc1(buf_size_vals,&recv_buffer_vals);CHKERRQ(ierr); 5141 ierr = PetscMalloc1(buf_size_idxs_is,&recv_buffer_idxs_is);CHKERRQ(ierr); 5142 5143 /* get new tags for clean communications */ 5144 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_idxs);CHKERRQ(ierr); 5145 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_vals);CHKERRQ(ierr); 5146 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_idxs_is);CHKERRQ(ierr); 5147 5148 /* allocate for requests */ 5149 ierr = PetscMalloc1(n_sends,&send_req_idxs);CHKERRQ(ierr); 5150 ierr = PetscMalloc1(n_sends,&send_req_vals);CHKERRQ(ierr); 5151 ierr = PetscMalloc1(n_sends,&send_req_idxs_is);CHKERRQ(ierr); 5152 ierr = PetscMalloc1(n_recvs,&recv_req_idxs);CHKERRQ(ierr); 5153 ierr = PetscMalloc1(n_recvs,&recv_req_vals);CHKERRQ(ierr); 5154 ierr = PetscMalloc1(n_recvs,&recv_req_idxs_is);CHKERRQ(ierr); 5155 5156 /* communications */ 5157 ptr_idxs = recv_buffer_idxs; 5158 ptr_vals = recv_buffer_vals; 5159 ptr_idxs_is = recv_buffer_idxs_is; 5160 for (i=0;i<n_recvs;i++) { 5161 source_dest = onodes[i]; 5162 ierr = MPI_Irecv(ptr_idxs,olengths_idxs[i],MPIU_INT,source_dest,tag_idxs,comm,&recv_req_idxs[i]);CHKERRQ(ierr); 5163 ierr = MPI_Irecv(ptr_vals,olengths_vals[i],MPIU_SCALAR,source_dest,tag_vals,comm,&recv_req_vals[i]);CHKERRQ(ierr); 5164 ptr_idxs += olengths_idxs[i]; 5165 ptr_vals += olengths_vals[i]; 5166 if (nis) { 5167 source_dest = onodes_is[i]; 5168 ierr = MPI_Irecv(ptr_idxs_is,olengths_idxs_is[i],MPIU_INT,source_dest,tag_idxs_is,comm,&recv_req_idxs_is[i]);CHKERRQ(ierr); 5169 ptr_idxs_is += olengths_idxs_is[i]; 5170 } 5171 } 5172 for (i=0;i<n_sends;i++) { 5173 ierr = PetscMPIIntCast(is_indices[i],&source_dest);CHKERRQ(ierr); 5174 ierr = MPI_Isend(send_buffer_idxs,ilengths_idxs[source_dest],MPIU_INT,source_dest,tag_idxs,comm,&send_req_idxs[i]);CHKERRQ(ierr); 5175 ierr = MPI_Isend(send_buffer_vals,ilengths_vals[source_dest],MPIU_SCALAR,source_dest,tag_vals,comm,&send_req_vals[i]);CHKERRQ(ierr); 5176 if (nis) { 5177 ierr = MPI_Isend(send_buffer_idxs_is,ilengths_idxs_is[source_dest],MPIU_INT,source_dest,tag_idxs_is,comm,&send_req_idxs_is[i]);CHKERRQ(ierr); 5178 } 5179 } 5180 ierr = ISRestoreIndices(is_sends_internal,&is_indices);CHKERRQ(ierr); 5181 ierr = ISDestroy(&is_sends_internal);CHKERRQ(ierr); 5182 5183 /* assemble new l2g map */ 5184 ierr = MPI_Waitall(n_recvs,recv_req_idxs,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 5185 ptr_idxs = recv_buffer_idxs; 5186 new_local_rows = 0; 5187 for (i=0;i<n_recvs;i++) { 5188 new_local_rows += *(ptr_idxs+1); /* second element is the local size of the l2gmap */ 5189 ptr_idxs += olengths_idxs[i]; 5190 } 5191 ierr = PetscMalloc1(new_local_rows,&l2gmap_indices);CHKERRQ(ierr); 5192 ptr_idxs = recv_buffer_idxs; 5193 new_local_rows = 0; 5194 for (i=0;i<n_recvs;i++) { 5195 ierr = PetscMemcpy(&l2gmap_indices[new_local_rows],ptr_idxs+2,(*(ptr_idxs+1))*sizeof(PetscInt));CHKERRQ(ierr); 5196 new_local_rows += *(ptr_idxs+1); /* second element is the local size of the l2gmap */ 5197 ptr_idxs += olengths_idxs[i]; 5198 } 5199 ierr = PetscSortRemoveDupsInt(&new_local_rows,l2gmap_indices);CHKERRQ(ierr); 5200 ierr = ISLocalToGlobalMappingCreate(comm_n,1,new_local_rows,l2gmap_indices,PETSC_COPY_VALUES,&l2gmap);CHKERRQ(ierr); 5201 ierr = PetscFree(l2gmap_indices);CHKERRQ(ierr); 5202 5203 /* infer new local matrix type from received local matrices type */ 5204 /* currently if all local matrices are of type X, then the resulting matrix will be of type X, except for the dense case */ 5205 /* it also assumes that if the block size is set, than it is the same among all local matrices (see checks at the beginning of the function) */ 5206 if (n_recvs) { 5207 MatTypePrivate new_local_type_private = (MatTypePrivate)send_buffer_idxs[0]; 5208 ptr_idxs = recv_buffer_idxs; 5209 for (i=0;i<n_recvs;i++) { 5210 if ((PetscInt)new_local_type_private != *ptr_idxs) { 5211 new_local_type_private = MATAIJ_PRIVATE; 5212 break; 5213 } 5214 ptr_idxs += olengths_idxs[i]; 5215 } 5216 switch (new_local_type_private) { 5217 case MATDENSE_PRIVATE: 5218 if (n_recvs>1) { /* subassembling of dense matrices does not give a dense matrix! */ 5219 new_local_type = MATSEQAIJ; 5220 bs = 1; 5221 } else { /* if I receive only 1 dense matrix */ 5222 new_local_type = MATSEQDENSE; 5223 bs = 1; 5224 } 5225 break; 5226 case MATAIJ_PRIVATE: 5227 new_local_type = MATSEQAIJ; 5228 bs = 1; 5229 break; 5230 case MATBAIJ_PRIVATE: 5231 new_local_type = MATSEQBAIJ; 5232 break; 5233 case MATSBAIJ_PRIVATE: 5234 new_local_type = MATSEQSBAIJ; 5235 break; 5236 default: 5237 SETERRQ2(comm,PETSC_ERR_SUP,"Unsupported private type %d in %s",new_local_type_private,__FUNCT__); 5238 break; 5239 } 5240 } else { /* by default, new_local_type is seqdense */ 5241 new_local_type = MATSEQDENSE; 5242 bs = 1; 5243 } 5244 5245 /* create MATIS object if needed */ 5246 if (!reuse) { 5247 ierr = MatGetSize(mat,&rows,&cols);CHKERRQ(ierr); 5248 ierr = MatCreateIS(comm_n,bs,PETSC_DECIDE,PETSC_DECIDE,rows,cols,l2gmap,NULL,mat_n);CHKERRQ(ierr); 5249 } else { 5250 /* it also destroys the local matrices */ 5251 if (*mat_n) { 5252 ierr = MatSetLocalToGlobalMapping(*mat_n,l2gmap,l2gmap);CHKERRQ(ierr); 5253 } else { /* this is a fake object */ 5254 ierr = MatCreateIS(comm_n,bs,PETSC_DECIDE,PETSC_DECIDE,rows,cols,l2gmap,NULL,mat_n);CHKERRQ(ierr); 5255 } 5256 } 5257 ierr = MatISGetLocalMat(*mat_n,&local_mat);CHKERRQ(ierr); 5258 ierr = MatSetType(local_mat,new_local_type);CHKERRQ(ierr); 5259 5260 ierr = MPI_Waitall(n_recvs,recv_req_vals,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 5261 5262 /* Global to local map of received indices */ 5263 ierr = PetscMalloc1(buf_size_idxs,&recv_buffer_idxs_local);CHKERRQ(ierr); /* needed for values insertion */ 5264 ierr = ISGlobalToLocalMappingApply(l2gmap,IS_GTOLM_MASK,buf_size_idxs,recv_buffer_idxs,&i,recv_buffer_idxs_local);CHKERRQ(ierr); 5265 ierr = ISLocalToGlobalMappingDestroy(&l2gmap);CHKERRQ(ierr); 5266 5267 /* restore attributes -> type of incoming data and its size */ 5268 buf_size_idxs = 0; 5269 for (i=0;i<n_recvs;i++) { 5270 recv_buffer_idxs_local[buf_size_idxs] = recv_buffer_idxs[buf_size_idxs]; 5271 recv_buffer_idxs_local[buf_size_idxs+1] = recv_buffer_idxs[buf_size_idxs+1]; 5272 buf_size_idxs += (PetscInt)olengths_idxs[i]; 5273 } 5274 ierr = PetscFree(recv_buffer_idxs);CHKERRQ(ierr); 5275 5276 /* set preallocation */ 5277 ierr = PetscObjectTypeCompare((PetscObject)local_mat,MATSEQDENSE,&newisdense);CHKERRQ(ierr); 5278 if (!newisdense) { 5279 PetscInt *new_local_nnz=0; 5280 5281 ptr_vals = recv_buffer_vals; 5282 ptr_idxs = recv_buffer_idxs_local; 5283 if (n_recvs) { 5284 ierr = PetscCalloc1(new_local_rows,&new_local_nnz);CHKERRQ(ierr); 5285 } 5286 for (i=0;i<n_recvs;i++) { 5287 PetscInt j; 5288 if (*ptr_idxs == (PetscInt)MATDENSE_PRIVATE) { /* preallocation provided for dense case only */ 5289 for (j=0;j<*(ptr_idxs+1);j++) { 5290 new_local_nnz[*(ptr_idxs+2+j)] += *(ptr_idxs+1); 5291 } 5292 } else { 5293 /* TODO */ 5294 } 5295 ptr_idxs += olengths_idxs[i]; 5296 } 5297 if (new_local_nnz) { 5298 for (i=0;i<new_local_rows;i++) new_local_nnz[i] = PetscMin(new_local_nnz[i],new_local_rows); 5299 ierr = MatSeqAIJSetPreallocation(local_mat,0,new_local_nnz);CHKERRQ(ierr); 5300 for (i=0;i<new_local_rows;i++) new_local_nnz[i] /= bs; 5301 ierr = MatSeqBAIJSetPreallocation(local_mat,bs,0,new_local_nnz);CHKERRQ(ierr); 5302 for (i=0;i<new_local_rows;i++) new_local_nnz[i] = PetscMax(new_local_nnz[i]-i,0); 5303 ierr = MatSeqSBAIJSetPreallocation(local_mat,bs,0,new_local_nnz);CHKERRQ(ierr); 5304 } else { 5305 ierr = MatSetUp(local_mat);CHKERRQ(ierr); 5306 } 5307 ierr = PetscFree(new_local_nnz);CHKERRQ(ierr); 5308 } else { 5309 ierr = MatSetUp(local_mat);CHKERRQ(ierr); 5310 } 5311 5312 /* set values */ 5313 ptr_vals = recv_buffer_vals; 5314 ptr_idxs = recv_buffer_idxs_local; 5315 for (i=0;i<n_recvs;i++) { 5316 if (*ptr_idxs == (PetscInt)MATDENSE_PRIVATE) { /* values insertion provided for dense case only */ 5317 ierr = MatSetOption(local_mat,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr); 5318 ierr = MatSetValues(local_mat,*(ptr_idxs+1),ptr_idxs+2,*(ptr_idxs+1),ptr_idxs+2,ptr_vals,ADD_VALUES);CHKERRQ(ierr); 5319 ierr = MatAssemblyBegin(local_mat,MAT_FLUSH_ASSEMBLY);CHKERRQ(ierr); 5320 ierr = MatAssemblyEnd(local_mat,MAT_FLUSH_ASSEMBLY);CHKERRQ(ierr); 5321 ierr = MatSetOption(local_mat,MAT_ROW_ORIENTED,PETSC_TRUE);CHKERRQ(ierr); 5322 } else { 5323 /* TODO */ 5324 } 5325 ptr_idxs += olengths_idxs[i]; 5326 ptr_vals += olengths_vals[i]; 5327 } 5328 ierr = MatAssemblyBegin(local_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5329 ierr = MatAssemblyEnd(local_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5330 ierr = MatAssemblyBegin(*mat_n,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5331 ierr = MatAssemblyEnd(*mat_n,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5332 ierr = PetscFree(recv_buffer_vals);CHKERRQ(ierr); 5333 ierr = PetscFree(recv_buffer_idxs_local);CHKERRQ(ierr); 5334 5335 #if 0 5336 if (!restrict_comm) { /* check */ 5337 Vec lvec,rvec; 5338 PetscReal infty_error; 5339 5340 ierr = MatCreateVecs(mat,&rvec,&lvec);CHKERRQ(ierr); 5341 ierr = VecSetRandom(rvec,NULL);CHKERRQ(ierr); 5342 ierr = MatMult(mat,rvec,lvec);CHKERRQ(ierr); 5343 ierr = VecScale(lvec,-1.0);CHKERRQ(ierr); 5344 ierr = MatMultAdd(*mat_n,rvec,lvec,lvec);CHKERRQ(ierr); 5345 ierr = VecNorm(lvec,NORM_INFINITY,&infty_error);CHKERRQ(ierr); 5346 ierr = PetscPrintf(PetscObjectComm((PetscObject)mat),"Infinity error subassembling %1.6e\n",infty_error); 5347 ierr = VecDestroy(&rvec);CHKERRQ(ierr); 5348 ierr = VecDestroy(&lvec);CHKERRQ(ierr); 5349 } 5350 #endif 5351 5352 /* assemble new additional is (if any) */ 5353 if (nis) { 5354 PetscInt **temp_idxs,*count_is,j,psum; 5355 5356 ierr = MPI_Waitall(n_recvs,recv_req_idxs_is,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 5357 ierr = PetscCalloc1(nis,&count_is);CHKERRQ(ierr); 5358 ptr_idxs = recv_buffer_idxs_is; 5359 psum = 0; 5360 for (i=0;i<n_recvs;i++) { 5361 for (j=0;j<nis;j++) { 5362 PetscInt plen = *(ptr_idxs); /* first element is the local size of IS's indices */ 5363 count_is[j] += plen; /* increment counting of buffer for j-th IS */ 5364 psum += plen; 5365 ptr_idxs += plen+1; /* shift pointer to received data */ 5366 } 5367 } 5368 ierr = PetscMalloc1(nis,&temp_idxs);CHKERRQ(ierr); 5369 ierr = PetscMalloc1(psum,&temp_idxs[0]);CHKERRQ(ierr); 5370 for (i=1;i<nis;i++) { 5371 temp_idxs[i] = temp_idxs[i-1]+count_is[i-1]; 5372 } 5373 ierr = PetscMemzero(count_is,nis*sizeof(PetscInt));CHKERRQ(ierr); 5374 ptr_idxs = recv_buffer_idxs_is; 5375 for (i=0;i<n_recvs;i++) { 5376 for (j=0;j<nis;j++) { 5377 PetscInt plen = *(ptr_idxs); /* first element is the local size of IS's indices */ 5378 ierr = PetscMemcpy(&temp_idxs[j][count_is[j]],ptr_idxs+1,plen*sizeof(PetscInt));CHKERRQ(ierr); 5379 count_is[j] += plen; /* increment starting point of buffer for j-th IS */ 5380 ptr_idxs += plen+1; /* shift pointer to received data */ 5381 } 5382 } 5383 for (i=0;i<nis;i++) { 5384 ierr = ISDestroy(&isarray[i]);CHKERRQ(ierr); 5385 ierr = PetscSortRemoveDupsInt(&count_is[i],temp_idxs[i]);CHKERRQ(ierr);CHKERRQ(ierr); 5386 ierr = ISCreateGeneral(comm_n,count_is[i],temp_idxs[i],PETSC_COPY_VALUES,&isarray[i]);CHKERRQ(ierr); 5387 } 5388 ierr = PetscFree(count_is);CHKERRQ(ierr); 5389 ierr = PetscFree(temp_idxs[0]);CHKERRQ(ierr); 5390 ierr = PetscFree(temp_idxs);CHKERRQ(ierr); 5391 } 5392 /* free workspace */ 5393 ierr = PetscFree(recv_buffer_idxs_is);CHKERRQ(ierr); 5394 ierr = MPI_Waitall(n_sends,send_req_idxs,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 5395 ierr = PetscFree(send_buffer_idxs);CHKERRQ(ierr); 5396 ierr = MPI_Waitall(n_sends,send_req_vals,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 5397 if (isdense) { 5398 ierr = MatISGetLocalMat(mat,&local_mat);CHKERRQ(ierr); 5399 ierr = MatDenseRestoreArray(local_mat,&send_buffer_vals);CHKERRQ(ierr); 5400 } else { 5401 /* ierr = PetscFree(send_buffer_vals);CHKERRQ(ierr); */ 5402 } 5403 if (nis) { 5404 ierr = MPI_Waitall(n_sends,send_req_idxs_is,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 5405 ierr = PetscFree(send_buffer_idxs_is);CHKERRQ(ierr); 5406 } 5407 ierr = PetscFree(recv_req_idxs);CHKERRQ(ierr); 5408 ierr = PetscFree(recv_req_vals);CHKERRQ(ierr); 5409 ierr = PetscFree(recv_req_idxs_is);CHKERRQ(ierr); 5410 ierr = PetscFree(send_req_idxs);CHKERRQ(ierr); 5411 ierr = PetscFree(send_req_vals);CHKERRQ(ierr); 5412 ierr = PetscFree(send_req_idxs_is);CHKERRQ(ierr); 5413 ierr = PetscFree(ilengths_vals);CHKERRQ(ierr); 5414 ierr = PetscFree(ilengths_idxs);CHKERRQ(ierr); 5415 ierr = PetscFree(olengths_vals);CHKERRQ(ierr); 5416 ierr = PetscFree(olengths_idxs);CHKERRQ(ierr); 5417 ierr = PetscFree(onodes);CHKERRQ(ierr); 5418 if (nis) { 5419 ierr = PetscFree(ilengths_idxs_is);CHKERRQ(ierr); 5420 ierr = PetscFree(olengths_idxs_is);CHKERRQ(ierr); 5421 ierr = PetscFree(onodes_is);CHKERRQ(ierr); 5422 } 5423 ierr = PetscSubcommDestroy(&subcomm);CHKERRQ(ierr); 5424 if (destroy_mat) { /* destroy mat is true only if restrict comm is true and process will not partecipate */ 5425 ierr = MatDestroy(mat_n);CHKERRQ(ierr); 5426 for (i=0;i<nis;i++) { 5427 ierr = ISDestroy(&isarray[i]);CHKERRQ(ierr); 5428 } 5429 *mat_n = NULL; 5430 } 5431 PetscFunctionReturn(0); 5432 } 5433 5434 /* temporary hack into ksp private data structure */ 5435 #include <petsc/private/kspimpl.h> 5436 5437 #undef __FUNCT__ 5438 #define __FUNCT__ "PCBDDCSetUpCoarseSolver" 5439 PetscErrorCode PCBDDCSetUpCoarseSolver(PC pc,PetscScalar* coarse_submat_vals) 5440 { 5441 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 5442 PC_IS *pcis = (PC_IS*)pc->data; 5443 Mat coarse_mat,coarse_mat_is,coarse_submat_dense; 5444 MatNullSpace CoarseNullSpace=NULL; 5445 ISLocalToGlobalMapping coarse_islg; 5446 IS coarse_is,*isarray; 5447 PetscInt i,im_active=-1,active_procs=-1; 5448 PetscInt nis,nisdofs,nisneu,nisvert; 5449 PC pc_temp; 5450 PCType coarse_pc_type; 5451 KSPType coarse_ksp_type; 5452 PetscBool multilevel_requested,multilevel_allowed; 5453 PetscBool isredundant,isbddc,isnn,coarse_reuse; 5454 Mat t_coarse_mat_is; 5455 PetscInt ncoarse; 5456 PetscBool compute_vecs = PETSC_FALSE; 5457 PetscScalar *array; 5458 MatReuse coarse_mat_reuse; 5459 PetscBool restr, full_restr, have_void; 5460 PetscErrorCode ierr; 5461 5462 PetscFunctionBegin; 5463 /* Assign global numbering to coarse dofs */ 5464 if (pcbddc->new_primal_space || pcbddc->coarse_size == -1) { /* a new primal space is present or it is the first initialization, so recompute global numbering */ 5465 PetscInt ocoarse_size; 5466 compute_vecs = PETSC_TRUE; 5467 ocoarse_size = pcbddc->coarse_size; 5468 ierr = PetscFree(pcbddc->global_primal_indices);CHKERRQ(ierr); 5469 ierr = PCBDDCComputePrimalNumbering(pc,&pcbddc->coarse_size,&pcbddc->global_primal_indices);CHKERRQ(ierr); 5470 /* see if we can avoid some work */ 5471 if (pcbddc->coarse_ksp) { /* coarse ksp has already been created */ 5472 /* if the coarse size is different or we are using adaptive selection, better to not reuse the coarse matrix */ 5473 if (ocoarse_size != pcbddc->coarse_size || pcbddc->adaptive_selection) { 5474 PC pc; 5475 PetscBool isbddc; 5476 5477 /* temporary workaround since PCBDDC does not have a reset method so far */ 5478 ierr = KSPGetPC(pcbddc->coarse_ksp,&pc);CHKERRQ(ierr); 5479 ierr = PetscObjectTypeCompare((PetscObject)pc,PCBDDC,&isbddc);CHKERRQ(ierr); 5480 if (isbddc) { 5481 ierr = KSPDestroy(&pcbddc->coarse_ksp);CHKERRQ(ierr); 5482 } else { 5483 ierr = KSPReset(pcbddc->coarse_ksp);CHKERRQ(ierr); 5484 } 5485 coarse_reuse = PETSC_FALSE; 5486 } else { /* we can safely reuse already computed coarse matrix */ 5487 coarse_reuse = PETSC_TRUE; 5488 } 5489 } else { /* there's no coarse ksp, so we need to create the coarse matrix too */ 5490 coarse_reuse = PETSC_FALSE; 5491 } 5492 /* reset any subassembling information */ 5493 if (!coarse_reuse || pcbddc->recompute_topography) { 5494 ierr = ISDestroy(&pcbddc->coarse_subassembling);CHKERRQ(ierr); 5495 } 5496 } else { /* primal space is unchanged, so we can reuse coarse matrix */ 5497 coarse_reuse = PETSC_TRUE; 5498 } 5499 /* assemble coarse matrix */ 5500 if (coarse_reuse && pcbddc->coarse_ksp) { 5501 ierr = KSPGetOperators(pcbddc->coarse_ksp,&coarse_mat,NULL);CHKERRQ(ierr); 5502 ierr = PetscObjectReference((PetscObject)coarse_mat);CHKERRQ(ierr); 5503 coarse_mat_reuse = MAT_REUSE_MATRIX; 5504 } else { 5505 coarse_mat = NULL; 5506 coarse_mat_reuse = MAT_INITIAL_MATRIX; 5507 } 5508 5509 /* creates temporary l2gmap and IS for coarse indexes */ 5510 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),pcbddc->local_primal_size,pcbddc->global_primal_indices,PETSC_COPY_VALUES,&coarse_is);CHKERRQ(ierr); 5511 ierr = ISLocalToGlobalMappingCreateIS(coarse_is,&coarse_islg);CHKERRQ(ierr); 5512 5513 /* creates temporary MATIS object for coarse matrix */ 5514 ierr = MatCreateSeqDense(PETSC_COMM_SELF,pcbddc->local_primal_size,pcbddc->local_primal_size,NULL,&coarse_submat_dense);CHKERRQ(ierr); 5515 ierr = MatDenseGetArray(coarse_submat_dense,&array);CHKERRQ(ierr); 5516 ierr = PetscMemcpy(array,coarse_submat_vals,sizeof(*coarse_submat_vals)*pcbddc->local_primal_size*pcbddc->local_primal_size);CHKERRQ(ierr); 5517 ierr = MatDenseRestoreArray(coarse_submat_dense,&array);CHKERRQ(ierr); 5518 ierr = MatCreateIS(PetscObjectComm((PetscObject)pc),1,PETSC_DECIDE,PETSC_DECIDE,pcbddc->coarse_size,pcbddc->coarse_size,coarse_islg,NULL,&t_coarse_mat_is);CHKERRQ(ierr); 5519 ierr = MatISSetLocalMat(t_coarse_mat_is,coarse_submat_dense);CHKERRQ(ierr); 5520 ierr = MatAssemblyBegin(t_coarse_mat_is,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5521 ierr = MatAssemblyEnd(t_coarse_mat_is,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5522 ierr = MatDestroy(&coarse_submat_dense);CHKERRQ(ierr); 5523 5524 /* count "active" (i.e. with positive local size) and "void" processes */ 5525 im_active = !!(pcis->n); 5526 ierr = MPIU_Allreduce(&im_active,&active_procs,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 5527 5528 /* determine number of process partecipating to coarse solver and compute subassembling pattern */ 5529 /* restr : whether if we want to exclude senders (which are not receivers) from the subassembling pattern */ 5530 /* full_restr : just use the receivers from the subassembling pattern */ 5531 coarse_mat_is = NULL; 5532 multilevel_allowed = PETSC_FALSE; 5533 multilevel_requested = PETSC_FALSE; 5534 full_restr = PETSC_TRUE; 5535 pcbddc->coarse_eqs_per_proc = PetscMin(pcbddc->coarse_size,pcbddc->coarse_eqs_per_proc); 5536 if (pcbddc->current_level < pcbddc->max_levels) multilevel_requested = PETSC_TRUE; 5537 if (multilevel_requested) { 5538 ncoarse = active_procs/pcbddc->coarsening_ratio; 5539 restr = PETSC_FALSE; 5540 full_restr = PETSC_FALSE; 5541 } else { 5542 ncoarse = pcbddc->coarse_size/pcbddc->coarse_eqs_per_proc; 5543 restr = PETSC_TRUE; 5544 full_restr = PETSC_TRUE; 5545 } 5546 ncoarse = PetscMax(1,ncoarse); 5547 if (!pcbddc->coarse_subassembling) { 5548 ierr = MatISGetSubassemblingPattern(t_coarse_mat_is,&ncoarse,pcbddc->coarse_adj_red,&pcbddc->coarse_subassembling,&have_void);CHKERRQ(ierr); 5549 } else { /* if a subassembling pattern exists, then we can reuse the coarse ksp and compute the number of process involved */ 5550 PetscInt psum; 5551 PetscMPIInt size; 5552 if (pcbddc->coarse_ksp) psum = 1; 5553 else psum = 0; 5554 ierr = MPIU_Allreduce(&psum,&ncoarse,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 5555 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)pc),&size);CHKERRQ(ierr); 5556 if (ncoarse < size) have_void = PETSC_TRUE; 5557 } 5558 /* determine if we can go multilevel */ 5559 if (multilevel_requested) { 5560 if (ncoarse > 1) multilevel_allowed = PETSC_TRUE; /* found enough processes */ 5561 else restr = full_restr = PETSC_TRUE; /* 1 subdomain, use a direct solver */ 5562 } 5563 if (multilevel_allowed && have_void) restr = PETSC_TRUE; 5564 5565 /* dump subassembling pattern */ 5566 if (pcbddc->dbg_flag && multilevel_allowed) { 5567 ierr = ISView(pcbddc->coarse_subassembling,pcbddc->dbg_viewer);CHKERRQ(ierr); 5568 } 5569 5570 /* compute dofs splitting and neumann boundaries for coarse dofs */ 5571 if (multilevel_allowed && (pcbddc->n_ISForDofsLocal || pcbddc->NeumannBoundariesLocal || (pcbddc->benign_saddle_point && pcbddc->user_primal_vertices_local))) { /* protects from unneded computations */ 5572 PetscInt *tidxs,*tidxs2,nout,tsize,i; 5573 const PetscInt *idxs; 5574 ISLocalToGlobalMapping tmap; 5575 5576 /* create map between primal indices (in local representative ordering) and local primal numbering */ 5577 ierr = ISLocalToGlobalMappingCreate(PETSC_COMM_SELF,1,pcbddc->local_primal_size,pcbddc->primal_indices_local_idxs,PETSC_COPY_VALUES,&tmap);CHKERRQ(ierr); 5578 /* allocate space for temporary storage */ 5579 ierr = PetscMalloc1(pcbddc->local_primal_size,&tidxs);CHKERRQ(ierr); 5580 ierr = PetscMalloc1(pcbddc->local_primal_size,&tidxs2);CHKERRQ(ierr); 5581 /* allocate for IS array */ 5582 nisdofs = pcbddc->n_ISForDofsLocal; 5583 nisneu = !!pcbddc->NeumannBoundariesLocal; 5584 nisvert = 0; 5585 if (pcbddc->benign_saddle_point && pcbddc->user_primal_vertices_local) { 5586 nisvert = 1; 5587 } 5588 nis = nisdofs + nisneu + nisvert; 5589 ierr = PetscMalloc1(nis,&isarray);CHKERRQ(ierr); 5590 /* dofs splitting */ 5591 for (i=0;i<nisdofs;i++) { 5592 /* ierr = ISView(pcbddc->ISForDofsLocal[i],0);CHKERRQ(ierr); */ 5593 ierr = ISGetLocalSize(pcbddc->ISForDofsLocal[i],&tsize);CHKERRQ(ierr); 5594 ierr = ISGetIndices(pcbddc->ISForDofsLocal[i],&idxs);CHKERRQ(ierr); 5595 ierr = ISGlobalToLocalMappingApply(tmap,IS_GTOLM_DROP,tsize,idxs,&nout,tidxs);CHKERRQ(ierr); 5596 ierr = ISRestoreIndices(pcbddc->ISForDofsLocal[i],&idxs);CHKERRQ(ierr); 5597 ierr = ISLocalToGlobalMappingApply(coarse_islg,nout,tidxs,tidxs2);CHKERRQ(ierr); 5598 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),nout,tidxs2,PETSC_COPY_VALUES,&isarray[i]);CHKERRQ(ierr); 5599 /* ierr = ISView(isarray[i],0);CHKERRQ(ierr); */ 5600 } 5601 /* neumann boundaries */ 5602 if (pcbddc->NeumannBoundariesLocal) { 5603 /* ierr = ISView(pcbddc->NeumannBoundariesLocal,0);CHKERRQ(ierr); */ 5604 ierr = ISGetLocalSize(pcbddc->NeumannBoundariesLocal,&tsize);CHKERRQ(ierr); 5605 ierr = ISGetIndices(pcbddc->NeumannBoundariesLocal,&idxs);CHKERRQ(ierr); 5606 ierr = ISGlobalToLocalMappingApply(tmap,IS_GTOLM_DROP,tsize,idxs,&nout,tidxs);CHKERRQ(ierr); 5607 ierr = ISRestoreIndices(pcbddc->NeumannBoundariesLocal,&idxs);CHKERRQ(ierr); 5608 ierr = ISLocalToGlobalMappingApply(coarse_islg,nout,tidxs,tidxs2);CHKERRQ(ierr); 5609 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),nout,tidxs2,PETSC_COPY_VALUES,&isarray[nisdofs]);CHKERRQ(ierr); 5610 /* ierr = ISView(isarray[nisdofs],0);CHKERRQ(ierr); */ 5611 } 5612 /* primal vertices (benign) */ 5613 if (pcbddc->benign_saddle_point && pcbddc->user_primal_vertices_local) { 5614 ierr = ISGetLocalSize(pcbddc->user_primal_vertices_local,&tsize);CHKERRQ(ierr); 5615 ierr = ISGetIndices(pcbddc->user_primal_vertices_local,&idxs);CHKERRQ(ierr); 5616 ierr = ISGlobalToLocalMappingApply(tmap,IS_GTOLM_DROP,tsize,idxs,&nout,tidxs);CHKERRQ(ierr); 5617 ierr = ISRestoreIndices(pcbddc->user_primal_vertices_local,&idxs);CHKERRQ(ierr); 5618 ierr = ISLocalToGlobalMappingApply(coarse_islg,nout,tidxs,tidxs2);CHKERRQ(ierr); 5619 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),nout,tidxs2,PETSC_COPY_VALUES,&isarray[nis-1]);CHKERRQ(ierr); 5620 /* ierr = ISView(isarray[nis-1],0);CHKERRQ(ierr); */ 5621 } 5622 /* free memory */ 5623 ierr = PetscFree(tidxs);CHKERRQ(ierr); 5624 ierr = PetscFree(tidxs2);CHKERRQ(ierr); 5625 ierr = ISLocalToGlobalMappingDestroy(&tmap);CHKERRQ(ierr); 5626 } else { 5627 nis = 0; 5628 nisdofs = 0; 5629 nisneu = 0; 5630 nisvert = 0; 5631 isarray = NULL; 5632 } 5633 /* destroy no longer needed map */ 5634 ierr = ISLocalToGlobalMappingDestroy(&coarse_islg);CHKERRQ(ierr); 5635 5636 /* subassemble */ 5637 if (multilevel_allowed) { 5638 PetscBool reuse,reuser; 5639 if (coarse_mat) reuse = PETSC_TRUE; 5640 else reuse = PETSC_FALSE; 5641 ierr = MPIU_Allreduce(&reuse,&reuser,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 5642 if (reuser) { 5643 ierr = MatISSubassemble(t_coarse_mat_is,pcbddc->coarse_subassembling,0,restr,full_restr,PETSC_TRUE,&coarse_mat,nis,isarray);CHKERRQ(ierr); 5644 } else { 5645 ierr = MatISSubassemble(t_coarse_mat_is,pcbddc->coarse_subassembling,0,restr,full_restr,PETSC_FALSE,&coarse_mat_is,nis,isarray);CHKERRQ(ierr); 5646 } 5647 } else { 5648 ierr = MatISSubassemble(t_coarse_mat_is,pcbddc->coarse_subassembling,0,restr,full_restr,PETSC_FALSE,&coarse_mat_is,nis,isarray);CHKERRQ(ierr); 5649 } 5650 if (coarse_mat_is || coarse_mat) { 5651 PetscMPIInt size; 5652 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)coarse_mat_is),&size); 5653 if (!multilevel_allowed) { 5654 ierr = MatISGetMPIXAIJ(coarse_mat_is,coarse_mat_reuse,&coarse_mat);CHKERRQ(ierr); 5655 } else { 5656 Mat A; 5657 5658 /* if this matrix is present, it means we are not reusing the coarse matrix */ 5659 if (coarse_mat_is) { 5660 if (coarse_mat) SETERRQ(PetscObjectComm((PetscObject)coarse_mat_is),PETSC_ERR_PLIB,"This should not happen"); 5661 ierr = PetscObjectReference((PetscObject)coarse_mat_is);CHKERRQ(ierr); 5662 coarse_mat = coarse_mat_is; 5663 } 5664 /* be sure we don't have MatSeqDENSE as local mat */ 5665 ierr = MatISGetLocalMat(coarse_mat,&A);CHKERRQ(ierr); 5666 ierr = MatConvert(A,MATSEQAIJ,MAT_INPLACE_MATRIX,&A);CHKERRQ(ierr); 5667 } 5668 } 5669 ierr = MatDestroy(&t_coarse_mat_is);CHKERRQ(ierr); 5670 ierr = MatDestroy(&coarse_mat_is);CHKERRQ(ierr); 5671 5672 /* create local to global scatters for coarse problem */ 5673 if (compute_vecs) { 5674 PetscInt lrows; 5675 ierr = VecDestroy(&pcbddc->coarse_vec);CHKERRQ(ierr); 5676 if (coarse_mat) { 5677 ierr = MatGetLocalSize(coarse_mat,&lrows,NULL);CHKERRQ(ierr); 5678 } else { 5679 lrows = 0; 5680 } 5681 ierr = VecCreate(PetscObjectComm((PetscObject)pc),&pcbddc->coarse_vec);CHKERRQ(ierr); 5682 ierr = VecSetSizes(pcbddc->coarse_vec,lrows,PETSC_DECIDE);CHKERRQ(ierr); 5683 ierr = VecSetType(pcbddc->coarse_vec,VECSTANDARD);CHKERRQ(ierr); 5684 ierr = VecScatterDestroy(&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr); 5685 ierr = VecScatterCreate(pcbddc->vec1_P,NULL,pcbddc->coarse_vec,coarse_is,&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr); 5686 } 5687 ierr = ISDestroy(&coarse_is);CHKERRQ(ierr); 5688 5689 /* set defaults for coarse KSP and PC */ 5690 if (multilevel_allowed) { 5691 coarse_ksp_type = KSPRICHARDSON; 5692 coarse_pc_type = PCBDDC; 5693 } else { 5694 coarse_ksp_type = KSPPREONLY; 5695 coarse_pc_type = PCREDUNDANT; 5696 } 5697 5698 /* print some info if requested */ 5699 if (pcbddc->dbg_flag) { 5700 if (!multilevel_allowed) { 5701 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 5702 if (multilevel_requested) { 5703 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Not enough active processes on level %d (active processes %d, coarsening ratio %d)\n",pcbddc->current_level,active_procs,pcbddc->coarsening_ratio);CHKERRQ(ierr); 5704 } else if (pcbddc->max_levels) { 5705 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Maximum number of requested levels reached (%d)\n",pcbddc->max_levels);CHKERRQ(ierr); 5706 } 5707 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 5708 } 5709 } 5710 5711 /* create the coarse KSP object only once with defaults */ 5712 if (coarse_mat) { 5713 PetscViewer dbg_viewer = NULL; 5714 if (pcbddc->dbg_flag) { 5715 dbg_viewer = PETSC_VIEWER_STDOUT_(PetscObjectComm((PetscObject)coarse_mat)); 5716 ierr = PetscViewerASCIIAddTab(dbg_viewer,2*pcbddc->current_level);CHKERRQ(ierr); 5717 } 5718 if (!pcbddc->coarse_ksp) { 5719 char prefix[256],str_level[16]; 5720 size_t len; 5721 ierr = KSPCreate(PetscObjectComm((PetscObject)coarse_mat),&pcbddc->coarse_ksp);CHKERRQ(ierr); 5722 ierr = KSPSetErrorIfNotConverged(pcbddc->coarse_ksp,pc->erroriffailure);CHKERRQ(ierr); 5723 ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->coarse_ksp,(PetscObject)pc,1);CHKERRQ(ierr); 5724 ierr = KSPSetTolerances(pcbddc->coarse_ksp,PETSC_DEFAULT,PETSC_DEFAULT,PETSC_DEFAULT,1);CHKERRQ(ierr); 5725 ierr = KSPSetOperators(pcbddc->coarse_ksp,coarse_mat,coarse_mat);CHKERRQ(ierr); 5726 ierr = KSPSetType(pcbddc->coarse_ksp,coarse_ksp_type);CHKERRQ(ierr); 5727 ierr = KSPSetNormType(pcbddc->coarse_ksp,KSP_NORM_NONE);CHKERRQ(ierr); 5728 ierr = KSPGetPC(pcbddc->coarse_ksp,&pc_temp);CHKERRQ(ierr); 5729 ierr = PCSetType(pc_temp,coarse_pc_type);CHKERRQ(ierr); 5730 /* prefix */ 5731 ierr = PetscStrcpy(prefix,"");CHKERRQ(ierr); 5732 ierr = PetscStrcpy(str_level,"");CHKERRQ(ierr); 5733 if (!pcbddc->current_level) { 5734 ierr = PetscStrcpy(prefix,((PetscObject)pc)->prefix);CHKERRQ(ierr); 5735 ierr = PetscStrcat(prefix,"pc_bddc_coarse_");CHKERRQ(ierr); 5736 } else { 5737 ierr = PetscStrlen(((PetscObject)pc)->prefix,&len);CHKERRQ(ierr); 5738 if (pcbddc->current_level>1) len -= 3; /* remove "lX_" with X level number */ 5739 if (pcbddc->current_level>10) len -= 1; /* remove another char from level number */ 5740 ierr = PetscStrncpy(prefix,((PetscObject)pc)->prefix,len+1);CHKERRQ(ierr); 5741 sprintf(str_level,"l%d_",(int)(pcbddc->current_level)); 5742 ierr = PetscStrcat(prefix,str_level);CHKERRQ(ierr); 5743 } 5744 ierr = KSPSetOptionsPrefix(pcbddc->coarse_ksp,prefix);CHKERRQ(ierr); 5745 /* propagate BDDC info to the next level (these are dummy calls if pc_temp is not of type PCBDDC) */ 5746 ierr = PCBDDCSetLevel(pc_temp,pcbddc->current_level+1);CHKERRQ(ierr); 5747 ierr = PCBDDCSetCoarseningRatio(pc_temp,pcbddc->coarsening_ratio);CHKERRQ(ierr); 5748 ierr = PCBDDCSetLevels(pc_temp,pcbddc->max_levels);CHKERRQ(ierr); 5749 /* allow user customization */ 5750 ierr = KSPSetFromOptions(pcbddc->coarse_ksp);CHKERRQ(ierr); 5751 } 5752 /* propagate BDDC info to the next level (these are dummy calls if pc_temp is not of type PCBDDC) */ 5753 ierr = KSPGetPC(pcbddc->coarse_ksp,&pc_temp);CHKERRQ(ierr); 5754 if (nisdofs) { 5755 ierr = PCBDDCSetDofsSplitting(pc_temp,nisdofs,isarray);CHKERRQ(ierr); 5756 for (i=0;i<nisdofs;i++) { 5757 ierr = ISDestroy(&isarray[i]);CHKERRQ(ierr); 5758 } 5759 } 5760 if (nisneu) { 5761 ierr = PCBDDCSetNeumannBoundaries(pc_temp,isarray[nisdofs]);CHKERRQ(ierr); 5762 ierr = ISDestroy(&isarray[nisdofs]);CHKERRQ(ierr); 5763 } 5764 if (nisvert) { 5765 ierr = PCBDDCSetPrimalVerticesIS(pc_temp,isarray[nis-1]);CHKERRQ(ierr); 5766 ierr = ISDestroy(&isarray[nis-1]);CHKERRQ(ierr); 5767 } 5768 5769 /* get some info after set from options */ 5770 ierr = PetscObjectTypeCompare((PetscObject)pc_temp,PCNN,&isnn);CHKERRQ(ierr); 5771 ierr = PetscObjectTypeCompare((PetscObject)pc_temp,PCBDDC,&isbddc);CHKERRQ(ierr); 5772 ierr = PetscObjectTypeCompare((PetscObject)pc_temp,PCREDUNDANT,&isredundant);CHKERRQ(ierr); 5773 if (isbddc && !multilevel_allowed) { /* multilevel can only be requested via pc_bddc_set_levels */ 5774 ierr = PCSetType(pc_temp,coarse_pc_type);CHKERRQ(ierr); 5775 isbddc = PETSC_FALSE; 5776 } 5777 ierr = PCFactorSetReuseFill(pc_temp,PETSC_TRUE);CHKERRQ(ierr); 5778 if (isredundant) { 5779 KSP inner_ksp; 5780 PC inner_pc; 5781 ierr = PCRedundantGetKSP(pc_temp,&inner_ksp);CHKERRQ(ierr); 5782 ierr = KSPGetPC(inner_ksp,&inner_pc);CHKERRQ(ierr); 5783 ierr = PCFactorSetReuseFill(inner_pc,PETSC_TRUE);CHKERRQ(ierr); 5784 } 5785 5786 /* parameters which miss an API */ 5787 if (isbddc) { 5788 PC_BDDC* pcbddc_coarse = (PC_BDDC*)pc_temp->data; 5789 pcbddc_coarse->detect_disconnected = PETSC_TRUE; 5790 pcbddc_coarse->coarse_eqs_per_proc = pcbddc->coarse_eqs_per_proc; 5791 pcbddc_coarse->benign_saddle_point = pcbddc->benign_saddle_point; 5792 pcbddc_coarse->benign_compute_nonetflux = pcbddc->benign_compute_nonetflux; 5793 if (pcbddc_coarse->benign_compute_nonetflux) { 5794 pcbddc_coarse->adaptive_userdefined = PETSC_TRUE; 5795 } 5796 } 5797 5798 /* propagate symmetry info of coarse matrix */ 5799 ierr = MatSetOption(coarse_mat,MAT_STRUCTURALLY_SYMMETRIC,PETSC_TRUE);CHKERRQ(ierr); 5800 if (pc->pmat->symmetric_set) { 5801 ierr = MatSetOption(coarse_mat,MAT_SYMMETRIC,pc->pmat->symmetric);CHKERRQ(ierr); 5802 } 5803 if (pc->pmat->hermitian_set) { 5804 ierr = MatSetOption(coarse_mat,MAT_HERMITIAN,pc->pmat->hermitian);CHKERRQ(ierr); 5805 } 5806 if (pc->pmat->spd_set) { 5807 ierr = MatSetOption(coarse_mat,MAT_SPD,pc->pmat->spd);CHKERRQ(ierr); 5808 } 5809 /* set operators */ 5810 ierr = KSPSetOperators(pcbddc->coarse_ksp,coarse_mat,coarse_mat);CHKERRQ(ierr); 5811 if (pcbddc->dbg_flag) { 5812 ierr = PetscViewerASCIISubtractTab(dbg_viewer,2*pcbddc->current_level);CHKERRQ(ierr); 5813 } 5814 } 5815 ierr = PetscFree(isarray);CHKERRQ(ierr); 5816 #if 0 5817 { 5818 PetscViewer viewer; 5819 char filename[256]; 5820 sprintf(filename,"coarse_mat_level%d.m",pcbddc->current_level); 5821 ierr = PetscViewerASCIIOpen(PetscObjectComm((PetscObject)coarse_mat),filename,&viewer);CHKERRQ(ierr); 5822 ierr = PetscViewerPushFormat(viewer,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr); 5823 ierr = MatView(coarse_mat,viewer);CHKERRQ(ierr); 5824 ierr = PetscViewerPopFormat(viewer);CHKERRQ(ierr); 5825 ierr = PetscViewerDestroy(&viewer);CHKERRQ(ierr); 5826 } 5827 #endif 5828 5829 /* Compute coarse null space (special handling by BDDC only) */ 5830 #if 0 5831 if (pcbddc->NullSpace) { 5832 ierr = PCBDDCNullSpaceAssembleCoarse(pc,coarse_mat,&CoarseNullSpace);CHKERRQ(ierr); 5833 } 5834 #endif 5835 /* hack */ 5836 if (pcbddc->coarse_ksp) { 5837 Vec crhs,csol; 5838 5839 ierr = KSPGetSolution(pcbddc->coarse_ksp,&csol);CHKERRQ(ierr); 5840 ierr = KSPGetRhs(pcbddc->coarse_ksp,&crhs);CHKERRQ(ierr); 5841 if (!csol) { 5842 ierr = MatCreateVecs(coarse_mat,&((pcbddc->coarse_ksp)->vec_sol),NULL);CHKERRQ(ierr); 5843 } 5844 if (!crhs) { 5845 ierr = MatCreateVecs(coarse_mat,NULL,&((pcbddc->coarse_ksp)->vec_rhs));CHKERRQ(ierr); 5846 } 5847 } 5848 5849 /* compute null space for coarse solver if the benign trick has been requested */ 5850 if (pcbddc->benign_null) { 5851 5852 ierr = VecSet(pcbddc->vec1_P,0.);CHKERRQ(ierr); 5853 for (i=0;i<pcbddc->benign_n;i++) { 5854 ierr = VecSetValue(pcbddc->vec1_P,pcbddc->local_primal_size-pcbddc->benign_n+i,1.0,INSERT_VALUES);CHKERRQ(ierr); 5855 } 5856 ierr = VecAssemblyBegin(pcbddc->vec1_P);CHKERRQ(ierr); 5857 ierr = VecAssemblyEnd(pcbddc->vec1_P);CHKERRQ(ierr); 5858 ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,pcbddc->vec1_P,pcbddc->coarse_vec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 5859 ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,pcbddc->vec1_P,pcbddc->coarse_vec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 5860 if (coarse_mat) { 5861 Vec nullv; 5862 PetscScalar *array,*array2; 5863 PetscInt nl; 5864 5865 ierr = MatCreateVecs(coarse_mat,&nullv,NULL);CHKERRQ(ierr); 5866 ierr = VecGetLocalSize(nullv,&nl);CHKERRQ(ierr); 5867 ierr = VecGetArrayRead(pcbddc->coarse_vec,(const PetscScalar**)&array);CHKERRQ(ierr); 5868 ierr = VecGetArray(nullv,&array2);CHKERRQ(ierr); 5869 ierr = PetscMemcpy(array2,array,nl*sizeof(*array));CHKERRQ(ierr); 5870 ierr = VecRestoreArray(nullv,&array2);CHKERRQ(ierr); 5871 ierr = VecRestoreArrayRead(pcbddc->coarse_vec,(const PetscScalar**)&array);CHKERRQ(ierr); 5872 ierr = VecNormalize(nullv,NULL);CHKERRQ(ierr); 5873 ierr = MatNullSpaceCreate(PetscObjectComm((PetscObject)coarse_mat),PETSC_FALSE,1,&nullv,&CoarseNullSpace);CHKERRQ(ierr); 5874 ierr = VecDestroy(&nullv);CHKERRQ(ierr); 5875 } 5876 } 5877 5878 if (pcbddc->coarse_ksp) { 5879 PetscBool ispreonly; 5880 5881 if (CoarseNullSpace) { 5882 PetscBool isnull; 5883 ierr = MatNullSpaceTest(CoarseNullSpace,coarse_mat,&isnull);CHKERRQ(ierr); 5884 if (1) { 5885 if (isbddc && !pcbddc->benign_saddle_point) { 5886 ierr = PCBDDCSetNullSpace(pc_temp,CoarseNullSpace);CHKERRQ(ierr); 5887 } else { 5888 ierr = MatSetNullSpace(coarse_mat,CoarseNullSpace);CHKERRQ(ierr); 5889 } 5890 } else { 5891 ierr = MatNullSpaceDestroy(&CoarseNullSpace);CHKERRQ(ierr); 5892 } 5893 } 5894 /* setup coarse ksp */ 5895 ierr = KSPSetUp(pcbddc->coarse_ksp);CHKERRQ(ierr); 5896 /* Check coarse problem if in debug mode or if solving with an iterative method */ 5897 ierr = PetscObjectTypeCompare((PetscObject)pcbddc->coarse_ksp,KSPPREONLY,&ispreonly);CHKERRQ(ierr); 5898 if (pcbddc->dbg_flag || (!ispreonly && pcbddc->use_coarse_estimates) ) { 5899 KSP check_ksp; 5900 KSPType check_ksp_type; 5901 PC check_pc; 5902 Vec check_vec,coarse_vec; 5903 PetscReal abs_infty_error,infty_error,lambda_min=1.0,lambda_max=1.0; 5904 PetscInt its; 5905 PetscBool compute_eigs; 5906 PetscReal *eigs_r,*eigs_c; 5907 PetscInt neigs; 5908 const char *prefix; 5909 5910 /* Create ksp object suitable for estimation of extreme eigenvalues */ 5911 ierr = KSPCreate(PetscObjectComm((PetscObject)pcbddc->coarse_ksp),&check_ksp);CHKERRQ(ierr); 5912 ierr = KSPSetErrorIfNotConverged(pcbddc->coarse_ksp,pc->erroriffailure);CHKERRQ(ierr); 5913 ierr = KSPSetOperators(check_ksp,coarse_mat,coarse_mat);CHKERRQ(ierr); 5914 ierr = KSPSetTolerances(check_ksp,1.e-12,1.e-12,PETSC_DEFAULT,pcbddc->coarse_size);CHKERRQ(ierr); 5915 /* prevent from setup unneeded object */ 5916 ierr = KSPGetPC(check_ksp,&check_pc);CHKERRQ(ierr); 5917 ierr = PCSetType(check_pc,PCNONE);CHKERRQ(ierr); 5918 if (ispreonly) { 5919 check_ksp_type = KSPPREONLY; 5920 compute_eigs = PETSC_FALSE; 5921 } else { 5922 check_ksp_type = KSPGMRES; 5923 compute_eigs = PETSC_TRUE; 5924 } 5925 ierr = KSPSetType(check_ksp,check_ksp_type);CHKERRQ(ierr); 5926 ierr = KSPSetComputeSingularValues(check_ksp,compute_eigs);CHKERRQ(ierr); 5927 ierr = KSPSetComputeEigenvalues(check_ksp,compute_eigs);CHKERRQ(ierr); 5928 ierr = KSPGMRESSetRestart(check_ksp,pcbddc->coarse_size+1);CHKERRQ(ierr); 5929 ierr = KSPGetOptionsPrefix(pcbddc->coarse_ksp,&prefix);CHKERRQ(ierr); 5930 ierr = KSPSetOptionsPrefix(check_ksp,prefix);CHKERRQ(ierr); 5931 ierr = KSPAppendOptionsPrefix(check_ksp,"check_");CHKERRQ(ierr); 5932 ierr = KSPSetFromOptions(check_ksp);CHKERRQ(ierr); 5933 ierr = KSPSetUp(check_ksp);CHKERRQ(ierr); 5934 ierr = KSPGetPC(pcbddc->coarse_ksp,&check_pc);CHKERRQ(ierr); 5935 ierr = KSPSetPC(check_ksp,check_pc);CHKERRQ(ierr); 5936 /* create random vec */ 5937 ierr = KSPGetSolution(pcbddc->coarse_ksp,&coarse_vec);CHKERRQ(ierr); 5938 ierr = VecDuplicate(coarse_vec,&check_vec);CHKERRQ(ierr); 5939 ierr = VecSetRandom(check_vec,NULL);CHKERRQ(ierr); 5940 if (CoarseNullSpace) { 5941 ierr = MatNullSpaceRemove(CoarseNullSpace,check_vec);CHKERRQ(ierr); 5942 } 5943 ierr = MatMult(coarse_mat,check_vec,coarse_vec);CHKERRQ(ierr); 5944 /* solve coarse problem */ 5945 ierr = KSPSolve(check_ksp,coarse_vec,coarse_vec);CHKERRQ(ierr); 5946 if (CoarseNullSpace) { 5947 ierr = MatNullSpaceRemove(CoarseNullSpace,coarse_vec);CHKERRQ(ierr); 5948 } 5949 /* set eigenvalue estimation if preonly has not been requested */ 5950 if (compute_eigs) { 5951 ierr = PetscMalloc1(pcbddc->coarse_size+1,&eigs_r);CHKERRQ(ierr); 5952 ierr = PetscMalloc1(pcbddc->coarse_size+1,&eigs_c);CHKERRQ(ierr); 5953 ierr = KSPComputeEigenvalues(check_ksp,pcbddc->coarse_size+1,eigs_r,eigs_c,&neigs);CHKERRQ(ierr); 5954 lambda_max = eigs_r[neigs-1]; 5955 lambda_min = eigs_r[0]; 5956 if (pcbddc->use_coarse_estimates) { 5957 if (lambda_max>lambda_min) { 5958 ierr = KSPChebyshevSetEigenvalues(pcbddc->coarse_ksp,lambda_max,lambda_min);CHKERRQ(ierr); 5959 ierr = KSPRichardsonSetScale(pcbddc->coarse_ksp,2.0/(lambda_max+lambda_min));CHKERRQ(ierr); 5960 } 5961 } 5962 } 5963 5964 /* check coarse problem residual error */ 5965 if (pcbddc->dbg_flag) { 5966 PetscViewer dbg_viewer = PETSC_VIEWER_STDOUT_(PetscObjectComm((PetscObject)pcbddc->coarse_ksp)); 5967 ierr = PetscViewerASCIIAddTab(dbg_viewer,2*(pcbddc->current_level+1));CHKERRQ(ierr); 5968 ierr = VecAXPY(check_vec,-1.0,coarse_vec);CHKERRQ(ierr); 5969 ierr = VecNorm(check_vec,NORM_INFINITY,&infty_error);CHKERRQ(ierr); 5970 ierr = MatMult(coarse_mat,check_vec,coarse_vec);CHKERRQ(ierr); 5971 ierr = VecNorm(coarse_vec,NORM_INFINITY,&abs_infty_error);CHKERRQ(ierr); 5972 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem details (use estimates %d)\n",pcbddc->use_coarse_estimates);CHKERRQ(ierr); 5973 ierr = PetscObjectPrintClassNamePrefixType((PetscObject)(pcbddc->coarse_ksp),dbg_viewer);CHKERRQ(ierr); 5974 ierr = PetscObjectPrintClassNamePrefixType((PetscObject)(check_pc),dbg_viewer);CHKERRQ(ierr); 5975 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem exact infty_error : %1.6e\n",infty_error);CHKERRQ(ierr); 5976 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem residual infty_error: %1.6e\n",abs_infty_error);CHKERRQ(ierr); 5977 if (CoarseNullSpace) { 5978 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem is singular\n");CHKERRQ(ierr); 5979 } 5980 if (compute_eigs) { 5981 PetscReal lambda_max_s,lambda_min_s; 5982 ierr = KSPGetType(check_ksp,&check_ksp_type);CHKERRQ(ierr); 5983 ierr = KSPGetIterationNumber(check_ksp,&its);CHKERRQ(ierr); 5984 ierr = KSPComputeExtremeSingularValues(check_ksp,&lambda_max_s,&lambda_min_s);CHKERRQ(ierr); 5985 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem eigenvalues (estimated with %d iterations of %s): %1.6e %1.6e (%1.6e %1.6e)\n",its,check_ksp_type,lambda_min,lambda_max,lambda_min_s,lambda_max_s);CHKERRQ(ierr); 5986 for (i=0;i<neigs;i++) { 5987 ierr = PetscViewerASCIIPrintf(dbg_viewer,"%1.6e %1.6ei\n",eigs_r[i],eigs_c[i]);CHKERRQ(ierr); 5988 } 5989 } 5990 ierr = PetscViewerFlush(dbg_viewer);CHKERRQ(ierr); 5991 ierr = PetscViewerASCIISubtractTab(dbg_viewer,2*(pcbddc->current_level+1));CHKERRQ(ierr); 5992 } 5993 ierr = VecDestroy(&check_vec);CHKERRQ(ierr); 5994 ierr = KSPDestroy(&check_ksp);CHKERRQ(ierr); 5995 if (compute_eigs) { 5996 ierr = PetscFree(eigs_r);CHKERRQ(ierr); 5997 ierr = PetscFree(eigs_c);CHKERRQ(ierr); 5998 } 5999 } 6000 } 6001 /* print additional info */ 6002 if (pcbddc->dbg_flag) { 6003 /* waits until all processes reaches this point */ 6004 ierr = PetscBarrier((PetscObject)pc);CHKERRQ(ierr); 6005 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Coarse solver setup completed at level %d\n",pcbddc->current_level);CHKERRQ(ierr); 6006 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 6007 } 6008 6009 /* free memory */ 6010 ierr = MatNullSpaceDestroy(&CoarseNullSpace);CHKERRQ(ierr); 6011 ierr = MatDestroy(&coarse_mat);CHKERRQ(ierr); 6012 PetscFunctionReturn(0); 6013 } 6014 6015 #undef __FUNCT__ 6016 #define __FUNCT__ "PCBDDCComputePrimalNumbering" 6017 PetscErrorCode PCBDDCComputePrimalNumbering(PC pc,PetscInt* coarse_size_n,PetscInt** local_primal_indices_n) 6018 { 6019 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 6020 PC_IS* pcis = (PC_IS*)pc->data; 6021 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 6022 IS subset,subset_mult,subset_n; 6023 PetscInt local_size,coarse_size=0; 6024 PetscInt *local_primal_indices=NULL; 6025 const PetscInt *t_local_primal_indices; 6026 PetscErrorCode ierr; 6027 6028 PetscFunctionBegin; 6029 /* Compute global number of coarse dofs */ 6030 if (pcbddc->local_primal_size && !pcbddc->local_primal_ref_node) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"BDDC ConstraintsSetUp should be called first"); 6031 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)(pc->pmat)),pcbddc->local_primal_size_cc,pcbddc->local_primal_ref_node,PETSC_COPY_VALUES,&subset_n);CHKERRQ(ierr); 6032 ierr = ISLocalToGlobalMappingApplyIS(pcis->mapping,subset_n,&subset);CHKERRQ(ierr); 6033 ierr = ISDestroy(&subset_n);CHKERRQ(ierr); 6034 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)(pc->pmat)),pcbddc->local_primal_size_cc,pcbddc->local_primal_ref_mult,PETSC_COPY_VALUES,&subset_mult);CHKERRQ(ierr); 6035 ierr = PCBDDCSubsetNumbering(subset,subset_mult,&coarse_size,&subset_n);CHKERRQ(ierr); 6036 ierr = ISDestroy(&subset);CHKERRQ(ierr); 6037 ierr = ISDestroy(&subset_mult);CHKERRQ(ierr); 6038 ierr = ISGetLocalSize(subset_n,&local_size);CHKERRQ(ierr); 6039 if (local_size != pcbddc->local_primal_size) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Invalid number of local primal indices computed %D != %D",local_size,pcbddc->local_primal_size); 6040 ierr = PetscMalloc1(local_size,&local_primal_indices);CHKERRQ(ierr); 6041 ierr = ISGetIndices(subset_n,&t_local_primal_indices);CHKERRQ(ierr); 6042 ierr = PetscMemcpy(local_primal_indices,t_local_primal_indices,local_size*sizeof(PetscInt));CHKERRQ(ierr); 6043 ierr = ISRestoreIndices(subset_n,&t_local_primal_indices);CHKERRQ(ierr); 6044 ierr = ISDestroy(&subset_n);CHKERRQ(ierr); 6045 6046 /* check numbering */ 6047 if (pcbddc->dbg_flag) { 6048 PetscScalar coarsesum,*array,*array2; 6049 PetscInt i; 6050 PetscBool set_error = PETSC_FALSE,set_error_reduced = PETSC_FALSE; 6051 6052 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 6053 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 6054 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Check coarse indices\n");CHKERRQ(ierr); 6055 ierr = PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 6056 /* counter */ 6057 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 6058 ierr = VecSet(pcis->vec1_N,1.0);CHKERRQ(ierr); 6059 ierr = VecScatterBegin(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 6060 ierr = VecScatterEnd(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 6061 ierr = VecScatterBegin(matis->rctx,pcis->vec1_global,pcis->vec2_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 6062 ierr = VecScatterEnd(matis->rctx,pcis->vec1_global,pcis->vec2_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 6063 ierr = VecSet(pcis->vec1_N,0.0);CHKERRQ(ierr); 6064 for (i=0;i<pcbddc->local_primal_size;i++) { 6065 ierr = VecSetValue(pcis->vec1_N,pcbddc->primal_indices_local_idxs[i],1.0,INSERT_VALUES);CHKERRQ(ierr); 6066 } 6067 ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr); 6068 ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr); 6069 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 6070 ierr = VecScatterBegin(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 6071 ierr = VecScatterEnd(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 6072 ierr = VecScatterBegin(matis->rctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 6073 ierr = VecScatterEnd(matis->rctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 6074 ierr = VecGetArray(pcis->vec1_N,&array);CHKERRQ(ierr); 6075 ierr = VecGetArray(pcis->vec2_N,&array2);CHKERRQ(ierr); 6076 for (i=0;i<pcis->n;i++) { 6077 if (array[i] != 0.0 && array[i] != array2[i]) { 6078 PetscInt owned = (PetscInt)PetscRealPart(array[i]),gi; 6079 PetscInt neigh = (PetscInt)PetscRealPart(array2[i]); 6080 set_error = PETSC_TRUE; 6081 ierr = ISLocalToGlobalMappingApply(pcis->mapping,1,&i,&gi);CHKERRQ(ierr); 6082 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d: local index %d (gid %d) owned by %d processes instead of %d!\n",PetscGlobalRank,i,gi,owned,neigh);CHKERRQ(ierr); 6083 } 6084 } 6085 ierr = VecRestoreArray(pcis->vec2_N,&array2);CHKERRQ(ierr); 6086 ierr = MPIU_Allreduce(&set_error,&set_error_reduced,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 6087 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 6088 for (i=0;i<pcis->n;i++) { 6089 if (PetscRealPart(array[i]) > 0.0) array[i] = 1.0/PetscRealPart(array[i]); 6090 } 6091 ierr = VecRestoreArray(pcis->vec1_N,&array);CHKERRQ(ierr); 6092 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 6093 ierr = VecScatterBegin(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 6094 ierr = VecScatterEnd(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 6095 ierr = VecSum(pcis->vec1_global,&coarsesum);CHKERRQ(ierr); 6096 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Size of coarse problem is %d (%lf)\n",coarse_size,PetscRealPart(coarsesum));CHKERRQ(ierr); 6097 if (pcbddc->dbg_flag > 1 || set_error_reduced) { 6098 PetscInt *gidxs; 6099 6100 ierr = PetscMalloc1(pcbddc->local_primal_size,&gidxs);CHKERRQ(ierr); 6101 ierr = ISLocalToGlobalMappingApply(pcis->mapping,pcbddc->local_primal_size,pcbddc->primal_indices_local_idxs,gidxs);CHKERRQ(ierr); 6102 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Distribution of local primal indices\n");CHKERRQ(ierr); 6103 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 6104 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d\n",PetscGlobalRank);CHKERRQ(ierr); 6105 for (i=0;i<pcbddc->local_primal_size;i++) { 6106 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local_primal_indices[%d]=%d (%d,%d)\n",i,local_primal_indices[i],pcbddc->primal_indices_local_idxs[i],gidxs[i]);CHKERRQ(ierr); 6107 } 6108 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 6109 ierr = PetscFree(gidxs);CHKERRQ(ierr); 6110 } 6111 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 6112 ierr = PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 6113 if (set_error_reduced) SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"BDDC Numbering of coarse dofs failed"); 6114 } 6115 /* ierr = PetscPrintf(PetscObjectComm((PetscObject)pc),"Size of coarse problem is %d\n",coarse_size);CHKERRQ(ierr); */ 6116 /* get back data */ 6117 *coarse_size_n = coarse_size; 6118 *local_primal_indices_n = local_primal_indices; 6119 PetscFunctionReturn(0); 6120 } 6121 6122 #undef __FUNCT__ 6123 #define __FUNCT__ "PCBDDCGlobalToLocal" 6124 PetscErrorCode PCBDDCGlobalToLocal(VecScatter g2l_ctx,Vec gwork, Vec lwork, IS globalis, IS* localis) 6125 { 6126 IS localis_t; 6127 PetscInt i,lsize,*idxs,n; 6128 PetscScalar *vals; 6129 PetscErrorCode ierr; 6130 6131 PetscFunctionBegin; 6132 /* get indices in local ordering exploiting local to global map */ 6133 ierr = ISGetLocalSize(globalis,&lsize);CHKERRQ(ierr); 6134 ierr = PetscMalloc1(lsize,&vals);CHKERRQ(ierr); 6135 for (i=0;i<lsize;i++) vals[i] = 1.0; 6136 ierr = ISGetIndices(globalis,(const PetscInt**)&idxs);CHKERRQ(ierr); 6137 ierr = VecSet(gwork,0.0);CHKERRQ(ierr); 6138 ierr = VecSet(lwork,0.0);CHKERRQ(ierr); 6139 if (idxs) { /* multilevel guard */ 6140 ierr = VecSetValues(gwork,lsize,idxs,vals,INSERT_VALUES);CHKERRQ(ierr); 6141 } 6142 ierr = VecAssemblyBegin(gwork);CHKERRQ(ierr); 6143 ierr = ISRestoreIndices(globalis,(const PetscInt**)&idxs);CHKERRQ(ierr); 6144 ierr = PetscFree(vals);CHKERRQ(ierr); 6145 ierr = VecAssemblyEnd(gwork);CHKERRQ(ierr); 6146 /* now compute set in local ordering */ 6147 ierr = VecScatterBegin(g2l_ctx,gwork,lwork,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 6148 ierr = VecScatterEnd(g2l_ctx,gwork,lwork,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 6149 ierr = VecGetArrayRead(lwork,(const PetscScalar**)&vals);CHKERRQ(ierr); 6150 ierr = VecGetSize(lwork,&n);CHKERRQ(ierr); 6151 for (i=0,lsize=0;i<n;i++) { 6152 if (PetscRealPart(vals[i]) > 0.5) { 6153 lsize++; 6154 } 6155 } 6156 ierr = PetscMalloc1(lsize,&idxs);CHKERRQ(ierr); 6157 for (i=0,lsize=0;i<n;i++) { 6158 if (PetscRealPart(vals[i]) > 0.5) { 6159 idxs[lsize++] = i; 6160 } 6161 } 6162 ierr = VecRestoreArrayRead(lwork,(const PetscScalar**)&vals);CHKERRQ(ierr); 6163 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)gwork),lsize,idxs,PETSC_OWN_POINTER,&localis_t);CHKERRQ(ierr); 6164 *localis = localis_t; 6165 PetscFunctionReturn(0); 6166 } 6167 6168 #undef __FUNCT__ 6169 #define __FUNCT__ "PCBDDCSetUpSubSchurs" 6170 PetscErrorCode PCBDDCSetUpSubSchurs(PC pc) 6171 { 6172 PC_IS *pcis=(PC_IS*)pc->data; 6173 PC_BDDC *pcbddc=(PC_BDDC*)pc->data; 6174 PCBDDCSubSchurs sub_schurs=pcbddc->sub_schurs; 6175 Mat S_j; 6176 PetscInt *used_xadj,*used_adjncy; 6177 PetscBool free_used_adj; 6178 PetscErrorCode ierr; 6179 6180 PetscFunctionBegin; 6181 /* decide the adjacency to be used for determining internal problems for local schur on subsets */ 6182 free_used_adj = PETSC_FALSE; 6183 if (pcbddc->sub_schurs_layers == -1) { 6184 used_xadj = NULL; 6185 used_adjncy = NULL; 6186 } else { 6187 if (pcbddc->sub_schurs_use_useradj && pcbddc->mat_graph->xadj) { 6188 used_xadj = pcbddc->mat_graph->xadj; 6189 used_adjncy = pcbddc->mat_graph->adjncy; 6190 } else if (pcbddc->computed_rowadj) { 6191 used_xadj = pcbddc->mat_graph->xadj; 6192 used_adjncy = pcbddc->mat_graph->adjncy; 6193 } else { 6194 PetscBool flg_row=PETSC_FALSE; 6195 const PetscInt *xadj,*adjncy; 6196 PetscInt nvtxs; 6197 6198 ierr = MatGetRowIJ(pcbddc->local_mat,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,&xadj,&adjncy,&flg_row);CHKERRQ(ierr); 6199 if (flg_row) { 6200 ierr = PetscMalloc2(nvtxs+1,&used_xadj,xadj[nvtxs],&used_adjncy);CHKERRQ(ierr); 6201 ierr = PetscMemcpy(used_xadj,xadj,(nvtxs+1)*sizeof(*xadj));CHKERRQ(ierr); 6202 ierr = PetscMemcpy(used_adjncy,adjncy,(xadj[nvtxs])*sizeof(*adjncy));CHKERRQ(ierr); 6203 free_used_adj = PETSC_TRUE; 6204 } else { 6205 pcbddc->sub_schurs_layers = -1; 6206 used_xadj = NULL; 6207 used_adjncy = NULL; 6208 } 6209 ierr = MatRestoreRowIJ(pcbddc->local_mat,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,&xadj,&adjncy,&flg_row);CHKERRQ(ierr); 6210 } 6211 } 6212 6213 /* setup sub_schurs data */ 6214 ierr = MatCreateSchurComplement(pcis->A_II,pcis->A_II,pcis->A_IB,pcis->A_BI,pcis->A_BB,&S_j);CHKERRQ(ierr); 6215 if (!sub_schurs->schur_explicit) { 6216 /* pcbddc->ksp_D up to date only if not using MatFactor with Schur complement support */ 6217 ierr = MatSchurComplementSetKSP(S_j,pcbddc->ksp_D);CHKERRQ(ierr); 6218 ierr = PCBDDCSubSchursSetUp(sub_schurs,NULL,S_j,PETSC_FALSE,used_xadj,used_adjncy,pcbddc->sub_schurs_layers,NULL,pcbddc->adaptive_selection,PETSC_FALSE,PETSC_FALSE,0,NULL,NULL,NULL,NULL);CHKERRQ(ierr); 6219 } else { 6220 PetscBool reuse_solvers = (PetscBool)!pcbddc->use_change_of_basis; 6221 PetscBool isseqaij,need_change = PETSC_FALSE;; 6222 PetscInt benign_n; 6223 Mat change = NULL; 6224 Vec scaling = NULL; 6225 IS change_primal = NULL; 6226 6227 if (!pcbddc->use_vertices && reuse_solvers) { 6228 PetscInt n_vertices; 6229 6230 ierr = ISGetLocalSize(sub_schurs->is_vertices,&n_vertices);CHKERRQ(ierr); 6231 reuse_solvers = (PetscBool)!n_vertices; 6232 } 6233 ierr = PetscObjectTypeCompare((PetscObject)pcbddc->local_mat,MATSEQAIJ,&isseqaij);CHKERRQ(ierr); 6234 if (!isseqaij) { 6235 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 6236 if (matis->A == pcbddc->local_mat) { 6237 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 6238 ierr = MatConvert(matis->A,MATSEQAIJ,MAT_INITIAL_MATRIX,&pcbddc->local_mat);CHKERRQ(ierr); 6239 } else { 6240 ierr = MatConvert(pcbddc->local_mat,MATSEQAIJ,MAT_INPLACE_MATRIX,&pcbddc->local_mat);CHKERRQ(ierr); 6241 } 6242 } 6243 if (!pcbddc->benign_change_explicit) { 6244 benign_n = pcbddc->benign_n; 6245 } else { 6246 benign_n = 0; 6247 } 6248 /* sub_schurs->change is a local object; instead, PCBDDCConstraintsSetUp and the quantities used in the test below are logically collective on pc. 6249 We need a global reduction to avoid possible deadlocks. 6250 We assume that sub_schurs->change is created once, and then reused for different solves, unless the topography has been recomputed */ 6251 if (pcbddc->adaptive_userdefined || (pcbddc->deluxe_zerorows && !pcbddc->use_change_of_basis)) { 6252 PetscBool have_loc_change = !!(sub_schurs->change); 6253 ierr = MPIU_Allreduce(&have_loc_change,&need_change,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 6254 need_change = !need_change; 6255 } 6256 /* If the user defines additional constraints, we import them here. 6257 We need to compute the change of basis according to the quadrature weights attached to pmat via MatSetNearNullSpace, and this could not be done (at the moment) without some hacking */ 6258 if (need_change) { 6259 PC_IS *pcisf; 6260 PC_BDDC *pcbddcf; 6261 PC pcf; 6262 6263 if (pcbddc->sub_schurs_rebuild) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Cannot compute change of basis with a different graph"); 6264 ierr = PCCreate(PetscObjectComm((PetscObject)pc),&pcf);CHKERRQ(ierr); 6265 ierr = PCSetOperators(pcf,pc->mat,pc->pmat);CHKERRQ(ierr); 6266 ierr = PCSetType(pcf,PCBDDC);CHKERRQ(ierr); 6267 /* hacks */ 6268 pcisf = (PC_IS*)pcf->data; 6269 pcisf->is_B_local = pcis->is_B_local; 6270 pcisf->vec1_N = pcis->vec1_N; 6271 pcisf->BtoNmap = pcis->BtoNmap; 6272 pcisf->n = pcis->n; 6273 pcisf->n_B = pcis->n_B; 6274 pcbddcf = (PC_BDDC*)pcf->data; 6275 ierr = PetscFree(pcbddcf->mat_graph);CHKERRQ(ierr); 6276 pcbddcf->mat_graph = pcbddc->mat_graph; 6277 pcbddcf->use_faces = PETSC_TRUE; 6278 pcbddcf->use_change_of_basis = PETSC_TRUE; 6279 pcbddcf->use_change_on_faces = PETSC_TRUE; 6280 pcbddcf->use_qr_single = PETSC_TRUE; 6281 pcbddcf->fake_change = PETSC_TRUE; 6282 ierr = PCBDDCConstraintsSetUp(pcf);CHKERRQ(ierr); 6283 /* store information on primal vertices and change of basis (in local numbering) */ 6284 sub_schurs->change_with_qr = pcbddcf->use_qr_single; 6285 ierr = ISCreateGeneral(PETSC_COMM_SELF,pcbddcf->n_vertices,pcbddcf->local_primal_ref_node,PETSC_COPY_VALUES,&change_primal);CHKERRQ(ierr); 6286 change = pcbddcf->ConstraintMatrix; 6287 pcbddcf->ConstraintMatrix = NULL; 6288 /* free unneeded memory allocated in PCBDDCConstraintsSetUp */ 6289 ierr = PetscFree(pcbddcf->sub_schurs);CHKERRQ(ierr); 6290 ierr = MatNullSpaceDestroy(&pcbddcf->onearnullspace);CHKERRQ(ierr); 6291 ierr = PetscFree2(pcbddcf->local_primal_ref_node,pcbddcf->local_primal_ref_mult);CHKERRQ(ierr); 6292 ierr = PetscFree(pcbddcf->primal_indices_local_idxs);CHKERRQ(ierr); 6293 ierr = PetscFree(pcbddcf->onearnullvecs_state);CHKERRQ(ierr); 6294 ierr = PetscFree(pcf->data);CHKERRQ(ierr); 6295 pcf->ops->destroy = NULL; 6296 ierr = PCDestroy(&pcf);CHKERRQ(ierr); 6297 } 6298 if (!pcbddc->use_deluxe_scaling) scaling = pcis->D; 6299 ierr = PCBDDCSubSchursSetUp(sub_schurs,pcbddc->local_mat,S_j,pcbddc->sub_schurs_exact_schur,used_xadj,used_adjncy,pcbddc->sub_schurs_layers,scaling,pcbddc->adaptive_selection,reuse_solvers,pcbddc->benign_saddle_point,benign_n,pcbddc->benign_p0_lidx,pcbddc->benign_zerodiag_subs,change,change_primal);CHKERRQ(ierr); 6300 ierr = MatDestroy(&change);CHKERRQ(ierr); 6301 ierr = ISDestroy(&change_primal);CHKERRQ(ierr); 6302 } 6303 ierr = MatDestroy(&S_j);CHKERRQ(ierr); 6304 6305 /* free adjacency */ 6306 if (free_used_adj) { 6307 ierr = PetscFree2(used_xadj,used_adjncy);CHKERRQ(ierr); 6308 } 6309 PetscFunctionReturn(0); 6310 } 6311 6312 #undef __FUNCT__ 6313 #define __FUNCT__ "PCBDDCInitSubSchurs" 6314 PetscErrorCode PCBDDCInitSubSchurs(PC pc) 6315 { 6316 PC_IS *pcis=(PC_IS*)pc->data; 6317 PC_BDDC *pcbddc=(PC_BDDC*)pc->data; 6318 PCBDDCSubSchurs sub_schurs=pcbddc->sub_schurs; 6319 PCBDDCGraph graph; 6320 PetscErrorCode ierr; 6321 6322 PetscFunctionBegin; 6323 /* attach interface graph for determining subsets */ 6324 if (pcbddc->sub_schurs_rebuild) { /* in case rebuild has been requested, it uses a graph generated only by the neighbouring information */ 6325 IS verticesIS,verticescomm; 6326 PetscInt vsize,*idxs; 6327 6328 ierr = PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph,NULL,NULL,NULL,NULL,&verticesIS);CHKERRQ(ierr); 6329 ierr = ISGetSize(verticesIS,&vsize);CHKERRQ(ierr); 6330 ierr = ISGetIndices(verticesIS,(const PetscInt**)&idxs);CHKERRQ(ierr); 6331 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),vsize,idxs,PETSC_COPY_VALUES,&verticescomm);CHKERRQ(ierr); 6332 ierr = ISRestoreIndices(verticesIS,(const PetscInt**)&idxs);CHKERRQ(ierr); 6333 ierr = ISDestroy(&verticesIS);CHKERRQ(ierr); 6334 ierr = PCBDDCGraphCreate(&graph);CHKERRQ(ierr); 6335 ierr = PCBDDCGraphInit(graph,pcbddc->mat_graph->l2gmap,pcbddc->mat_graph->nvtxs_global);CHKERRQ(ierr); 6336 ierr = PCBDDCGraphSetUp(graph,pcbddc->mat_graph->custom_minimal_size,NULL,pcbddc->DirichletBoundariesLocal,0,NULL,verticescomm);CHKERRQ(ierr); 6337 ierr = ISDestroy(&verticescomm);CHKERRQ(ierr); 6338 ierr = PCBDDCGraphComputeConnectedComponents(graph);CHKERRQ(ierr); 6339 } else { 6340 graph = pcbddc->mat_graph; 6341 } 6342 /* print some info */ 6343 if (pcbddc->dbg_flag) { 6344 IS vertices; 6345 PetscInt nv,nedges,nfaces; 6346 ierr = PCBDDCGraphASCIIView(graph,pcbddc->dbg_flag,pcbddc->dbg_viewer);CHKERRQ(ierr); 6347 ierr = PCBDDCGraphGetCandidatesIS(graph,&nfaces,NULL,&nedges,NULL,&vertices);CHKERRQ(ierr); 6348 ierr = ISGetSize(vertices,&nv);CHKERRQ(ierr); 6349 ierr = ISDestroy(&vertices);CHKERRQ(ierr); 6350 ierr = PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 6351 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"--------------------------------------------------------------\n");CHKERRQ(ierr); 6352 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate vertices (%d)\n",PetscGlobalRank,nv,pcbddc->use_vertices);CHKERRQ(ierr); 6353 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate edges (%d)\n",PetscGlobalRank,nedges,pcbddc->use_edges);CHKERRQ(ierr); 6354 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate faces (%d)\n",PetscGlobalRank,nfaces,pcbddc->use_faces);CHKERRQ(ierr); 6355 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 6356 ierr = PetscViewerASCIIPopSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 6357 } 6358 6359 /* sub_schurs init */ 6360 ierr = PCBDDCSubSchursInit(sub_schurs,pcis->is_I_local,pcis->is_B_local,graph,pcis->BtoNmap);CHKERRQ(ierr); 6361 6362 /* free graph struct */ 6363 if (pcbddc->sub_schurs_rebuild) { 6364 ierr = PCBDDCGraphDestroy(&graph);CHKERRQ(ierr); 6365 } 6366 PetscFunctionReturn(0); 6367 } 6368 6369 #undef __FUNCT__ 6370 #define __FUNCT__ "PCBDDCCheckOperator" 6371 PetscErrorCode PCBDDCCheckOperator(PC pc) 6372 { 6373 PC_IS *pcis=(PC_IS*)pc->data; 6374 PC_BDDC *pcbddc=(PC_BDDC*)pc->data; 6375 PetscErrorCode ierr; 6376 6377 PetscFunctionBegin; 6378 if (pcbddc->n_vertices == pcbddc->local_primal_size) { 6379 IS zerodiag = NULL; 6380 Mat S_j,B0_B=NULL; 6381 Vec dummy_vec=NULL,vec_check_B,vec_scale_P; 6382 PetscScalar *p0_check,*array,*array2; 6383 PetscReal norm; 6384 PetscInt i; 6385 6386 /* B0 and B0_B */ 6387 if (zerodiag) { 6388 IS dummy; 6389 6390 ierr = ISCreateStride(PETSC_COMM_SELF,pcbddc->benign_n,0,1,&dummy);CHKERRQ(ierr); 6391 ierr = MatGetSubMatrix(pcbddc->benign_B0,dummy,pcis->is_B_local,MAT_INITIAL_MATRIX,&B0_B);CHKERRQ(ierr); 6392 ierr = MatCreateVecs(B0_B,NULL,&dummy_vec);CHKERRQ(ierr); 6393 ierr = ISDestroy(&dummy);CHKERRQ(ierr); 6394 } 6395 /* I need a primal vector to scale primal nodes since BDDC sums contibutions */ 6396 ierr = VecDuplicate(pcbddc->vec1_P,&vec_scale_P);CHKERRQ(ierr); 6397 ierr = VecSet(pcbddc->vec1_P,1.0);CHKERRQ(ierr); 6398 ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,pcbddc->vec1_P,pcbddc->coarse_vec,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 6399 ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,pcbddc->vec1_P,pcbddc->coarse_vec,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 6400 ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,pcbddc->coarse_vec,vec_scale_P,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 6401 ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,pcbddc->coarse_vec,vec_scale_P,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 6402 ierr = VecReciprocal(vec_scale_P);CHKERRQ(ierr); 6403 /* S_j */ 6404 ierr = MatCreateSchurComplement(pcis->A_II,pcis->A_II,pcis->A_IB,pcis->A_BI,pcis->A_BB,&S_j);CHKERRQ(ierr); 6405 ierr = MatSchurComplementSetKSP(S_j,pcbddc->ksp_D);CHKERRQ(ierr); 6406 6407 /* mimic vector in \widetilde{W}_\Gamma */ 6408 ierr = VecSetRandom(pcis->vec1_N,NULL);CHKERRQ(ierr); 6409 /* continuous in primal space */ 6410 ierr = VecSetRandom(pcbddc->coarse_vec,NULL);CHKERRQ(ierr); 6411 ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,pcbddc->coarse_vec,pcbddc->vec1_P,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 6412 ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,pcbddc->coarse_vec,pcbddc->vec1_P,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 6413 ierr = VecGetArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 6414 ierr = PetscCalloc1(pcbddc->benign_n,&p0_check);CHKERRQ(ierr); 6415 for (i=0;i<pcbddc->benign_n;i++) p0_check[i] = array[pcbddc->local_primal_size-pcbddc->benign_n+i]; 6416 ierr = VecSetValues(pcis->vec1_N,pcbddc->local_primal_size,pcbddc->local_primal_ref_node,array,INSERT_VALUES);CHKERRQ(ierr); 6417 ierr = VecRestoreArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 6418 ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr); 6419 ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr); 6420 ierr = VecScatterBegin(pcis->N_to_B,pcis->vec1_N,pcis->vec2_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 6421 ierr = VecScatterEnd(pcis->N_to_B,pcis->vec1_N,pcis->vec2_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 6422 ierr = VecDuplicate(pcis->vec2_B,&vec_check_B);CHKERRQ(ierr); 6423 ierr = VecCopy(pcis->vec2_B,vec_check_B);CHKERRQ(ierr); 6424 6425 /* assemble rhs for coarse problem */ 6426 /* widetilde{S}_\Gamma w_\Gamma + \widetilde{B0}^T_B p0 */ 6427 /* local with Schur */ 6428 ierr = MatMult(S_j,pcis->vec2_B,pcis->vec1_B);CHKERRQ(ierr); 6429 if (zerodiag) { 6430 ierr = VecGetArray(dummy_vec,&array);CHKERRQ(ierr); 6431 for (i=0;i<pcbddc->benign_n;i++) array[i] = p0_check[i]; 6432 ierr = VecRestoreArray(dummy_vec,&array);CHKERRQ(ierr); 6433 ierr = MatMultTransposeAdd(B0_B,dummy_vec,pcis->vec1_B,pcis->vec1_B);CHKERRQ(ierr); 6434 } 6435 /* sum on primal nodes the local contributions */ 6436 ierr = VecScatterBegin(pcis->N_to_B,pcis->vec1_B,pcis->vec1_N,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 6437 ierr = VecScatterEnd(pcis->N_to_B,pcis->vec1_B,pcis->vec1_N,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 6438 ierr = VecGetArray(pcis->vec1_N,&array);CHKERRQ(ierr); 6439 ierr = VecGetArray(pcbddc->vec1_P,&array2);CHKERRQ(ierr); 6440 for (i=0;i<pcbddc->local_primal_size;i++) array2[i] = array[pcbddc->local_primal_ref_node[i]]; 6441 ierr = VecRestoreArray(pcbddc->vec1_P,&array2);CHKERRQ(ierr); 6442 ierr = VecRestoreArray(pcis->vec1_N,&array);CHKERRQ(ierr); 6443 ierr = VecSet(pcbddc->coarse_vec,0.);CHKERRQ(ierr); 6444 ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,pcbddc->vec1_P,pcbddc->coarse_vec,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 6445 ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,pcbddc->vec1_P,pcbddc->coarse_vec,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 6446 ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,pcbddc->coarse_vec,pcbddc->vec1_P,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 6447 ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,pcbddc->coarse_vec,pcbddc->vec1_P,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 6448 ierr = VecGetArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 6449 /* scale primal nodes (BDDC sums contibutions) */ 6450 ierr = VecPointwiseMult(pcbddc->vec1_P,vec_scale_P,pcbddc->vec1_P);CHKERRQ(ierr); 6451 ierr = VecSetValues(pcis->vec1_N,pcbddc->local_primal_size,pcbddc->local_primal_ref_node,array,INSERT_VALUES);CHKERRQ(ierr); 6452 ierr = VecRestoreArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 6453 ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr); 6454 ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr); 6455 ierr = VecScatterBegin(pcis->N_to_B,pcis->vec1_N,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 6456 ierr = VecScatterEnd(pcis->N_to_B,pcis->vec1_N,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 6457 /* global: \widetilde{B0}_B w_\Gamma */ 6458 if (zerodiag) { 6459 ierr = MatMult(B0_B,pcis->vec2_B,dummy_vec);CHKERRQ(ierr); 6460 ierr = VecGetArray(dummy_vec,&array);CHKERRQ(ierr); 6461 for (i=0;i<pcbddc->benign_n;i++) pcbddc->benign_p0[i] = array[i]; 6462 ierr = VecRestoreArray(dummy_vec,&array);CHKERRQ(ierr); 6463 } 6464 /* BDDC */ 6465 ierr = VecSet(pcis->vec1_D,0.);CHKERRQ(ierr); 6466 ierr = PCBDDCApplyInterfacePreconditioner(pc,PETSC_FALSE);CHKERRQ(ierr); 6467 6468 ierr = VecCopy(pcis->vec1_B,pcis->vec2_B);CHKERRQ(ierr); 6469 ierr = VecAXPY(pcis->vec1_B,-1.0,vec_check_B);CHKERRQ(ierr); 6470 ierr = VecNorm(pcis->vec1_B,NORM_INFINITY,&norm);CHKERRQ(ierr); 6471 PetscPrintf(PETSC_COMM_SELF,"[%d] BDDC local error is %1.4e\n",PetscGlobalRank,norm); 6472 for (i=0;i<pcbddc->benign_n;i++) { 6473 PetscPrintf(PETSC_COMM_SELF,"[%d] BDDC p0[%d] error is %1.4e\n",PetscGlobalRank,i,PetscAbsScalar(pcbddc->benign_p0[i]-p0_check[i])); 6474 } 6475 ierr = PetscFree(p0_check);CHKERRQ(ierr); 6476 ierr = VecDestroy(&vec_scale_P);CHKERRQ(ierr); 6477 ierr = VecDestroy(&vec_check_B);CHKERRQ(ierr); 6478 ierr = VecDestroy(&dummy_vec);CHKERRQ(ierr); 6479 ierr = MatDestroy(&S_j);CHKERRQ(ierr); 6480 ierr = MatDestroy(&B0_B);CHKERRQ(ierr); 6481 } 6482 PetscFunctionReturn(0); 6483 } 6484