1 #include <../src/mat/impls/aij/seq/aij.h> 2 #include <../src/ksp/pc/impls/bddc/bddc.h> 3 #include <../src/ksp/pc/impls/bddc/bddcprivate.h> 4 #include <petscblaslapack.h> 5 6 #undef __FUNCT__ 7 #define __FUNCT__ "PCBDDCBenignMatMult_Private_Private" 8 PetscErrorCode PCBDDCBenignMatMult_Private_Private(Mat A, Vec x, Vec y, PetscBool transpose) 9 { 10 PCBDDCBenignMatMult_ctx ctx; 11 PetscErrorCode ierr; 12 PetscBool apply_right,apply_left,reset_x; 13 14 PetscFunctionBegin; 15 ierr = MatShellGetContext(A,&ctx);CHKERRQ(ierr); 16 if (transpose) { 17 apply_right = ctx->apply_left; 18 apply_left = ctx->apply_right; 19 } else { 20 apply_right = ctx->apply_right; 21 apply_left = ctx->apply_left; 22 } 23 reset_x = PETSC_FALSE; 24 if (apply_right) { 25 const PetscScalar *ax; 26 PetscInt nl,i; 27 28 ierr = VecGetLocalSize(x,&nl);CHKERRQ(ierr); 29 ierr = VecGetArrayRead(x,&ax);CHKERRQ(ierr); 30 ierr = PetscMemcpy(ctx->work,ax,nl*sizeof(PetscScalar));CHKERRQ(ierr); 31 ierr = VecRestoreArrayRead(x,&ax);CHKERRQ(ierr); 32 for (i=0;i<ctx->benign_n;i++) { 33 PetscScalar sum,val; 34 const PetscInt *idxs; 35 PetscInt nz,j; 36 ierr = ISGetLocalSize(ctx->benign_zerodiag_subs[i],&nz);CHKERRQ(ierr); 37 ierr = ISGetIndices(ctx->benign_zerodiag_subs[i],&idxs);CHKERRQ(ierr); 38 sum = 0.; 39 if (ctx->apply_p0) { 40 val = ctx->work[idxs[nz-1]]; 41 for (j=0;j<nz-1;j++) { 42 sum += ctx->work[idxs[j]]; 43 ctx->work[idxs[j]] += val; 44 } 45 } else { 46 for (j=0;j<nz-1;j++) { 47 sum += ctx->work[idxs[j]]; 48 } 49 } 50 ctx->work[idxs[nz-1]] -= sum; 51 ierr = ISRestoreIndices(ctx->benign_zerodiag_subs[i],&idxs);CHKERRQ(ierr); 52 } 53 ierr = VecPlaceArray(x,ctx->work);CHKERRQ(ierr); 54 reset_x = PETSC_TRUE; 55 } 56 if (transpose) { 57 ierr = MatMultTranspose(ctx->A,x,y);CHKERRQ(ierr); 58 } else { 59 ierr = MatMult(ctx->A,x,y);CHKERRQ(ierr); 60 } 61 if (reset_x) { 62 ierr = VecResetArray(x);CHKERRQ(ierr); 63 } 64 if (apply_left) { 65 PetscScalar *ay; 66 PetscInt i; 67 68 ierr = VecGetArray(y,&ay);CHKERRQ(ierr); 69 for (i=0;i<ctx->benign_n;i++) { 70 PetscScalar sum,val; 71 const PetscInt *idxs; 72 PetscInt nz,j; 73 ierr = ISGetLocalSize(ctx->benign_zerodiag_subs[i],&nz);CHKERRQ(ierr); 74 ierr = ISGetIndices(ctx->benign_zerodiag_subs[i],&idxs);CHKERRQ(ierr); 75 val = -ay[idxs[nz-1]]; 76 if (ctx->apply_p0) { 77 sum = 0.; 78 for (j=0;j<nz-1;j++) { 79 sum += ay[idxs[j]]; 80 ay[idxs[j]] += val; 81 } 82 ay[idxs[nz-1]] += sum; 83 } else { 84 for (j=0;j<nz-1;j++) { 85 ay[idxs[j]] += val; 86 } 87 ay[idxs[nz-1]] = 0.; 88 } 89 ierr = ISRestoreIndices(ctx->benign_zerodiag_subs[i],&idxs);CHKERRQ(ierr); 90 } 91 ierr = VecRestoreArray(y,&ay);CHKERRQ(ierr); 92 } 93 PetscFunctionReturn(0); 94 } 95 96 #undef __FUNCT__ 97 #define __FUNCT__ "PCBDDCBenignMatMultTranspose_Private" 98 PetscErrorCode PCBDDCBenignMatMultTranspose_Private(Mat A, Vec x, Vec y) 99 { 100 PetscErrorCode ierr; 101 102 PetscFunctionBegin; 103 ierr = PCBDDCBenignMatMult_Private_Private(A,x,y,PETSC_TRUE);CHKERRQ(ierr); 104 PetscFunctionReturn(0); 105 } 106 107 #undef __FUNCT__ 108 #define __FUNCT__ "PCBDDCBenignMatMult_Private" 109 PetscErrorCode PCBDDCBenignMatMult_Private(Mat A, Vec x, Vec y) 110 { 111 PetscErrorCode ierr; 112 113 PetscFunctionBegin; 114 ierr = PCBDDCBenignMatMult_Private_Private(A,x,y,PETSC_FALSE);CHKERRQ(ierr); 115 PetscFunctionReturn(0); 116 } 117 118 #undef __FUNCT__ 119 #define __FUNCT__ "PCBDDCBenignShellMat" 120 PetscErrorCode PCBDDCBenignShellMat(PC pc, PetscBool restore) 121 { 122 PC_IS *pcis = (PC_IS*)pc->data; 123 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 124 PCBDDCBenignMatMult_ctx ctx; 125 PetscErrorCode ierr; 126 127 PetscFunctionBegin; 128 if (!restore) { 129 Mat A_IB,A_BI; 130 PetscScalar *work; 131 PCBDDCReuseSolvers reuse = pcbddc->sub_schurs->reuse_solver; 132 133 if (pcbddc->benign_original_mat) { 134 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Benign original mat has not been restored"); 135 } 136 if (!pcbddc->benign_change || !pcbddc->benign_n || pcbddc->benign_change_explicit) { 137 PetscFunctionReturn(0); 138 } 139 ierr = PetscMalloc1(pcis->n,&work);CHKERRQ(ierr); 140 ierr = MatCreate(PETSC_COMM_SELF,&A_IB);CHKERRQ(ierr); 141 ierr = MatSetSizes(A_IB,pcis->n-pcis->n_B,pcis->n_B,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr); 142 ierr = MatSetType(A_IB,MATSHELL);CHKERRQ(ierr); 143 ierr = MatShellSetOperation(A_IB,MATOP_MULT,(void (*)(void))PCBDDCBenignMatMult_Private);CHKERRQ(ierr); 144 ierr = MatShellSetOperation(A_IB,MATOP_MULT_TRANSPOSE,(void (*)(void))PCBDDCBenignMatMultTranspose_Private);CHKERRQ(ierr); 145 ierr = PetscNew(&ctx);CHKERRQ(ierr); 146 ierr = MatShellSetContext(A_IB,ctx);CHKERRQ(ierr); 147 ctx->apply_left = PETSC_TRUE; 148 ctx->apply_right = PETSC_FALSE; 149 ctx->apply_p0 = PETSC_FALSE; 150 ctx->benign_n = pcbddc->benign_n; 151 if (reuse) { 152 ctx->benign_zerodiag_subs = reuse->benign_zerodiag_subs; 153 ctx->free = PETSC_FALSE; 154 } else { /* TODO: could be optimized for successive solves */ 155 ISLocalToGlobalMapping N_to_D; 156 PetscInt i; 157 158 ierr = ISLocalToGlobalMappingCreateIS(pcis->is_I_local,&N_to_D);CHKERRQ(ierr); 159 ierr = PetscMalloc1(pcbddc->benign_n,&ctx->benign_zerodiag_subs);CHKERRQ(ierr); 160 for (i=0;i<pcbddc->benign_n;i++) { 161 ierr = ISGlobalToLocalMappingApplyIS(N_to_D,IS_GTOLM_DROP,pcbddc->benign_zerodiag_subs[i],&ctx->benign_zerodiag_subs[i]);CHKERRQ(ierr); 162 } 163 ierr = ISLocalToGlobalMappingDestroy(&N_to_D);CHKERRQ(ierr); 164 ctx->free = PETSC_TRUE; 165 } 166 ctx->A = pcis->A_IB; 167 ctx->work = work; 168 ierr = MatSetUp(A_IB);CHKERRQ(ierr); 169 ierr = MatAssemblyBegin(A_IB,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 170 ierr = MatAssemblyEnd(A_IB,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 171 pcis->A_IB = A_IB; 172 173 /* A_BI as A_IB^T */ 174 ierr = MatCreateTranspose(A_IB,&A_BI);CHKERRQ(ierr); 175 pcbddc->benign_original_mat = pcis->A_BI; 176 pcis->A_BI = A_BI; 177 } else { 178 if (!pcbddc->benign_original_mat) { 179 PetscFunctionReturn(0); 180 } 181 ierr = MatShellGetContext(pcis->A_IB,&ctx);CHKERRQ(ierr); 182 ierr = MatDestroy(&pcis->A_IB);CHKERRQ(ierr); 183 pcis->A_IB = ctx->A; 184 ctx->A = NULL; 185 ierr = MatDestroy(&pcis->A_BI);CHKERRQ(ierr); 186 pcis->A_BI = pcbddc->benign_original_mat; 187 pcbddc->benign_original_mat = NULL; 188 if (ctx->free) { 189 PetscInt i; 190 for (i=0;i<ctx->benign_n;i++) { 191 ierr = ISDestroy(&ctx->benign_zerodiag_subs[i]);CHKERRQ(ierr); 192 } 193 ierr = PetscFree(ctx->benign_zerodiag_subs);CHKERRQ(ierr); 194 } 195 ierr = PetscFree(ctx->work);CHKERRQ(ierr); 196 ierr = PetscFree(ctx);CHKERRQ(ierr); 197 } 198 PetscFunctionReturn(0); 199 } 200 201 /* used just in bddc debug mode */ 202 #undef __FUNCT__ 203 #define __FUNCT__ "PCBDDCBenignProject" 204 PetscErrorCode PCBDDCBenignProject(PC pc, IS is1, IS is2, Mat *B) 205 { 206 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 207 Mat_IS *matis = (Mat_IS*)pc->pmat->data; 208 Mat An; 209 PetscErrorCode ierr; 210 211 PetscFunctionBegin; 212 ierr = MatPtAP(matis->A,pcbddc->benign_change,MAT_INITIAL_MATRIX,2.0,&An);CHKERRQ(ierr); 213 ierr = MatZeroRowsColumns(An,pcbddc->benign_n,pcbddc->benign_p0_lidx,1.0,NULL,NULL);CHKERRQ(ierr); 214 if (is1) { 215 ierr = MatGetSubMatrix(An,is1,is2,MAT_INITIAL_MATRIX,B);CHKERRQ(ierr); 216 ierr = MatDestroy(&An);CHKERRQ(ierr); 217 } else { 218 *B = An; 219 } 220 PetscFunctionReturn(0); 221 } 222 223 /* TODO: add reuse flag */ 224 #undef __FUNCT__ 225 #define __FUNCT__ "MatSeqAIJCompress" 226 PetscErrorCode MatSeqAIJCompress(Mat A, Mat *B) 227 { 228 Mat Bt; 229 PetscScalar *a,*bdata; 230 const PetscInt *ii,*ij; 231 PetscInt m,n,i,nnz,*bii,*bij; 232 PetscBool flg_row; 233 PetscErrorCode ierr; 234 235 PetscFunctionBegin; 236 ierr = MatGetSize(A,&n,&m);CHKERRQ(ierr); 237 ierr = MatGetRowIJ(A,0,PETSC_FALSE,PETSC_FALSE,&n,&ii,&ij,&flg_row);CHKERRQ(ierr); 238 ierr = MatSeqAIJGetArray(A,&a);CHKERRQ(ierr); 239 nnz = n; 240 for (i=0;i<ii[n];i++) { 241 if (PetscLikely(PetscAbsScalar(a[i]) > PETSC_SMALL)) nnz++; 242 } 243 ierr = PetscMalloc1(n+1,&bii);CHKERRQ(ierr); 244 ierr = PetscMalloc1(nnz,&bij);CHKERRQ(ierr); 245 ierr = PetscMalloc1(nnz,&bdata);CHKERRQ(ierr); 246 nnz = 0; 247 bii[0] = 0; 248 for (i=0;i<n;i++) { 249 PetscInt j; 250 for (j=ii[i];j<ii[i+1];j++) { 251 PetscScalar entry = a[j]; 252 if (PetscLikely(PetscAbsScalar(entry) > PETSC_SMALL) || ij[j] == i) { 253 bij[nnz] = ij[j]; 254 bdata[nnz] = entry; 255 nnz++; 256 } 257 } 258 bii[i+1] = nnz; 259 } 260 ierr = MatSeqAIJRestoreArray(A,&a);CHKERRQ(ierr); 261 ierr = MatCreateSeqAIJWithArrays(PetscObjectComm((PetscObject)A),n,m,bii,bij,bdata,&Bt);CHKERRQ(ierr); 262 ierr = MatRestoreRowIJ(A,0,PETSC_FALSE,PETSC_FALSE,&n,&ii,&ij,&flg_row);CHKERRQ(ierr); 263 { 264 Mat_SeqAIJ *b = (Mat_SeqAIJ*)(Bt->data); 265 b->free_a = PETSC_TRUE; 266 b->free_ij = PETSC_TRUE; 267 } 268 *B = Bt; 269 PetscFunctionReturn(0); 270 } 271 272 #undef __FUNCT__ 273 #define __FUNCT__ "MatDetectDisconnectedComponents" 274 PetscErrorCode MatDetectDisconnectedComponents(Mat A, PetscBool filter, PetscInt *ncc, IS* cc[]) 275 { 276 Mat B; 277 IS is_dummy,*cc_n; 278 ISLocalToGlobalMapping l2gmap_dummy; 279 PCBDDCGraph graph; 280 PetscInt i,n; 281 PetscInt *xadj,*adjncy; 282 PetscInt *xadj_filtered,*adjncy_filtered; 283 PetscBool flg_row,isseqaij; 284 PetscErrorCode ierr; 285 286 PetscFunctionBegin; 287 if (!A->rmap->N || !A->cmap->N) { 288 *ncc = 0; 289 *cc = NULL; 290 PetscFunctionReturn(0); 291 } 292 ierr = PetscObjectTypeCompare((PetscObject)A,MATSEQAIJ,&isseqaij);CHKERRQ(ierr); 293 if (!isseqaij && filter) { 294 PetscBool isseqdense; 295 296 ierr = PetscObjectTypeCompare((PetscObject)A,MATSEQDENSE,&isseqdense);CHKERRQ(ierr); 297 if (!isseqdense) { 298 ierr = MatConvert(A,MATSEQAIJ,MAT_INITIAL_MATRIX,&B);CHKERRQ(ierr); 299 } else { /* TODO: rectangular case and LDA */ 300 PetscScalar *array; 301 PetscReal chop=1.e-6; 302 303 ierr = MatDuplicate(A,MAT_COPY_VALUES,&B);CHKERRQ(ierr); 304 ierr = MatDenseGetArray(B,&array);CHKERRQ(ierr); 305 ierr = MatGetSize(B,&n,NULL);CHKERRQ(ierr); 306 for (i=0;i<n;i++) { 307 PetscInt j; 308 for (j=i+1;j<n;j++) { 309 PetscReal thresh = chop*(PetscAbsScalar(array[i*(n+1)])+PetscAbsScalar(array[j*(n+1)])); 310 if (PetscAbsScalar(array[i*n+j]) < thresh) array[i*n+j] = 0.; 311 if (PetscAbsScalar(array[j*n+i]) < thresh) array[j*n+i] = 0.; 312 } 313 } 314 ierr = MatDenseRestoreArray(B,&array);CHKERRQ(ierr); 315 ierr = MatConvert(B,MATSEQAIJ,MAT_INPLACE_MATRIX,&B);CHKERRQ(ierr); 316 } 317 } else { 318 B = A; 319 } 320 ierr = MatGetRowIJ(B,0,PETSC_TRUE,PETSC_FALSE,&n,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);CHKERRQ(ierr); 321 322 /* if filter is true, then removes entries lower than PETSC_SMALL in magnitude */ 323 if (filter) { 324 PetscScalar *data; 325 PetscInt j,cum; 326 327 ierr = PetscCalloc2(n+1,&xadj_filtered,xadj[n],&adjncy_filtered);CHKERRQ(ierr); 328 ierr = MatSeqAIJGetArray(B,&data);CHKERRQ(ierr); 329 cum = 0; 330 for (i=0;i<n;i++) { 331 PetscInt t; 332 333 for (j=xadj[i];j<xadj[i+1];j++) { 334 if (PetscUnlikely(PetscAbsScalar(data[j]) < PETSC_SMALL)) { 335 continue; 336 } 337 adjncy_filtered[cum+xadj_filtered[i]++] = adjncy[j]; 338 } 339 t = xadj_filtered[i]; 340 xadj_filtered[i] = cum; 341 cum += t; 342 } 343 ierr = MatSeqAIJRestoreArray(B,&data);CHKERRQ(ierr); 344 } else { 345 xadj_filtered = NULL; 346 adjncy_filtered = NULL; 347 } 348 349 /* compute local connected components using PCBDDCGraph */ 350 ierr = ISCreateStride(PETSC_COMM_SELF,n,0,1,&is_dummy);CHKERRQ(ierr); 351 ierr = ISLocalToGlobalMappingCreateIS(is_dummy,&l2gmap_dummy);CHKERRQ(ierr); 352 ierr = ISDestroy(&is_dummy);CHKERRQ(ierr); 353 ierr = PCBDDCGraphCreate(&graph);CHKERRQ(ierr); 354 ierr = PCBDDCGraphInit(graph,l2gmap_dummy,n);CHKERRQ(ierr); 355 ierr = ISLocalToGlobalMappingDestroy(&l2gmap_dummy);CHKERRQ(ierr); 356 if (xadj_filtered) { 357 graph->xadj = xadj_filtered; 358 graph->adjncy = adjncy_filtered; 359 } else { 360 graph->xadj = xadj; 361 graph->adjncy = adjncy; 362 } 363 ierr = PCBDDCGraphSetUp(graph,1,NULL,NULL,0,NULL,NULL);CHKERRQ(ierr); 364 ierr = PCBDDCGraphComputeConnectedComponents(graph);CHKERRQ(ierr); 365 /* partial clean up */ 366 ierr = PetscFree2(xadj_filtered,adjncy_filtered);CHKERRQ(ierr); 367 ierr = MatRestoreRowIJ(B,0,PETSC_TRUE,PETSC_FALSE,&n,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);CHKERRQ(ierr); 368 if (A != B) { 369 ierr = MatDestroy(&B);CHKERRQ(ierr); 370 } 371 372 /* get back data */ 373 if (ncc) *ncc = graph->ncc; 374 if (cc) { 375 ierr = PetscMalloc1(graph->ncc,&cc_n);CHKERRQ(ierr); 376 for (i=0;i<graph->ncc;i++) { 377 ierr = ISCreateGeneral(PETSC_COMM_SELF,graph->cptr[i+1]-graph->cptr[i],graph->queue+graph->cptr[i],PETSC_COPY_VALUES,&cc_n[i]);CHKERRQ(ierr); 378 } 379 *cc = cc_n; 380 } 381 /* clean up graph */ 382 graph->xadj = 0; 383 graph->adjncy = 0; 384 ierr = PCBDDCGraphDestroy(&graph);CHKERRQ(ierr); 385 PetscFunctionReturn(0); 386 } 387 388 #undef __FUNCT__ 389 #define __FUNCT__ "PCBDDCBenignCheck" 390 PetscErrorCode PCBDDCBenignCheck(PC pc, IS zerodiag) 391 { 392 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 393 PC_IS* pcis = (PC_IS*)(pc->data); 394 IS dirIS = NULL; 395 PetscInt i; 396 PetscErrorCode ierr; 397 398 PetscFunctionBegin; 399 ierr = PCBDDCGraphGetDirichletDofs(pcbddc->mat_graph,&dirIS);CHKERRQ(ierr); 400 if (zerodiag) { 401 Mat A; 402 Vec vec3_N; 403 PetscScalar *vals; 404 const PetscInt *idxs; 405 PetscInt nz,*count; 406 407 /* p0 */ 408 ierr = VecSet(pcis->vec1_N,0.);CHKERRQ(ierr); 409 ierr = PetscMalloc1(pcis->n,&vals);CHKERRQ(ierr); 410 ierr = ISGetLocalSize(zerodiag,&nz);CHKERRQ(ierr); 411 ierr = ISGetIndices(zerodiag,&idxs);CHKERRQ(ierr); 412 for (i=0;i<nz;i++) vals[i] = 1.; 413 ierr = VecSetValues(pcis->vec1_N,nz,idxs,vals,INSERT_VALUES);CHKERRQ(ierr); 414 ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr); 415 ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr); 416 /* v_I */ 417 ierr = VecSetRandom(pcis->vec2_N,NULL);CHKERRQ(ierr); 418 for (i=0;i<nz;i++) vals[i] = 0.; 419 ierr = VecSetValues(pcis->vec2_N,nz,idxs,vals,INSERT_VALUES);CHKERRQ(ierr); 420 ierr = ISRestoreIndices(zerodiag,&idxs);CHKERRQ(ierr); 421 ierr = ISGetIndices(pcis->is_B_local,&idxs);CHKERRQ(ierr); 422 for (i=0;i<pcis->n_B;i++) vals[i] = 0.; 423 ierr = VecSetValues(pcis->vec2_N,pcis->n_B,idxs,vals,INSERT_VALUES);CHKERRQ(ierr); 424 ierr = ISRestoreIndices(pcis->is_B_local,&idxs);CHKERRQ(ierr); 425 if (dirIS) { 426 PetscInt n; 427 428 ierr = ISGetLocalSize(dirIS,&n);CHKERRQ(ierr); 429 ierr = ISGetIndices(dirIS,&idxs);CHKERRQ(ierr); 430 for (i=0;i<n;i++) vals[i] = 0.; 431 ierr = VecSetValues(pcis->vec2_N,n,idxs,vals,INSERT_VALUES);CHKERRQ(ierr); 432 ierr = ISRestoreIndices(dirIS,&idxs);CHKERRQ(ierr); 433 } 434 ierr = VecAssemblyBegin(pcis->vec2_N);CHKERRQ(ierr); 435 ierr = VecAssemblyEnd(pcis->vec2_N);CHKERRQ(ierr); 436 ierr = VecDuplicate(pcis->vec1_N,&vec3_N);CHKERRQ(ierr); 437 ierr = VecSet(vec3_N,0.);CHKERRQ(ierr); 438 ierr = MatISGetLocalMat(pc->mat,&A);CHKERRQ(ierr); 439 ierr = MatMult(A,pcis->vec1_N,vec3_N);CHKERRQ(ierr); 440 ierr = VecDot(vec3_N,pcis->vec2_N,&vals[0]);CHKERRQ(ierr); 441 if (!pcbddc->current_level && PetscAbsScalar(vals[0]) > PETSC_SMALL) { 442 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"Benign trick can not be applied! b(v_I,p_0) = %1.6e (should be numerically 0.)",PetscAbsScalar(vals[0])); 443 } 444 ierr = PetscFree(vals);CHKERRQ(ierr); 445 ierr = VecDestroy(&vec3_N);CHKERRQ(ierr); 446 447 /* there should not be any pressure dofs lying on the interface */ 448 ierr = PetscCalloc1(pcis->n,&count);CHKERRQ(ierr); 449 ierr = ISGetIndices(pcis->is_B_local,&idxs);CHKERRQ(ierr); 450 for (i=0;i<pcis->n_B;i++) count[idxs[i]]++; 451 ierr = ISRestoreIndices(pcis->is_B_local,&idxs);CHKERRQ(ierr); 452 ierr = ISGetIndices(zerodiag,&idxs);CHKERRQ(ierr); 453 for (i=0;i<nz;i++) { 454 if (count[idxs[i]]) { 455 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"Benign trick can not be applied! pressure dof %d is an interface dof",idxs[i]); 456 } 457 } 458 ierr = ISRestoreIndices(zerodiag,&idxs);CHKERRQ(ierr); 459 ierr = PetscFree(count);CHKERRQ(ierr); 460 } 461 ierr = ISDestroy(&dirIS);CHKERRQ(ierr); 462 463 /* check PCBDDCBenignGetOrSetP0 */ 464 ierr = VecSetRandom(pcis->vec1_global,NULL);CHKERRQ(ierr); 465 for (i=0;i<pcbddc->benign_n;i++) pcbddc->benign_p0[i] = -PetscGlobalRank-i; 466 ierr = PCBDDCBenignGetOrSetP0(pc,pcis->vec1_global,PETSC_FALSE);CHKERRQ(ierr); 467 for (i=0;i<pcbddc->benign_n;i++) pcbddc->benign_p0[i] = 1; 468 ierr = PCBDDCBenignGetOrSetP0(pc,pcis->vec1_global,PETSC_TRUE);CHKERRQ(ierr); 469 for (i=0;i<pcbddc->benign_n;i++) { 470 if ((PetscInt)PetscRealPart(pcbddc->benign_p0[i]) != -PetscGlobalRank-i) { 471 SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error testing PCBDDCBenignGetOrSetP0! Found %1.4e at %d instead of %1.4e\n",pcbddc->benign_p0[i],i,-PetscGlobalRank-i);CHKERRQ(ierr); 472 } 473 } 474 PetscFunctionReturn(0); 475 } 476 477 #undef __FUNCT__ 478 #define __FUNCT__ "PCBDDCBenignDetectSaddlePoint" 479 PetscErrorCode PCBDDCBenignDetectSaddlePoint(PC pc, IS *zerodiaglocal) 480 { 481 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 482 IS pressures,zerodiag,*zerodiag_subs; 483 PetscInt nz,n; 484 PetscBool sorted,have_null,has_null_pressures,recompute_zerodiag; 485 PetscErrorCode ierr; 486 487 PetscFunctionBegin; 488 ierr = PetscSFDestroy(&pcbddc->benign_sf);CHKERRQ(ierr); 489 ierr = MatDestroy(&pcbddc->benign_B0);CHKERRQ(ierr); 490 for (n=0;n<pcbddc->benign_n;n++) { 491 ierr = ISDestroy(&pcbddc->benign_zerodiag_subs[n]);CHKERRQ(ierr); 492 } 493 ierr = PetscFree(pcbddc->benign_zerodiag_subs);CHKERRQ(ierr); 494 pcbddc->benign_n = 0; 495 /* if a local info on dofs is present, assumes the last field is represented by "pressures" 496 otherwise, it uses only zerodiagonal dofs (ok if the pressure block is all zero; it could fail if it is not) 497 Checks if all the pressure dofs in each subdomain have a zero diagonal 498 If not, a change of basis on pressures is not needed 499 since the local Schur complements are SPD 500 */ 501 has_null_pressures = PETSC_TRUE; 502 have_null = PETSC_TRUE; 503 if (pcbddc->n_ISForDofsLocal) { 504 PetscInt npl,*idxs,p = pcbddc->n_ISForDofsLocal-1; 505 506 /* Dofs splitting for BDDC cannot have PETSC_COMM_SELF, so create a sequential IS */ 507 ierr = ISGetLocalSize(pcbddc->ISForDofsLocal[p],&npl);CHKERRQ(ierr); 508 ierr = ISGetIndices(pcbddc->ISForDofsLocal[p],(const PetscInt**)&idxs);CHKERRQ(ierr); 509 ierr = ISCreateGeneral(PETSC_COMM_SELF,npl,idxs,PETSC_COPY_VALUES,&pressures);CHKERRQ(ierr); 510 ierr = ISRestoreIndices(pcbddc->ISForDofsLocal[p],(const PetscInt**)&idxs);CHKERRQ(ierr); 511 ierr = ISSorted(pressures,&sorted);CHKERRQ(ierr); 512 if (!sorted) { 513 ierr = ISSort(pressures);CHKERRQ(ierr); 514 } 515 } else { 516 pressures = NULL; 517 } 518 /* pcis has not been setup yet, so get the local size from the subdomain matrix */ 519 ierr = MatGetLocalSize(pcbddc->local_mat,&n,NULL);CHKERRQ(ierr); 520 ierr = MatFindZeroDiagonals(pcbddc->local_mat,&zerodiag);CHKERRQ(ierr); 521 ierr = ISSorted(zerodiag,&sorted);CHKERRQ(ierr); 522 if (!sorted) { 523 ierr = ISSort(zerodiag);CHKERRQ(ierr); 524 } 525 ierr = ISGetLocalSize(zerodiag,&nz);CHKERRQ(ierr); 526 if (!nz) { 527 if (n) have_null = PETSC_FALSE; 528 has_null_pressures = PETSC_FALSE; 529 ierr = ISDestroy(&zerodiag);CHKERRQ(ierr); 530 } 531 recompute_zerodiag = PETSC_FALSE; 532 /* in case disconnected subdomains info is present, split the pressures accordingly (otherwise the benign trick could fail) */ 533 zerodiag_subs = NULL; 534 pcbddc->benign_n = 0; 535 if (has_null_pressures) { 536 IS *subs; 537 PetscInt nsubs,i; 538 539 subs = pcbddc->local_subs; 540 nsubs = pcbddc->n_local_subs; 541 if (nsubs > 1) { 542 ierr = PetscCalloc1(nsubs,&zerodiag_subs);CHKERRQ(ierr); 543 for (i=0;i<nsubs;i++) { 544 ISLocalToGlobalMapping l2g; 545 IS t_zerodiag_subs; 546 PetscInt nl; 547 548 ierr = ISLocalToGlobalMappingCreateIS(subs[i],&l2g);CHKERRQ(ierr); 549 ierr = ISGlobalToLocalMappingApplyIS(l2g,IS_GTOLM_DROP,zerodiag,&t_zerodiag_subs);CHKERRQ(ierr); 550 ierr = ISGetLocalSize(t_zerodiag_subs,&nl);CHKERRQ(ierr); 551 if (nl) { 552 PetscBool valid = PETSC_TRUE; 553 554 if (pressures) { 555 IS t_pressure_subs; 556 ierr = ISGlobalToLocalMappingApplyIS(l2g,IS_GTOLM_DROP,pressures,&t_pressure_subs);CHKERRQ(ierr); 557 ierr = ISEqual(t_pressure_subs,t_zerodiag_subs,&valid);CHKERRQ(ierr); 558 ierr = ISDestroy(&t_pressure_subs);CHKERRQ(ierr); 559 } 560 if (valid) { 561 ierr = ISLocalToGlobalMappingApplyIS(l2g,t_zerodiag_subs,&zerodiag_subs[pcbddc->benign_n]);CHKERRQ(ierr); 562 pcbddc->benign_n++; 563 } else { 564 recompute_zerodiag = PETSC_TRUE; 565 } 566 } 567 ierr = ISDestroy(&t_zerodiag_subs);CHKERRQ(ierr); 568 ierr = ISLocalToGlobalMappingDestroy(&l2g);CHKERRQ(ierr); 569 } 570 } else { /* there's just one subdomain (or zero if they have not been detected */ 571 PetscBool valid = PETSC_TRUE; 572 if (pressures) { 573 ierr = ISEqual(pressures,zerodiag,&valid);CHKERRQ(ierr); 574 } 575 if (valid) { 576 pcbddc->benign_n = 1; 577 ierr = PetscMalloc1(pcbddc->benign_n,&zerodiag_subs);CHKERRQ(ierr); 578 ierr = PetscObjectReference((PetscObject)zerodiag);CHKERRQ(ierr); 579 zerodiag_subs[0] = zerodiag; 580 } 581 } 582 } 583 584 if (!pcbddc->benign_n) { 585 PetscInt n; 586 587 ierr = ISDestroy(&zerodiag);CHKERRQ(ierr); 588 recompute_zerodiag = PETSC_FALSE; 589 ierr = MatGetLocalSize(pcbddc->local_mat,&n,NULL);CHKERRQ(ierr); 590 if (n) { 591 has_null_pressures = PETSC_FALSE; 592 have_null = PETSC_FALSE; 593 } 594 } 595 596 /* final check for null pressures */ 597 if (zerodiag && pressures) { 598 PetscInt nz,np; 599 ierr = ISGetLocalSize(zerodiag,&nz);CHKERRQ(ierr); 600 ierr = ISGetLocalSize(pressures,&np);CHKERRQ(ierr); 601 if (nz != np) have_null = PETSC_FALSE; 602 } 603 604 if (recompute_zerodiag) { 605 ierr = ISDestroy(&zerodiag);CHKERRQ(ierr); 606 if (pcbddc->benign_n == 1) { 607 ierr = PetscObjectReference((PetscObject)zerodiag_subs[0]);CHKERRQ(ierr); 608 zerodiag = zerodiag_subs[0]; 609 } else { 610 PetscInt i,nzn,*new_idxs; 611 612 nzn = 0; 613 for (i=0;i<pcbddc->benign_n;i++) { 614 PetscInt ns; 615 ierr = ISGetLocalSize(zerodiag_subs[i],&ns);CHKERRQ(ierr); 616 nzn += ns; 617 } 618 ierr = PetscMalloc1(nzn,&new_idxs);CHKERRQ(ierr); 619 nzn = 0; 620 for (i=0;i<pcbddc->benign_n;i++) { 621 PetscInt ns,*idxs; 622 ierr = ISGetLocalSize(zerodiag_subs[i],&ns);CHKERRQ(ierr); 623 ierr = ISGetIndices(zerodiag_subs[i],(const PetscInt**)&idxs);CHKERRQ(ierr); 624 ierr = PetscMemcpy(new_idxs+nzn,idxs,ns*sizeof(PetscInt));CHKERRQ(ierr); 625 ierr = ISRestoreIndices(zerodiag_subs[i],(const PetscInt**)&idxs);CHKERRQ(ierr); 626 nzn += ns; 627 } 628 ierr = PetscSortInt(nzn,new_idxs);CHKERRQ(ierr); 629 ierr = ISCreateGeneral(PETSC_COMM_SELF,nzn,new_idxs,PETSC_OWN_POINTER,&zerodiag);CHKERRQ(ierr); 630 } 631 have_null = PETSC_FALSE; 632 } 633 634 /* no-net-flux */ 635 if (pcbddc->benign_compute_nonetflux) { 636 Mat_IS* matis = (Mat_IS*)(pc->pmat->data); 637 MatNullSpace near_null_space; 638 ISLocalToGlobalMapping rmap; 639 Vec quad_vec; 640 PetscScalar *pvals; 641 PetscInt i,np,*dummyins; 642 643 /* create vector to hold quadrature weights */ 644 ierr = MatCreateVecs(pc->pmat,&quad_vec,NULL);CHKERRQ(ierr); 645 ierr = VecSet(quad_vec,0.0);CHKERRQ(ierr); 646 ierr = MatGetLocalToGlobalMapping(pc->pmat,&rmap,NULL);CHKERRQ(ierr); 647 ierr = VecSetLocalToGlobalMapping(quad_vec,rmap);CHKERRQ(ierr); 648 649 /* compute B^{(i)T} * 1_p */ 650 np = 0; 651 if (zerodiag) { 652 ierr = ISGetLocalSize(zerodiag,&np);CHKERRQ(ierr); 653 } else if (pressures) { 654 ierr = ISGetLocalSize(pressures,&np);CHKERRQ(ierr); 655 } 656 ierr = PetscMalloc1(np,&pvals);CHKERRQ(ierr); 657 for (i=0;i<np;i++) pvals[i] = 1.; 658 ierr = VecSet(matis->x,0.);CHKERRQ(ierr); 659 if (np) { 660 const PetscInt *pidxs; 661 662 if (zerodiag) { 663 ierr = ISGetIndices(zerodiag,&pidxs);CHKERRQ(ierr); 664 } else if (pressures) { 665 ierr = ISGetIndices(pressures,&pidxs);CHKERRQ(ierr); 666 } 667 ierr = VecSetValues(matis->x,np,pidxs,pvals,INSERT_VALUES);CHKERRQ(ierr); 668 if (zerodiag) { 669 ierr = ISRestoreIndices(zerodiag,&pidxs);CHKERRQ(ierr); 670 } else if (pressures) { 671 ierr = ISRestoreIndices(pressures,&pidxs);CHKERRQ(ierr); 672 } 673 } 674 ierr = VecAssemblyBegin(matis->x);CHKERRQ(ierr); 675 ierr = VecAssemblyEnd(matis->x);CHKERRQ(ierr); 676 ierr = MatMult(matis->A,matis->x,matis->y);CHKERRQ(ierr); 677 ierr = PetscFree(pvals);CHKERRQ(ierr); 678 679 /* decide which of the sharing ranks (per dof) has to insert the values (should just be a matter of having a different orientation) */ 680 ierr = MatISSetUpSF(pc->pmat);CHKERRQ(ierr); 681 for (i=0;i<matis->sf_nroots;i++) matis->sf_rootdata[i] = -1; 682 for (i=0;i<matis->sf_nleaves;i++) matis->sf_leafdata[i] = PetscGlobalRank; 683 ierr = PetscSFReduceBegin(matis->sf,MPIU_INT,matis->sf_leafdata,matis->sf_rootdata,MPI_MAX);CHKERRQ(ierr); 684 ierr = PetscSFReduceEnd(matis->sf,MPIU_INT,matis->sf_leafdata,matis->sf_rootdata,MPI_MAX);CHKERRQ(ierr); 685 ierr = PetscSFBcastBegin(matis->sf,MPIU_INT,matis->sf_rootdata,matis->sf_leafdata);CHKERRQ(ierr); 686 ierr = PetscSFBcastEnd(matis->sf,MPIU_INT,matis->sf_rootdata,matis->sf_leafdata);CHKERRQ(ierr); 687 ierr = VecGetArray(matis->y,&pvals);CHKERRQ(ierr); 688 for (i=0;i<matis->sf_nleaves;i++) { 689 if (PetscGlobalRank != matis->sf_leafdata[i]) { 690 pvals[i] = 0.; 691 } 692 } 693 ierr = PetscMalloc1(matis->sf_nleaves,&dummyins);CHKERRQ(ierr); 694 for (i=0;i<matis->sf_nleaves;i++) dummyins[i] = i; 695 ierr = VecSetValuesLocal(quad_vec,matis->sf_nleaves,dummyins,pvals,ADD_VALUES);CHKERRQ(ierr); 696 ierr = VecRestoreArray(matis->y,&pvals);CHKERRQ(ierr); 697 ierr = PetscFree(dummyins);CHKERRQ(ierr); 698 699 /* assembly quadrature vec and attach near null space to pmat */ 700 ierr = VecAssemblyBegin(quad_vec);CHKERRQ(ierr); 701 ierr = VecAssemblyEnd(quad_vec);CHKERRQ(ierr); 702 ierr = VecNormalize(quad_vec,NULL);CHKERRQ(ierr); 703 ierr = MatNullSpaceCreate(PetscObjectComm((PetscObject)pc),PETSC_FALSE,1,&quad_vec,&near_null_space);CHKERRQ(ierr); 704 ierr = VecDestroy(&quad_vec);CHKERRQ(ierr); 705 ierr = MatSetNearNullSpace(pc->pmat,near_null_space);CHKERRQ(ierr); 706 ierr = MatNullSpaceDestroy(&near_null_space);CHKERRQ(ierr); 707 } 708 709 /* change of basis and p0 dofs */ 710 if (has_null_pressures) { 711 IS zerodiagc; 712 const PetscInt *idxs,*idxsc; 713 PetscInt i,s,*nnz; 714 715 ierr = ISGetLocalSize(zerodiag,&nz);CHKERRQ(ierr); 716 ierr = ISComplement(zerodiag,0,n,&zerodiagc);CHKERRQ(ierr); 717 ierr = ISGetIndices(zerodiagc,&idxsc);CHKERRQ(ierr); 718 /* local change of basis for pressures */ 719 ierr = MatDestroy(&pcbddc->benign_change);CHKERRQ(ierr); 720 ierr = MatCreate(PetscObjectComm((PetscObject)pcbddc->local_mat),&pcbddc->benign_change);CHKERRQ(ierr); 721 ierr = MatSetType(pcbddc->benign_change,MATAIJ);CHKERRQ(ierr); 722 ierr = MatSetSizes(pcbddc->benign_change,n,n,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr); 723 ierr = PetscMalloc1(n,&nnz);CHKERRQ(ierr); 724 for (i=0;i<n-nz;i++) nnz[idxsc[i]] = 1; /* identity on velocities plus pressure dofs for non-singular subdomains */ 725 for (i=0;i<pcbddc->benign_n;i++) { 726 PetscInt nzs,j; 727 728 ierr = ISGetLocalSize(zerodiag_subs[i],&nzs);CHKERRQ(ierr); 729 ierr = ISGetIndices(zerodiag_subs[i],&idxs);CHKERRQ(ierr); 730 for (j=0;j<nzs-1;j++) nnz[idxs[j]] = 2; /* change on pressures */ 731 nnz[idxs[nzs-1]] = nzs; /* last local pressure dof in subdomain */ 732 ierr = ISRestoreIndices(zerodiag_subs[i],&idxs);CHKERRQ(ierr); 733 } 734 ierr = MatSeqAIJSetPreallocation(pcbddc->benign_change,0,nnz);CHKERRQ(ierr); 735 ierr = PetscFree(nnz);CHKERRQ(ierr); 736 /* set identity on velocities */ 737 for (i=0;i<n-nz;i++) { 738 ierr = MatSetValue(pcbddc->benign_change,idxsc[i],idxsc[i],1.,INSERT_VALUES);CHKERRQ(ierr); 739 } 740 ierr = ISRestoreIndices(zerodiagc,&idxsc);CHKERRQ(ierr); 741 ierr = ISDestroy(&zerodiagc);CHKERRQ(ierr); 742 ierr = PetscFree3(pcbddc->benign_p0_lidx,pcbddc->benign_p0_gidx,pcbddc->benign_p0);CHKERRQ(ierr); 743 ierr = PetscMalloc3(pcbddc->benign_n,&pcbddc->benign_p0_lidx,pcbddc->benign_n,&pcbddc->benign_p0_gidx,pcbddc->benign_n,&pcbddc->benign_p0);CHKERRQ(ierr); 744 /* set change on pressures */ 745 for (s=0;s<pcbddc->benign_n;s++) { 746 PetscScalar *array; 747 PetscInt nzs; 748 749 ierr = ISGetLocalSize(zerodiag_subs[s],&nzs);CHKERRQ(ierr); 750 ierr = ISGetIndices(zerodiag_subs[s],&idxs);CHKERRQ(ierr); 751 for (i=0;i<nzs-1;i++) { 752 PetscScalar vals[2]; 753 PetscInt cols[2]; 754 755 cols[0] = idxs[i]; 756 cols[1] = idxs[nzs-1]; 757 vals[0] = 1.; 758 vals[1] = 1.; 759 ierr = MatSetValues(pcbddc->benign_change,1,cols,2,cols,vals,INSERT_VALUES);CHKERRQ(ierr); 760 } 761 ierr = PetscMalloc1(nzs,&array);CHKERRQ(ierr); 762 for (i=0;i<nzs-1;i++) array[i] = -1.; 763 array[nzs-1] = 1.; 764 ierr = MatSetValues(pcbddc->benign_change,1,idxs+nzs-1,nzs,idxs,array,INSERT_VALUES);CHKERRQ(ierr); 765 /* store local idxs for p0 */ 766 pcbddc->benign_p0_lidx[s] = idxs[nzs-1]; 767 ierr = ISRestoreIndices(zerodiag_subs[s],&idxs);CHKERRQ(ierr); 768 ierr = PetscFree(array);CHKERRQ(ierr); 769 } 770 ierr = MatAssemblyBegin(pcbddc->benign_change,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 771 ierr = MatAssemblyEnd(pcbddc->benign_change,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 772 /* project if needed */ 773 if (pcbddc->benign_change_explicit) { 774 Mat M; 775 776 ierr = MatPtAP(pcbddc->local_mat,pcbddc->benign_change,MAT_INITIAL_MATRIX,2.0,&M);CHKERRQ(ierr); 777 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 778 ierr = MatSeqAIJCompress(M,&pcbddc->local_mat);CHKERRQ(ierr); 779 ierr = MatDestroy(&M);CHKERRQ(ierr); 780 } 781 /* store global idxs for p0 */ 782 ierr = ISLocalToGlobalMappingApply(pc->pmat->rmap->mapping,pcbddc->benign_n,pcbddc->benign_p0_lidx,pcbddc->benign_p0_gidx);CHKERRQ(ierr); 783 } 784 pcbddc->benign_zerodiag_subs = zerodiag_subs; 785 ierr = ISDestroy(&pressures);CHKERRQ(ierr); 786 787 /* determines if the coarse solver will be singular or not */ 788 ierr = MPI_Allreduce(&have_null,&pcbddc->benign_null,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 789 *zerodiaglocal = zerodiag; 790 PetscFunctionReturn(0); 791 } 792 793 #undef __FUNCT__ 794 #define __FUNCT__ "PCBDDCBenignGetOrSetP0" 795 PetscErrorCode PCBDDCBenignGetOrSetP0(PC pc, Vec v, PetscBool get) 796 { 797 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 798 PetscErrorCode ierr; 799 800 PetscFunctionBegin; 801 if (!pcbddc->benign_sf) { 802 ierr = PetscSFCreate(PetscObjectComm((PetscObject)pc),&pcbddc->benign_sf);CHKERRQ(ierr); 803 ierr = PetscSFSetGraphLayout(pcbddc->benign_sf,pc->pmat->rmap,pcbddc->benign_n,NULL,PETSC_OWN_POINTER,pcbddc->benign_p0_gidx);CHKERRQ(ierr); 804 } 805 if (get) { /* use SF to get values */ 806 PetscScalar *array; 807 808 ierr = VecGetArrayRead(v,(const PetscScalar**)&array);CHKERRQ(ierr); 809 ierr = PetscSFBcastBegin(pcbddc->benign_sf,MPIU_SCALAR,array,pcbddc->benign_p0);CHKERRQ(ierr); 810 ierr = PetscSFBcastEnd(pcbddc->benign_sf,MPIU_SCALAR,array,pcbddc->benign_p0);CHKERRQ(ierr); 811 ierr = VecRestoreArrayRead(v,(const PetscScalar**)&array);CHKERRQ(ierr); 812 } else { /* use VecSetValues (not scalable, I should try to find a better solution (defining a new MPI_OP for reduction) */ 813 ierr = VecSetValues(v,pcbddc->benign_n,pcbddc->benign_p0_gidx,pcbddc->benign_p0,INSERT_VALUES);CHKERRQ(ierr); 814 ierr = VecAssemblyBegin(v);CHKERRQ(ierr); 815 ierr = VecAssemblyEnd(v);CHKERRQ(ierr); 816 } 817 PetscFunctionReturn(0); 818 } 819 820 #undef __FUNCT__ 821 #define __FUNCT__ "PCBDDCBenignPopOrPushB0" 822 PetscErrorCode PCBDDCBenignPopOrPushB0(PC pc, PetscBool pop) 823 { 824 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 825 PetscErrorCode ierr; 826 827 PetscFunctionBegin; 828 /* TODO: add error checking 829 - avoid nested pop (or push) calls. 830 - cannot push before pop. 831 - cannot call this if pcbddc->local_mat is NULL 832 */ 833 if (!pcbddc->benign_n) { 834 PetscFunctionReturn(0); 835 } 836 if (pop) { 837 if (pcbddc->benign_change_explicit) { 838 IS is_p0; 839 MatReuse reuse; 840 841 /* extract B_0 */ 842 reuse = MAT_INITIAL_MATRIX; 843 if (pcbddc->benign_B0) { 844 reuse = MAT_REUSE_MATRIX; 845 } 846 ierr = ISCreateGeneral(PETSC_COMM_SELF,pcbddc->benign_n,pcbddc->benign_p0_lidx,PETSC_COPY_VALUES,&is_p0);CHKERRQ(ierr); 847 ierr = MatGetSubMatrix(pcbddc->local_mat,is_p0,NULL,reuse,&pcbddc->benign_B0);CHKERRQ(ierr); 848 /* remove rows and cols from local problem */ 849 ierr = MatSetOption(pcbddc->local_mat,MAT_KEEP_NONZERO_PATTERN,PETSC_TRUE);CHKERRQ(ierr); 850 ierr = MatSetOption(pcbddc->local_mat,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr); 851 ierr = MatZeroRowsColumnsIS(pcbddc->local_mat,is_p0,1.0,NULL,NULL);CHKERRQ(ierr); 852 ierr = ISDestroy(&is_p0);CHKERRQ(ierr); 853 } else { 854 Mat_IS *matis = (Mat_IS*)pc->pmat->data; 855 PetscScalar *vals; 856 PetscInt i,n,*idxs_ins; 857 858 ierr = VecGetLocalSize(matis->y,&n);CHKERRQ(ierr); 859 ierr = PetscMalloc2(n,&idxs_ins,n,&vals);CHKERRQ(ierr); 860 if (!pcbddc->benign_B0) { 861 PetscInt *nnz; 862 ierr = MatCreate(PetscObjectComm((PetscObject)pcbddc->local_mat),&pcbddc->benign_B0);CHKERRQ(ierr); 863 ierr = MatSetType(pcbddc->benign_B0,MATAIJ);CHKERRQ(ierr); 864 ierr = MatSetSizes(pcbddc->benign_B0,pcbddc->benign_n,n,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr); 865 ierr = PetscMalloc1(pcbddc->benign_n,&nnz);CHKERRQ(ierr); 866 for (i=0;i<pcbddc->benign_n;i++) { 867 ierr = ISGetLocalSize(pcbddc->benign_zerodiag_subs[i],&nnz[i]);CHKERRQ(ierr); 868 nnz[i] = n - nnz[i]; 869 } 870 ierr = MatSeqAIJSetPreallocation(pcbddc->benign_B0,0,nnz);CHKERRQ(ierr); 871 ierr = PetscFree(nnz);CHKERRQ(ierr); 872 } 873 874 for (i=0;i<pcbddc->benign_n;i++) { 875 PetscScalar *array; 876 PetscInt *idxs,j,nz,cum; 877 878 ierr = VecSet(matis->x,0.);CHKERRQ(ierr); 879 ierr = ISGetLocalSize(pcbddc->benign_zerodiag_subs[i],&nz);CHKERRQ(ierr); 880 ierr = ISGetIndices(pcbddc->benign_zerodiag_subs[i],(const PetscInt**)&idxs);CHKERRQ(ierr); 881 for (j=0;j<nz;j++) vals[j] = 1.; 882 ierr = VecSetValues(matis->x,nz,idxs,vals,INSERT_VALUES);CHKERRQ(ierr); 883 ierr = VecAssemblyBegin(matis->x);CHKERRQ(ierr); 884 ierr = VecAssemblyEnd(matis->x);CHKERRQ(ierr); 885 ierr = VecSet(matis->y,0.);CHKERRQ(ierr); 886 ierr = MatMult(matis->A,matis->x,matis->y);CHKERRQ(ierr); 887 ierr = VecGetArray(matis->y,&array);CHKERRQ(ierr); 888 cum = 0; 889 for (j=0;j<n;j++) { 890 if (PetscUnlikely(PetscAbsReal(array[j]) > PETSC_SMALL)) { 891 vals[cum] = array[j]; 892 idxs_ins[cum] = j; 893 cum++; 894 } 895 } 896 ierr = MatSetValues(pcbddc->benign_B0,1,&i,cum,idxs_ins,vals,INSERT_VALUES);CHKERRQ(ierr); 897 ierr = VecRestoreArray(matis->y,&array);CHKERRQ(ierr); 898 ierr = ISRestoreIndices(pcbddc->benign_zerodiag_subs[i],(const PetscInt**)&idxs);CHKERRQ(ierr); 899 } 900 ierr = MatAssemblyBegin(pcbddc->benign_B0,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 901 ierr = MatAssemblyEnd(pcbddc->benign_B0,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 902 ierr = PetscFree2(idxs_ins,vals);CHKERRQ(ierr); 903 } 904 } else { /* push */ 905 if (pcbddc->benign_change_explicit) { 906 PetscInt i; 907 908 for (i=0;i<pcbddc->benign_n;i++) { 909 PetscScalar *B0_vals; 910 PetscInt *B0_cols,B0_ncol; 911 912 ierr = MatGetRow(pcbddc->benign_B0,i,&B0_ncol,(const PetscInt**)&B0_cols,(const PetscScalar**)&B0_vals);CHKERRQ(ierr); 913 ierr = MatSetValues(pcbddc->local_mat,1,pcbddc->benign_p0_lidx+i,B0_ncol,B0_cols,B0_vals,INSERT_VALUES);CHKERRQ(ierr); 914 ierr = MatSetValues(pcbddc->local_mat,B0_ncol,B0_cols,1,pcbddc->benign_p0_lidx+i,B0_vals,INSERT_VALUES);CHKERRQ(ierr); 915 ierr = MatSetValue(pcbddc->local_mat,pcbddc->benign_p0_lidx[i],pcbddc->benign_p0_lidx[i],0.0,INSERT_VALUES);CHKERRQ(ierr); 916 ierr = MatRestoreRow(pcbddc->benign_B0,i,&B0_ncol,(const PetscInt**)&B0_cols,(const PetscScalar**)&B0_vals);CHKERRQ(ierr); 917 } 918 ierr = MatAssemblyBegin(pcbddc->local_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 919 ierr = MatAssemblyEnd(pcbddc->local_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 920 } else { 921 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Cannot push B0!\n"); 922 } 923 } 924 PetscFunctionReturn(0); 925 } 926 927 #undef __FUNCT__ 928 #define __FUNCT__ "PCBDDCAdaptiveSelection" 929 PetscErrorCode PCBDDCAdaptiveSelection(PC pc) 930 { 931 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 932 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 933 PetscBLASInt B_dummyint,B_neigs,B_ierr,B_lwork; 934 PetscBLASInt *B_iwork,*B_ifail; 935 PetscScalar *work,lwork; 936 PetscScalar *St,*S,*eigv; 937 PetscScalar *Sarray,*Starray; 938 PetscReal *eigs,thresh; 939 PetscInt i,nmax,nmin,nv,cum,mss,cum2,cumarray,maxneigs; 940 PetscBool allocated_S_St; 941 #if defined(PETSC_USE_COMPLEX) 942 PetscReal *rwork; 943 #endif 944 PetscErrorCode ierr; 945 946 PetscFunctionBegin; 947 if (!sub_schurs->schur_explicit) SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_SUP,"Adaptive selection of constraints requires MUMPS and/or MKL_CPARDISO"); 948 if (sub_schurs->n_subs && (!sub_schurs->is_hermitian || !sub_schurs->is_posdef)) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_SUP,"Adaptive selection not yet implemented for general matrix pencils (herm %d, posdef %d)\n",sub_schurs->is_hermitian,sub_schurs->is_posdef); 949 950 if (pcbddc->dbg_flag) { 951 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 952 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 953 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Check adaptive selection of constraints\n");CHKERRQ(ierr); 954 ierr = PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 955 } 956 957 if (pcbddc->dbg_flag) { 958 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d cc %d (%d,%d).\n",PetscGlobalRank,sub_schurs->n_subs,sub_schurs->is_hermitian,sub_schurs->is_posdef); 959 } 960 961 /* max size of subsets */ 962 mss = 0; 963 for (i=0;i<sub_schurs->n_subs;i++) { 964 PetscInt subset_size; 965 966 ierr = ISGetLocalSize(sub_schurs->is_subs[i],&subset_size);CHKERRQ(ierr); 967 mss = PetscMax(mss,subset_size); 968 } 969 970 /* min/max and threshold */ 971 nmax = pcbddc->adaptive_nmax > 0 ? pcbddc->adaptive_nmax : mss; 972 nmin = pcbddc->adaptive_nmin > 0 ? pcbddc->adaptive_nmin : 0; 973 nmax = PetscMax(nmin,nmax); 974 allocated_S_St = PETSC_FALSE; 975 if (nmin) { 976 allocated_S_St = PETSC_TRUE; 977 } 978 979 /* allocate lapack workspace */ 980 cum = cum2 = 0; 981 maxneigs = 0; 982 for (i=0;i<sub_schurs->n_subs;i++) { 983 PetscInt n,subset_size; 984 985 ierr = ISGetLocalSize(sub_schurs->is_subs[i],&subset_size);CHKERRQ(ierr); 986 n = PetscMin(subset_size,nmax); 987 cum += subset_size; 988 cum2 += subset_size*n; 989 maxneigs = PetscMax(maxneigs,n); 990 } 991 if (mss) { 992 if (sub_schurs->is_hermitian && sub_schurs->is_posdef) { 993 PetscBLASInt B_itype = 1; 994 PetscBLASInt B_N = mss; 995 PetscReal zero = 0.0; 996 PetscReal eps = 0.0; /* dlamch? */ 997 998 B_lwork = -1; 999 S = NULL; 1000 St = NULL; 1001 eigs = NULL; 1002 eigv = NULL; 1003 B_iwork = NULL; 1004 B_ifail = NULL; 1005 #if defined(PETSC_USE_COMPLEX) 1006 rwork = NULL; 1007 #endif 1008 thresh = 1.0; 1009 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 1010 #if defined(PETSC_USE_COMPLEX) 1011 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&zero,&thresh,&B_dummyint,&B_dummyint,&eps,&B_neigs,eigs,eigv,&B_N,&lwork,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr)); 1012 #else 1013 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&zero,&thresh,&B_dummyint,&B_dummyint,&eps,&B_neigs,eigs,eigv,&B_N,&lwork,&B_lwork,B_iwork,B_ifail,&B_ierr)); 1014 #endif 1015 if (B_ierr != 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to SYGVX Lapack routine %d",(int)B_ierr); 1016 ierr = PetscFPTrapPop();CHKERRQ(ierr); 1017 } else { 1018 /* TODO */ 1019 } 1020 } else { 1021 lwork = 0; 1022 } 1023 1024 nv = 0; 1025 if (sub_schurs->is_vertices && pcbddc->use_vertices) { /* complement set of active subsets, each entry is a vertex (boundary made by active subsets, vertices and dirichlet dofs) */ 1026 ierr = ISGetLocalSize(sub_schurs->is_vertices,&nv);CHKERRQ(ierr); 1027 } 1028 ierr = PetscBLASIntCast((PetscInt)PetscRealPart(lwork),&B_lwork);CHKERRQ(ierr); 1029 if (allocated_S_St) { 1030 ierr = PetscMalloc2(mss*mss,&S,mss*mss,&St);CHKERRQ(ierr); 1031 } 1032 ierr = PetscMalloc5(mss*mss,&eigv,mss,&eigs,B_lwork,&work,5*mss,&B_iwork,mss,&B_ifail);CHKERRQ(ierr); 1033 #if defined(PETSC_USE_COMPLEX) 1034 ierr = PetscMalloc1(7*mss,&rwork);CHKERRQ(ierr); 1035 #endif 1036 ierr = PetscMalloc5(nv+sub_schurs->n_subs,&pcbddc->adaptive_constraints_n, 1037 nv+sub_schurs->n_subs+1,&pcbddc->adaptive_constraints_idxs_ptr, 1038 nv+sub_schurs->n_subs+1,&pcbddc->adaptive_constraints_data_ptr, 1039 nv+cum,&pcbddc->adaptive_constraints_idxs, 1040 nv+cum2,&pcbddc->adaptive_constraints_data);CHKERRQ(ierr); 1041 ierr = PetscMemzero(pcbddc->adaptive_constraints_n,(nv+sub_schurs->n_subs)*sizeof(PetscInt));CHKERRQ(ierr); 1042 1043 maxneigs = 0; 1044 cum = cumarray = 0; 1045 pcbddc->adaptive_constraints_idxs_ptr[0] = 0; 1046 pcbddc->adaptive_constraints_data_ptr[0] = 0; 1047 if (sub_schurs->is_vertices && pcbddc->use_vertices) { 1048 const PetscInt *idxs; 1049 1050 ierr = ISGetIndices(sub_schurs->is_vertices,&idxs);CHKERRQ(ierr); 1051 for (cum=0;cum<nv;cum++) { 1052 pcbddc->adaptive_constraints_n[cum] = 1; 1053 pcbddc->adaptive_constraints_idxs[cum] = idxs[cum]; 1054 pcbddc->adaptive_constraints_data[cum] = 1.0; 1055 pcbddc->adaptive_constraints_idxs_ptr[cum+1] = pcbddc->adaptive_constraints_idxs_ptr[cum]+1; 1056 pcbddc->adaptive_constraints_data_ptr[cum+1] = pcbddc->adaptive_constraints_data_ptr[cum]+1; 1057 } 1058 ierr = ISRestoreIndices(sub_schurs->is_vertices,&idxs);CHKERRQ(ierr); 1059 } 1060 1061 if (mss) { /* multilevel */ 1062 ierr = MatSeqAIJGetArray(sub_schurs->sum_S_Ej_inv_all,&Sarray);CHKERRQ(ierr); 1063 ierr = MatSeqAIJGetArray(sub_schurs->sum_S_Ej_tilda_all,&Starray);CHKERRQ(ierr); 1064 } 1065 1066 thresh = pcbddc->adaptive_threshold; 1067 for (i=0;i<sub_schurs->n_subs;i++) { 1068 const PetscInt *idxs; 1069 PetscReal upper,lower; 1070 PetscInt j,subset_size,eigs_start = 0; 1071 PetscBLASInt B_N; 1072 PetscBool same_data = PETSC_FALSE; 1073 1074 if (pcbddc->use_deluxe_scaling) { 1075 upper = PETSC_MAX_REAL; 1076 lower = thresh; 1077 } else { 1078 upper = 1./thresh; 1079 lower = 0.; 1080 } 1081 ierr = ISGetLocalSize(sub_schurs->is_subs[i],&subset_size);CHKERRQ(ierr); 1082 ierr = ISGetIndices(sub_schurs->is_subs[i],&idxs);CHKERRQ(ierr); 1083 ierr = PetscBLASIntCast(subset_size,&B_N);CHKERRQ(ierr); 1084 if (allocated_S_St) { /* S and S_t should be copied since we could need them later */ 1085 if (sub_schurs->is_hermitian) { 1086 PetscInt j,k; 1087 if (sub_schurs->n_subs == 1) { /* zeroing memory to use PetscMemcmp later */ 1088 ierr = PetscMemzero(S,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 1089 ierr = PetscMemzero(St,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 1090 } 1091 for (j=0;j<subset_size;j++) { 1092 for (k=j;k<subset_size;k++) { 1093 S [j*subset_size+k] = Sarray [cumarray+j*subset_size+k]; 1094 St[j*subset_size+k] = Starray[cumarray+j*subset_size+k]; 1095 } 1096 } 1097 } else { 1098 ierr = PetscMemcpy(S,Sarray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 1099 ierr = PetscMemcpy(St,Starray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 1100 } 1101 } else { 1102 S = Sarray + cumarray; 1103 St = Starray + cumarray; 1104 } 1105 /* see if we can save some work */ 1106 if (sub_schurs->n_subs == 1 && pcbddc->use_deluxe_scaling) { 1107 ierr = PetscMemcmp(S,St,subset_size*subset_size*sizeof(PetscScalar),&same_data);CHKERRQ(ierr); 1108 } 1109 1110 if (same_data && !sub_schurs->change) { /* there's no need of constraints here */ 1111 B_neigs = 0; 1112 } else { 1113 if (sub_schurs->is_hermitian && sub_schurs->is_posdef) { 1114 PetscBLASInt B_itype = 1; 1115 PetscBLASInt B_IL, B_IU; 1116 PetscReal eps = -1.0; /* dlamch? */ 1117 PetscInt nmin_s; 1118 PetscBool compute_range = PETSC_FALSE; 1119 1120 if (pcbddc->dbg_flag) { 1121 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Computing for sub %d/%d %d %d.\n",i,sub_schurs->n_subs,subset_size,pcbddc->mat_graph->count[idxs[0]]); 1122 } 1123 1124 compute_range = PETSC_FALSE; 1125 if (thresh > 1.+PETSC_SMALL && !same_data) { 1126 compute_range = PETSC_TRUE; 1127 } 1128 1129 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 1130 if (compute_range) { 1131 1132 /* ask for eigenvalues larger than thresh */ 1133 #if defined(PETSC_USE_COMPLEX) 1134 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&lower,&upper,&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr)); 1135 #else 1136 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&lower,&upper,&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr)); 1137 #endif 1138 } else if (!same_data) { 1139 B_IU = PetscMax(1,PetscMin(B_N,nmax)); 1140 B_IL = 1; 1141 #if defined(PETSC_USE_COMPLEX) 1142 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","I","L",&B_N,St,&B_N,S,&B_N,&lower,&upper,&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr)); 1143 #else 1144 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","I","L",&B_N,St,&B_N,S,&B_N,&lower,&upper,&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr)); 1145 #endif 1146 } else { /* same_data is true, so get the adaptive function requested by the user */ 1147 PetscInt k; 1148 if (!sub_schurs->change_primal_sub) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"This should not happen"); 1149 ierr = ISGetLocalSize(sub_schurs->change_primal_sub[i],&nmax);CHKERRQ(ierr); 1150 ierr = PetscBLASIntCast(nmax,&B_neigs);CHKERRQ(ierr); 1151 nmin = nmax; 1152 ierr = PetscMemzero(eigv,subset_size*nmax*sizeof(PetscScalar));CHKERRQ(ierr); 1153 for (k=0;k<nmax;k++) { 1154 eigs[k] = 1./PETSC_SMALL; 1155 eigv[k*(subset_size+1)] = 1.0; 1156 } 1157 } 1158 ierr = PetscFPTrapPop();CHKERRQ(ierr); 1159 if (B_ierr) { 1160 if (B_ierr < 0 ) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: illegal value for argument %d",-(int)B_ierr); 1161 else if (B_ierr <= B_N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: %d eigenvalues failed to converge",(int)B_ierr); 1162 else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: leading minor of order %d is not positive definite",(int)B_ierr-B_N-1); 1163 } 1164 1165 if (B_neigs > nmax) { 1166 if (pcbddc->dbg_flag) { 1167 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," found %d eigs, more than maximum required %d.\n",B_neigs,nmax); 1168 } 1169 if (pcbddc->use_deluxe_scaling) eigs_start = B_neigs -nmax; 1170 B_neigs = nmax; 1171 } 1172 1173 nmin_s = PetscMin(nmin,B_N); 1174 if (B_neigs < nmin_s) { 1175 PetscBLASInt B_neigs2; 1176 1177 if (pcbddc->use_deluxe_scaling) { 1178 B_IL = B_N - nmin_s + 1; 1179 B_IU = B_N - B_neigs; 1180 } else { 1181 B_IL = B_neigs + 1; 1182 B_IU = nmin_s; 1183 } 1184 if (pcbddc->dbg_flag) { 1185 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," found %d eigs, less than minimum required %d. Asking for %d to %d incl (fortran like)\n",B_neigs,nmin,B_IL,B_IU); 1186 } 1187 if (sub_schurs->is_hermitian) { 1188 PetscInt j; 1189 for (j=0;j<subset_size;j++) { 1190 ierr = PetscMemcpy(S+j*(subset_size+1),Sarray+cumarray+j*(subset_size+1),(subset_size-j)*sizeof(PetscScalar));CHKERRQ(ierr); 1191 } 1192 for (j=0;j<subset_size;j++) { 1193 ierr = PetscMemcpy(St+j*(subset_size+1),Starray+cumarray+j*(subset_size+1),(subset_size-j)*sizeof(PetscScalar));CHKERRQ(ierr); 1194 } 1195 } else { 1196 ierr = PetscMemcpy(S,Sarray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 1197 ierr = PetscMemcpy(St,Starray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 1198 } 1199 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 1200 #if defined(PETSC_USE_COMPLEX) 1201 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","I","L",&B_N,St,&B_N,S,&B_N,&lower,&upper,&B_IL,&B_IU,&eps,&B_neigs2,eigs+B_neigs,eigv+B_neigs*subset_size,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr)); 1202 #else 1203 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","I","L",&B_N,St,&B_N,S,&B_N,&lower,&upper,&B_IL,&B_IU,&eps,&B_neigs2,eigs+B_neigs,eigv+B_neigs*subset_size,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr)); 1204 #endif 1205 ierr = PetscFPTrapPop();CHKERRQ(ierr); 1206 B_neigs += B_neigs2; 1207 } 1208 if (B_ierr) { 1209 if (B_ierr < 0 ) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: illegal value for argument %d",-(int)B_ierr); 1210 else if (B_ierr <= B_N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: %d eigenvalues failed to converge",(int)B_ierr); 1211 else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: leading minor of order %d is not positive definite",(int)B_ierr-B_N-1); 1212 } 1213 if (pcbddc->dbg_flag) { 1214 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," -> Got %d eigs\n",B_neigs);CHKERRQ(ierr); 1215 for (j=0;j<B_neigs;j++) { 1216 if (eigs[j] == 0.0) { 1217 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," Inf\n");CHKERRQ(ierr); 1218 } else { 1219 if (pcbddc->use_deluxe_scaling) { 1220 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," %1.6e\n",eigs[j+eigs_start]);CHKERRQ(ierr); 1221 } else { 1222 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," %1.6e\n",1./eigs[j+eigs_start]);CHKERRQ(ierr); 1223 } 1224 } 1225 } 1226 } 1227 } else { 1228 /* TODO */ 1229 } 1230 } 1231 /* change the basis back to the original one */ 1232 if (sub_schurs->change) { 1233 Mat change,phi,phit; 1234 1235 if (pcbddc->dbg_flag > 1) { 1236 PetscInt ii; 1237 for (ii=0;ii<B_neigs;ii++) { 1238 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," -> Eigenvector (old basis) %d/%d (%d)\n",ii,B_neigs,B_N);CHKERRQ(ierr); 1239 for (j=0;j<B_N;j++) { 1240 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," %1.4e\n",eigv[(ii+eigs_start)*subset_size+j]);CHKERRQ(ierr); 1241 } 1242 } 1243 } 1244 ierr = KSPGetOperators(sub_schurs->change[i],&change,NULL);CHKERRQ(ierr); 1245 ierr = MatCreateSeqDense(PETSC_COMM_SELF,subset_size,B_neigs,eigv+eigs_start*subset_size,&phit);CHKERRQ(ierr); 1246 ierr = MatMatMult(change,phit,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&phi);CHKERRQ(ierr); 1247 ierr = MatCopy(phi,phit,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 1248 ierr = MatDestroy(&phit);CHKERRQ(ierr); 1249 ierr = MatDestroy(&phi);CHKERRQ(ierr); 1250 } 1251 maxneigs = PetscMax(B_neigs,maxneigs); 1252 pcbddc->adaptive_constraints_n[i+nv] = B_neigs; 1253 if (B_neigs) { 1254 ierr = PetscMemcpy(pcbddc->adaptive_constraints_data+pcbddc->adaptive_constraints_data_ptr[cum],eigv+eigs_start*subset_size,B_neigs*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 1255 1256 if (pcbddc->dbg_flag > 1) { 1257 PetscInt ii; 1258 for (ii=0;ii<B_neigs;ii++) { 1259 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," -> Eigenvector %d/%d (%d)\n",ii,B_neigs,B_N);CHKERRQ(ierr); 1260 for (j=0;j<B_N;j++) { 1261 #if defined(PETSC_USE_COMPLEX) 1262 PetscReal r = PetscRealPart(pcbddc->adaptive_constraints_data[ii*subset_size+j+pcbddc->adaptive_constraints_data_ptr[cum]]); 1263 PetscReal c = PetscImaginaryPart(pcbddc->adaptive_constraints_data[ii*subset_size+j+pcbddc->adaptive_constraints_data_ptr[cum]]); 1264 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," %1.4e + %1.4e i\n",r,c);CHKERRQ(ierr); 1265 #else 1266 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," %1.4e\n",pcbddc->adaptive_constraints_data[ii*subset_size+j+pcbddc->adaptive_constraints_data_ptr[cum]]);CHKERRQ(ierr); 1267 #endif 1268 } 1269 } 1270 } 1271 ierr = PetscMemcpy(pcbddc->adaptive_constraints_idxs+pcbddc->adaptive_constraints_idxs_ptr[cum],idxs,subset_size*sizeof(PetscInt));CHKERRQ(ierr); 1272 pcbddc->adaptive_constraints_idxs_ptr[cum+1] = pcbddc->adaptive_constraints_idxs_ptr[cum] + subset_size; 1273 pcbddc->adaptive_constraints_data_ptr[cum+1] = pcbddc->adaptive_constraints_data_ptr[cum] + subset_size*B_neigs; 1274 cum++; 1275 } 1276 ierr = ISRestoreIndices(sub_schurs->is_subs[i],&idxs);CHKERRQ(ierr); 1277 /* shift for next computation */ 1278 cumarray += subset_size*subset_size; 1279 } 1280 if (pcbddc->dbg_flag) { 1281 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 1282 } 1283 1284 if (mss) { 1285 ierr = MatSeqAIJRestoreArray(sub_schurs->sum_S_Ej_inv_all,&Sarray);CHKERRQ(ierr); 1286 ierr = MatSeqAIJRestoreArray(sub_schurs->sum_S_Ej_tilda_all,&Starray);CHKERRQ(ierr); 1287 /* destroy matrices (junk) */ 1288 ierr = MatDestroy(&sub_schurs->sum_S_Ej_inv_all);CHKERRQ(ierr); 1289 ierr = MatDestroy(&sub_schurs->sum_S_Ej_tilda_all);CHKERRQ(ierr); 1290 } 1291 if (allocated_S_St) { 1292 ierr = PetscFree2(S,St);CHKERRQ(ierr); 1293 } 1294 ierr = PetscFree5(eigv,eigs,work,B_iwork,B_ifail);CHKERRQ(ierr); 1295 #if defined(PETSC_USE_COMPLEX) 1296 ierr = PetscFree(rwork);CHKERRQ(ierr); 1297 #endif 1298 if (pcbddc->dbg_flag) { 1299 PetscInt maxneigs_r; 1300 ierr = MPIU_Allreduce(&maxneigs,&maxneigs_r,1,MPIU_INT,MPI_MAX,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 1301 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Maximum number of constraints per cc %d\n",maxneigs_r);CHKERRQ(ierr); 1302 } 1303 PetscFunctionReturn(0); 1304 } 1305 1306 #undef __FUNCT__ 1307 #define __FUNCT__ "PCBDDCSetUpSolvers" 1308 PetscErrorCode PCBDDCSetUpSolvers(PC pc) 1309 { 1310 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 1311 PetscScalar *coarse_submat_vals; 1312 PetscErrorCode ierr; 1313 1314 PetscFunctionBegin; 1315 /* Setup local scatters R_to_B and (optionally) R_to_D */ 1316 /* PCBDDCSetUpLocalWorkVectors should be called first! */ 1317 ierr = PCBDDCSetUpLocalScatters(pc);CHKERRQ(ierr); 1318 1319 /* Setup local neumann solver ksp_R */ 1320 /* PCBDDCSetUpLocalScatters should be called first! */ 1321 ierr = PCBDDCSetUpLocalSolvers(pc,PETSC_FALSE,PETSC_TRUE);CHKERRQ(ierr); 1322 1323 /* Change global null space passed in by the user if change of basis has been requested */ 1324 if (pcbddc->NullSpace && pcbddc->ChangeOfBasisMatrix) { 1325 ierr = PCBDDCNullSpaceAdaptGlobal(pc);CHKERRQ(ierr); 1326 } 1327 1328 /* 1329 Setup local correction and local part of coarse basis. 1330 Gives back the dense local part of the coarse matrix in column major ordering 1331 */ 1332 ierr = PCBDDCSetUpCorrection(pc,&coarse_submat_vals);CHKERRQ(ierr); 1333 1334 /* Compute total number of coarse nodes and setup coarse solver */ 1335 ierr = PCBDDCSetUpCoarseSolver(pc,coarse_submat_vals);CHKERRQ(ierr); 1336 1337 /* free */ 1338 ierr = PetscFree(coarse_submat_vals);CHKERRQ(ierr); 1339 PetscFunctionReturn(0); 1340 } 1341 1342 #undef __FUNCT__ 1343 #define __FUNCT__ "PCBDDCResetCustomization" 1344 PetscErrorCode PCBDDCResetCustomization(PC pc) 1345 { 1346 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 1347 PetscErrorCode ierr; 1348 1349 PetscFunctionBegin; 1350 ierr = PCBDDCGraphResetCSR(pcbddc->mat_graph);CHKERRQ(ierr); 1351 ierr = ISDestroy(&pcbddc->user_primal_vertices);CHKERRQ(ierr); 1352 ierr = ISDestroy(&pcbddc->user_primal_vertices_local);CHKERRQ(ierr); 1353 ierr = MatNullSpaceDestroy(&pcbddc->NullSpace);CHKERRQ(ierr); 1354 ierr = ISDestroy(&pcbddc->NeumannBoundaries);CHKERRQ(ierr); 1355 ierr = ISDestroy(&pcbddc->NeumannBoundariesLocal);CHKERRQ(ierr); 1356 ierr = ISDestroy(&pcbddc->DirichletBoundaries);CHKERRQ(ierr); 1357 ierr = MatNullSpaceDestroy(&pcbddc->onearnullspace);CHKERRQ(ierr); 1358 ierr = PetscFree(pcbddc->onearnullvecs_state);CHKERRQ(ierr); 1359 ierr = ISDestroy(&pcbddc->DirichletBoundariesLocal);CHKERRQ(ierr); 1360 ierr = PCBDDCSetDofsSplitting(pc,0,NULL);CHKERRQ(ierr); 1361 ierr = PCBDDCSetDofsSplittingLocal(pc,0,NULL);CHKERRQ(ierr); 1362 PetscFunctionReturn(0); 1363 } 1364 1365 #undef __FUNCT__ 1366 #define __FUNCT__ "PCBDDCResetTopography" 1367 PetscErrorCode PCBDDCResetTopography(PC pc) 1368 { 1369 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 1370 PetscInt i; 1371 PetscErrorCode ierr; 1372 1373 PetscFunctionBegin; 1374 ierr = MatDestroy(&pcbddc->user_ChangeOfBasisMatrix);CHKERRQ(ierr); 1375 ierr = MatDestroy(&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 1376 ierr = MatDestroy(&pcbddc->switch_static_change);CHKERRQ(ierr); 1377 ierr = VecDestroy(&pcbddc->work_change);CHKERRQ(ierr); 1378 ierr = MatDestroy(&pcbddc->ConstraintMatrix);CHKERRQ(ierr); 1379 ierr = PCBDDCGraphReset(pcbddc->mat_graph);CHKERRQ(ierr); 1380 for (i=0;i<pcbddc->n_local_subs;i++) { 1381 ierr = ISDestroy(&pcbddc->local_subs[i]);CHKERRQ(ierr); 1382 } 1383 ierr = PetscFree(pcbddc->local_subs);CHKERRQ(ierr); 1384 ierr = PCBDDCSubSchursReset(pcbddc->sub_schurs);CHKERRQ(ierr); 1385 PetscFunctionReturn(0); 1386 } 1387 1388 #undef __FUNCT__ 1389 #define __FUNCT__ "PCBDDCResetSolvers" 1390 PetscErrorCode PCBDDCResetSolvers(PC pc) 1391 { 1392 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 1393 PetscErrorCode ierr; 1394 1395 PetscFunctionBegin; 1396 ierr = VecDestroy(&pcbddc->coarse_vec);CHKERRQ(ierr); 1397 if (pcbddc->coarse_phi_B) { 1398 PetscScalar *array; 1399 ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&array);CHKERRQ(ierr); 1400 ierr = PetscFree(array);CHKERRQ(ierr); 1401 } 1402 ierr = MatDestroy(&pcbddc->coarse_phi_B);CHKERRQ(ierr); 1403 ierr = MatDestroy(&pcbddc->coarse_phi_D);CHKERRQ(ierr); 1404 ierr = MatDestroy(&pcbddc->coarse_psi_B);CHKERRQ(ierr); 1405 ierr = MatDestroy(&pcbddc->coarse_psi_D);CHKERRQ(ierr); 1406 ierr = VecDestroy(&pcbddc->vec1_P);CHKERRQ(ierr); 1407 ierr = VecDestroy(&pcbddc->vec1_C);CHKERRQ(ierr); 1408 ierr = MatDestroy(&pcbddc->local_auxmat2);CHKERRQ(ierr); 1409 ierr = MatDestroy(&pcbddc->local_auxmat1);CHKERRQ(ierr); 1410 ierr = VecDestroy(&pcbddc->vec1_R);CHKERRQ(ierr); 1411 ierr = VecDestroy(&pcbddc->vec2_R);CHKERRQ(ierr); 1412 ierr = ISDestroy(&pcbddc->is_R_local);CHKERRQ(ierr); 1413 ierr = VecScatterDestroy(&pcbddc->R_to_B);CHKERRQ(ierr); 1414 ierr = VecScatterDestroy(&pcbddc->R_to_D);CHKERRQ(ierr); 1415 ierr = VecScatterDestroy(&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr); 1416 ierr = KSPDestroy(&pcbddc->ksp_D);CHKERRQ(ierr); 1417 ierr = KSPDestroy(&pcbddc->ksp_R);CHKERRQ(ierr); 1418 ierr = KSPDestroy(&pcbddc->coarse_ksp);CHKERRQ(ierr); 1419 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 1420 ierr = PetscFree(pcbddc->primal_indices_local_idxs);CHKERRQ(ierr); 1421 ierr = PetscFree2(pcbddc->local_primal_ref_node,pcbddc->local_primal_ref_mult);CHKERRQ(ierr); 1422 ierr = PetscFree(pcbddc->global_primal_indices);CHKERRQ(ierr); 1423 ierr = ISDestroy(&pcbddc->coarse_subassembling);CHKERRQ(ierr); 1424 ierr = ISDestroy(&pcbddc->coarse_subassembling_init);CHKERRQ(ierr); 1425 ierr = MatDestroy(&pcbddc->benign_change);CHKERRQ(ierr); 1426 ierr = VecDestroy(&pcbddc->benign_vec);CHKERRQ(ierr); 1427 ierr = PCBDDCBenignShellMat(pc,PETSC_TRUE);CHKERRQ(ierr); 1428 ierr = MatDestroy(&pcbddc->benign_B0);CHKERRQ(ierr); 1429 ierr = PetscSFDestroy(&pcbddc->benign_sf);CHKERRQ(ierr); 1430 if (pcbddc->benign_zerodiag_subs) { 1431 PetscInt i; 1432 for (i=0;i<pcbddc->benign_n;i++) { 1433 ierr = ISDestroy(&pcbddc->benign_zerodiag_subs[i]);CHKERRQ(ierr); 1434 } 1435 ierr = PetscFree(pcbddc->benign_zerodiag_subs);CHKERRQ(ierr); 1436 } 1437 ierr = PetscFree3(pcbddc->benign_p0_lidx,pcbddc->benign_p0_gidx,pcbddc->benign_p0);CHKERRQ(ierr); 1438 PetscFunctionReturn(0); 1439 } 1440 1441 #undef __FUNCT__ 1442 #define __FUNCT__ "PCBDDCSetUpLocalWorkVectors" 1443 PetscErrorCode PCBDDCSetUpLocalWorkVectors(PC pc) 1444 { 1445 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 1446 PC_IS *pcis = (PC_IS*)pc->data; 1447 VecType impVecType; 1448 PetscInt n_constraints,n_R,old_size; 1449 PetscErrorCode ierr; 1450 1451 PetscFunctionBegin; 1452 if (!pcbddc->ConstraintMatrix) SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"BDDC Constraint matrix has not been created"); 1453 /* get sizes */ 1454 n_constraints = pcbddc->local_primal_size - pcbddc->benign_n - pcbddc->n_vertices; 1455 n_R = pcis->n - pcbddc->n_vertices; 1456 ierr = VecGetType(pcis->vec1_N,&impVecType);CHKERRQ(ierr); 1457 /* local work vectors (try to avoid unneeded work)*/ 1458 /* R nodes */ 1459 old_size = -1; 1460 if (pcbddc->vec1_R) { 1461 ierr = VecGetSize(pcbddc->vec1_R,&old_size);CHKERRQ(ierr); 1462 } 1463 if (n_R != old_size) { 1464 ierr = VecDestroy(&pcbddc->vec1_R);CHKERRQ(ierr); 1465 ierr = VecDestroy(&pcbddc->vec2_R);CHKERRQ(ierr); 1466 ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_R);CHKERRQ(ierr); 1467 ierr = VecSetSizes(pcbddc->vec1_R,PETSC_DECIDE,n_R);CHKERRQ(ierr); 1468 ierr = VecSetType(pcbddc->vec1_R,impVecType);CHKERRQ(ierr); 1469 ierr = VecDuplicate(pcbddc->vec1_R,&pcbddc->vec2_R);CHKERRQ(ierr); 1470 } 1471 /* local primal dofs */ 1472 old_size = -1; 1473 if (pcbddc->vec1_P) { 1474 ierr = VecGetSize(pcbddc->vec1_P,&old_size);CHKERRQ(ierr); 1475 } 1476 if (pcbddc->local_primal_size != old_size) { 1477 ierr = VecDestroy(&pcbddc->vec1_P);CHKERRQ(ierr); 1478 ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_P);CHKERRQ(ierr); 1479 ierr = VecSetSizes(pcbddc->vec1_P,PETSC_DECIDE,pcbddc->local_primal_size);CHKERRQ(ierr); 1480 ierr = VecSetType(pcbddc->vec1_P,impVecType);CHKERRQ(ierr); 1481 } 1482 /* local explicit constraints */ 1483 old_size = -1; 1484 if (pcbddc->vec1_C) { 1485 ierr = VecGetSize(pcbddc->vec1_C,&old_size);CHKERRQ(ierr); 1486 } 1487 if (n_constraints && n_constraints != old_size) { 1488 ierr = VecDestroy(&pcbddc->vec1_C);CHKERRQ(ierr); 1489 ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_C);CHKERRQ(ierr); 1490 ierr = VecSetSizes(pcbddc->vec1_C,PETSC_DECIDE,n_constraints);CHKERRQ(ierr); 1491 ierr = VecSetType(pcbddc->vec1_C,impVecType);CHKERRQ(ierr); 1492 } 1493 PetscFunctionReturn(0); 1494 } 1495 1496 #undef __FUNCT__ 1497 #define __FUNCT__ "PCBDDCSetUpCorrection" 1498 PetscErrorCode PCBDDCSetUpCorrection(PC pc, PetscScalar **coarse_submat_vals_n) 1499 { 1500 PetscErrorCode ierr; 1501 /* pointers to pcis and pcbddc */ 1502 PC_IS* pcis = (PC_IS*)pc->data; 1503 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 1504 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 1505 /* submatrices of local problem */ 1506 Mat A_RV,A_VR,A_VV,local_auxmat2_R; 1507 /* submatrices of local coarse problem */ 1508 Mat S_VV,S_CV,S_VC,S_CC; 1509 /* working matrices */ 1510 Mat C_CR; 1511 /* additional working stuff */ 1512 PC pc_R; 1513 Mat F; 1514 Vec dummy_vec; 1515 PetscBool isLU,isCHOL,isILU,need_benign_correction; 1516 PetscScalar *coarse_submat_vals; /* TODO: use a PETSc matrix */ 1517 PetscScalar *work; 1518 PetscInt *idx_V_B; 1519 PetscInt lda_rhs,n,n_vertices,n_constraints,*p0_lidx_I; 1520 PetscInt i,n_R,n_D,n_B; 1521 1522 /* some shortcuts to scalars */ 1523 PetscScalar one=1.0,m_one=-1.0; 1524 1525 PetscFunctionBegin; 1526 if (!pcbddc->symmetric_primal && pcbddc->benign_n) { 1527 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Non-symmetric primal basis computation with benign trick not yet implemented"); 1528 } 1529 1530 /* Set Non-overlapping dimensions */ 1531 n_vertices = pcbddc->n_vertices; 1532 n_constraints = pcbddc->local_primal_size - pcbddc->benign_n - n_vertices; 1533 n_B = pcis->n_B; 1534 n_D = pcis->n - n_B; 1535 n_R = pcis->n - n_vertices; 1536 1537 /* vertices in boundary numbering */ 1538 ierr = PetscMalloc1(n_vertices,&idx_V_B);CHKERRQ(ierr); 1539 ierr = ISGlobalToLocalMappingApply(pcis->BtoNmap,IS_GTOLM_DROP,n_vertices,pcbddc->local_primal_ref_node,&i,idx_V_B);CHKERRQ(ierr); 1540 if (i != n_vertices) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error in boundary numbering for BDDC vertices! %D != %D\n",n_vertices,i); 1541 1542 /* Subdomain contribution (Non-overlapping) to coarse matrix */ 1543 ierr = PetscCalloc1(pcbddc->local_primal_size*pcbddc->local_primal_size,&coarse_submat_vals);CHKERRQ(ierr); 1544 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_vertices,n_vertices,coarse_submat_vals,&S_VV);CHKERRQ(ierr); 1545 ierr = MatSeqDenseSetLDA(S_VV,pcbddc->local_primal_size);CHKERRQ(ierr); 1546 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_constraints,n_vertices,coarse_submat_vals+n_vertices,&S_CV);CHKERRQ(ierr); 1547 ierr = MatSeqDenseSetLDA(S_CV,pcbddc->local_primal_size);CHKERRQ(ierr); 1548 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_vertices,n_constraints,coarse_submat_vals+pcbddc->local_primal_size*n_vertices,&S_VC);CHKERRQ(ierr); 1549 ierr = MatSeqDenseSetLDA(S_VC,pcbddc->local_primal_size);CHKERRQ(ierr); 1550 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_constraints,n_constraints,coarse_submat_vals+(pcbddc->local_primal_size+1)*n_vertices,&S_CC);CHKERRQ(ierr); 1551 ierr = MatSeqDenseSetLDA(S_CC,pcbddc->local_primal_size);CHKERRQ(ierr); 1552 1553 /* determine if can use MatSolve routines instead of calling KSPSolve on ksp_R */ 1554 ierr = KSPGetPC(pcbddc->ksp_R,&pc_R);CHKERRQ(ierr); 1555 ierr = PetscObjectTypeCompare((PetscObject)pc_R,PCLU,&isLU);CHKERRQ(ierr); 1556 ierr = PetscObjectTypeCompare((PetscObject)pc_R,PCILU,&isILU);CHKERRQ(ierr); 1557 ierr = PetscObjectTypeCompare((PetscObject)pc_R,PCCHOLESKY,&isCHOL);CHKERRQ(ierr); 1558 lda_rhs = n_R; 1559 need_benign_correction = PETSC_FALSE; 1560 if (isLU || isILU || isCHOL) { 1561 ierr = PCFactorGetMatrix(pc_R,&F);CHKERRQ(ierr); 1562 } else if (sub_schurs->reuse_solver) { 1563 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 1564 MatFactorType type; 1565 1566 F = reuse_solver->F; 1567 ierr = MatGetFactorType(F,&type);CHKERRQ(ierr); 1568 if (type == MAT_FACTOR_CHOLESKY) isCHOL = PETSC_TRUE; 1569 ierr = MatGetSize(F,&lda_rhs,NULL);CHKERRQ(ierr); 1570 need_benign_correction = !!reuse_solver->benign_n; 1571 } else { 1572 F = NULL; 1573 } 1574 1575 /* allocate workspace */ 1576 n = 0; 1577 if (n_constraints) { 1578 n += lda_rhs*n_constraints; 1579 } 1580 if (n_vertices) { 1581 n = PetscMax(2*lda_rhs*n_vertices,n); 1582 n = PetscMax((lda_rhs+n_B)*n_vertices,n); 1583 } 1584 ierr = PetscMalloc1(n,&work);CHKERRQ(ierr); 1585 1586 /* create dummy vector to modify rhs and sol of MatMatSolve (work array will never be used) */ 1587 dummy_vec = NULL; 1588 if (need_benign_correction && lda_rhs != n_R && F) { 1589 ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,1,lda_rhs,work,&dummy_vec);CHKERRQ(ierr); 1590 } 1591 1592 /* Precompute stuffs needed for preprocessing and application of BDDC*/ 1593 if (n_constraints) { 1594 Mat M1,M2,M3,C_B; 1595 IS is_aux; 1596 PetscScalar *array,*array2; 1597 1598 ierr = MatDestroy(&pcbddc->local_auxmat1);CHKERRQ(ierr); 1599 ierr = MatDestroy(&pcbddc->local_auxmat2);CHKERRQ(ierr); 1600 1601 /* Extract constraints on R nodes: C_{CR} */ 1602 ierr = ISCreateStride(PETSC_COMM_SELF,n_constraints,n_vertices,1,&is_aux);CHKERRQ(ierr); 1603 ierr = MatGetSubMatrix(pcbddc->ConstraintMatrix,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&C_CR);CHKERRQ(ierr); 1604 ierr = MatGetSubMatrix(pcbddc->ConstraintMatrix,is_aux,pcis->is_B_local,MAT_INITIAL_MATRIX,&C_B);CHKERRQ(ierr); 1605 1606 /* Assemble local_auxmat2_R = (- A_{RR}^{-1} C^T_{CR}) needed by BDDC setup */ 1607 /* Assemble pcbddc->local_auxmat2 = R_to_B (- A_{RR}^{-1} C^T_{CR}) needed by BDDC application */ 1608 ierr = PetscMemzero(work,lda_rhs*n_constraints*sizeof(PetscScalar));CHKERRQ(ierr); 1609 for (i=0;i<n_constraints;i++) { 1610 const PetscScalar *row_cmat_values; 1611 const PetscInt *row_cmat_indices; 1612 PetscInt size_of_constraint,j; 1613 1614 ierr = MatGetRow(C_CR,i,&size_of_constraint,&row_cmat_indices,&row_cmat_values);CHKERRQ(ierr); 1615 for (j=0;j<size_of_constraint;j++) { 1616 work[row_cmat_indices[j]+i*lda_rhs] = -row_cmat_values[j]; 1617 } 1618 ierr = MatRestoreRow(C_CR,i,&size_of_constraint,&row_cmat_indices,&row_cmat_values);CHKERRQ(ierr); 1619 } 1620 ierr = MatCreateSeqDense(PETSC_COMM_SELF,lda_rhs,n_constraints,NULL,&local_auxmat2_R);CHKERRQ(ierr); 1621 if (F) { 1622 Mat B; 1623 1624 ierr = MatCreateSeqDense(PETSC_COMM_SELF,lda_rhs,n_constraints,work,&B);CHKERRQ(ierr); 1625 if (need_benign_correction) { 1626 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 1627 1628 /* rhs is already zero on interior dofs, no need to change the rhs */ 1629 ierr = PetscMemzero(reuse_solver->benign_save_vals,pcbddc->benign_n*sizeof(PetscScalar));CHKERRQ(ierr); 1630 } 1631 ierr = MatMatSolve(F,B,local_auxmat2_R);CHKERRQ(ierr); 1632 if (need_benign_correction) { 1633 PetscScalar *marr; 1634 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 1635 1636 ierr = MatDenseGetArray(local_auxmat2_R,&marr);CHKERRQ(ierr); 1637 if (lda_rhs != n_R) { 1638 for (i=0;i<n_constraints;i++) { 1639 ierr = VecPlaceArray(dummy_vec,marr+i*lda_rhs);CHKERRQ(ierr); 1640 ierr = PCBDDCReuseSolversBenignAdapt(reuse_solver,dummy_vec,NULL,PETSC_TRUE,PETSC_TRUE);CHKERRQ(ierr); 1641 ierr = VecResetArray(dummy_vec);CHKERRQ(ierr); 1642 } 1643 } else { 1644 for (i=0;i<n_constraints;i++) { 1645 ierr = VecPlaceArray(pcbddc->vec1_R,marr+i*lda_rhs);CHKERRQ(ierr); 1646 ierr = PCBDDCReuseSolversBenignAdapt(reuse_solver,pcbddc->vec1_R,NULL,PETSC_TRUE,PETSC_TRUE);CHKERRQ(ierr); 1647 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 1648 } 1649 } 1650 ierr = MatDenseRestoreArray(local_auxmat2_R,&marr);CHKERRQ(ierr); 1651 } 1652 ierr = MatDestroy(&B);CHKERRQ(ierr); 1653 } else { 1654 PetscScalar *marr; 1655 1656 ierr = MatDenseGetArray(local_auxmat2_R,&marr);CHKERRQ(ierr); 1657 for (i=0;i<n_constraints;i++) { 1658 ierr = VecPlaceArray(pcbddc->vec1_R,work+i*lda_rhs);CHKERRQ(ierr); 1659 ierr = VecPlaceArray(pcbddc->vec2_R,marr+i*lda_rhs);CHKERRQ(ierr); 1660 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 1661 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 1662 ierr = VecResetArray(pcbddc->vec2_R);CHKERRQ(ierr); 1663 } 1664 ierr = MatDenseRestoreArray(local_auxmat2_R,&marr);CHKERRQ(ierr); 1665 } 1666 if (!pcbddc->switch_static) { 1667 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_B,n_constraints,NULL,&pcbddc->local_auxmat2);CHKERRQ(ierr); 1668 ierr = MatDenseGetArray(pcbddc->local_auxmat2,&array);CHKERRQ(ierr); 1669 ierr = MatDenseGetArray(local_auxmat2_R,&array2);CHKERRQ(ierr); 1670 for (i=0;i<n_constraints;i++) { 1671 ierr = VecPlaceArray(pcbddc->vec1_R,array2+i*lda_rhs);CHKERRQ(ierr); 1672 ierr = VecPlaceArray(pcis->vec1_B,array+i*n_B);CHKERRQ(ierr); 1673 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1674 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1675 ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr); 1676 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 1677 } 1678 ierr = MatDenseRestoreArray(local_auxmat2_R,&array2);CHKERRQ(ierr); 1679 ierr = MatDenseRestoreArray(pcbddc->local_auxmat2,&array);CHKERRQ(ierr); 1680 ierr = MatMatMult(C_B,pcbddc->local_auxmat2,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&M3);CHKERRQ(ierr); 1681 } else { 1682 if (lda_rhs != n_R) { 1683 IS dummy; 1684 1685 ierr = ISCreateStride(PETSC_COMM_SELF,n_R,0,1,&dummy);CHKERRQ(ierr); 1686 ierr = MatGetSubMatrix(local_auxmat2_R,dummy,NULL,MAT_INITIAL_MATRIX,&pcbddc->local_auxmat2);CHKERRQ(ierr); 1687 ierr = ISDestroy(&dummy);CHKERRQ(ierr); 1688 } else { 1689 ierr = PetscObjectReference((PetscObject)local_auxmat2_R);CHKERRQ(ierr); 1690 pcbddc->local_auxmat2 = local_auxmat2_R; 1691 } 1692 ierr = MatMatMult(C_CR,pcbddc->local_auxmat2,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&M3);CHKERRQ(ierr); 1693 } 1694 ierr = ISDestroy(&is_aux);CHKERRQ(ierr); 1695 /* Assemble explicitly S_CC = ( C_{CR} A_{RR}^{-1} C^T_{CR} )^{-1} */ 1696 ierr = MatScale(M3,m_one);CHKERRQ(ierr); 1697 ierr = MatDuplicate(M3,MAT_DO_NOT_COPY_VALUES,&M1);CHKERRQ(ierr); 1698 ierr = MatDuplicate(M3,MAT_DO_NOT_COPY_VALUES,&M2);CHKERRQ(ierr); 1699 if (isCHOL) { 1700 ierr = MatCholeskyFactor(M3,NULL,NULL);CHKERRQ(ierr); 1701 } else { 1702 ierr = MatLUFactor(M3,NULL,NULL,NULL);CHKERRQ(ierr); 1703 } 1704 ierr = VecSet(pcbddc->vec1_C,one);CHKERRQ(ierr); 1705 ierr = MatDiagonalSet(M2,pcbddc->vec1_C,INSERT_VALUES);CHKERRQ(ierr); 1706 ierr = MatMatSolve(M3,M2,M1);CHKERRQ(ierr); 1707 ierr = MatDestroy(&M2);CHKERRQ(ierr); 1708 ierr = MatDestroy(&M3);CHKERRQ(ierr); 1709 /* Assemble local_auxmat1 = S_CC*C_{CB} needed by BDDC application in KSP and in preproc */ 1710 ierr = MatMatMult(M1,C_B,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&pcbddc->local_auxmat1);CHKERRQ(ierr); 1711 ierr = MatDestroy(&C_B);CHKERRQ(ierr); 1712 ierr = MatCopy(M1,S_CC,SAME_NONZERO_PATTERN);CHKERRQ(ierr); /* S_CC can have a different LDA, MatMatSolve doesn't support it */ 1713 ierr = MatDestroy(&M1);CHKERRQ(ierr); 1714 } 1715 1716 /* Get submatrices from subdomain matrix */ 1717 if (n_vertices) { 1718 IS is_aux; 1719 1720 if (sub_schurs->reuse_solver) { /* is_R_local is not sorted, ISComplement doesn't like it */ 1721 IS tis; 1722 1723 ierr = ISDuplicate(pcbddc->is_R_local,&tis);CHKERRQ(ierr); 1724 ierr = ISSort(tis);CHKERRQ(ierr); 1725 ierr = ISComplement(tis,0,pcis->n,&is_aux);CHKERRQ(ierr); 1726 ierr = ISDestroy(&tis);CHKERRQ(ierr); 1727 } else { 1728 ierr = ISComplement(pcbddc->is_R_local,0,pcis->n,&is_aux);CHKERRQ(ierr); 1729 } 1730 ierr = MatGetSubMatrix(pcbddc->local_mat,pcbddc->is_R_local,is_aux,MAT_INITIAL_MATRIX,&A_RV);CHKERRQ(ierr); 1731 ierr = MatGetSubMatrix(pcbddc->local_mat,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&A_VR);CHKERRQ(ierr); 1732 ierr = MatGetSubMatrix(pcbddc->local_mat,is_aux,is_aux,MAT_INITIAL_MATRIX,&A_VV);CHKERRQ(ierr); 1733 ierr = ISDestroy(&is_aux);CHKERRQ(ierr); 1734 } 1735 1736 /* Matrix of coarse basis functions (local) */ 1737 if (pcbddc->coarse_phi_B) { 1738 PetscInt on_B,on_primal,on_D=n_D; 1739 if (pcbddc->coarse_phi_D) { 1740 ierr = MatGetSize(pcbddc->coarse_phi_D,&on_D,NULL);CHKERRQ(ierr); 1741 } 1742 ierr = MatGetSize(pcbddc->coarse_phi_B,&on_B,&on_primal);CHKERRQ(ierr); 1743 if (on_B != n_B || on_primal != pcbddc->local_primal_size || on_D != n_D) { 1744 PetscScalar *marray; 1745 1746 ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&marray);CHKERRQ(ierr); 1747 ierr = PetscFree(marray);CHKERRQ(ierr); 1748 ierr = MatDestroy(&pcbddc->coarse_phi_B);CHKERRQ(ierr); 1749 ierr = MatDestroy(&pcbddc->coarse_psi_B);CHKERRQ(ierr); 1750 ierr = MatDestroy(&pcbddc->coarse_phi_D);CHKERRQ(ierr); 1751 ierr = MatDestroy(&pcbddc->coarse_psi_D);CHKERRQ(ierr); 1752 } 1753 } 1754 1755 if (!pcbddc->coarse_phi_B) { 1756 PetscScalar *marray; 1757 1758 n = n_B*pcbddc->local_primal_size; 1759 if (pcbddc->switch_static || pcbddc->dbg_flag) { 1760 n += n_D*pcbddc->local_primal_size; 1761 } 1762 if (!pcbddc->symmetric_primal) { 1763 n *= 2; 1764 } 1765 ierr = PetscCalloc1(n,&marray);CHKERRQ(ierr); 1766 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_B,pcbddc->local_primal_size,marray,&pcbddc->coarse_phi_B);CHKERRQ(ierr); 1767 n = n_B*pcbddc->local_primal_size; 1768 if (pcbddc->switch_static || pcbddc->dbg_flag) { 1769 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_D,pcbddc->local_primal_size,marray+n,&pcbddc->coarse_phi_D);CHKERRQ(ierr); 1770 n += n_D*pcbddc->local_primal_size; 1771 } 1772 if (!pcbddc->symmetric_primal) { 1773 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_B,pcbddc->local_primal_size,marray+n,&pcbddc->coarse_psi_B);CHKERRQ(ierr); 1774 if (pcbddc->switch_static || pcbddc->dbg_flag) { 1775 n = n_B*pcbddc->local_primal_size; 1776 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_D,pcbddc->local_primal_size,marray+n,&pcbddc->coarse_psi_D);CHKERRQ(ierr); 1777 } 1778 } else { 1779 ierr = PetscObjectReference((PetscObject)pcbddc->coarse_phi_B);CHKERRQ(ierr); 1780 pcbddc->coarse_psi_B = pcbddc->coarse_phi_B; 1781 if (pcbddc->switch_static || pcbddc->dbg_flag) { 1782 ierr = PetscObjectReference((PetscObject)pcbddc->coarse_phi_D);CHKERRQ(ierr); 1783 pcbddc->coarse_psi_D = pcbddc->coarse_phi_D; 1784 } 1785 } 1786 } 1787 1788 /* We are now ready to evaluate coarse basis functions and subdomain contribution to coarse problem */ 1789 p0_lidx_I = NULL; 1790 if (pcbddc->benign_n && (pcbddc->switch_static || pcbddc->dbg_flag)) { 1791 const PetscInt *idxs; 1792 1793 ierr = ISGetIndices(pcis->is_I_local,&idxs);CHKERRQ(ierr); 1794 ierr = PetscMalloc1(pcbddc->benign_n,&p0_lidx_I);CHKERRQ(ierr); 1795 for (i=0;i<pcbddc->benign_n;i++) { 1796 ierr = PetscFindInt(pcbddc->benign_p0_lidx[i],pcis->n-pcis->n_B,idxs,&p0_lidx_I[i]);CHKERRQ(ierr); 1797 } 1798 ierr = ISRestoreIndices(pcis->is_I_local,&idxs);CHKERRQ(ierr); 1799 } 1800 1801 /* vertices */ 1802 if (n_vertices) { 1803 1804 ierr = MatConvert(A_VV,MATDENSE,MAT_INPLACE_MATRIX,&A_VV);CHKERRQ(ierr); 1805 1806 if (n_R) { 1807 Mat A_RRmA_RV,A_RV_bcorr=NULL,S_VVt; /* S_VVt with LDA=N */ 1808 PetscBLASInt B_N,B_one = 1; 1809 PetscScalar *x,*y; 1810 PetscBool isseqaij; 1811 1812 ierr = MatScale(A_RV,m_one);CHKERRQ(ierr); 1813 if (need_benign_correction) { 1814 ISLocalToGlobalMapping RtoN; 1815 IS is_p0; 1816 PetscInt *idxs_p0,n; 1817 1818 ierr = PetscMalloc1(pcbddc->benign_n,&idxs_p0);CHKERRQ(ierr); 1819 ierr = ISLocalToGlobalMappingCreateIS(pcbddc->is_R_local,&RtoN);CHKERRQ(ierr); 1820 ierr = ISGlobalToLocalMappingApply(RtoN,IS_GTOLM_DROP,pcbddc->benign_n,pcbddc->benign_p0_lidx,&n,idxs_p0);CHKERRQ(ierr); 1821 if (n != pcbddc->benign_n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error in R numbering for benign p0! %d != %d\n",n,pcbddc->benign_n); 1822 ierr = ISLocalToGlobalMappingDestroy(&RtoN);CHKERRQ(ierr); 1823 ierr = ISCreateGeneral(PETSC_COMM_SELF,n,idxs_p0,PETSC_OWN_POINTER,&is_p0);CHKERRQ(ierr); 1824 ierr = MatGetSubMatrix(A_RV,is_p0,NULL,MAT_INITIAL_MATRIX,&A_RV_bcorr);CHKERRQ(ierr); 1825 ierr = ISDestroy(&is_p0);CHKERRQ(ierr); 1826 } 1827 1828 if (lda_rhs == n_R) { 1829 ierr = MatConvert(A_RV,MATDENSE,MAT_INPLACE_MATRIX,&A_RV);CHKERRQ(ierr); 1830 } else { 1831 PetscScalar *av,*array; 1832 const PetscInt *xadj,*adjncy; 1833 PetscInt n; 1834 PetscBool flg_row; 1835 1836 array = work+lda_rhs*n_vertices; 1837 ierr = PetscMemzero(array,lda_rhs*n_vertices*sizeof(PetscScalar));CHKERRQ(ierr); 1838 ierr = MatConvert(A_RV,MATSEQAIJ,MAT_INPLACE_MATRIX,&A_RV);CHKERRQ(ierr); 1839 ierr = MatGetRowIJ(A_RV,0,PETSC_FALSE,PETSC_FALSE,&n,&xadj,&adjncy,&flg_row);CHKERRQ(ierr); 1840 ierr = MatSeqAIJGetArray(A_RV,&av);CHKERRQ(ierr); 1841 for (i=0;i<n;i++) { 1842 PetscInt j; 1843 for (j=xadj[i];j<xadj[i+1];j++) array[lda_rhs*adjncy[j]+i] = av[j]; 1844 } 1845 ierr = MatRestoreRowIJ(A_RV,0,PETSC_FALSE,PETSC_FALSE,&n,&xadj,&adjncy,&flg_row);CHKERRQ(ierr); 1846 ierr = MatDestroy(&A_RV);CHKERRQ(ierr); 1847 ierr = MatCreateSeqDense(PETSC_COMM_SELF,lda_rhs,n_vertices,array,&A_RV);CHKERRQ(ierr); 1848 } 1849 ierr = MatCreateSeqDense(PETSC_COMM_SELF,lda_rhs,n_vertices,work,&A_RRmA_RV);CHKERRQ(ierr); 1850 if (need_benign_correction) { 1851 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 1852 PetscScalar *marr; 1853 1854 ierr = MatDenseGetArray(A_RV,&marr);CHKERRQ(ierr); 1855 /* need \Phi^T A_RV = (I+L)A_RV, L given by 1856 1857 | 0 0 0 | (V) 1858 L = | 0 0 -1 | (P-p0) 1859 | 0 0 -1 | (p0) 1860 1861 */ 1862 for (i=0;i<reuse_solver->benign_n;i++) { 1863 const PetscScalar *vals; 1864 const PetscInt *idxs,*idxs_zero; 1865 PetscInt n,j,nz; 1866 1867 ierr = ISGetLocalSize(reuse_solver->benign_zerodiag_subs[i],&nz);CHKERRQ(ierr); 1868 ierr = ISGetIndices(reuse_solver->benign_zerodiag_subs[i],&idxs_zero);CHKERRQ(ierr); 1869 ierr = MatGetRow(A_RV_bcorr,i,&n,&idxs,&vals);CHKERRQ(ierr); 1870 for (j=0;j<n;j++) { 1871 PetscScalar val = vals[j]; 1872 PetscInt k,col = idxs[j]; 1873 for (k=0;k<nz;k++) marr[idxs_zero[k]+lda_rhs*col] -= val; 1874 } 1875 ierr = MatRestoreRow(A_RV_bcorr,i,&n,&idxs,&vals);CHKERRQ(ierr); 1876 ierr = ISRestoreIndices(reuse_solver->benign_zerodiag_subs[i],&idxs_zero);CHKERRQ(ierr); 1877 } 1878 ierr = MatDenseRestoreArray(A_RV,&marr);CHKERRQ(ierr); 1879 } 1880 if (F) { 1881 /* need to correct the rhs */ 1882 if (need_benign_correction) { 1883 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 1884 PetscScalar *marr; 1885 1886 ierr = MatDenseGetArray(A_RV,&marr);CHKERRQ(ierr); 1887 if (lda_rhs != n_R) { 1888 for (i=0;i<n_vertices;i++) { 1889 ierr = VecPlaceArray(dummy_vec,marr+i*lda_rhs);CHKERRQ(ierr); 1890 ierr = PCBDDCReuseSolversBenignAdapt(reuse_solver,dummy_vec,NULL,PETSC_FALSE,PETSC_TRUE);CHKERRQ(ierr); 1891 ierr = VecResetArray(dummy_vec);CHKERRQ(ierr); 1892 } 1893 } else { 1894 for (i=0;i<n_vertices;i++) { 1895 ierr = VecPlaceArray(pcbddc->vec1_R,marr+i*lda_rhs);CHKERRQ(ierr); 1896 ierr = PCBDDCReuseSolversBenignAdapt(reuse_solver,pcbddc->vec1_R,NULL,PETSC_FALSE,PETSC_TRUE);CHKERRQ(ierr); 1897 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 1898 } 1899 } 1900 ierr = MatDenseRestoreArray(A_RV,&marr);CHKERRQ(ierr); 1901 } 1902 ierr = MatMatSolve(F,A_RV,A_RRmA_RV);CHKERRQ(ierr); 1903 /* need to correct the solution */ 1904 if (need_benign_correction) { 1905 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 1906 PetscScalar *marr; 1907 1908 ierr = MatDenseGetArray(A_RRmA_RV,&marr);CHKERRQ(ierr); 1909 if (lda_rhs != n_R) { 1910 for (i=0;i<n_vertices;i++) { 1911 ierr = VecPlaceArray(dummy_vec,marr+i*lda_rhs);CHKERRQ(ierr); 1912 ierr = PCBDDCReuseSolversBenignAdapt(reuse_solver,dummy_vec,NULL,PETSC_TRUE,PETSC_TRUE);CHKERRQ(ierr); 1913 ierr = VecResetArray(dummy_vec);CHKERRQ(ierr); 1914 } 1915 } else { 1916 for (i=0;i<n_vertices;i++) { 1917 ierr = VecPlaceArray(pcbddc->vec1_R,marr+i*lda_rhs);CHKERRQ(ierr); 1918 ierr = PCBDDCReuseSolversBenignAdapt(reuse_solver,pcbddc->vec1_R,NULL,PETSC_TRUE,PETSC_TRUE);CHKERRQ(ierr); 1919 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 1920 } 1921 } 1922 ierr = MatDenseRestoreArray(A_RRmA_RV,&marr);CHKERRQ(ierr); 1923 } 1924 } else { 1925 ierr = MatDenseGetArray(A_RV,&y);CHKERRQ(ierr); 1926 for (i=0;i<n_vertices;i++) { 1927 ierr = VecPlaceArray(pcbddc->vec1_R,y+i*lda_rhs);CHKERRQ(ierr); 1928 ierr = VecPlaceArray(pcbddc->vec2_R,work+i*lda_rhs);CHKERRQ(ierr); 1929 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 1930 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 1931 ierr = VecResetArray(pcbddc->vec2_R);CHKERRQ(ierr); 1932 } 1933 ierr = MatDenseRestoreArray(A_RV,&y);CHKERRQ(ierr); 1934 } 1935 ierr = MatDestroy(&A_RV);CHKERRQ(ierr); 1936 /* S_VV and S_CV */ 1937 if (n_constraints) { 1938 Mat B; 1939 1940 ierr = PetscMemzero(work+lda_rhs*n_vertices,n_B*n_vertices*sizeof(PetscScalar));CHKERRQ(ierr); 1941 for (i=0;i<n_vertices;i++) { 1942 ierr = VecPlaceArray(pcbddc->vec1_R,work+i*lda_rhs);CHKERRQ(ierr); 1943 ierr = VecPlaceArray(pcis->vec1_B,work+lda_rhs*n_vertices+i*n_B);CHKERRQ(ierr); 1944 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1945 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1946 ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr); 1947 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 1948 } 1949 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_B,n_vertices,work+lda_rhs*n_vertices,&B);CHKERRQ(ierr); 1950 ierr = MatMatMult(pcbddc->local_auxmat1,B,MAT_REUSE_MATRIX,PETSC_DEFAULT,&S_CV);CHKERRQ(ierr); 1951 ierr = MatDestroy(&B);CHKERRQ(ierr); 1952 ierr = MatCreateSeqDense(PETSC_COMM_SELF,lda_rhs,n_vertices,work+lda_rhs*n_vertices,&B);CHKERRQ(ierr); 1953 ierr = MatMatMult(local_auxmat2_R,S_CV,MAT_REUSE_MATRIX,PETSC_DEFAULT,&B);CHKERRQ(ierr); 1954 ierr = MatScale(S_CV,m_one);CHKERRQ(ierr); 1955 ierr = PetscBLASIntCast(lda_rhs*n_vertices,&B_N);CHKERRQ(ierr); 1956 PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&B_N,&one,work+lda_rhs*n_vertices,&B_one,work,&B_one)); 1957 ierr = MatDestroy(&B);CHKERRQ(ierr); 1958 } 1959 ierr = PetscObjectTypeCompare((PetscObject)A_VR,MATSEQAIJ,&isseqaij);CHKERRQ(ierr); 1960 if (!isseqaij) { /* MatMatMult with SEQ(S)BAIJ below will raise an error */ 1961 ierr = MatConvert(A_VR,MATSEQAIJ,MAT_INPLACE_MATRIX,&A_VR);CHKERRQ(ierr); 1962 } 1963 if (lda_rhs != n_R) { 1964 ierr = MatDestroy(&A_RRmA_RV);CHKERRQ(ierr); 1965 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_vertices,work,&A_RRmA_RV);CHKERRQ(ierr); 1966 ierr = MatSeqDenseSetLDA(A_RRmA_RV,lda_rhs);CHKERRQ(ierr); 1967 } 1968 ierr = MatMatMult(A_VR,A_RRmA_RV,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&S_VVt);CHKERRQ(ierr); 1969 /* need A_VR * \Phi * A_RRmA_RV = A_VR * (I+L)^T * A_RRmA_RV, L given as before */ 1970 if (need_benign_correction) { 1971 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 1972 PetscScalar *marr,*sums; 1973 1974 ierr = PetscMalloc1(n_vertices,&sums);CHKERRQ(ierr); 1975 ierr = MatDenseGetArray(S_VVt,&marr); 1976 for (i=0;i<reuse_solver->benign_n;i++) { 1977 const PetscScalar *vals; 1978 const PetscInt *idxs,*idxs_zero; 1979 PetscInt n,j,nz; 1980 1981 ierr = ISGetLocalSize(reuse_solver->benign_zerodiag_subs[i],&nz);CHKERRQ(ierr); 1982 ierr = ISGetIndices(reuse_solver->benign_zerodiag_subs[i],&idxs_zero);CHKERRQ(ierr); 1983 for (j=0;j<n_vertices;j++) { 1984 PetscInt k; 1985 sums[j] = 0.; 1986 for (k=0;k<nz;k++) sums[j] += work[idxs_zero[k]+j*lda_rhs]; 1987 } 1988 ierr = MatGetRow(A_RV_bcorr,i,&n,&idxs,&vals);CHKERRQ(ierr); 1989 for (j=0;j<n;j++) { 1990 PetscScalar val = vals[j]; 1991 PetscInt k; 1992 for (k=0;k<n_vertices;k++) { 1993 marr[idxs[j]+k*n_vertices] += val*sums[k]; 1994 } 1995 } 1996 ierr = MatRestoreRow(A_RV_bcorr,i,&n,&idxs,&vals);CHKERRQ(ierr); 1997 ierr = ISRestoreIndices(reuse_solver->benign_zerodiag_subs[i],&idxs_zero);CHKERRQ(ierr); 1998 } 1999 ierr = PetscFree(sums);CHKERRQ(ierr); 2000 ierr = MatDenseRestoreArray(S_VVt,&marr); 2001 ierr = MatDestroy(&A_RV_bcorr);CHKERRQ(ierr); 2002 } 2003 ierr = MatDestroy(&A_RRmA_RV);CHKERRQ(ierr); 2004 ierr = PetscBLASIntCast(n_vertices*n_vertices,&B_N);CHKERRQ(ierr); 2005 ierr = MatDenseGetArray(A_VV,&x);CHKERRQ(ierr); 2006 ierr = MatDenseGetArray(S_VVt,&y);CHKERRQ(ierr); 2007 PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&B_N,&one,x,&B_one,y,&B_one)); 2008 ierr = MatDenseRestoreArray(A_VV,&x);CHKERRQ(ierr); 2009 ierr = MatDenseRestoreArray(S_VVt,&y);CHKERRQ(ierr); 2010 ierr = MatCopy(S_VVt,S_VV,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 2011 ierr = MatDestroy(&S_VVt);CHKERRQ(ierr); 2012 } else { 2013 ierr = MatCopy(A_VV,S_VV,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 2014 } 2015 ierr = MatDestroy(&A_VV);CHKERRQ(ierr); 2016 2017 /* coarse basis functions */ 2018 for (i=0;i<n_vertices;i++) { 2019 PetscScalar *y; 2020 2021 ierr = VecPlaceArray(pcbddc->vec1_R,work+lda_rhs*i);CHKERRQ(ierr); 2022 ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&y);CHKERRQ(ierr); 2023 ierr = VecPlaceArray(pcis->vec1_B,y+n_B*i);CHKERRQ(ierr); 2024 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2025 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2026 y[n_B*i+idx_V_B[i]] = 1.0; 2027 ierr = MatDenseRestoreArray(pcbddc->coarse_phi_B,&y);CHKERRQ(ierr); 2028 ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr); 2029 2030 if (pcbddc->switch_static || pcbddc->dbg_flag) { 2031 PetscInt j; 2032 2033 ierr = MatDenseGetArray(pcbddc->coarse_phi_D,&y);CHKERRQ(ierr); 2034 ierr = VecPlaceArray(pcis->vec1_D,y+n_D*i);CHKERRQ(ierr); 2035 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2036 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2037 ierr = VecResetArray(pcis->vec1_D);CHKERRQ(ierr); 2038 for (j=0;j<pcbddc->benign_n;j++) y[n_D*i+p0_lidx_I[j]] = 0.0; 2039 ierr = MatDenseRestoreArray(pcbddc->coarse_phi_D,&y);CHKERRQ(ierr); 2040 } 2041 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 2042 } 2043 /* if n_R == 0 the object is not destroyed */ 2044 ierr = MatDestroy(&A_RV);CHKERRQ(ierr); 2045 } 2046 ierr = VecDestroy(&dummy_vec);CHKERRQ(ierr); 2047 2048 if (n_constraints) { 2049 Mat B; 2050 2051 ierr = MatCreateSeqDense(PETSC_COMM_SELF,lda_rhs,n_constraints,work,&B);CHKERRQ(ierr); 2052 ierr = MatScale(S_CC,m_one);CHKERRQ(ierr); 2053 ierr = MatMatMult(local_auxmat2_R,S_CC,MAT_REUSE_MATRIX,PETSC_DEFAULT,&B);CHKERRQ(ierr); 2054 ierr = MatScale(S_CC,m_one);CHKERRQ(ierr); 2055 if (n_vertices) { 2056 if (isCHOL) { /* if we can solve the interior problem with cholesky, we should also be fine with transposing here */ 2057 ierr = MatTranspose(S_CV,MAT_REUSE_MATRIX,&S_VC);CHKERRQ(ierr); 2058 } else { 2059 Mat S_VCt; 2060 2061 if (lda_rhs != n_R) { 2062 ierr = MatDestroy(&B);CHKERRQ(ierr); 2063 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_constraints,work,&B);CHKERRQ(ierr); 2064 ierr = MatSeqDenseSetLDA(B,lda_rhs);CHKERRQ(ierr); 2065 } 2066 ierr = MatMatMult(A_VR,B,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&S_VCt);CHKERRQ(ierr); 2067 ierr = MatCopy(S_VCt,S_VC,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 2068 ierr = MatDestroy(&S_VCt);CHKERRQ(ierr); 2069 } 2070 } 2071 ierr = MatDestroy(&B);CHKERRQ(ierr); 2072 /* coarse basis functions */ 2073 for (i=0;i<n_constraints;i++) { 2074 PetscScalar *y; 2075 2076 ierr = VecPlaceArray(pcbddc->vec1_R,work+lda_rhs*i);CHKERRQ(ierr); 2077 ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&y);CHKERRQ(ierr); 2078 ierr = VecPlaceArray(pcis->vec1_B,y+n_B*(i+n_vertices));CHKERRQ(ierr); 2079 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2080 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2081 ierr = MatDenseRestoreArray(pcbddc->coarse_phi_B,&y);CHKERRQ(ierr); 2082 ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr); 2083 if (pcbddc->switch_static || pcbddc->dbg_flag) { 2084 PetscInt j; 2085 2086 ierr = MatDenseGetArray(pcbddc->coarse_phi_D,&y);CHKERRQ(ierr); 2087 ierr = VecPlaceArray(pcis->vec1_D,y+n_D*(i+n_vertices));CHKERRQ(ierr); 2088 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2089 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2090 ierr = VecResetArray(pcis->vec1_D);CHKERRQ(ierr); 2091 for (j=0;j<pcbddc->benign_n;j++) y[n_D*i+p0_lidx_I[j]] = 0.0; 2092 ierr = MatDenseRestoreArray(pcbddc->coarse_phi_D,&y);CHKERRQ(ierr); 2093 } 2094 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 2095 } 2096 } 2097 if (n_constraints) { 2098 ierr = MatDestroy(&local_auxmat2_R);CHKERRQ(ierr); 2099 } 2100 ierr = PetscFree(p0_lidx_I);CHKERRQ(ierr); 2101 2102 /* coarse matrix entries relative to B_0 */ 2103 if (pcbddc->benign_n) { 2104 Mat B0_B,B0_BPHI; 2105 IS is_dummy; 2106 PetscScalar *data; 2107 PetscInt j; 2108 2109 ierr = ISCreateStride(PETSC_COMM_SELF,pcbddc->benign_n,0,1,&is_dummy);CHKERRQ(ierr); 2110 ierr = MatGetSubMatrix(pcbddc->benign_B0,is_dummy,pcis->is_B_local,MAT_INITIAL_MATRIX,&B0_B);CHKERRQ(ierr); 2111 ierr = ISDestroy(&is_dummy);CHKERRQ(ierr); 2112 ierr = MatMatMult(B0_B,pcbddc->coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&B0_BPHI);CHKERRQ(ierr); 2113 ierr = MatConvert(B0_BPHI,MATSEQDENSE,MAT_INPLACE_MATRIX,&B0_BPHI);CHKERRQ(ierr); 2114 ierr = MatDenseGetArray(B0_BPHI,&data);CHKERRQ(ierr); 2115 for (j=0;j<pcbddc->benign_n;j++) { 2116 PetscInt primal_idx = pcbddc->local_primal_size - pcbddc->benign_n + j; 2117 for (i=0;i<pcbddc->local_primal_size;i++) { 2118 coarse_submat_vals[primal_idx*pcbddc->local_primal_size+i] = data[i*pcbddc->benign_n+j]; 2119 coarse_submat_vals[i*pcbddc->local_primal_size+primal_idx] = data[i*pcbddc->benign_n+j]; 2120 } 2121 } 2122 ierr = MatDenseRestoreArray(B0_BPHI,&data);CHKERRQ(ierr); 2123 ierr = MatDestroy(&B0_B);CHKERRQ(ierr); 2124 ierr = MatDestroy(&B0_BPHI);CHKERRQ(ierr); 2125 } 2126 2127 /* compute other basis functions for non-symmetric problems */ 2128 if (!pcbddc->symmetric_primal) { 2129 Mat B_V=NULL,B_C=NULL; 2130 PetscScalar *marray; 2131 2132 if (n_constraints) { 2133 Mat S_CCT,C_CRT; 2134 2135 ierr = MatTranspose(C_CR,MAT_INPLACE_MATRIX,&C_CRT);CHKERRQ(ierr); 2136 ierr = MatTranspose(S_CC,MAT_INITIAL_MATRIX,&S_CCT);CHKERRQ(ierr); 2137 ierr = MatMatMult(C_CRT,S_CCT,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&B_C);CHKERRQ(ierr); 2138 ierr = MatDestroy(&S_CCT);CHKERRQ(ierr); 2139 if (n_vertices) { 2140 Mat S_VCT; 2141 2142 ierr = MatTranspose(S_VC,MAT_INITIAL_MATRIX,&S_VCT);CHKERRQ(ierr); 2143 ierr = MatMatMult(C_CRT,S_VCT,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&B_V);CHKERRQ(ierr); 2144 ierr = MatDestroy(&S_VCT);CHKERRQ(ierr); 2145 } 2146 ierr = MatDestroy(&C_CRT);CHKERRQ(ierr); 2147 } 2148 if (n_vertices && n_R) { 2149 PetscScalar *av,*marray; 2150 const PetscInt *xadj,*adjncy; 2151 PetscInt n; 2152 PetscBool flg_row; 2153 2154 /* B_V = B_V - A_VR^T */ 2155 ierr = MatConvert(A_VR,MATSEQAIJ,MAT_INPLACE_MATRIX,&A_VR);CHKERRQ(ierr); 2156 ierr = MatGetRowIJ(A_VR,0,PETSC_FALSE,PETSC_FALSE,&n,&xadj,&adjncy,&flg_row);CHKERRQ(ierr); 2157 ierr = MatSeqAIJGetArray(A_VR,&av);CHKERRQ(ierr); 2158 ierr = MatDenseGetArray(B_V,&marray);CHKERRQ(ierr); 2159 for (i=0;i<n;i++) { 2160 PetscInt j; 2161 for (j=xadj[i];j<xadj[i+1];j++) marray[i*n_R + adjncy[j]] -= av[j]; 2162 } 2163 ierr = MatDenseRestoreArray(B_V,&marray);CHKERRQ(ierr); 2164 ierr = MatRestoreRowIJ(A_VR,0,PETSC_FALSE,PETSC_FALSE,&n,&xadj,&adjncy,&flg_row);CHKERRQ(ierr); 2165 ierr = MatDestroy(&A_VR);CHKERRQ(ierr); 2166 } 2167 2168 /* currently there's no support for MatTransposeMatSolve(F,B,X) */ 2169 ierr = MatDenseGetArray(B_V,&marray);CHKERRQ(ierr); 2170 for (i=0;i<n_vertices;i++) { 2171 ierr = VecPlaceArray(pcbddc->vec1_R,marray+i*n_R);CHKERRQ(ierr); 2172 ierr = VecPlaceArray(pcbddc->vec2_R,work+i*n_R);CHKERRQ(ierr); 2173 ierr = KSPSolveTranspose(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 2174 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 2175 ierr = VecResetArray(pcbddc->vec2_R);CHKERRQ(ierr); 2176 } 2177 ierr = MatDenseRestoreArray(B_V,&marray);CHKERRQ(ierr); 2178 ierr = MatDenseGetArray(B_C,&marray);CHKERRQ(ierr); 2179 for (i=n_vertices;i<n_constraints+n_vertices;i++) { 2180 ierr = VecPlaceArray(pcbddc->vec1_R,marray+(i-n_vertices)*n_R);CHKERRQ(ierr); 2181 ierr = VecPlaceArray(pcbddc->vec2_R,work+i*n_R);CHKERRQ(ierr); 2182 ierr = KSPSolveTranspose(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 2183 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 2184 ierr = VecResetArray(pcbddc->vec2_R);CHKERRQ(ierr); 2185 } 2186 ierr = MatDenseRestoreArray(B_C,&marray);CHKERRQ(ierr); 2187 /* coarse basis functions */ 2188 for (i=0;i<pcbddc->local_primal_size;i++) { 2189 PetscScalar *y; 2190 2191 ierr = VecPlaceArray(pcbddc->vec1_R,work+i*n_R);CHKERRQ(ierr); 2192 ierr = MatDenseGetArray(pcbddc->coarse_psi_B,&y);CHKERRQ(ierr); 2193 ierr = VecPlaceArray(pcis->vec1_B,y+n_B*i);CHKERRQ(ierr); 2194 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2195 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2196 if (i<n_vertices) { 2197 y[n_B*i+idx_V_B[i]] = 1.0; 2198 } 2199 ierr = MatDenseRestoreArray(pcbddc->coarse_psi_B,&y);CHKERRQ(ierr); 2200 ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr); 2201 2202 if (pcbddc->switch_static || pcbddc->dbg_flag) { 2203 ierr = MatDenseGetArray(pcbddc->coarse_psi_D,&y);CHKERRQ(ierr); 2204 ierr = VecPlaceArray(pcis->vec1_D,y+n_D*i);CHKERRQ(ierr); 2205 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2206 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2207 ierr = VecResetArray(pcis->vec1_D);CHKERRQ(ierr); 2208 ierr = MatDenseRestoreArray(pcbddc->coarse_psi_D,&y);CHKERRQ(ierr); 2209 } 2210 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 2211 } 2212 ierr = MatDestroy(&B_V);CHKERRQ(ierr); 2213 ierr = MatDestroy(&B_C);CHKERRQ(ierr); 2214 } 2215 /* free memory */ 2216 ierr = PetscFree(idx_V_B);CHKERRQ(ierr); 2217 ierr = MatDestroy(&S_VV);CHKERRQ(ierr); 2218 ierr = MatDestroy(&S_CV);CHKERRQ(ierr); 2219 ierr = MatDestroy(&S_VC);CHKERRQ(ierr); 2220 ierr = MatDestroy(&S_CC);CHKERRQ(ierr); 2221 ierr = PetscFree(work);CHKERRQ(ierr); 2222 if (n_vertices) { 2223 ierr = MatDestroy(&A_VR);CHKERRQ(ierr); 2224 } 2225 if (n_constraints) { 2226 ierr = MatDestroy(&C_CR);CHKERRQ(ierr); 2227 } 2228 /* Checking coarse_sub_mat and coarse basis functios */ 2229 /* Symmetric case : It should be \Phi^{(j)^T} A^{(j)} \Phi^{(j)}=coarse_sub_mat */ 2230 /* Non-symmetric case : It should be \Psi^{(j)^T} A^{(j)} \Phi^{(j)}=coarse_sub_mat */ 2231 if (pcbddc->dbg_flag) { 2232 Mat coarse_sub_mat; 2233 Mat AUXMAT,TM1,TM2,TM3,TM4; 2234 Mat coarse_phi_D,coarse_phi_B; 2235 Mat coarse_psi_D,coarse_psi_B; 2236 Mat A_II,A_BB,A_IB,A_BI; 2237 Mat C_B,CPHI; 2238 IS is_dummy; 2239 Vec mones; 2240 MatType checkmattype=MATSEQAIJ; 2241 PetscReal real_value; 2242 2243 if (pcbddc->benign_n && !pcbddc->benign_change_explicit) { 2244 Mat A; 2245 ierr = PCBDDCBenignProject(pc,NULL,NULL,&A);CHKERRQ(ierr); 2246 ierr = MatGetSubMatrix(A,pcis->is_I_local,pcis->is_I_local,MAT_INITIAL_MATRIX,&A_II);CHKERRQ(ierr); 2247 ierr = MatGetSubMatrix(A,pcis->is_I_local,pcis->is_B_local,MAT_INITIAL_MATRIX,&A_IB);CHKERRQ(ierr); 2248 ierr = MatGetSubMatrix(A,pcis->is_B_local,pcis->is_I_local,MAT_INITIAL_MATRIX,&A_BI);CHKERRQ(ierr); 2249 ierr = MatGetSubMatrix(A,pcis->is_B_local,pcis->is_B_local,MAT_INITIAL_MATRIX,&A_BB);CHKERRQ(ierr); 2250 ierr = MatDestroy(&A);CHKERRQ(ierr); 2251 } else { 2252 ierr = MatConvert(pcis->A_II,checkmattype,MAT_INITIAL_MATRIX,&A_II);CHKERRQ(ierr); 2253 ierr = MatConvert(pcis->A_IB,checkmattype,MAT_INITIAL_MATRIX,&A_IB);CHKERRQ(ierr); 2254 ierr = MatConvert(pcis->A_BI,checkmattype,MAT_INITIAL_MATRIX,&A_BI);CHKERRQ(ierr); 2255 ierr = MatConvert(pcis->A_BB,checkmattype,MAT_INITIAL_MATRIX,&A_BB);CHKERRQ(ierr); 2256 } 2257 ierr = MatConvert(pcbddc->coarse_phi_D,checkmattype,MAT_INITIAL_MATRIX,&coarse_phi_D);CHKERRQ(ierr); 2258 ierr = MatConvert(pcbddc->coarse_phi_B,checkmattype,MAT_INITIAL_MATRIX,&coarse_phi_B);CHKERRQ(ierr); 2259 if (!pcbddc->symmetric_primal) { 2260 ierr = MatConvert(pcbddc->coarse_psi_D,checkmattype,MAT_INITIAL_MATRIX,&coarse_psi_D);CHKERRQ(ierr); 2261 ierr = MatConvert(pcbddc->coarse_psi_B,checkmattype,MAT_INITIAL_MATRIX,&coarse_psi_B);CHKERRQ(ierr); 2262 } 2263 ierr = MatCreateSeqDense(PETSC_COMM_SELF,pcbddc->local_primal_size,pcbddc->local_primal_size,coarse_submat_vals,&coarse_sub_mat);CHKERRQ(ierr); 2264 2265 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 2266 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Check coarse sub mat computation (symmetric %d)\n",pcbddc->symmetric_primal);CHKERRQ(ierr); 2267 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 2268 if (!pcbddc->symmetric_primal) { 2269 ierr = MatMatMult(A_II,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 2270 ierr = MatTransposeMatMult(coarse_psi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM1);CHKERRQ(ierr); 2271 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 2272 ierr = MatMatMult(A_BB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 2273 ierr = MatTransposeMatMult(coarse_psi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM2);CHKERRQ(ierr); 2274 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 2275 ierr = MatMatMult(A_IB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 2276 ierr = MatTransposeMatMult(coarse_psi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM3);CHKERRQ(ierr); 2277 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 2278 ierr = MatMatMult(A_BI,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 2279 ierr = MatTransposeMatMult(coarse_psi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM4);CHKERRQ(ierr); 2280 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 2281 } else { 2282 ierr = MatPtAP(A_II,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&TM1);CHKERRQ(ierr); 2283 ierr = MatPtAP(A_BB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&TM2);CHKERRQ(ierr); 2284 ierr = MatMatMult(A_IB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 2285 ierr = MatTransposeMatMult(coarse_phi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM3);CHKERRQ(ierr); 2286 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 2287 ierr = MatMatMult(A_BI,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 2288 ierr = MatTransposeMatMult(coarse_phi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM4);CHKERRQ(ierr); 2289 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 2290 } 2291 ierr = MatAXPY(TM1,one,TM2,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 2292 ierr = MatAXPY(TM1,one,TM3,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 2293 ierr = MatAXPY(TM1,one,TM4,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 2294 ierr = MatConvert(TM1,MATSEQDENSE,MAT_INPLACE_MATRIX,&TM1);CHKERRQ(ierr); 2295 if (pcbddc->benign_n) { 2296 Mat B0_B,B0_BPHI; 2297 PetscScalar *data,*data2; 2298 PetscInt j; 2299 2300 ierr = ISCreateStride(PETSC_COMM_SELF,pcbddc->benign_n,0,1,&is_dummy);CHKERRQ(ierr); 2301 ierr = MatGetSubMatrix(pcbddc->benign_B0,is_dummy,pcis->is_B_local,MAT_INITIAL_MATRIX,&B0_B);CHKERRQ(ierr); 2302 ierr = MatMatMult(B0_B,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&B0_BPHI);CHKERRQ(ierr); 2303 ierr = MatConvert(B0_BPHI,MATSEQDENSE,MAT_INPLACE_MATRIX,&B0_BPHI);CHKERRQ(ierr); 2304 ierr = MatDenseGetArray(TM1,&data);CHKERRQ(ierr); 2305 ierr = MatDenseGetArray(B0_BPHI,&data2);CHKERRQ(ierr); 2306 for (j=0;j<pcbddc->benign_n;j++) { 2307 PetscInt primal_idx = pcbddc->local_primal_size - pcbddc->benign_n + j; 2308 for (i=0;i<pcbddc->local_primal_size;i++) { 2309 data[primal_idx*pcbddc->local_primal_size+i] += data2[i*pcbddc->benign_n+j]; 2310 data[i*pcbddc->local_primal_size+primal_idx] += data2[i*pcbddc->benign_n+j]; 2311 } 2312 } 2313 ierr = MatDenseRestoreArray(TM1,&data);CHKERRQ(ierr); 2314 ierr = MatDenseRestoreArray(B0_BPHI,&data2);CHKERRQ(ierr); 2315 ierr = MatDestroy(&B0_B);CHKERRQ(ierr); 2316 ierr = ISDestroy(&is_dummy);CHKERRQ(ierr); 2317 ierr = MatDestroy(&B0_BPHI);CHKERRQ(ierr); 2318 } 2319 #if 0 2320 { 2321 PetscViewer viewer; 2322 char filename[256]; 2323 sprintf(filename,"details_local_coarse_mat%d_level%d.m",PetscGlobalRank,pcbddc->current_level); 2324 ierr = PetscViewerASCIIOpen(PETSC_COMM_SELF,filename,&viewer);CHKERRQ(ierr); 2325 ierr = PetscViewerSetFormat(viewer,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr); 2326 ierr = PetscObjectSetName((PetscObject)coarse_sub_mat,"computed");CHKERRQ(ierr); 2327 ierr = MatView(coarse_sub_mat,viewer);CHKERRQ(ierr); 2328 ierr = PetscObjectSetName((PetscObject)TM1,"projected");CHKERRQ(ierr); 2329 ierr = MatView(TM1,viewer);CHKERRQ(ierr); 2330 if (save_change) { 2331 Mat phi_B; 2332 ierr = MatMatMult(save_change,pcbddc->coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&phi_B);CHKERRQ(ierr); 2333 ierr = PetscObjectSetName((PetscObject)phi_B,"phi_B");CHKERRQ(ierr); 2334 ierr = MatView(phi_B,viewer);CHKERRQ(ierr); 2335 ierr = MatDestroy(&phi_B);CHKERRQ(ierr); 2336 } else { 2337 ierr = PetscObjectSetName((PetscObject)pcbddc->coarse_phi_B,"phi_B");CHKERRQ(ierr); 2338 ierr = MatView(pcbddc->coarse_phi_B,viewer);CHKERRQ(ierr); 2339 } 2340 if (pcbddc->coarse_phi_D) { 2341 ierr = PetscObjectSetName((PetscObject)pcbddc->coarse_phi_D,"phi_D");CHKERRQ(ierr); 2342 ierr = MatView(pcbddc->coarse_phi_D,viewer);CHKERRQ(ierr); 2343 } 2344 if (pcbddc->coarse_psi_B) { 2345 ierr = PetscObjectSetName((PetscObject)pcbddc->coarse_psi_B,"psi_B");CHKERRQ(ierr); 2346 ierr = MatView(pcbddc->coarse_psi_B,viewer);CHKERRQ(ierr); 2347 } 2348 if (pcbddc->coarse_psi_D) { 2349 ierr = PetscObjectSetName((PetscObject)pcbddc->coarse_psi_D,"psi_D");CHKERRQ(ierr); 2350 ierr = MatView(pcbddc->coarse_psi_D,viewer);CHKERRQ(ierr); 2351 } 2352 ierr = PetscViewerDestroy(&viewer);CHKERRQ(ierr); 2353 } 2354 #endif 2355 ierr = MatAXPY(TM1,m_one,coarse_sub_mat,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 2356 ierr = MatNorm(TM1,NORM_FROBENIUS,&real_value);CHKERRQ(ierr); 2357 ierr = PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 2358 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d matrix error % 1.14e\n",PetscGlobalRank,real_value);CHKERRQ(ierr); 2359 2360 /* check constraints */ 2361 ierr = ISCreateStride(PETSC_COMM_SELF,pcbddc->local_primal_size-pcbddc->benign_n,0,1,&is_dummy);CHKERRQ(ierr); 2362 ierr = MatGetSubMatrix(pcbddc->ConstraintMatrix,is_dummy,pcis->is_B_local,MAT_INITIAL_MATRIX,&C_B);CHKERRQ(ierr); 2363 if (!pcbddc->benign_n) { /* TODO: add benign case */ 2364 ierr = MatMatMult(C_B,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&CPHI);CHKERRQ(ierr); 2365 } else { 2366 PetscScalar *data; 2367 Mat tmat; 2368 ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&data);CHKERRQ(ierr); 2369 ierr = MatCreateSeqDense(PETSC_COMM_SELF,pcis->n_B,pcbddc->local_primal_size-pcbddc->benign_n,data,&tmat);CHKERRQ(ierr); 2370 ierr = MatDenseRestoreArray(pcbddc->coarse_phi_B,&data);CHKERRQ(ierr); 2371 ierr = MatMatMult(C_B,tmat,MAT_INITIAL_MATRIX,1.0,&CPHI);CHKERRQ(ierr); 2372 ierr = MatDestroy(&tmat);CHKERRQ(ierr); 2373 } 2374 ierr = MatCreateVecs(CPHI,&mones,NULL);CHKERRQ(ierr); 2375 ierr = VecSet(mones,-1.0);CHKERRQ(ierr); 2376 ierr = MatDiagonalSet(CPHI,mones,ADD_VALUES);CHKERRQ(ierr); 2377 ierr = MatNorm(CPHI,NORM_FROBENIUS,&real_value);CHKERRQ(ierr); 2378 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d phi constraints error % 1.14e\n",PetscGlobalRank,real_value);CHKERRQ(ierr); 2379 if (!pcbddc->symmetric_primal) { 2380 ierr = MatMatMult(C_B,coarse_psi_B,MAT_REUSE_MATRIX,1.0,&CPHI);CHKERRQ(ierr); 2381 ierr = VecSet(mones,-1.0);CHKERRQ(ierr); 2382 ierr = MatDiagonalSet(CPHI,mones,ADD_VALUES);CHKERRQ(ierr); 2383 ierr = MatNorm(CPHI,NORM_FROBENIUS,&real_value);CHKERRQ(ierr); 2384 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d psi constraints error % 1.14e\n",PetscGlobalRank,real_value);CHKERRQ(ierr); 2385 } 2386 ierr = MatDestroy(&C_B);CHKERRQ(ierr); 2387 ierr = MatDestroy(&CPHI);CHKERRQ(ierr); 2388 ierr = ISDestroy(&is_dummy);CHKERRQ(ierr); 2389 ierr = VecDestroy(&mones);CHKERRQ(ierr); 2390 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 2391 ierr = MatDestroy(&A_II);CHKERRQ(ierr); 2392 ierr = MatDestroy(&A_BB);CHKERRQ(ierr); 2393 ierr = MatDestroy(&A_IB);CHKERRQ(ierr); 2394 ierr = MatDestroy(&A_BI);CHKERRQ(ierr); 2395 ierr = MatDestroy(&TM1);CHKERRQ(ierr); 2396 ierr = MatDestroy(&TM2);CHKERRQ(ierr); 2397 ierr = MatDestroy(&TM3);CHKERRQ(ierr); 2398 ierr = MatDestroy(&TM4);CHKERRQ(ierr); 2399 ierr = MatDestroy(&coarse_phi_D);CHKERRQ(ierr); 2400 ierr = MatDestroy(&coarse_phi_B);CHKERRQ(ierr); 2401 if (!pcbddc->symmetric_primal) { 2402 ierr = MatDestroy(&coarse_psi_D);CHKERRQ(ierr); 2403 ierr = MatDestroy(&coarse_psi_B);CHKERRQ(ierr); 2404 } 2405 ierr = MatDestroy(&coarse_sub_mat);CHKERRQ(ierr); 2406 } 2407 /* get back data */ 2408 *coarse_submat_vals_n = coarse_submat_vals; 2409 PetscFunctionReturn(0); 2410 } 2411 2412 #undef __FUNCT__ 2413 #define __FUNCT__ "MatGetSubMatrixUnsorted" 2414 PetscErrorCode MatGetSubMatrixUnsorted(Mat A, IS isrow, IS iscol, Mat* B) 2415 { 2416 Mat *work_mat; 2417 IS isrow_s,iscol_s; 2418 PetscBool rsorted,csorted; 2419 PetscInt rsize,*idxs_perm_r,csize,*idxs_perm_c; 2420 PetscErrorCode ierr; 2421 2422 PetscFunctionBegin; 2423 ierr = ISSorted(isrow,&rsorted);CHKERRQ(ierr); 2424 ierr = ISSorted(iscol,&csorted);CHKERRQ(ierr); 2425 ierr = ISGetLocalSize(isrow,&rsize);CHKERRQ(ierr); 2426 ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr); 2427 2428 if (!rsorted) { 2429 const PetscInt *idxs; 2430 PetscInt *idxs_sorted,i; 2431 2432 ierr = PetscMalloc1(rsize,&idxs_perm_r);CHKERRQ(ierr); 2433 ierr = PetscMalloc1(rsize,&idxs_sorted);CHKERRQ(ierr); 2434 for (i=0;i<rsize;i++) { 2435 idxs_perm_r[i] = i; 2436 } 2437 ierr = ISGetIndices(isrow,&idxs);CHKERRQ(ierr); 2438 ierr = PetscSortIntWithPermutation(rsize,idxs,idxs_perm_r);CHKERRQ(ierr); 2439 for (i=0;i<rsize;i++) { 2440 idxs_sorted[i] = idxs[idxs_perm_r[i]]; 2441 } 2442 ierr = ISRestoreIndices(isrow,&idxs);CHKERRQ(ierr); 2443 ierr = ISCreateGeneral(PETSC_COMM_SELF,rsize,idxs_sorted,PETSC_OWN_POINTER,&isrow_s);CHKERRQ(ierr); 2444 } else { 2445 ierr = PetscObjectReference((PetscObject)isrow);CHKERRQ(ierr); 2446 isrow_s = isrow; 2447 } 2448 2449 if (!csorted) { 2450 if (isrow == iscol) { 2451 ierr = PetscObjectReference((PetscObject)isrow_s);CHKERRQ(ierr); 2452 iscol_s = isrow_s; 2453 } else { 2454 const PetscInt *idxs; 2455 PetscInt *idxs_sorted,i; 2456 2457 ierr = PetscMalloc1(csize,&idxs_perm_c);CHKERRQ(ierr); 2458 ierr = PetscMalloc1(csize,&idxs_sorted);CHKERRQ(ierr); 2459 for (i=0;i<csize;i++) { 2460 idxs_perm_c[i] = i; 2461 } 2462 ierr = ISGetIndices(iscol,&idxs);CHKERRQ(ierr); 2463 ierr = PetscSortIntWithPermutation(csize,idxs,idxs_perm_c);CHKERRQ(ierr); 2464 for (i=0;i<csize;i++) { 2465 idxs_sorted[i] = idxs[idxs_perm_c[i]]; 2466 } 2467 ierr = ISRestoreIndices(iscol,&idxs);CHKERRQ(ierr); 2468 ierr = ISCreateGeneral(PETSC_COMM_SELF,csize,idxs_sorted,PETSC_OWN_POINTER,&iscol_s);CHKERRQ(ierr); 2469 } 2470 } else { 2471 ierr = PetscObjectReference((PetscObject)iscol);CHKERRQ(ierr); 2472 iscol_s = iscol; 2473 } 2474 2475 ierr = MatGetSubMatrices(A,1,&isrow_s,&iscol_s,MAT_INITIAL_MATRIX,&work_mat);CHKERRQ(ierr); 2476 2477 if (!rsorted || !csorted) { 2478 Mat new_mat; 2479 IS is_perm_r,is_perm_c; 2480 2481 if (!rsorted) { 2482 PetscInt *idxs_r,i; 2483 ierr = PetscMalloc1(rsize,&idxs_r);CHKERRQ(ierr); 2484 for (i=0;i<rsize;i++) { 2485 idxs_r[idxs_perm_r[i]] = i; 2486 } 2487 ierr = PetscFree(idxs_perm_r);CHKERRQ(ierr); 2488 ierr = ISCreateGeneral(PETSC_COMM_SELF,rsize,idxs_r,PETSC_OWN_POINTER,&is_perm_r);CHKERRQ(ierr); 2489 } else { 2490 ierr = ISCreateStride(PETSC_COMM_SELF,rsize,0,1,&is_perm_r);CHKERRQ(ierr); 2491 } 2492 ierr = ISSetPermutation(is_perm_r);CHKERRQ(ierr); 2493 2494 if (!csorted) { 2495 if (isrow_s == iscol_s) { 2496 ierr = PetscObjectReference((PetscObject)is_perm_r);CHKERRQ(ierr); 2497 is_perm_c = is_perm_r; 2498 } else { 2499 PetscInt *idxs_c,i; 2500 ierr = PetscMalloc1(csize,&idxs_c);CHKERRQ(ierr); 2501 for (i=0;i<csize;i++) { 2502 idxs_c[idxs_perm_c[i]] = i; 2503 } 2504 ierr = PetscFree(idxs_perm_c);CHKERRQ(ierr); 2505 ierr = ISCreateGeneral(PETSC_COMM_SELF,csize,idxs_c,PETSC_OWN_POINTER,&is_perm_c);CHKERRQ(ierr); 2506 } 2507 } else { 2508 ierr = ISCreateStride(PETSC_COMM_SELF,csize,0,1,&is_perm_c);CHKERRQ(ierr); 2509 } 2510 ierr = ISSetPermutation(is_perm_c);CHKERRQ(ierr); 2511 2512 ierr = MatPermute(work_mat[0],is_perm_r,is_perm_c,&new_mat);CHKERRQ(ierr); 2513 ierr = MatDestroy(&work_mat[0]);CHKERRQ(ierr); 2514 work_mat[0] = new_mat; 2515 ierr = ISDestroy(&is_perm_r);CHKERRQ(ierr); 2516 ierr = ISDestroy(&is_perm_c);CHKERRQ(ierr); 2517 } 2518 2519 ierr = PetscObjectReference((PetscObject)work_mat[0]);CHKERRQ(ierr); 2520 *B = work_mat[0]; 2521 ierr = MatDestroyMatrices(1,&work_mat);CHKERRQ(ierr); 2522 ierr = ISDestroy(&isrow_s);CHKERRQ(ierr); 2523 ierr = ISDestroy(&iscol_s);CHKERRQ(ierr); 2524 PetscFunctionReturn(0); 2525 } 2526 2527 #undef __FUNCT__ 2528 #define __FUNCT__ "PCBDDCComputeLocalMatrix" 2529 PetscErrorCode PCBDDCComputeLocalMatrix(PC pc, Mat ChangeOfBasisMatrix) 2530 { 2531 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 2532 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 2533 Mat new_mat; 2534 IS is_local,is_global; 2535 PetscInt local_size; 2536 PetscBool isseqaij; 2537 PetscErrorCode ierr; 2538 2539 PetscFunctionBegin; 2540 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 2541 ierr = MatGetSize(matis->A,&local_size,NULL);CHKERRQ(ierr); 2542 ierr = ISCreateStride(PetscObjectComm((PetscObject)matis->A),local_size,0,1,&is_local);CHKERRQ(ierr); 2543 ierr = ISLocalToGlobalMappingApplyIS(pc->pmat->rmap->mapping,is_local,&is_global);CHKERRQ(ierr); 2544 ierr = ISDestroy(&is_local);CHKERRQ(ierr); 2545 ierr = MatGetSubMatrixUnsorted(ChangeOfBasisMatrix,is_global,is_global,&new_mat);CHKERRQ(ierr); 2546 ierr = ISDestroy(&is_global);CHKERRQ(ierr); 2547 2548 /* check */ 2549 if (pcbddc->dbg_flag) { 2550 Vec x,x_change; 2551 PetscReal error; 2552 2553 ierr = MatCreateVecs(ChangeOfBasisMatrix,&x,&x_change);CHKERRQ(ierr); 2554 ierr = VecSetRandom(x,NULL);CHKERRQ(ierr); 2555 ierr = MatMult(ChangeOfBasisMatrix,x,x_change);CHKERRQ(ierr); 2556 ierr = VecScatterBegin(matis->cctx,x,matis->x,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2557 ierr = VecScatterEnd(matis->cctx,x,matis->x,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2558 ierr = MatMult(new_mat,matis->x,matis->y);CHKERRQ(ierr); 2559 ierr = VecScatterBegin(matis->rctx,matis->y,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2560 ierr = VecScatterEnd(matis->rctx,matis->y,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2561 ierr = VecAXPY(x,-1.0,x_change);CHKERRQ(ierr); 2562 ierr = VecNorm(x,NORM_INFINITY,&error);CHKERRQ(ierr); 2563 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 2564 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Error global vs local change on N: %1.6e\n",error);CHKERRQ(ierr); 2565 ierr = VecDestroy(&x);CHKERRQ(ierr); 2566 ierr = VecDestroy(&x_change);CHKERRQ(ierr); 2567 } 2568 2569 /* TODO: HOW TO WORK WITH BAIJ and SBAIJ and SEQDENSE? */ 2570 ierr = PetscObjectTypeCompare((PetscObject)matis->A,MATSEQAIJ,&isseqaij);CHKERRQ(ierr); 2571 if (isseqaij) { 2572 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 2573 ierr = MatPtAP(matis->A,new_mat,MAT_INITIAL_MATRIX,2.0,&pcbddc->local_mat);CHKERRQ(ierr); 2574 } else { 2575 Mat work_mat; 2576 2577 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 2578 ierr = MatConvert(matis->A,MATSEQAIJ,MAT_INITIAL_MATRIX,&work_mat);CHKERRQ(ierr); 2579 ierr = MatPtAP(work_mat,new_mat,MAT_INITIAL_MATRIX,2.0,&pcbddc->local_mat);CHKERRQ(ierr); 2580 } 2581 if (matis->A->symmetric_set) { 2582 ierr = MatSetOption(pcbddc->local_mat,MAT_SYMMETRIC,matis->A->symmetric);CHKERRQ(ierr); 2583 #if !defined(PETSC_USE_COMPLEX) 2584 ierr = MatSetOption(pcbddc->local_mat,MAT_HERMITIAN,matis->A->symmetric);CHKERRQ(ierr); 2585 #endif 2586 } 2587 ierr = MatDestroy(&new_mat);CHKERRQ(ierr); 2588 PetscFunctionReturn(0); 2589 } 2590 2591 #undef __FUNCT__ 2592 #define __FUNCT__ "PCBDDCSetUpLocalScatters" 2593 PetscErrorCode PCBDDCSetUpLocalScatters(PC pc) 2594 { 2595 PC_IS* pcis = (PC_IS*)(pc->data); 2596 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 2597 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 2598 PetscInt *idx_R_local=NULL; 2599 PetscInt n_vertices,i,j,n_R,n_D,n_B; 2600 PetscInt vbs,bs; 2601 PetscBT bitmask=NULL; 2602 PetscErrorCode ierr; 2603 2604 PetscFunctionBegin; 2605 /* 2606 No need to setup local scatters if 2607 - primal space is unchanged 2608 AND 2609 - we actually have locally some primal dofs (could not be true in multilevel or for isolated subdomains) 2610 AND 2611 - we are not in debugging mode (this is needed since there are Synchronized prints at the end of the subroutine 2612 */ 2613 if (!pcbddc->new_primal_space_local && pcbddc->local_primal_size && !pcbddc->dbg_flag) { 2614 PetscFunctionReturn(0); 2615 } 2616 /* destroy old objects */ 2617 ierr = ISDestroy(&pcbddc->is_R_local);CHKERRQ(ierr); 2618 ierr = VecScatterDestroy(&pcbddc->R_to_B);CHKERRQ(ierr); 2619 ierr = VecScatterDestroy(&pcbddc->R_to_D);CHKERRQ(ierr); 2620 /* Set Non-overlapping dimensions */ 2621 n_B = pcis->n_B; 2622 n_D = pcis->n - n_B; 2623 n_vertices = pcbddc->n_vertices; 2624 2625 /* Dohrmann's notation: dofs splitted in R (Remaining: all dofs but the vertices) and V (Vertices) */ 2626 2627 /* create auxiliary bitmask and allocate workspace */ 2628 if (!sub_schurs->reuse_solver) { 2629 ierr = PetscMalloc1(pcis->n-n_vertices,&idx_R_local);CHKERRQ(ierr); 2630 ierr = PetscBTCreate(pcis->n,&bitmask);CHKERRQ(ierr); 2631 for (i=0;i<n_vertices;i++) { 2632 ierr = PetscBTSet(bitmask,pcbddc->local_primal_ref_node[i]);CHKERRQ(ierr); 2633 } 2634 2635 for (i=0, n_R=0; i<pcis->n; i++) { 2636 if (!PetscBTLookup(bitmask,i)) { 2637 idx_R_local[n_R++] = i; 2638 } 2639 } 2640 } else { /* A different ordering (already computed) is present if we are reusing the Schur solver */ 2641 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 2642 2643 ierr = ISGetIndices(reuse_solver->is_R,(const PetscInt**)&idx_R_local);CHKERRQ(ierr); 2644 ierr = ISGetLocalSize(reuse_solver->is_R,&n_R);CHKERRQ(ierr); 2645 } 2646 2647 /* Block code */ 2648 vbs = 1; 2649 ierr = MatGetBlockSize(pcbddc->local_mat,&bs);CHKERRQ(ierr); 2650 if (bs>1 && !(n_vertices%bs)) { 2651 PetscBool is_blocked = PETSC_TRUE; 2652 PetscInt *vary; 2653 if (!sub_schurs->reuse_solver) { 2654 ierr = PetscMalloc1(pcis->n/bs,&vary);CHKERRQ(ierr); 2655 ierr = PetscMemzero(vary,pcis->n/bs*sizeof(PetscInt));CHKERRQ(ierr); 2656 /* Verify that the vertex indices correspond to each element in a block (code taken from sbaij2.c) */ 2657 /* it is ok to check this way since local_primal_ref_node are always sorted by local numbering and idx_R_local is obtained as a complement */ 2658 for (i=0; i<n_vertices; i++) vary[pcbddc->local_primal_ref_node[i]/bs]++; 2659 for (i=0; i<pcis->n/bs; i++) { 2660 if (vary[i]!=0 && vary[i]!=bs) { 2661 is_blocked = PETSC_FALSE; 2662 break; 2663 } 2664 } 2665 ierr = PetscFree(vary);CHKERRQ(ierr); 2666 } else { 2667 /* Verify directly the R set */ 2668 for (i=0; i<n_R/bs; i++) { 2669 PetscInt j,node=idx_R_local[bs*i]; 2670 for (j=1; j<bs; j++) { 2671 if (node != idx_R_local[bs*i+j]-j) { 2672 is_blocked = PETSC_FALSE; 2673 break; 2674 } 2675 } 2676 } 2677 } 2678 if (is_blocked) { /* build compressed IS for R nodes (complement of vertices) */ 2679 vbs = bs; 2680 for (i=0;i<n_R/vbs;i++) { 2681 idx_R_local[i] = idx_R_local[vbs*i]/vbs; 2682 } 2683 } 2684 } 2685 ierr = ISCreateBlock(PETSC_COMM_SELF,vbs,n_R/vbs,idx_R_local,PETSC_COPY_VALUES,&pcbddc->is_R_local);CHKERRQ(ierr); 2686 if (sub_schurs->reuse_solver) { 2687 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 2688 2689 ierr = ISRestoreIndices(reuse_solver->is_R,(const PetscInt**)&idx_R_local);CHKERRQ(ierr); 2690 ierr = ISDestroy(&reuse_solver->is_R);CHKERRQ(ierr); 2691 ierr = PetscObjectReference((PetscObject)pcbddc->is_R_local);CHKERRQ(ierr); 2692 reuse_solver->is_R = pcbddc->is_R_local; 2693 } else { 2694 ierr = PetscFree(idx_R_local);CHKERRQ(ierr); 2695 } 2696 2697 /* print some info if requested */ 2698 if (pcbddc->dbg_flag) { 2699 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 2700 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 2701 ierr = PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 2702 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d local dimensions\n",PetscGlobalRank);CHKERRQ(ierr); 2703 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local_size = %d, dirichlet_size = %d, boundary_size = %d\n",pcis->n,n_D,n_B);CHKERRQ(ierr); 2704 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"r_size = %d, v_size = %d, constraints = %d, local_primal_size = %d\n",n_R,n_vertices,pcbddc->local_primal_size-n_vertices-pcbddc->benign_n,pcbddc->local_primal_size);CHKERRQ(ierr); 2705 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 2706 } 2707 2708 /* VecScatters pcbddc->R_to_B and (optionally) pcbddc->R_to_D */ 2709 if (!sub_schurs->reuse_solver) { 2710 IS is_aux1,is_aux2; 2711 PetscInt *aux_array1,*aux_array2,*is_indices,*idx_R_local; 2712 2713 ierr = ISGetIndices(pcbddc->is_R_local,(const PetscInt**)&idx_R_local);CHKERRQ(ierr); 2714 ierr = PetscMalloc1(pcis->n_B-n_vertices,&aux_array1);CHKERRQ(ierr); 2715 ierr = PetscMalloc1(pcis->n_B-n_vertices,&aux_array2);CHKERRQ(ierr); 2716 ierr = ISGetIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 2717 for (i=0; i<n_D; i++) { 2718 ierr = PetscBTSet(bitmask,is_indices[i]);CHKERRQ(ierr); 2719 } 2720 ierr = ISRestoreIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 2721 for (i=0, j=0; i<n_R; i++) { 2722 if (!PetscBTLookup(bitmask,idx_R_local[i])) { 2723 aux_array1[j++] = i; 2724 } 2725 } 2726 ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array1,PETSC_OWN_POINTER,&is_aux1);CHKERRQ(ierr); 2727 ierr = ISGetIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 2728 for (i=0, j=0; i<n_B; i++) { 2729 if (!PetscBTLookup(bitmask,is_indices[i])) { 2730 aux_array2[j++] = i; 2731 } 2732 } 2733 ierr = ISRestoreIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 2734 ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array2,PETSC_OWN_POINTER,&is_aux2);CHKERRQ(ierr); 2735 ierr = VecScatterCreate(pcbddc->vec1_R,is_aux1,pcis->vec1_B,is_aux2,&pcbddc->R_to_B);CHKERRQ(ierr); 2736 ierr = ISDestroy(&is_aux1);CHKERRQ(ierr); 2737 ierr = ISDestroy(&is_aux2);CHKERRQ(ierr); 2738 2739 if (pcbddc->switch_static || pcbddc->dbg_flag) { 2740 ierr = PetscMalloc1(n_D,&aux_array1);CHKERRQ(ierr); 2741 for (i=0, j=0; i<n_R; i++) { 2742 if (PetscBTLookup(bitmask,idx_R_local[i])) { 2743 aux_array1[j++] = i; 2744 } 2745 } 2746 ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array1,PETSC_OWN_POINTER,&is_aux1);CHKERRQ(ierr); 2747 ierr = VecScatterCreate(pcbddc->vec1_R,is_aux1,pcis->vec1_D,(IS)0,&pcbddc->R_to_D);CHKERRQ(ierr); 2748 ierr = ISDestroy(&is_aux1);CHKERRQ(ierr); 2749 } 2750 ierr = PetscBTDestroy(&bitmask);CHKERRQ(ierr); 2751 ierr = ISRestoreIndices(pcbddc->is_R_local,(const PetscInt**)&idx_R_local);CHKERRQ(ierr); 2752 } else { 2753 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 2754 IS tis; 2755 PetscInt schur_size; 2756 2757 ierr = ISGetLocalSize(reuse_solver->is_B,&schur_size);CHKERRQ(ierr); 2758 ierr = ISCreateStride(PETSC_COMM_SELF,schur_size,n_D,1,&tis);CHKERRQ(ierr); 2759 ierr = VecScatterCreate(pcbddc->vec1_R,tis,pcis->vec1_B,reuse_solver->is_B,&pcbddc->R_to_B);CHKERRQ(ierr); 2760 ierr = ISDestroy(&tis);CHKERRQ(ierr); 2761 if (pcbddc->switch_static || pcbddc->dbg_flag) { 2762 ierr = ISCreateStride(PETSC_COMM_SELF,n_D,0,1,&tis);CHKERRQ(ierr); 2763 ierr = VecScatterCreate(pcbddc->vec1_R,tis,pcis->vec1_D,(IS)0,&pcbddc->R_to_D);CHKERRQ(ierr); 2764 ierr = ISDestroy(&tis);CHKERRQ(ierr); 2765 } 2766 } 2767 PetscFunctionReturn(0); 2768 } 2769 2770 2771 #undef __FUNCT__ 2772 #define __FUNCT__ "PCBDDCSetUpLocalSolvers" 2773 PetscErrorCode PCBDDCSetUpLocalSolvers(PC pc, PetscBool dirichlet, PetscBool neumann) 2774 { 2775 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 2776 PC_IS *pcis = (PC_IS*)pc->data; 2777 PC pc_temp; 2778 Mat A_RR; 2779 MatReuse reuse; 2780 PetscScalar m_one = -1.0; 2781 PetscReal value; 2782 PetscInt n_D,n_R; 2783 PetscBool use_exact,use_exact_reduced,issbaij; 2784 PetscErrorCode ierr; 2785 /* prefixes stuff */ 2786 char dir_prefix[256],neu_prefix[256],str_level[16]; 2787 size_t len; 2788 2789 PetscFunctionBegin; 2790 2791 /* compute prefixes */ 2792 ierr = PetscStrcpy(dir_prefix,"");CHKERRQ(ierr); 2793 ierr = PetscStrcpy(neu_prefix,"");CHKERRQ(ierr); 2794 if (!pcbddc->current_level) { 2795 ierr = PetscStrcpy(dir_prefix,((PetscObject)pc)->prefix);CHKERRQ(ierr); 2796 ierr = PetscStrcpy(neu_prefix,((PetscObject)pc)->prefix);CHKERRQ(ierr); 2797 ierr = PetscStrcat(dir_prefix,"pc_bddc_dirichlet_");CHKERRQ(ierr); 2798 ierr = PetscStrcat(neu_prefix,"pc_bddc_neumann_");CHKERRQ(ierr); 2799 } else { 2800 ierr = PetscStrcpy(str_level,"");CHKERRQ(ierr); 2801 sprintf(str_level,"l%d_",(int)(pcbddc->current_level)); 2802 ierr = PetscStrlen(((PetscObject)pc)->prefix,&len);CHKERRQ(ierr); 2803 len -= 15; /* remove "pc_bddc_coarse_" */ 2804 if (pcbddc->current_level>1) len -= 3; /* remove "lX_" with X level number */ 2805 if (pcbddc->current_level>10) len -= 1; /* remove another char from level number */ 2806 ierr = PetscStrncpy(dir_prefix,((PetscObject)pc)->prefix,len+1);CHKERRQ(ierr); 2807 ierr = PetscStrncpy(neu_prefix,((PetscObject)pc)->prefix,len+1);CHKERRQ(ierr); 2808 ierr = PetscStrcat(dir_prefix,"pc_bddc_dirichlet_");CHKERRQ(ierr); 2809 ierr = PetscStrcat(neu_prefix,"pc_bddc_neumann_");CHKERRQ(ierr); 2810 ierr = PetscStrcat(dir_prefix,str_level);CHKERRQ(ierr); 2811 ierr = PetscStrcat(neu_prefix,str_level);CHKERRQ(ierr); 2812 } 2813 2814 /* DIRICHLET PROBLEM */ 2815 if (dirichlet) { 2816 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 2817 if (pcbddc->benign_n && !pcbddc->benign_change_explicit) { 2818 if (!sub_schurs->reuse_solver) { 2819 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Not yet implemented\n"); 2820 } 2821 if (pcbddc->dbg_flag) { 2822 Mat A_IIn; 2823 2824 ierr = PCBDDCBenignProject(pc,pcis->is_I_local,pcis->is_I_local,&A_IIn);CHKERRQ(ierr); 2825 ierr = MatDestroy(&pcis->A_II);CHKERRQ(ierr); 2826 pcis->A_II = A_IIn; 2827 } 2828 } 2829 if (pcbddc->local_mat->symmetric_set) { 2830 ierr = MatSetOption(pcis->A_II,MAT_SYMMETRIC,pcbddc->local_mat->symmetric_set);CHKERRQ(ierr); 2831 } 2832 /* Matrix for Dirichlet problem is pcis->A_II */ 2833 n_D = pcis->n - pcis->n_B; 2834 if (!pcbddc->ksp_D) { /* create object if not yet build */ 2835 ierr = KSPCreate(PETSC_COMM_SELF,&pcbddc->ksp_D);CHKERRQ(ierr); 2836 ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->ksp_D,(PetscObject)pc,1);CHKERRQ(ierr); 2837 /* default */ 2838 ierr = KSPSetType(pcbddc->ksp_D,KSPPREONLY);CHKERRQ(ierr); 2839 ierr = KSPSetOptionsPrefix(pcbddc->ksp_D,dir_prefix);CHKERRQ(ierr); 2840 ierr = PetscObjectTypeCompare((PetscObject)pcis->A_II,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr); 2841 ierr = KSPGetPC(pcbddc->ksp_D,&pc_temp);CHKERRQ(ierr); 2842 if (issbaij) { 2843 ierr = PCSetType(pc_temp,PCCHOLESKY);CHKERRQ(ierr); 2844 } else { 2845 ierr = PCSetType(pc_temp,PCLU);CHKERRQ(ierr); 2846 } 2847 /* Allow user's customization */ 2848 ierr = KSPSetFromOptions(pcbddc->ksp_D);CHKERRQ(ierr); 2849 ierr = PCFactorSetReuseFill(pc_temp,PETSC_TRUE);CHKERRQ(ierr); 2850 } 2851 ierr = KSPSetOperators(pcbddc->ksp_D,pcis->A_II,pcis->A_II);CHKERRQ(ierr); 2852 if (sub_schurs->reuse_solver) { 2853 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 2854 2855 ierr = KSPSetPC(pcbddc->ksp_D,reuse_solver->interior_solver);CHKERRQ(ierr); 2856 } 2857 /* umfpack interface has a bug when matrix dimension is zero. TODO solve from umfpack interface */ 2858 if (!n_D) { 2859 ierr = KSPGetPC(pcbddc->ksp_D,&pc_temp);CHKERRQ(ierr); 2860 ierr = PCSetType(pc_temp,PCNONE);CHKERRQ(ierr); 2861 } 2862 /* Set Up KSP for Dirichlet problem of BDDC */ 2863 ierr = KSPSetUp(pcbddc->ksp_D);CHKERRQ(ierr); 2864 /* set ksp_D into pcis data */ 2865 ierr = KSPDestroy(&pcis->ksp_D);CHKERRQ(ierr); 2866 ierr = PetscObjectReference((PetscObject)pcbddc->ksp_D);CHKERRQ(ierr); 2867 pcis->ksp_D = pcbddc->ksp_D; 2868 } 2869 2870 /* NEUMANN PROBLEM */ 2871 A_RR = 0; 2872 if (neumann) { 2873 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 2874 PetscInt ibs,mbs; 2875 PetscBool issbaij; 2876 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 2877 /* Matrix for Neumann problem is A_RR -> we need to create/reuse it at this point */ 2878 ierr = ISGetSize(pcbddc->is_R_local,&n_R);CHKERRQ(ierr); 2879 if (pcbddc->ksp_R) { /* already created ksp */ 2880 PetscInt nn_R; 2881 ierr = KSPGetOperators(pcbddc->ksp_R,NULL,&A_RR);CHKERRQ(ierr); 2882 ierr = PetscObjectReference((PetscObject)A_RR);CHKERRQ(ierr); 2883 ierr = MatGetSize(A_RR,&nn_R,NULL);CHKERRQ(ierr); 2884 if (nn_R != n_R) { /* old ksp is not reusable, so reset it */ 2885 ierr = KSPReset(pcbddc->ksp_R);CHKERRQ(ierr); 2886 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 2887 reuse = MAT_INITIAL_MATRIX; 2888 } else { /* same sizes, but nonzero pattern depend on primal vertices so it can be changed */ 2889 if (pcbddc->new_primal_space_local) { /* we are not sure the matrix will have the same nonzero pattern */ 2890 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 2891 reuse = MAT_INITIAL_MATRIX; 2892 } else { /* safe to reuse the matrix */ 2893 reuse = MAT_REUSE_MATRIX; 2894 } 2895 } 2896 /* last check */ 2897 if (pc->flag == DIFFERENT_NONZERO_PATTERN) { 2898 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 2899 reuse = MAT_INITIAL_MATRIX; 2900 } 2901 } else { /* first time, so we need to create the matrix */ 2902 reuse = MAT_INITIAL_MATRIX; 2903 } 2904 /* convert pcbddc->local_mat if needed later in PCBDDCSetUpCorrection */ 2905 ierr = MatGetBlockSize(pcbddc->local_mat,&mbs);CHKERRQ(ierr); 2906 ierr = ISGetBlockSize(pcbddc->is_R_local,&ibs);CHKERRQ(ierr); 2907 ierr = PetscObjectTypeCompare((PetscObject)pcbddc->local_mat,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr); 2908 if (ibs != mbs) { /* need to convert to SEQAIJ to extract any submatrix with is_R_local */ 2909 if (matis->A == pcbddc->local_mat) { 2910 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 2911 ierr = MatConvert(matis->A,MATSEQAIJ,MAT_INITIAL_MATRIX,&pcbddc->local_mat);CHKERRQ(ierr); 2912 } else { 2913 ierr = MatConvert(pcbddc->local_mat,MATSEQAIJ,MAT_INPLACE_MATRIX,&pcbddc->local_mat);CHKERRQ(ierr); 2914 } 2915 } else if (issbaij) { /* need to convert to BAIJ to get offdiagonal blocks */ 2916 if (matis->A == pcbddc->local_mat) { 2917 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 2918 ierr = MatConvert(matis->A,MATSEQBAIJ,MAT_INITIAL_MATRIX,&pcbddc->local_mat);CHKERRQ(ierr); 2919 } else { 2920 ierr = MatConvert(pcbddc->local_mat,MATSEQBAIJ,MAT_INPLACE_MATRIX,&pcbddc->local_mat);CHKERRQ(ierr); 2921 } 2922 } 2923 /* extract A_RR */ 2924 if (sub_schurs->reuse_solver) { 2925 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 2926 2927 if (pcbddc->dbg_flag) { /* we need A_RR to test the solver later */ 2928 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 2929 if (reuse_solver->benign_n) { /* we are not using the explicit change of basis on the pressures */ 2930 ierr = PCBDDCBenignProject(pc,pcbddc->is_R_local,pcbddc->is_R_local,&A_RR);CHKERRQ(ierr); 2931 } else { 2932 ierr = MatGetSubMatrix(pcbddc->local_mat,pcbddc->is_R_local,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&A_RR);CHKERRQ(ierr); 2933 } 2934 } else { 2935 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 2936 ierr = PCGetOperators(reuse_solver->correction_solver,&A_RR,NULL);CHKERRQ(ierr); 2937 ierr = PetscObjectReference((PetscObject)A_RR);CHKERRQ(ierr); 2938 } 2939 } else { /* we have to build the neumann solver, so we need to extract the relevant matrix */ 2940 ierr = MatGetSubMatrix(pcbddc->local_mat,pcbddc->is_R_local,pcbddc->is_R_local,reuse,&A_RR);CHKERRQ(ierr); 2941 } 2942 if (pcbddc->local_mat->symmetric_set) { 2943 ierr = MatSetOption(A_RR,MAT_SYMMETRIC,pcbddc->local_mat->symmetric_set);CHKERRQ(ierr); 2944 } 2945 if (!pcbddc->ksp_R) { /* create object if not present */ 2946 ierr = KSPCreate(PETSC_COMM_SELF,&pcbddc->ksp_R);CHKERRQ(ierr); 2947 ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->ksp_R,(PetscObject)pc,1);CHKERRQ(ierr); 2948 /* default */ 2949 ierr = KSPSetType(pcbddc->ksp_R,KSPPREONLY);CHKERRQ(ierr); 2950 ierr = KSPSetOptionsPrefix(pcbddc->ksp_R,neu_prefix);CHKERRQ(ierr); 2951 ierr = KSPGetPC(pcbddc->ksp_R,&pc_temp);CHKERRQ(ierr); 2952 ierr = PetscObjectTypeCompare((PetscObject)A_RR,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr); 2953 if (issbaij) { 2954 ierr = PCSetType(pc_temp,PCCHOLESKY);CHKERRQ(ierr); 2955 } else { 2956 ierr = PCSetType(pc_temp,PCLU);CHKERRQ(ierr); 2957 } 2958 /* Allow user's customization */ 2959 ierr = KSPSetFromOptions(pcbddc->ksp_R);CHKERRQ(ierr); 2960 ierr = PCFactorSetReuseFill(pc_temp,PETSC_TRUE);CHKERRQ(ierr); 2961 } 2962 /* umfpack interface has a bug when matrix dimension is zero. TODO solve from umfpack interface */ 2963 if (!n_R) { 2964 ierr = KSPGetPC(pcbddc->ksp_R,&pc_temp);CHKERRQ(ierr); 2965 ierr = PCSetType(pc_temp,PCNONE);CHKERRQ(ierr); 2966 } 2967 ierr = KSPSetOperators(pcbddc->ksp_R,A_RR,A_RR);CHKERRQ(ierr); 2968 /* Reuse solver if it is present */ 2969 if (sub_schurs->reuse_solver) { 2970 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 2971 2972 ierr = KSPSetPC(pcbddc->ksp_R,reuse_solver->correction_solver);CHKERRQ(ierr); 2973 } 2974 /* Set Up KSP for Neumann problem of BDDC */ 2975 ierr = KSPSetUp(pcbddc->ksp_R);CHKERRQ(ierr); 2976 } 2977 2978 /* check Dirichlet and Neumann solvers and adapt them if a nullspace correction is needed */ 2979 if (pcbddc->NullSpace || pcbddc->dbg_flag) { 2980 if (pcbddc->dbg_flag) { 2981 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 2982 ierr = PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 2983 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 2984 } 2985 if (dirichlet) { /* Dirichlet */ 2986 ierr = VecSetRandom(pcis->vec1_D,NULL);CHKERRQ(ierr); 2987 ierr = MatMult(pcis->A_II,pcis->vec1_D,pcis->vec2_D);CHKERRQ(ierr); 2988 ierr = KSPSolve(pcbddc->ksp_D,pcis->vec2_D,pcis->vec2_D);CHKERRQ(ierr); 2989 ierr = VecAXPY(pcis->vec1_D,m_one,pcis->vec2_D);CHKERRQ(ierr); 2990 ierr = VecNorm(pcis->vec1_D,NORM_INFINITY,&value);CHKERRQ(ierr); 2991 /* need to be adapted? */ 2992 use_exact = (PetscAbsReal(value) > 1.e-4 ? PETSC_FALSE : PETSC_TRUE); 2993 ierr = MPIU_Allreduce(&use_exact,&use_exact_reduced,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 2994 ierr = PCBDDCSetUseExactDirichlet(pc,use_exact_reduced);CHKERRQ(ierr); 2995 /* print info */ 2996 if (pcbddc->dbg_flag) { 2997 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d infinity error for Dirichlet solve (%s) = % 1.14e \n",PetscGlobalRank,((PetscObject)(pcbddc->ksp_D))->prefix,value);CHKERRQ(ierr); 2998 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 2999 } 3000 if (pcbddc->NullSpace && !use_exact_reduced && !pcbddc->switch_static) { 3001 ierr = PCBDDCNullSpaceAssembleCorrection(pc,PETSC_TRUE,pcis->is_I_local);CHKERRQ(ierr); 3002 } 3003 } 3004 if (neumann) { /* Neumann */ 3005 ierr = VecSetRandom(pcbddc->vec1_R,NULL);CHKERRQ(ierr); 3006 ierr = MatMult(A_RR,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 3007 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec2_R,pcbddc->vec2_R);CHKERRQ(ierr); 3008 ierr = VecAXPY(pcbddc->vec1_R,m_one,pcbddc->vec2_R);CHKERRQ(ierr); 3009 ierr = VecNorm(pcbddc->vec1_R,NORM_INFINITY,&value);CHKERRQ(ierr); 3010 /* need to be adapted? */ 3011 use_exact = (PetscAbsReal(value) > 1.e-4 ? PETSC_FALSE : PETSC_TRUE); 3012 ierr = MPIU_Allreduce(&use_exact,&use_exact_reduced,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 3013 /* print info */ 3014 if (pcbddc->dbg_flag) { 3015 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d infinity error for Neumann solve (%s) = % 1.14e\n",PetscGlobalRank,((PetscObject)(pcbddc->ksp_R))->prefix,value);CHKERRQ(ierr); 3016 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 3017 } 3018 if (pcbddc->NullSpace && !use_exact_reduced) { /* is it the right logic? */ 3019 ierr = PCBDDCNullSpaceAssembleCorrection(pc,PETSC_FALSE,pcbddc->is_R_local);CHKERRQ(ierr); 3020 } 3021 } 3022 } 3023 /* free Neumann problem's matrix */ 3024 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 3025 PetscFunctionReturn(0); 3026 } 3027 3028 #undef __FUNCT__ 3029 #define __FUNCT__ "PCBDDCSolveSubstructureCorrection" 3030 static PetscErrorCode PCBDDCSolveSubstructureCorrection(PC pc, Vec inout_B, Vec inout_D, PetscBool applytranspose) 3031 { 3032 PetscErrorCode ierr; 3033 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 3034 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 3035 3036 PetscFunctionBegin; 3037 if (!sub_schurs->reuse_solver) { 3038 ierr = VecSet(pcbddc->vec1_R,0.);CHKERRQ(ierr); 3039 } 3040 if (!pcbddc->switch_static) { 3041 if (applytranspose && pcbddc->local_auxmat1) { 3042 ierr = MatMultTranspose(pcbddc->local_auxmat2,inout_B,pcbddc->vec1_C);CHKERRQ(ierr); 3043 ierr = MatMultTransposeAdd(pcbddc->local_auxmat1,pcbddc->vec1_C,inout_B,inout_B);CHKERRQ(ierr); 3044 } 3045 if (!sub_schurs->reuse_solver) { 3046 ierr = VecScatterBegin(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 3047 ierr = VecScatterEnd(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 3048 } else { 3049 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 3050 3051 ierr = VecScatterBegin(reuse_solver->correction_scatter_B,inout_B,reuse_solver->rhs_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3052 ierr = VecScatterEnd(reuse_solver->correction_scatter_B,inout_B,reuse_solver->rhs_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3053 } 3054 } else { 3055 ierr = VecScatterBegin(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 3056 ierr = VecScatterEnd(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 3057 ierr = VecScatterBegin(pcbddc->R_to_D,inout_D,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 3058 ierr = VecScatterEnd(pcbddc->R_to_D,inout_D,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 3059 if (applytranspose && pcbddc->local_auxmat1) { 3060 ierr = MatMultTranspose(pcbddc->local_auxmat2,pcbddc->vec1_R,pcbddc->vec1_C);CHKERRQ(ierr); 3061 ierr = MatMultTransposeAdd(pcbddc->local_auxmat1,pcbddc->vec1_C,inout_B,inout_B);CHKERRQ(ierr); 3062 ierr = VecScatterBegin(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 3063 ierr = VecScatterEnd(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 3064 } 3065 } 3066 if (!sub_schurs->reuse_solver || pcbddc->switch_static) { 3067 if (applytranspose) { 3068 ierr = KSPSolveTranspose(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr); 3069 } else { 3070 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr); 3071 } 3072 } else { 3073 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 3074 3075 if (applytranspose) { 3076 ierr = MatFactorSolveSchurComplementTranspose(reuse_solver->F,reuse_solver->rhs_B,reuse_solver->sol_B);CHKERRQ(ierr); 3077 } else { 3078 ierr = MatFactorSolveSchurComplement(reuse_solver->F,reuse_solver->rhs_B,reuse_solver->sol_B);CHKERRQ(ierr); 3079 } 3080 } 3081 ierr = VecSet(inout_B,0.);CHKERRQ(ierr); 3082 if (!pcbddc->switch_static) { 3083 if (!sub_schurs->reuse_solver) { 3084 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3085 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3086 } else { 3087 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 3088 3089 ierr = VecScatterBegin(reuse_solver->correction_scatter_B,reuse_solver->sol_B,inout_B,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 3090 ierr = VecScatterEnd(reuse_solver->correction_scatter_B,reuse_solver->sol_B,inout_B,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 3091 } 3092 if (!applytranspose && pcbddc->local_auxmat1) { 3093 ierr = MatMult(pcbddc->local_auxmat1,inout_B,pcbddc->vec1_C);CHKERRQ(ierr); 3094 ierr = MatMultAdd(pcbddc->local_auxmat2,pcbddc->vec1_C,inout_B,inout_B);CHKERRQ(ierr); 3095 } 3096 } else { 3097 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3098 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3099 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,inout_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3100 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,inout_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3101 if (!applytranspose && pcbddc->local_auxmat1) { 3102 ierr = MatMult(pcbddc->local_auxmat1,inout_B,pcbddc->vec1_C);CHKERRQ(ierr); 3103 ierr = MatMultAdd(pcbddc->local_auxmat2,pcbddc->vec1_C,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr); 3104 } 3105 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3106 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3107 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,inout_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3108 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,inout_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3109 } 3110 PetscFunctionReturn(0); 3111 } 3112 3113 /* parameter apply transpose determines if the interface preconditioner should be applied transposed or not */ 3114 #undef __FUNCT__ 3115 #define __FUNCT__ "PCBDDCApplyInterfacePreconditioner" 3116 PetscErrorCode PCBDDCApplyInterfacePreconditioner(PC pc, PetscBool applytranspose) 3117 { 3118 PetscErrorCode ierr; 3119 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 3120 PC_IS* pcis = (PC_IS*) (pc->data); 3121 const PetscScalar zero = 0.0; 3122 3123 PetscFunctionBegin; 3124 /* Application of PSI^T or PHI^T (depending on applytranspose, see comment above) */ 3125 if (!pcbddc->benign_apply_coarse_only) { 3126 if (applytranspose) { 3127 ierr = MatMultTranspose(pcbddc->coarse_phi_B,pcis->vec1_B,pcbddc->vec1_P);CHKERRQ(ierr); 3128 if (pcbddc->switch_static) { ierr = MatMultTransposeAdd(pcbddc->coarse_phi_D,pcis->vec1_D,pcbddc->vec1_P,pcbddc->vec1_P);CHKERRQ(ierr); } 3129 } else { 3130 ierr = MatMultTranspose(pcbddc->coarse_psi_B,pcis->vec1_B,pcbddc->vec1_P);CHKERRQ(ierr); 3131 if (pcbddc->switch_static) { ierr = MatMultTransposeAdd(pcbddc->coarse_psi_D,pcis->vec1_D,pcbddc->vec1_P,pcbddc->vec1_P);CHKERRQ(ierr); } 3132 } 3133 } else { 3134 ierr = VecSet(pcbddc->vec1_P,zero);CHKERRQ(ierr); 3135 } 3136 3137 /* add p0 to the last value of vec1_P holding the coarse dof relative to p0 */ 3138 if (pcbddc->benign_n) { 3139 PetscScalar *array; 3140 PetscInt j; 3141 3142 ierr = VecGetArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 3143 for (j=0;j<pcbddc->benign_n;j++) array[pcbddc->local_primal_size-pcbddc->benign_n+j] += pcbddc->benign_p0[j]; 3144 ierr = VecRestoreArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 3145 } 3146 3147 /* start communications from local primal nodes to rhs of coarse solver */ 3148 ierr = VecSet(pcbddc->coarse_vec,zero);CHKERRQ(ierr); 3149 ierr = PCBDDCScatterCoarseDataBegin(pc,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3150 ierr = PCBDDCScatterCoarseDataEnd(pc,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3151 3152 /* Coarse solution -> rhs and sol updated inside PCBDDCScattarCoarseDataBegin/End */ 3153 /* TODO remove null space when doing multilevel */ 3154 if (pcbddc->coarse_ksp) { 3155 Mat coarse_mat; 3156 Vec rhs,sol; 3157 MatNullSpace nullsp; 3158 3159 ierr = KSPGetRhs(pcbddc->coarse_ksp,&rhs);CHKERRQ(ierr); 3160 ierr = KSPGetSolution(pcbddc->coarse_ksp,&sol);CHKERRQ(ierr); 3161 ierr = KSPGetOperators(pcbddc->coarse_ksp,&coarse_mat,NULL);CHKERRQ(ierr); 3162 ierr = MatGetNullSpace(coarse_mat,&nullsp);CHKERRQ(ierr); 3163 if (nullsp) { 3164 ierr = MatNullSpaceRemove(nullsp,rhs);CHKERRQ(ierr); 3165 } 3166 if (applytranspose) { 3167 ierr = KSPSolveTranspose(pcbddc->coarse_ksp,rhs,sol);CHKERRQ(ierr); 3168 } else { 3169 ierr = KSPSolve(pcbddc->coarse_ksp,rhs,sol);CHKERRQ(ierr); 3170 } 3171 if (nullsp) { 3172 ierr = MatNullSpaceRemove(nullsp,sol);CHKERRQ(ierr); 3173 } 3174 } 3175 3176 /* Local solution on R nodes */ 3177 if (pcis->n && !pcbddc->benign_apply_coarse_only) { 3178 ierr = PCBDDCSolveSubstructureCorrection(pc,pcis->vec1_B,pcis->vec1_D,applytranspose);CHKERRQ(ierr); 3179 } 3180 /* communications from coarse sol to local primal nodes */ 3181 ierr = PCBDDCScatterCoarseDataBegin(pc,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 3182 ierr = PCBDDCScatterCoarseDataEnd(pc,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 3183 3184 /* Sum contributions from the two levels */ 3185 if (!pcbddc->benign_apply_coarse_only) { 3186 if (applytranspose) { 3187 ierr = MatMultAdd(pcbddc->coarse_psi_B,pcbddc->vec1_P,pcis->vec1_B,pcis->vec1_B);CHKERRQ(ierr); 3188 if (pcbddc->switch_static) { ierr = MatMultAdd(pcbddc->coarse_psi_D,pcbddc->vec1_P,pcis->vec1_D,pcis->vec1_D);CHKERRQ(ierr); } 3189 } else { 3190 ierr = MatMultAdd(pcbddc->coarse_phi_B,pcbddc->vec1_P,pcis->vec1_B,pcis->vec1_B);CHKERRQ(ierr); 3191 if (pcbddc->switch_static) { ierr = MatMultAdd(pcbddc->coarse_phi_D,pcbddc->vec1_P,pcis->vec1_D,pcis->vec1_D);CHKERRQ(ierr); } 3192 } 3193 /* store p0 */ 3194 if (pcbddc->benign_n) { 3195 PetscScalar *array; 3196 PetscInt j; 3197 3198 ierr = VecGetArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 3199 for (j=0;j<pcbddc->benign_n;j++) pcbddc->benign_p0[j] = array[pcbddc->local_primal_size-pcbddc->benign_n+j]; 3200 ierr = VecRestoreArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 3201 } 3202 } else { /* expand the coarse solution */ 3203 if (applytranspose) { 3204 ierr = MatMult(pcbddc->coarse_psi_B,pcbddc->vec1_P,pcis->vec1_B);CHKERRQ(ierr); 3205 } else { 3206 ierr = MatMult(pcbddc->coarse_phi_B,pcbddc->vec1_P,pcis->vec1_B);CHKERRQ(ierr); 3207 } 3208 } 3209 PetscFunctionReturn(0); 3210 } 3211 3212 #undef __FUNCT__ 3213 #define __FUNCT__ "PCBDDCScatterCoarseDataBegin" 3214 PetscErrorCode PCBDDCScatterCoarseDataBegin(PC pc,InsertMode imode, ScatterMode smode) 3215 { 3216 PetscErrorCode ierr; 3217 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 3218 PetscScalar *array; 3219 Vec from,to; 3220 3221 PetscFunctionBegin; 3222 if (smode == SCATTER_REVERSE) { /* from global to local -> get data from coarse solution */ 3223 from = pcbddc->coarse_vec; 3224 to = pcbddc->vec1_P; 3225 if (pcbddc->coarse_ksp) { /* get array from coarse processes */ 3226 Vec tvec; 3227 3228 ierr = KSPGetRhs(pcbddc->coarse_ksp,&tvec);CHKERRQ(ierr); 3229 ierr = VecResetArray(tvec);CHKERRQ(ierr); 3230 ierr = KSPGetSolution(pcbddc->coarse_ksp,&tvec);CHKERRQ(ierr); 3231 ierr = VecGetArray(tvec,&array);CHKERRQ(ierr); 3232 ierr = VecPlaceArray(from,array);CHKERRQ(ierr); 3233 ierr = VecRestoreArray(tvec,&array);CHKERRQ(ierr); 3234 } 3235 } else { /* from local to global -> put data in coarse right hand side */ 3236 from = pcbddc->vec1_P; 3237 to = pcbddc->coarse_vec; 3238 } 3239 ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,from,to,imode,smode);CHKERRQ(ierr); 3240 PetscFunctionReturn(0); 3241 } 3242 3243 #undef __FUNCT__ 3244 #define __FUNCT__ "PCBDDCScatterCoarseDataEnd" 3245 PetscErrorCode PCBDDCScatterCoarseDataEnd(PC pc, InsertMode imode, ScatterMode smode) 3246 { 3247 PetscErrorCode ierr; 3248 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 3249 PetscScalar *array; 3250 Vec from,to; 3251 3252 PetscFunctionBegin; 3253 if (smode == SCATTER_REVERSE) { /* from global to local -> get data from coarse solution */ 3254 from = pcbddc->coarse_vec; 3255 to = pcbddc->vec1_P; 3256 } else { /* from local to global -> put data in coarse right hand side */ 3257 from = pcbddc->vec1_P; 3258 to = pcbddc->coarse_vec; 3259 } 3260 ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,from,to,imode,smode);CHKERRQ(ierr); 3261 if (smode == SCATTER_FORWARD) { 3262 if (pcbddc->coarse_ksp) { /* get array from coarse processes */ 3263 Vec tvec; 3264 3265 ierr = KSPGetRhs(pcbddc->coarse_ksp,&tvec);CHKERRQ(ierr); 3266 ierr = VecGetArray(to,&array);CHKERRQ(ierr); 3267 ierr = VecPlaceArray(tvec,array);CHKERRQ(ierr); 3268 ierr = VecRestoreArray(to,&array);CHKERRQ(ierr); 3269 } 3270 } else { 3271 if (pcbddc->coarse_ksp) { /* restore array of pcbddc->coarse_vec */ 3272 ierr = VecResetArray(from);CHKERRQ(ierr); 3273 } 3274 } 3275 PetscFunctionReturn(0); 3276 } 3277 3278 /* uncomment for testing purposes */ 3279 /* #define PETSC_MISSING_LAPACK_GESVD 1 */ 3280 #undef __FUNCT__ 3281 #define __FUNCT__ "PCBDDCConstraintsSetUp" 3282 PetscErrorCode PCBDDCConstraintsSetUp(PC pc) 3283 { 3284 PetscErrorCode ierr; 3285 PC_IS* pcis = (PC_IS*)(pc->data); 3286 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 3287 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 3288 /* one and zero */ 3289 PetscScalar one=1.0,zero=0.0; 3290 /* space to store constraints and their local indices */ 3291 PetscScalar *constraints_data; 3292 PetscInt *constraints_idxs,*constraints_idxs_B; 3293 PetscInt *constraints_idxs_ptr,*constraints_data_ptr; 3294 PetscInt *constraints_n; 3295 /* iterators */ 3296 PetscInt i,j,k,total_counts,total_counts_cc,cum; 3297 /* BLAS integers */ 3298 PetscBLASInt lwork,lierr; 3299 PetscBLASInt Blas_N,Blas_M,Blas_K,Blas_one=1; 3300 PetscBLASInt Blas_LDA,Blas_LDB,Blas_LDC; 3301 /* reuse */ 3302 PetscInt olocal_primal_size,olocal_primal_size_cc; 3303 PetscInt *olocal_primal_ref_node,*olocal_primal_ref_mult; 3304 /* change of basis */ 3305 PetscBool qr_needed; 3306 PetscBT change_basis,qr_needed_idx; 3307 /* auxiliary stuff */ 3308 PetscInt *nnz,*is_indices; 3309 PetscInt ncc; 3310 /* some quantities */ 3311 PetscInt n_vertices,total_primal_vertices,valid_constraints; 3312 PetscInt size_of_constraint,max_size_of_constraint=0,max_constraints,temp_constraints; 3313 3314 PetscFunctionBegin; 3315 /* Destroy Mat objects computed previously */ 3316 ierr = MatDestroy(&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 3317 ierr = MatDestroy(&pcbddc->ConstraintMatrix);CHKERRQ(ierr); 3318 ierr = MatDestroy(&pcbddc->switch_static_change);CHKERRQ(ierr); 3319 /* save info on constraints from previous setup (if any) */ 3320 olocal_primal_size = pcbddc->local_primal_size; 3321 olocal_primal_size_cc = pcbddc->local_primal_size_cc; 3322 ierr = PetscMalloc2(olocal_primal_size_cc,&olocal_primal_ref_node,olocal_primal_size_cc,&olocal_primal_ref_mult);CHKERRQ(ierr); 3323 ierr = PetscMemcpy(olocal_primal_ref_node,pcbddc->local_primal_ref_node,olocal_primal_size_cc*sizeof(PetscInt));CHKERRQ(ierr); 3324 ierr = PetscMemcpy(olocal_primal_ref_mult,pcbddc->local_primal_ref_mult,olocal_primal_size_cc*sizeof(PetscInt));CHKERRQ(ierr); 3325 ierr = PetscFree2(pcbddc->local_primal_ref_node,pcbddc->local_primal_ref_mult);CHKERRQ(ierr); 3326 ierr = PetscFree(pcbddc->primal_indices_local_idxs);CHKERRQ(ierr); 3327 3328 /* print some info */ 3329 if (pcbddc->dbg_flag) { 3330 IS vertices; 3331 PetscInt nv,nedges,nfaces; 3332 ierr = PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph,&nfaces,NULL,&nedges,NULL,&vertices);CHKERRQ(ierr); 3333 ierr = ISGetSize(vertices,&nv);CHKERRQ(ierr); 3334 ierr = ISDestroy(&vertices);CHKERRQ(ierr); 3335 ierr = PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 3336 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"--------------------------------------------------------------\n");CHKERRQ(ierr); 3337 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate vertices (%d)\n",PetscGlobalRank,nv,pcbddc->use_vertices);CHKERRQ(ierr); 3338 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate edges (%d)\n",PetscGlobalRank,nedges,pcbddc->use_edges);CHKERRQ(ierr); 3339 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate faces (%d)\n",PetscGlobalRank,nfaces,pcbddc->use_faces);CHKERRQ(ierr); 3340 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 3341 ierr = PetscViewerASCIIPopSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 3342 } 3343 3344 if (!pcbddc->adaptive_selection) { 3345 IS ISForVertices,*ISForFaces,*ISForEdges; 3346 MatNullSpace nearnullsp; 3347 const Vec *nearnullvecs; 3348 Vec *localnearnullsp; 3349 PetscScalar *array; 3350 PetscInt n_ISForFaces,n_ISForEdges,nnsp_size; 3351 PetscBool nnsp_has_cnst; 3352 /* LAPACK working arrays for SVD or POD */ 3353 PetscBool skip_lapack,boolforchange; 3354 PetscScalar *work; 3355 PetscReal *singular_vals; 3356 #if defined(PETSC_USE_COMPLEX) 3357 PetscReal *rwork; 3358 #endif 3359 #if defined(PETSC_MISSING_LAPACK_GESVD) 3360 PetscScalar *temp_basis,*correlation_mat; 3361 #else 3362 PetscBLASInt dummy_int=1; 3363 PetscScalar dummy_scalar=1.; 3364 #endif 3365 3366 /* Get index sets for faces, edges and vertices from graph */ 3367 ierr = PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph,&n_ISForFaces,&ISForFaces,&n_ISForEdges,&ISForEdges,&ISForVertices);CHKERRQ(ierr); 3368 /* free unneeded index sets */ 3369 if (!pcbddc->use_vertices) { 3370 ierr = ISDestroy(&ISForVertices);CHKERRQ(ierr); 3371 } 3372 if (!pcbddc->use_edges) { 3373 for (i=0;i<n_ISForEdges;i++) { 3374 ierr = ISDestroy(&ISForEdges[i]);CHKERRQ(ierr); 3375 } 3376 ierr = PetscFree(ISForEdges);CHKERRQ(ierr); 3377 n_ISForEdges = 0; 3378 } 3379 if (!pcbddc->use_faces) { 3380 for (i=0;i<n_ISForFaces;i++) { 3381 ierr = ISDestroy(&ISForFaces[i]);CHKERRQ(ierr); 3382 } 3383 ierr = PetscFree(ISForFaces);CHKERRQ(ierr); 3384 n_ISForFaces = 0; 3385 } 3386 3387 #if defined(PETSC_USE_DEBUG) 3388 /* HACK: when solving singular problems not using vertices, a change of basis is mandatory. 3389 Also use_change_of_basis should be consistent among processors */ 3390 if (pcbddc->NullSpace) { 3391 PetscBool tbool[2],gbool[2]; 3392 3393 if (!ISForVertices && !pcbddc->user_ChangeOfBasisMatrix) { 3394 pcbddc->use_change_of_basis = PETSC_TRUE; 3395 if (!ISForEdges) { 3396 pcbddc->use_change_on_faces = PETSC_TRUE; 3397 } 3398 } 3399 tbool[0] = pcbddc->use_change_of_basis; 3400 tbool[1] = pcbddc->use_change_on_faces; 3401 ierr = MPIU_Allreduce(tbool,gbool,2,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 3402 pcbddc->use_change_of_basis = gbool[0]; 3403 pcbddc->use_change_on_faces = gbool[1]; 3404 } 3405 #endif 3406 3407 /* check if near null space is attached to global mat */ 3408 ierr = MatGetNearNullSpace(pc->pmat,&nearnullsp);CHKERRQ(ierr); 3409 if (nearnullsp) { 3410 ierr = MatNullSpaceGetVecs(nearnullsp,&nnsp_has_cnst,&nnsp_size,&nearnullvecs);CHKERRQ(ierr); 3411 /* remove any stored info */ 3412 ierr = MatNullSpaceDestroy(&pcbddc->onearnullspace);CHKERRQ(ierr); 3413 ierr = PetscFree(pcbddc->onearnullvecs_state);CHKERRQ(ierr); 3414 /* store information for BDDC solver reuse */ 3415 ierr = PetscObjectReference((PetscObject)nearnullsp);CHKERRQ(ierr); 3416 pcbddc->onearnullspace = nearnullsp; 3417 ierr = PetscMalloc1(nnsp_size,&pcbddc->onearnullvecs_state);CHKERRQ(ierr); 3418 for (i=0;i<nnsp_size;i++) { 3419 ierr = PetscObjectStateGet((PetscObject)nearnullvecs[i],&pcbddc->onearnullvecs_state[i]);CHKERRQ(ierr); 3420 } 3421 } else { /* if near null space is not provided BDDC uses constants by default */ 3422 nnsp_size = 0; 3423 nnsp_has_cnst = PETSC_TRUE; 3424 } 3425 /* get max number of constraints on a single cc */ 3426 max_constraints = nnsp_size; 3427 if (nnsp_has_cnst) max_constraints++; 3428 3429 /* 3430 Evaluate maximum storage size needed by the procedure 3431 - Indices for connected component i stored at "constraints_idxs + constraints_idxs_ptr[i]" 3432 - Values for constraints on connected component i stored at "constraints_data + constraints_data_ptr[i]" 3433 There can be multiple constraints per connected component 3434 */ 3435 n_vertices = 0; 3436 if (ISForVertices) { 3437 ierr = ISGetSize(ISForVertices,&n_vertices);CHKERRQ(ierr); 3438 } 3439 ncc = n_vertices+n_ISForFaces+n_ISForEdges; 3440 ierr = PetscMalloc3(ncc+1,&constraints_idxs_ptr,ncc+1,&constraints_data_ptr,ncc,&constraints_n);CHKERRQ(ierr); 3441 3442 total_counts = n_ISForFaces+n_ISForEdges; 3443 total_counts *= max_constraints; 3444 total_counts += n_vertices; 3445 ierr = PetscBTCreate(total_counts,&change_basis);CHKERRQ(ierr); 3446 3447 total_counts = 0; 3448 max_size_of_constraint = 0; 3449 for (i=0;i<n_ISForEdges+n_ISForFaces;i++) { 3450 IS used_is; 3451 if (i<n_ISForEdges) { 3452 used_is = ISForEdges[i]; 3453 } else { 3454 used_is = ISForFaces[i-n_ISForEdges]; 3455 } 3456 ierr = ISGetSize(used_is,&j);CHKERRQ(ierr); 3457 total_counts += j; 3458 max_size_of_constraint = PetscMax(j,max_size_of_constraint); 3459 } 3460 ierr = PetscMalloc3(total_counts*max_constraints+n_vertices,&constraints_data,total_counts+n_vertices,&constraints_idxs,total_counts+n_vertices,&constraints_idxs_B);CHKERRQ(ierr); 3461 3462 /* get local part of global near null space vectors */ 3463 ierr = PetscMalloc1(nnsp_size,&localnearnullsp);CHKERRQ(ierr); 3464 for (k=0;k<nnsp_size;k++) { 3465 ierr = VecDuplicate(pcis->vec1_N,&localnearnullsp[k]);CHKERRQ(ierr); 3466 ierr = VecScatterBegin(matis->rctx,nearnullvecs[k],localnearnullsp[k],INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3467 ierr = VecScatterEnd(matis->rctx,nearnullvecs[k],localnearnullsp[k],INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3468 } 3469 3470 /* whether or not to skip lapack calls */ 3471 skip_lapack = PETSC_TRUE; 3472 if (n_ISForFaces+n_ISForEdges && max_constraints > 1 && !pcbddc->use_nnsp_true) skip_lapack = PETSC_FALSE; 3473 3474 /* First we issue queries to allocate optimal workspace for LAPACKgesvd (or LAPACKsyev if SVD is missing) */ 3475 if (!skip_lapack) { 3476 PetscScalar temp_work; 3477 3478 #if defined(PETSC_MISSING_LAPACK_GESVD) 3479 /* Proper Orthogonal Decomposition (POD) using the snapshot method */ 3480 ierr = PetscMalloc1(max_constraints*max_constraints,&correlation_mat);CHKERRQ(ierr); 3481 ierr = PetscMalloc1(max_constraints,&singular_vals);CHKERRQ(ierr); 3482 ierr = PetscMalloc1(max_size_of_constraint*max_constraints,&temp_basis);CHKERRQ(ierr); 3483 #if defined(PETSC_USE_COMPLEX) 3484 ierr = PetscMalloc1(3*max_constraints,&rwork);CHKERRQ(ierr); 3485 #endif 3486 /* now we evaluate the optimal workspace using query with lwork=-1 */ 3487 ierr = PetscBLASIntCast(max_constraints,&Blas_N);CHKERRQ(ierr); 3488 ierr = PetscBLASIntCast(max_constraints,&Blas_LDA);CHKERRQ(ierr); 3489 lwork = -1; 3490 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 3491 #if !defined(PETSC_USE_COMPLEX) 3492 PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,&temp_work,&lwork,&lierr)); 3493 #else 3494 PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,&temp_work,&lwork,rwork,&lierr)); 3495 #endif 3496 ierr = PetscFPTrapPop();CHKERRQ(ierr); 3497 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to SYEV Lapack routine %d",(int)lierr); 3498 #else /* on missing GESVD */ 3499 /* SVD */ 3500 PetscInt max_n,min_n; 3501 max_n = max_size_of_constraint; 3502 min_n = max_constraints; 3503 if (max_size_of_constraint < max_constraints) { 3504 min_n = max_size_of_constraint; 3505 max_n = max_constraints; 3506 } 3507 ierr = PetscMalloc1(min_n,&singular_vals);CHKERRQ(ierr); 3508 #if defined(PETSC_USE_COMPLEX) 3509 ierr = PetscMalloc1(5*min_n,&rwork);CHKERRQ(ierr); 3510 #endif 3511 /* now we evaluate the optimal workspace using query with lwork=-1 */ 3512 lwork = -1; 3513 ierr = PetscBLASIntCast(max_n,&Blas_M);CHKERRQ(ierr); 3514 ierr = PetscBLASIntCast(min_n,&Blas_N);CHKERRQ(ierr); 3515 ierr = PetscBLASIntCast(max_n,&Blas_LDA);CHKERRQ(ierr); 3516 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 3517 #if !defined(PETSC_USE_COMPLEX) 3518 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&constraints_data[0],&Blas_LDA,singular_vals,&dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,&temp_work,&lwork,&lierr)); 3519 #else 3520 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&constraints_data[0],&Blas_LDA,singular_vals,&dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,&temp_work,&lwork,rwork,&lierr)); 3521 #endif 3522 ierr = PetscFPTrapPop();CHKERRQ(ierr); 3523 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to GESVD Lapack routine %d",(int)lierr); 3524 #endif /* on missing GESVD */ 3525 /* Allocate optimal workspace */ 3526 ierr = PetscBLASIntCast((PetscInt)PetscRealPart(temp_work),&lwork);CHKERRQ(ierr); 3527 ierr = PetscMalloc1(lwork,&work);CHKERRQ(ierr); 3528 } 3529 /* Now we can loop on constraining sets */ 3530 total_counts = 0; 3531 constraints_idxs_ptr[0] = 0; 3532 constraints_data_ptr[0] = 0; 3533 /* vertices */ 3534 if (n_vertices) { 3535 ierr = ISGetIndices(ISForVertices,(const PetscInt**)&is_indices);CHKERRQ(ierr); 3536 ierr = PetscMemcpy(constraints_idxs,is_indices,n_vertices*sizeof(PetscInt));CHKERRQ(ierr); 3537 for (i=0;i<n_vertices;i++) { 3538 constraints_n[total_counts] = 1; 3539 constraints_data[total_counts] = 1.0; 3540 constraints_idxs_ptr[total_counts+1] = constraints_idxs_ptr[total_counts]+1; 3541 constraints_data_ptr[total_counts+1] = constraints_data_ptr[total_counts]+1; 3542 total_counts++; 3543 } 3544 ierr = ISRestoreIndices(ISForVertices,(const PetscInt**)&is_indices);CHKERRQ(ierr); 3545 n_vertices = total_counts; 3546 } 3547 3548 /* edges and faces */ 3549 total_counts_cc = total_counts; 3550 for (ncc=0;ncc<n_ISForEdges+n_ISForFaces;ncc++) { 3551 IS used_is; 3552 PetscBool idxs_copied = PETSC_FALSE; 3553 3554 if (ncc<n_ISForEdges) { 3555 used_is = ISForEdges[ncc]; 3556 boolforchange = pcbddc->use_change_of_basis; /* change or not the basis on the edge */ 3557 } else { 3558 used_is = ISForFaces[ncc-n_ISForEdges]; 3559 boolforchange = (PetscBool)(pcbddc->use_change_of_basis && pcbddc->use_change_on_faces); /* change or not the basis on the face */ 3560 } 3561 temp_constraints = 0; /* zero the number of constraints I have on this conn comp */ 3562 3563 ierr = ISGetSize(used_is,&size_of_constraint);CHKERRQ(ierr); 3564 ierr = ISGetIndices(used_is,(const PetscInt**)&is_indices);CHKERRQ(ierr); 3565 /* change of basis should not be performed on local periodic nodes */ 3566 if (pcbddc->mat_graph->mirrors && pcbddc->mat_graph->mirrors[is_indices[0]]) boolforchange = PETSC_FALSE; 3567 if (nnsp_has_cnst) { 3568 PetscScalar quad_value; 3569 3570 ierr = PetscMemcpy(constraints_idxs + constraints_idxs_ptr[total_counts_cc],is_indices,size_of_constraint*sizeof(PetscInt));CHKERRQ(ierr); 3571 idxs_copied = PETSC_TRUE; 3572 3573 if (!pcbddc->use_nnsp_true) { 3574 quad_value = (PetscScalar)(1.0/PetscSqrtReal((PetscReal)size_of_constraint)); 3575 } else { 3576 quad_value = 1.0; 3577 } 3578 for (j=0;j<size_of_constraint;j++) { 3579 constraints_data[constraints_data_ptr[total_counts_cc]+j] = quad_value; 3580 } 3581 temp_constraints++; 3582 total_counts++; 3583 } 3584 for (k=0;k<nnsp_size;k++) { 3585 PetscReal real_value; 3586 PetscScalar *ptr_to_data; 3587 3588 ierr = VecGetArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr); 3589 ptr_to_data = &constraints_data[constraints_data_ptr[total_counts_cc]+temp_constraints*size_of_constraint]; 3590 for (j=0;j<size_of_constraint;j++) { 3591 ptr_to_data[j] = array[is_indices[j]]; 3592 } 3593 ierr = VecRestoreArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr); 3594 /* check if array is null on the connected component */ 3595 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 3596 PetscStackCallBLAS("BLASasum",real_value = BLASasum_(&Blas_N,ptr_to_data,&Blas_one)); 3597 if (real_value > 0.0) { /* keep indices and values */ 3598 temp_constraints++; 3599 total_counts++; 3600 if (!idxs_copied) { 3601 ierr = PetscMemcpy(constraints_idxs + constraints_idxs_ptr[total_counts_cc],is_indices,size_of_constraint*sizeof(PetscInt));CHKERRQ(ierr); 3602 idxs_copied = PETSC_TRUE; 3603 } 3604 } 3605 } 3606 ierr = ISRestoreIndices(used_is,(const PetscInt**)&is_indices);CHKERRQ(ierr); 3607 valid_constraints = temp_constraints; 3608 if (!pcbddc->use_nnsp_true && temp_constraints) { 3609 if (temp_constraints == 1) { /* just normalize the constraint */ 3610 PetscScalar norm,*ptr_to_data; 3611 3612 ptr_to_data = &constraints_data[constraints_data_ptr[total_counts_cc]]; 3613 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 3614 PetscStackCallBLAS("BLASdot",norm = BLASdot_(&Blas_N,ptr_to_data,&Blas_one,ptr_to_data,&Blas_one)); 3615 norm = 1.0/PetscSqrtReal(PetscRealPart(norm)); 3616 PetscStackCallBLAS("BLASscal",BLASscal_(&Blas_N,&norm,ptr_to_data,&Blas_one)); 3617 } else { /* perform SVD */ 3618 PetscReal tol = 1.0e-8; /* tolerance for retaining eigenmodes */ 3619 PetscScalar *ptr_to_data = &constraints_data[constraints_data_ptr[total_counts_cc]]; 3620 3621 #if defined(PETSC_MISSING_LAPACK_GESVD) 3622 /* SVD: Y = U*S*V^H -> U (eigenvectors of Y*Y^H) = Y*V*(S)^\dag 3623 POD: Y^H*Y = V*D*V^H, D = S^H*S -> U = Y*V*D^(-1/2) 3624 -> When PETSC_USE_COMPLEX and PETSC_MISSING_LAPACK_GESVD are defined 3625 the constraints basis will differ (by a complex factor with absolute value equal to 1) 3626 from that computed using LAPACKgesvd 3627 -> This is due to a different computation of eigenvectors in LAPACKheev 3628 -> The quality of the POD-computed basis will be the same */ 3629 ierr = PetscMemzero(correlation_mat,temp_constraints*temp_constraints*sizeof(PetscScalar));CHKERRQ(ierr); 3630 /* Store upper triangular part of correlation matrix */ 3631 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 3632 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 3633 for (j=0;j<temp_constraints;j++) { 3634 for (k=0;k<j+1;k++) { 3635 PetscStackCallBLAS("BLASdot",correlation_mat[j*temp_constraints+k] = BLASdot_(&Blas_N,ptr_to_data+k*size_of_constraint,&Blas_one,ptr_to_data+j*size_of_constraint,&Blas_one)); 3636 } 3637 } 3638 /* compute eigenvalues and eigenvectors of correlation matrix */ 3639 ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr); 3640 ierr = PetscBLASIntCast(temp_constraints,&Blas_LDA);CHKERRQ(ierr); 3641 #if !defined(PETSC_USE_COMPLEX) 3642 PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,work,&lwork,&lierr)); 3643 #else 3644 PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,work,&lwork,rwork,&lierr)); 3645 #endif 3646 ierr = PetscFPTrapPop();CHKERRQ(ierr); 3647 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYEV Lapack routine %d",(int)lierr); 3648 /* retain eigenvalues greater than tol: note that LAPACKsyev gives eigs in ascending order */ 3649 j = 0; 3650 while (j < temp_constraints && singular_vals[j] < tol) j++; 3651 total_counts = total_counts-j; 3652 valid_constraints = temp_constraints-j; 3653 /* scale and copy POD basis into used quadrature memory */ 3654 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 3655 ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr); 3656 ierr = PetscBLASIntCast(temp_constraints,&Blas_K);CHKERRQ(ierr); 3657 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 3658 ierr = PetscBLASIntCast(temp_constraints,&Blas_LDB);CHKERRQ(ierr); 3659 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDC);CHKERRQ(ierr); 3660 if (j<temp_constraints) { 3661 PetscInt ii; 3662 for (k=j;k<temp_constraints;k++) singular_vals[k] = 1.0/PetscSqrtReal(singular_vals[k]); 3663 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 3664 PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&Blas_M,&Blas_N,&Blas_K,&one,ptr_to_data,&Blas_LDA,correlation_mat,&Blas_LDB,&zero,temp_basis,&Blas_LDC)); 3665 ierr = PetscFPTrapPop();CHKERRQ(ierr); 3666 for (k=0;k<temp_constraints-j;k++) { 3667 for (ii=0;ii<size_of_constraint;ii++) { 3668 ptr_to_data[k*size_of_constraint+ii] = singular_vals[temp_constraints-1-k]*temp_basis[(temp_constraints-1-k)*size_of_constraint+ii]; 3669 } 3670 } 3671 } 3672 #else /* on missing GESVD */ 3673 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 3674 ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr); 3675 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 3676 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 3677 #if !defined(PETSC_USE_COMPLEX) 3678 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,ptr_to_data,&Blas_LDA,singular_vals,&dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,work,&lwork,&lierr)); 3679 #else 3680 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,ptr_to_data,&Blas_LDA,singular_vals,&dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,work,&lwork,rwork,&lierr)); 3681 #endif 3682 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in GESVD Lapack routine %d",(int)lierr); 3683 ierr = PetscFPTrapPop();CHKERRQ(ierr); 3684 /* retain eigenvalues greater than tol: note that LAPACKgesvd gives eigs in descending order */ 3685 k = temp_constraints; 3686 if (k > size_of_constraint) k = size_of_constraint; 3687 j = 0; 3688 while (j < k && singular_vals[k-j-1] < tol) j++; 3689 valid_constraints = k-j; 3690 total_counts = total_counts-temp_constraints+valid_constraints; 3691 #endif /* on missing GESVD */ 3692 } 3693 } 3694 /* update pointers information */ 3695 if (valid_constraints) { 3696 constraints_n[total_counts_cc] = valid_constraints; 3697 constraints_idxs_ptr[total_counts_cc+1] = constraints_idxs_ptr[total_counts_cc]+size_of_constraint; 3698 constraints_data_ptr[total_counts_cc+1] = constraints_data_ptr[total_counts_cc]+size_of_constraint*valid_constraints; 3699 /* set change_of_basis flag */ 3700 if (boolforchange) { 3701 PetscBTSet(change_basis,total_counts_cc); 3702 } 3703 total_counts_cc++; 3704 } 3705 } 3706 /* free workspace */ 3707 if (!skip_lapack) { 3708 ierr = PetscFree(work);CHKERRQ(ierr); 3709 #if defined(PETSC_USE_COMPLEX) 3710 ierr = PetscFree(rwork);CHKERRQ(ierr); 3711 #endif 3712 ierr = PetscFree(singular_vals);CHKERRQ(ierr); 3713 #if defined(PETSC_MISSING_LAPACK_GESVD) 3714 ierr = PetscFree(correlation_mat);CHKERRQ(ierr); 3715 ierr = PetscFree(temp_basis);CHKERRQ(ierr); 3716 #endif 3717 } 3718 for (k=0;k<nnsp_size;k++) { 3719 ierr = VecDestroy(&localnearnullsp[k]);CHKERRQ(ierr); 3720 } 3721 ierr = PetscFree(localnearnullsp);CHKERRQ(ierr); 3722 /* free index sets of faces, edges and vertices */ 3723 for (i=0;i<n_ISForFaces;i++) { 3724 ierr = ISDestroy(&ISForFaces[i]);CHKERRQ(ierr); 3725 } 3726 if (n_ISForFaces) { 3727 ierr = PetscFree(ISForFaces);CHKERRQ(ierr); 3728 } 3729 for (i=0;i<n_ISForEdges;i++) { 3730 ierr = ISDestroy(&ISForEdges[i]);CHKERRQ(ierr); 3731 } 3732 if (n_ISForEdges) { 3733 ierr = PetscFree(ISForEdges);CHKERRQ(ierr); 3734 } 3735 ierr = ISDestroy(&ISForVertices);CHKERRQ(ierr); 3736 } else { 3737 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 3738 3739 total_counts = 0; 3740 n_vertices = 0; 3741 if (sub_schurs->is_vertices && pcbddc->use_vertices) { 3742 ierr = ISGetLocalSize(sub_schurs->is_vertices,&n_vertices);CHKERRQ(ierr); 3743 } 3744 max_constraints = 0; 3745 total_counts_cc = 0; 3746 for (i=0;i<sub_schurs->n_subs+n_vertices;i++) { 3747 total_counts += pcbddc->adaptive_constraints_n[i]; 3748 if (pcbddc->adaptive_constraints_n[i]) total_counts_cc++; 3749 max_constraints = PetscMax(max_constraints,pcbddc->adaptive_constraints_n[i]); 3750 } 3751 constraints_idxs_ptr = pcbddc->adaptive_constraints_idxs_ptr; 3752 constraints_data_ptr = pcbddc->adaptive_constraints_data_ptr; 3753 constraints_idxs = pcbddc->adaptive_constraints_idxs; 3754 constraints_data = pcbddc->adaptive_constraints_data; 3755 /* constraints_n differs from pcbddc->adaptive_constraints_n */ 3756 ierr = PetscMalloc1(total_counts_cc,&constraints_n);CHKERRQ(ierr); 3757 total_counts_cc = 0; 3758 for (i=0;i<sub_schurs->n_subs+n_vertices;i++) { 3759 if (pcbddc->adaptive_constraints_n[i]) { 3760 constraints_n[total_counts_cc++] = pcbddc->adaptive_constraints_n[i]; 3761 } 3762 } 3763 #if 0 3764 printf("Found %d totals (%d)\n",total_counts_cc,total_counts); 3765 for (i=0;i<total_counts_cc;i++) { 3766 printf("const %d, start %d",i,constraints_idxs_ptr[i]); 3767 printf(" end %d:\n",constraints_idxs_ptr[i+1]); 3768 for (j=constraints_idxs_ptr[i];j<constraints_idxs_ptr[i+1];j++) { 3769 printf(" %d",constraints_idxs[j]); 3770 } 3771 printf("\n"); 3772 printf("number of cc: %d\n",constraints_n[i]); 3773 } 3774 for (i=0;i<n_vertices;i++) { 3775 PetscPrintf(PETSC_COMM_SELF,"[%d] vertex %d, n %d\n",PetscGlobalRank,i,pcbddc->adaptive_constraints_n[i]); 3776 } 3777 for (i=0;i<sub_schurs->n_subs;i++) { 3778 PetscPrintf(PETSC_COMM_SELF,"[%d] sub %d, edge %d, n %d\n",PetscGlobalRank,i,(PetscBool)PetscBTLookup(sub_schurs->is_edge,i),pcbddc->adaptive_constraints_n[i+n_vertices]); 3779 } 3780 #endif 3781 3782 max_size_of_constraint = 0; 3783 for (i=0;i<total_counts_cc;i++) max_size_of_constraint = PetscMax(max_size_of_constraint,constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i]); 3784 ierr = PetscMalloc1(constraints_idxs_ptr[total_counts_cc],&constraints_idxs_B);CHKERRQ(ierr); 3785 /* Change of basis */ 3786 ierr = PetscBTCreate(total_counts_cc,&change_basis);CHKERRQ(ierr); 3787 if (pcbddc->use_change_of_basis) { 3788 for (i=0;i<sub_schurs->n_subs;i++) { 3789 if (PetscBTLookup(sub_schurs->is_edge,i) || pcbddc->use_change_on_faces) { 3790 ierr = PetscBTSet(change_basis,i+n_vertices);CHKERRQ(ierr); 3791 } 3792 } 3793 } 3794 } 3795 pcbddc->local_primal_size = total_counts; 3796 ierr = PetscMalloc1(pcbddc->local_primal_size+pcbddc->benign_n,&pcbddc->primal_indices_local_idxs);CHKERRQ(ierr); 3797 3798 /* map constraints_idxs in boundary numbering */ 3799 ierr = ISGlobalToLocalMappingApply(pcis->BtoNmap,IS_GTOLM_DROP,constraints_idxs_ptr[total_counts_cc],constraints_idxs,&i,constraints_idxs_B);CHKERRQ(ierr); 3800 if (i != constraints_idxs_ptr[total_counts_cc]) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_SUP,"Error in boundary numbering for constraints indices %D != %D\n",constraints_idxs_ptr[total_counts_cc],i); 3801 3802 /* Create constraint matrix */ 3803 ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->ConstraintMatrix);CHKERRQ(ierr); 3804 ierr = MatSetType(pcbddc->ConstraintMatrix,MATAIJ);CHKERRQ(ierr); 3805 ierr = MatSetSizes(pcbddc->ConstraintMatrix,pcbddc->local_primal_size,pcis->n,pcbddc->local_primal_size,pcis->n);CHKERRQ(ierr); 3806 3807 /* find primal_dofs: subdomain corners plus dofs selected as primal after change of basis */ 3808 /* determine if a QR strategy is needed for change of basis */ 3809 qr_needed = PETSC_FALSE; 3810 ierr = PetscBTCreate(total_counts_cc,&qr_needed_idx);CHKERRQ(ierr); 3811 total_primal_vertices=0; 3812 pcbddc->local_primal_size_cc = 0; 3813 for (i=0;i<total_counts_cc;i++) { 3814 size_of_constraint = constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i]; 3815 if (size_of_constraint == 1 && pcbddc->mat_graph->custom_minimal_size) { 3816 pcbddc->primal_indices_local_idxs[total_primal_vertices++] = constraints_idxs[constraints_idxs_ptr[i]]; 3817 pcbddc->local_primal_size_cc += 1; 3818 } else if (PetscBTLookup(change_basis,i)) { 3819 for (k=0;k<constraints_n[i];k++) { 3820 pcbddc->primal_indices_local_idxs[total_primal_vertices++] = constraints_idxs[constraints_idxs_ptr[i]+k]; 3821 } 3822 pcbddc->local_primal_size_cc += constraints_n[i]; 3823 if (constraints_n[i] > 1 || pcbddc->use_qr_single || pcbddc->faster_deluxe) { 3824 PetscBTSet(qr_needed_idx,i); 3825 qr_needed = PETSC_TRUE; 3826 } 3827 } else { 3828 pcbddc->local_primal_size_cc += 1; 3829 } 3830 } 3831 /* note that the local variable n_vertices used below stores the number of pointwise constraints */ 3832 pcbddc->n_vertices = total_primal_vertices; 3833 /* permute indices in order to have a sorted set of vertices */ 3834 ierr = PetscSortInt(total_primal_vertices,pcbddc->primal_indices_local_idxs);CHKERRQ(ierr); 3835 3836 ierr = PetscMalloc2(pcbddc->local_primal_size_cc+pcbddc->benign_n,&pcbddc->local_primal_ref_node,pcbddc->local_primal_size_cc+pcbddc->benign_n,&pcbddc->local_primal_ref_mult);CHKERRQ(ierr); 3837 ierr = PetscMemcpy(pcbddc->local_primal_ref_node,pcbddc->primal_indices_local_idxs,total_primal_vertices*sizeof(PetscInt));CHKERRQ(ierr); 3838 for (i=0;i<total_primal_vertices;i++) pcbddc->local_primal_ref_mult[i] = 1; 3839 3840 /* nonzero structure of constraint matrix */ 3841 /* and get reference dof for local constraints */ 3842 ierr = PetscMalloc1(pcbddc->local_primal_size,&nnz);CHKERRQ(ierr); 3843 for (i=0;i<total_primal_vertices;i++) nnz[i] = 1; 3844 3845 j = total_primal_vertices; 3846 total_counts = total_primal_vertices; 3847 cum = total_primal_vertices; 3848 for (i=n_vertices;i<total_counts_cc;i++) { 3849 if (!PetscBTLookup(change_basis,i)) { 3850 pcbddc->local_primal_ref_node[cum] = constraints_idxs[constraints_idxs_ptr[i]]; 3851 pcbddc->local_primal_ref_mult[cum] = constraints_n[i]; 3852 cum++; 3853 size_of_constraint = constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i]; 3854 for (k=0;k<constraints_n[i];k++) { 3855 pcbddc->primal_indices_local_idxs[total_counts++] = constraints_idxs[constraints_idxs_ptr[i]+k]; 3856 nnz[j+k] = size_of_constraint; 3857 } 3858 j += constraints_n[i]; 3859 } 3860 } 3861 ierr = MatSeqAIJSetPreallocation(pcbddc->ConstraintMatrix,0,nnz);CHKERRQ(ierr); 3862 ierr = PetscFree(nnz);CHKERRQ(ierr); 3863 3864 /* set values in constraint matrix */ 3865 for (i=0;i<total_primal_vertices;i++) { 3866 ierr = MatSetValue(pcbddc->ConstraintMatrix,i,pcbddc->local_primal_ref_node[i],1.0,INSERT_VALUES);CHKERRQ(ierr); 3867 } 3868 total_counts = total_primal_vertices; 3869 for (i=n_vertices;i<total_counts_cc;i++) { 3870 if (!PetscBTLookup(change_basis,i)) { 3871 PetscInt *cols; 3872 3873 size_of_constraint = constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i]; 3874 cols = constraints_idxs+constraints_idxs_ptr[i]; 3875 for (k=0;k<constraints_n[i];k++) { 3876 PetscInt row = total_counts+k; 3877 PetscScalar *vals; 3878 3879 vals = constraints_data+constraints_data_ptr[i]+k*size_of_constraint; 3880 ierr = MatSetValues(pcbddc->ConstraintMatrix,1,&row,size_of_constraint,cols,vals,INSERT_VALUES);CHKERRQ(ierr); 3881 } 3882 total_counts += constraints_n[i]; 3883 } 3884 } 3885 /* assembling */ 3886 ierr = MatAssemblyBegin(pcbddc->ConstraintMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3887 ierr = MatAssemblyEnd(pcbddc->ConstraintMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3888 3889 /* 3890 ierr = PetscViewerPushFormat(PETSC_VIEWER_STDOUT_SELF,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr); 3891 ierr = MatView(pcbddc->ConstraintMatrix,(PetscViewer)0);CHKERRQ(ierr); 3892 ierr = PetscViewerPopFormat(PETSC_VIEWER_STDOUT_SELF);CHKERRQ(ierr); 3893 */ 3894 /* Create matrix for change of basis. We don't need it in case pcbddc->use_change_of_basis is FALSE */ 3895 if (pcbddc->use_change_of_basis) { 3896 /* dual and primal dofs on a single cc */ 3897 PetscInt dual_dofs,primal_dofs; 3898 /* working stuff for GEQRF */ 3899 PetscScalar *qr_basis,*qr_tau = NULL,*qr_work,lqr_work_t; 3900 PetscBLASInt lqr_work; 3901 /* working stuff for UNGQR */ 3902 PetscScalar *gqr_work,lgqr_work_t; 3903 PetscBLASInt lgqr_work; 3904 /* working stuff for TRTRS */ 3905 PetscScalar *trs_rhs; 3906 PetscBLASInt Blas_NRHS; 3907 /* pointers for values insertion into change of basis matrix */ 3908 PetscInt *start_rows,*start_cols; 3909 PetscScalar *start_vals; 3910 /* working stuff for values insertion */ 3911 PetscBT is_primal; 3912 PetscInt *aux_primal_numbering_B; 3913 /* matrix sizes */ 3914 PetscInt global_size,local_size; 3915 /* temporary change of basis */ 3916 Mat localChangeOfBasisMatrix; 3917 /* extra space for debugging */ 3918 PetscScalar *dbg_work; 3919 3920 /* local temporary change of basis acts on local interfaces -> dimension is n_B x n_B */ 3921 ierr = MatCreate(PETSC_COMM_SELF,&localChangeOfBasisMatrix);CHKERRQ(ierr); 3922 ierr = MatSetType(localChangeOfBasisMatrix,MATAIJ);CHKERRQ(ierr); 3923 ierr = MatSetSizes(localChangeOfBasisMatrix,pcis->n,pcis->n,pcis->n,pcis->n);CHKERRQ(ierr); 3924 /* nonzeros for local mat */ 3925 ierr = PetscMalloc1(pcis->n,&nnz);CHKERRQ(ierr); 3926 if (!pcbddc->benign_change || pcbddc->fake_change) { 3927 for (i=0;i<pcis->n;i++) nnz[i]=1; 3928 } else { 3929 const PetscInt *ii; 3930 PetscInt n; 3931 PetscBool flg_row; 3932 ierr = MatGetRowIJ(pcbddc->benign_change,0,PETSC_FALSE,PETSC_FALSE,&n,&ii,NULL,&flg_row);CHKERRQ(ierr); 3933 for (i=0;i<n;i++) nnz[i] = ii[i+1]-ii[i]; 3934 ierr = MatRestoreRowIJ(pcbddc->benign_change,0,PETSC_FALSE,PETSC_FALSE,&n,&ii,NULL,&flg_row);CHKERRQ(ierr); 3935 } 3936 for (i=n_vertices;i<total_counts_cc;i++) { 3937 if (PetscBTLookup(change_basis,i)) { 3938 size_of_constraint = constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i]; 3939 if (PetscBTLookup(qr_needed_idx,i)) { 3940 for (j=0;j<size_of_constraint;j++) nnz[constraints_idxs[constraints_idxs_ptr[i]+j]] = size_of_constraint; 3941 } else { 3942 nnz[constraints_idxs[constraints_idxs_ptr[i]]] = size_of_constraint; 3943 for (j=1;j<size_of_constraint;j++) nnz[constraints_idxs[constraints_idxs_ptr[i]+j]] = 2; 3944 } 3945 } 3946 } 3947 ierr = MatSeqAIJSetPreallocation(localChangeOfBasisMatrix,0,nnz);CHKERRQ(ierr); 3948 ierr = PetscFree(nnz);CHKERRQ(ierr); 3949 /* Set interior change in the matrix */ 3950 if (!pcbddc->benign_change || pcbddc->fake_change) { 3951 for (i=0;i<pcis->n;i++) { 3952 ierr = MatSetValue(localChangeOfBasisMatrix,i,i,1.0,INSERT_VALUES);CHKERRQ(ierr); 3953 } 3954 } else { 3955 const PetscInt *ii,*jj; 3956 PetscScalar *aa; 3957 PetscInt n; 3958 PetscBool flg_row; 3959 ierr = MatGetRowIJ(pcbddc->benign_change,0,PETSC_FALSE,PETSC_FALSE,&n,&ii,&jj,&flg_row);CHKERRQ(ierr); 3960 ierr = MatSeqAIJGetArray(pcbddc->benign_change,&aa);CHKERRQ(ierr); 3961 for (i=0;i<n;i++) { 3962 ierr = MatSetValues(localChangeOfBasisMatrix,1,&i,ii[i+1]-ii[i],jj+ii[i],aa+ii[i],INSERT_VALUES);CHKERRQ(ierr); 3963 } 3964 ierr = MatSeqAIJRestoreArray(pcbddc->benign_change,&aa);CHKERRQ(ierr); 3965 ierr = MatRestoreRowIJ(pcbddc->benign_change,0,PETSC_FALSE,PETSC_FALSE,&n,&ii,&jj,&flg_row);CHKERRQ(ierr); 3966 } 3967 3968 if (pcbddc->dbg_flag) { 3969 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"--------------------------------------------------------------\n");CHKERRQ(ierr); 3970 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Checking change of basis computation for subdomain %04d\n",PetscGlobalRank);CHKERRQ(ierr); 3971 } 3972 3973 3974 /* Now we loop on the constraints which need a change of basis */ 3975 /* 3976 Change of basis matrix is evaluated similarly to the FIRST APPROACH in 3977 Klawonn and Widlund, Dual-primal FETI-DP methods for linear elasticity, (see Sect 6.2.1) 3978 3979 Basic blocks of change of basis matrix T computed by 3980 3981 - Using the following block transformation if there is only a primal dof on the cc (and -pc_bddc_use_qr_single is not specified) 3982 3983 | 1 0 ... 0 s_1/S | 3984 | 0 1 ... 0 s_2/S | 3985 | ... | 3986 | 0 ... 1 s_{n-1}/S | 3987 | -s_1/s_n ... -s_{n-1}/s_n s_n/S | 3988 3989 with S = \sum_{i=1}^n s_i^2 3990 NOTE: in the above example, the primal dof is the last one of the edge in LOCAL ordering 3991 in the current implementation, the primal dof is the first one of the edge in GLOBAL ordering 3992 3993 - QR decomposition of constraints otherwise 3994 */ 3995 if (qr_needed) { 3996 /* space to store Q */ 3997 ierr = PetscMalloc1(max_size_of_constraint*max_size_of_constraint,&qr_basis);CHKERRQ(ierr); 3998 /* first we issue queries for optimal work */ 3999 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_M);CHKERRQ(ierr); 4000 ierr = PetscBLASIntCast(max_constraints,&Blas_N);CHKERRQ(ierr); 4001 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 4002 lqr_work = -1; 4003 PetscStackCallBLAS("LAPACKgeqrf",LAPACKgeqrf_(&Blas_M,&Blas_N,qr_basis,&Blas_LDA,qr_tau,&lqr_work_t,&lqr_work,&lierr)); 4004 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to GEQRF Lapack routine %d",(int)lierr); 4005 ierr = PetscBLASIntCast((PetscInt)PetscRealPart(lqr_work_t),&lqr_work);CHKERRQ(ierr); 4006 ierr = PetscMalloc1((PetscInt)PetscRealPart(lqr_work_t),&qr_work);CHKERRQ(ierr); 4007 lgqr_work = -1; 4008 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_M);CHKERRQ(ierr); 4009 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_N);CHKERRQ(ierr); 4010 ierr = PetscBLASIntCast(max_constraints,&Blas_K);CHKERRQ(ierr); 4011 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 4012 if (Blas_K>Blas_M) Blas_K=Blas_M; /* adjust just for computing optimal work */ 4013 PetscStackCallBLAS("LAPACKungqr",LAPACKungqr_(&Blas_M,&Blas_N,&Blas_K,qr_basis,&Blas_LDA,qr_tau,&lgqr_work_t,&lgqr_work,&lierr)); 4014 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to UNGQR Lapack routine %d",(int)lierr); 4015 ierr = PetscBLASIntCast((PetscInt)PetscRealPart(lgqr_work_t),&lgqr_work);CHKERRQ(ierr); 4016 ierr = PetscMalloc1((PetscInt)PetscRealPart(lgqr_work_t),&gqr_work);CHKERRQ(ierr); 4017 /* array to store scaling factors for reflectors */ 4018 ierr = PetscMalloc1(max_constraints,&qr_tau);CHKERRQ(ierr); 4019 /* array to store rhs and solution of triangular solver */ 4020 ierr = PetscMalloc1(max_constraints*max_constraints,&trs_rhs);CHKERRQ(ierr); 4021 /* allocating workspace for check */ 4022 if (pcbddc->dbg_flag) { 4023 ierr = PetscMalloc1(max_size_of_constraint*(max_constraints+max_size_of_constraint),&dbg_work);CHKERRQ(ierr); 4024 } 4025 } 4026 /* array to store whether a node is primal or not */ 4027 ierr = PetscBTCreate(pcis->n_B,&is_primal);CHKERRQ(ierr); 4028 ierr = PetscMalloc1(total_primal_vertices,&aux_primal_numbering_B);CHKERRQ(ierr); 4029 ierr = ISGlobalToLocalMappingApply(pcis->BtoNmap,IS_GTOLM_DROP,total_primal_vertices,pcbddc->local_primal_ref_node,&i,aux_primal_numbering_B);CHKERRQ(ierr); 4030 if (i != total_primal_vertices) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_SUP,"Error in boundary numbering for BDDC vertices! %D != %D\n",total_primal_vertices,i); 4031 for (i=0;i<total_primal_vertices;i++) { 4032 ierr = PetscBTSet(is_primal,aux_primal_numbering_B[i]);CHKERRQ(ierr); 4033 } 4034 ierr = PetscFree(aux_primal_numbering_B);CHKERRQ(ierr); 4035 4036 /* loop on constraints and see whether or not they need a change of basis and compute it */ 4037 for (total_counts=n_vertices;total_counts<total_counts_cc;total_counts++) { 4038 size_of_constraint = constraints_idxs_ptr[total_counts+1]-constraints_idxs_ptr[total_counts]; 4039 if (PetscBTLookup(change_basis,total_counts)) { 4040 /* get constraint info */ 4041 primal_dofs = constraints_n[total_counts]; 4042 dual_dofs = size_of_constraint-primal_dofs; 4043 4044 if (pcbddc->dbg_flag) { 4045 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Constraints %d: %d need a change of basis (size %d)\n",total_counts,primal_dofs,size_of_constraint);CHKERRQ(ierr); 4046 } 4047 4048 if (PetscBTLookup(qr_needed_idx,total_counts)) { /* QR */ 4049 4050 /* copy quadrature constraints for change of basis check */ 4051 if (pcbddc->dbg_flag) { 4052 ierr = PetscMemcpy(dbg_work,&constraints_data[constraints_data_ptr[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr); 4053 } 4054 /* copy temporary constraints into larger work vector (in order to store all columns of Q) */ 4055 ierr = PetscMemcpy(qr_basis,&constraints_data[constraints_data_ptr[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr); 4056 4057 /* compute QR decomposition of constraints */ 4058 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 4059 ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr); 4060 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 4061 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 4062 PetscStackCallBLAS("LAPACKgeqrf",LAPACKgeqrf_(&Blas_M,&Blas_N,qr_basis,&Blas_LDA,qr_tau,qr_work,&lqr_work,&lierr)); 4063 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in GEQRF Lapack routine %d",(int)lierr); 4064 ierr = PetscFPTrapPop();CHKERRQ(ierr); 4065 4066 /* explictly compute R^-T */ 4067 ierr = PetscMemzero(trs_rhs,primal_dofs*primal_dofs*sizeof(*trs_rhs));CHKERRQ(ierr); 4068 for (j=0;j<primal_dofs;j++) trs_rhs[j*(primal_dofs+1)] = 1.0; 4069 ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr); 4070 ierr = PetscBLASIntCast(primal_dofs,&Blas_NRHS);CHKERRQ(ierr); 4071 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 4072 ierr = PetscBLASIntCast(primal_dofs,&Blas_LDB);CHKERRQ(ierr); 4073 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 4074 PetscStackCallBLAS("LAPACKtrtrs",LAPACKtrtrs_("U","T","N",&Blas_N,&Blas_NRHS,qr_basis,&Blas_LDA,trs_rhs,&Blas_LDB,&lierr)); 4075 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in TRTRS Lapack routine %d",(int)lierr); 4076 ierr = PetscFPTrapPop();CHKERRQ(ierr); 4077 4078 /* explicitly compute all columns of Q (Q = [Q1 | Q2] ) overwriting QR factorization in qr_basis */ 4079 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 4080 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 4081 ierr = PetscBLASIntCast(primal_dofs,&Blas_K);CHKERRQ(ierr); 4082 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 4083 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 4084 PetscStackCallBLAS("LAPACKungqr",LAPACKungqr_(&Blas_M,&Blas_N,&Blas_K,qr_basis,&Blas_LDA,qr_tau,gqr_work,&lgqr_work,&lierr)); 4085 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in UNGQR Lapack routine %d",(int)lierr); 4086 ierr = PetscFPTrapPop();CHKERRQ(ierr); 4087 4088 /* first primal_dofs columns of Q need to be re-scaled in order to be unitary w.r.t constraints 4089 i.e. C_{pxn}*Q_{nxn} should be equal to [I_pxp | 0_pxd] (see check below) 4090 where n=size_of_constraint, p=primal_dofs, d=dual_dofs (n=p+d), I and 0 identity and null matrix resp. */ 4091 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 4092 ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr); 4093 ierr = PetscBLASIntCast(primal_dofs,&Blas_K);CHKERRQ(ierr); 4094 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 4095 ierr = PetscBLASIntCast(primal_dofs,&Blas_LDB);CHKERRQ(ierr); 4096 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDC);CHKERRQ(ierr); 4097 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 4098 PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&Blas_M,&Blas_N,&Blas_K,&one,qr_basis,&Blas_LDA,trs_rhs,&Blas_LDB,&zero,constraints_data+constraints_data_ptr[total_counts],&Blas_LDC)); 4099 ierr = PetscFPTrapPop();CHKERRQ(ierr); 4100 ierr = PetscMemcpy(qr_basis,&constraints_data[constraints_data_ptr[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr); 4101 4102 /* insert values in change of basis matrix respecting global ordering of new primal dofs */ 4103 start_rows = &constraints_idxs[constraints_idxs_ptr[total_counts]]; 4104 /* insert cols for primal dofs */ 4105 for (j=0;j<primal_dofs;j++) { 4106 start_vals = &qr_basis[j*size_of_constraint]; 4107 start_cols = &constraints_idxs[constraints_idxs_ptr[total_counts]+j]; 4108 ierr = MatSetValues(localChangeOfBasisMatrix,size_of_constraint,start_rows,1,start_cols,start_vals,INSERT_VALUES);CHKERRQ(ierr); 4109 } 4110 /* insert cols for dual dofs */ 4111 for (j=0,k=0;j<dual_dofs;k++) { 4112 if (!PetscBTLookup(is_primal,constraints_idxs_B[constraints_idxs_ptr[total_counts]+k])) { 4113 start_vals = &qr_basis[(primal_dofs+j)*size_of_constraint]; 4114 start_cols = &constraints_idxs[constraints_idxs_ptr[total_counts]+k]; 4115 ierr = MatSetValues(localChangeOfBasisMatrix,size_of_constraint,start_rows,1,start_cols,start_vals,INSERT_VALUES);CHKERRQ(ierr); 4116 j++; 4117 } 4118 } 4119 4120 /* check change of basis */ 4121 if (pcbddc->dbg_flag) { 4122 PetscInt ii,jj; 4123 PetscBool valid_qr=PETSC_TRUE; 4124 ierr = PetscBLASIntCast(primal_dofs,&Blas_M);CHKERRQ(ierr); 4125 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 4126 ierr = PetscBLASIntCast(size_of_constraint,&Blas_K);CHKERRQ(ierr); 4127 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 4128 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDB);CHKERRQ(ierr); 4129 ierr = PetscBLASIntCast(primal_dofs,&Blas_LDC);CHKERRQ(ierr); 4130 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 4131 PetscStackCallBLAS("BLASgemm",BLASgemm_("T","N",&Blas_M,&Blas_N,&Blas_K,&one,dbg_work,&Blas_LDA,qr_basis,&Blas_LDB,&zero,&dbg_work[size_of_constraint*primal_dofs],&Blas_LDC)); 4132 ierr = PetscFPTrapPop();CHKERRQ(ierr); 4133 for (jj=0;jj<size_of_constraint;jj++) { 4134 for (ii=0;ii<primal_dofs;ii++) { 4135 if (ii != jj && PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]) > 1.e-12) valid_qr = PETSC_FALSE; 4136 if (ii == jj && PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]-1.0) > 1.e-12) valid_qr = PETSC_FALSE; 4137 } 4138 } 4139 if (!valid_qr) { 4140 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> wrong change of basis!\n");CHKERRQ(ierr); 4141 for (jj=0;jj<size_of_constraint;jj++) { 4142 for (ii=0;ii<primal_dofs;ii++) { 4143 if (ii != jj && PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]) > 1.e-12) { 4144 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\tQr basis function %d is not orthogonal to constraint %d (%1.14e)!\n",jj,ii,PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii])); 4145 } 4146 if (ii == jj && PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]-1.0) > 1.e-12) { 4147 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\tQr basis function %d is not unitary w.r.t constraint %d (%1.14e)!\n",jj,ii,PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii])); 4148 } 4149 } 4150 } 4151 } else { 4152 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> right change of basis!\n");CHKERRQ(ierr); 4153 } 4154 } 4155 } else { /* simple transformation block */ 4156 PetscInt row,col; 4157 PetscScalar val,norm; 4158 4159 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 4160 PetscStackCallBLAS("BLASdot",norm = BLASdot_(&Blas_N,constraints_data+constraints_data_ptr[total_counts],&Blas_one,constraints_data+constraints_data_ptr[total_counts],&Blas_one)); 4161 for (j=0;j<size_of_constraint;j++) { 4162 PetscInt row_B = constraints_idxs_B[constraints_idxs_ptr[total_counts]+j]; 4163 row = constraints_idxs[constraints_idxs_ptr[total_counts]+j]; 4164 if (!PetscBTLookup(is_primal,row_B)) { 4165 col = constraints_idxs[constraints_idxs_ptr[total_counts]]; 4166 ierr = MatSetValue(localChangeOfBasisMatrix,row,row,1.0,INSERT_VALUES);CHKERRQ(ierr); 4167 ierr = MatSetValue(localChangeOfBasisMatrix,row,col,constraints_data[constraints_data_ptr[total_counts]+j]/norm,INSERT_VALUES);CHKERRQ(ierr); 4168 } else { 4169 for (k=0;k<size_of_constraint;k++) { 4170 col = constraints_idxs[constraints_idxs_ptr[total_counts]+k]; 4171 if (row != col) { 4172 val = -constraints_data[constraints_data_ptr[total_counts]+k]/constraints_data[constraints_data_ptr[total_counts]]; 4173 } else { 4174 val = constraints_data[constraints_data_ptr[total_counts]]/norm; 4175 } 4176 ierr = MatSetValue(localChangeOfBasisMatrix,row,col,val,INSERT_VALUES);CHKERRQ(ierr); 4177 } 4178 } 4179 } 4180 if (pcbddc->dbg_flag) { 4181 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> using standard change of basis\n");CHKERRQ(ierr); 4182 } 4183 } 4184 } else { 4185 if (pcbddc->dbg_flag) { 4186 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Constraint %d does not need a change of basis (size %d)\n",total_counts,size_of_constraint);CHKERRQ(ierr); 4187 } 4188 } 4189 } 4190 4191 /* free workspace */ 4192 if (qr_needed) { 4193 if (pcbddc->dbg_flag) { 4194 ierr = PetscFree(dbg_work);CHKERRQ(ierr); 4195 } 4196 ierr = PetscFree(trs_rhs);CHKERRQ(ierr); 4197 ierr = PetscFree(qr_tau);CHKERRQ(ierr); 4198 ierr = PetscFree(qr_work);CHKERRQ(ierr); 4199 ierr = PetscFree(gqr_work);CHKERRQ(ierr); 4200 ierr = PetscFree(qr_basis);CHKERRQ(ierr); 4201 } 4202 ierr = PetscBTDestroy(&is_primal);CHKERRQ(ierr); 4203 ierr = MatAssemblyBegin(localChangeOfBasisMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4204 ierr = MatAssemblyEnd(localChangeOfBasisMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4205 4206 /* assembling of global change of variable */ 4207 if (!pcbddc->fake_change) { 4208 Mat tmat; 4209 PetscInt bs; 4210 4211 ierr = VecGetSize(pcis->vec1_global,&global_size);CHKERRQ(ierr); 4212 ierr = VecGetLocalSize(pcis->vec1_global,&local_size);CHKERRQ(ierr); 4213 ierr = MatDuplicate(pc->pmat,MAT_DO_NOT_COPY_VALUES,&tmat);CHKERRQ(ierr); 4214 ierr = MatISSetLocalMat(tmat,localChangeOfBasisMatrix);CHKERRQ(ierr); 4215 ierr = MatCreate(PetscObjectComm((PetscObject)pc),&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 4216 ierr = MatSetType(pcbddc->ChangeOfBasisMatrix,MATAIJ);CHKERRQ(ierr); 4217 ierr = MatGetBlockSize(pc->pmat,&bs);CHKERRQ(ierr); 4218 ierr = MatSetBlockSize(pcbddc->ChangeOfBasisMatrix,bs);CHKERRQ(ierr); 4219 ierr = MatSetSizes(pcbddc->ChangeOfBasisMatrix,local_size,local_size,global_size,global_size);CHKERRQ(ierr); 4220 ierr = MatISSetMPIXAIJPreallocation_Private(tmat,pcbddc->ChangeOfBasisMatrix,PETSC_TRUE);CHKERRQ(ierr); 4221 ierr = MatISGetMPIXAIJ(tmat,MAT_REUSE_MATRIX,&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 4222 ierr = MatDestroy(&tmat);CHKERRQ(ierr); 4223 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 4224 ierr = VecSet(pcis->vec1_N,1.0);CHKERRQ(ierr); 4225 ierr = VecScatterBegin(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4226 ierr = VecScatterEnd(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4227 ierr = VecReciprocal(pcis->vec1_global);CHKERRQ(ierr); 4228 ierr = MatDiagonalScale(pcbddc->ChangeOfBasisMatrix,pcis->vec1_global,NULL);CHKERRQ(ierr); 4229 4230 /* check */ 4231 if (pcbddc->dbg_flag) { 4232 PetscReal error; 4233 Vec x,x_change; 4234 4235 ierr = VecDuplicate(pcis->vec1_global,&x);CHKERRQ(ierr); 4236 ierr = VecDuplicate(pcis->vec1_global,&x_change);CHKERRQ(ierr); 4237 ierr = VecSetRandom(x,NULL);CHKERRQ(ierr); 4238 ierr = VecCopy(x,pcis->vec1_global);CHKERRQ(ierr); 4239 ierr = VecScatterBegin(matis->rctx,x,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4240 ierr = VecScatterEnd(matis->rctx,x,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4241 ierr = MatMult(localChangeOfBasisMatrix,pcis->vec1_N,pcis->vec2_N);CHKERRQ(ierr); 4242 ierr = VecScatterBegin(matis->rctx,pcis->vec2_N,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4243 ierr = VecScatterEnd(matis->rctx,pcis->vec2_N,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4244 ierr = MatMult(pcbddc->ChangeOfBasisMatrix,pcis->vec1_global,x_change);CHKERRQ(ierr); 4245 ierr = VecAXPY(x,-1.0,x_change);CHKERRQ(ierr); 4246 ierr = VecNorm(x,NORM_INFINITY,&error);CHKERRQ(ierr); 4247 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4248 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Error global vs local change: %1.6e\n",error);CHKERRQ(ierr); 4249 ierr = VecDestroy(&x);CHKERRQ(ierr); 4250 ierr = VecDestroy(&x_change);CHKERRQ(ierr); 4251 } 4252 /* adapt sub_schurs computed (if any) */ 4253 if (pcbddc->use_deluxe_scaling) { 4254 PCBDDCSubSchurs sub_schurs=pcbddc->sub_schurs; 4255 4256 if (pcbddc->use_change_of_basis && pcbddc->adaptive_userdefined) { 4257 SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_SUP,"Cannot mix automatic change of basis, adaptive selection and user-defined constraints");CHKERRQ(ierr); 4258 } 4259 if (sub_schurs->S_Ej_all) { 4260 Mat S_new,tmat; 4261 IS is_all_N,is_V_Sall = NULL; 4262 4263 ierr = ISLocalToGlobalMappingApplyIS(pcis->BtoNmap,sub_schurs->is_Ej_all,&is_all_N);CHKERRQ(ierr); 4264 ierr = MatGetSubMatrix(localChangeOfBasisMatrix,is_all_N,is_all_N,MAT_INITIAL_MATRIX,&tmat);CHKERRQ(ierr); 4265 if (pcbddc->deluxe_zerorows) { 4266 ISLocalToGlobalMapping NtoSall; 4267 IS is_V; 4268 ierr = ISCreateGeneral(PETSC_COMM_SELF,pcbddc->n_vertices,pcbddc->local_primal_ref_node,PETSC_COPY_VALUES,&is_V);CHKERRQ(ierr); 4269 ierr = ISLocalToGlobalMappingCreateIS(is_all_N,&NtoSall);CHKERRQ(ierr); 4270 ierr = ISGlobalToLocalMappingApplyIS(NtoSall,IS_GTOLM_DROP,is_V,&is_V_Sall);CHKERRQ(ierr); 4271 ierr = ISLocalToGlobalMappingDestroy(&NtoSall);CHKERRQ(ierr); 4272 ierr = ISDestroy(&is_V);CHKERRQ(ierr); 4273 } 4274 ierr = ISDestroy(&is_all_N);CHKERRQ(ierr); 4275 ierr = MatPtAP(sub_schurs->S_Ej_all,tmat,MAT_INITIAL_MATRIX,1.0,&S_new);CHKERRQ(ierr); 4276 ierr = MatDestroy(&sub_schurs->S_Ej_all);CHKERRQ(ierr); 4277 ierr = PetscObjectReference((PetscObject)S_new);CHKERRQ(ierr); 4278 if (pcbddc->deluxe_zerorows) { 4279 const PetscScalar *array; 4280 const PetscInt *idxs_V,*idxs_all; 4281 PetscInt i,n_V; 4282 4283 ierr = MatZeroRowsColumnsIS(S_new,is_V_Sall,1.,NULL,NULL);CHKERRQ(ierr); 4284 ierr = ISGetLocalSize(is_V_Sall,&n_V);CHKERRQ(ierr); 4285 ierr = ISGetIndices(is_V_Sall,&idxs_V);CHKERRQ(ierr); 4286 ierr = ISGetIndices(sub_schurs->is_Ej_all,&idxs_all);CHKERRQ(ierr); 4287 ierr = VecGetArrayRead(pcis->D,&array);CHKERRQ(ierr); 4288 for (i=0;i<n_V;i++) { 4289 PetscScalar val; 4290 PetscInt idx; 4291 4292 idx = idxs_V[i]; 4293 val = array[idxs_all[idxs_V[i]]]; 4294 ierr = MatSetValue(S_new,idx,idx,val,INSERT_VALUES);CHKERRQ(ierr); 4295 } 4296 ierr = MatAssemblyBegin(S_new,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4297 ierr = MatAssemblyEnd(S_new,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4298 ierr = VecRestoreArrayRead(pcis->D,&array);CHKERRQ(ierr); 4299 ierr = ISRestoreIndices(sub_schurs->is_Ej_all,&idxs_all);CHKERRQ(ierr); 4300 ierr = ISRestoreIndices(is_V_Sall,&idxs_V);CHKERRQ(ierr); 4301 } 4302 sub_schurs->S_Ej_all = S_new; 4303 ierr = MatDestroy(&S_new);CHKERRQ(ierr); 4304 if (sub_schurs->sum_S_Ej_all) { 4305 ierr = MatPtAP(sub_schurs->sum_S_Ej_all,tmat,MAT_INITIAL_MATRIX,1.0,&S_new);CHKERRQ(ierr); 4306 ierr = MatDestroy(&sub_schurs->sum_S_Ej_all);CHKERRQ(ierr); 4307 ierr = PetscObjectReference((PetscObject)S_new);CHKERRQ(ierr); 4308 if (pcbddc->deluxe_zerorows) { 4309 ierr = MatZeroRowsColumnsIS(S_new,is_V_Sall,1.,NULL,NULL);CHKERRQ(ierr); 4310 } 4311 sub_schurs->sum_S_Ej_all = S_new; 4312 ierr = MatDestroy(&S_new);CHKERRQ(ierr); 4313 } 4314 ierr = ISDestroy(&is_V_Sall);CHKERRQ(ierr); 4315 ierr = MatDestroy(&tmat);CHKERRQ(ierr); 4316 } 4317 /* destroy any change of basis context in sub_schurs */ 4318 if (sub_schurs->change) { 4319 PetscInt i; 4320 4321 for (i=0;i<sub_schurs->n_subs;i++) { 4322 ierr = KSPDestroy(&sub_schurs->change[i]);CHKERRQ(ierr); 4323 } 4324 ierr = PetscFree(sub_schurs->change);CHKERRQ(ierr); 4325 } 4326 } 4327 if (pcbddc->switch_static) { /* need to save the local change */ 4328 pcbddc->switch_static_change = localChangeOfBasisMatrix; 4329 } else { 4330 ierr = MatDestroy(&localChangeOfBasisMatrix);CHKERRQ(ierr); 4331 } 4332 /* determine if any process has changed the pressures locally */ 4333 if (pcbddc->benign_saddle_point) { 4334 PetscBool have_null = (PetscBool)!!pcbddc->benign_change; 4335 ierr = MPI_Allreduce(&have_null,&pcbddc->change_interior,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 4336 } 4337 } else { /* fake change (get back change of basis into ConstraintMatrix and info on qr) */ 4338 ierr = MatDestroy(&pcbddc->ConstraintMatrix);CHKERRQ(ierr); 4339 pcbddc->ConstraintMatrix = localChangeOfBasisMatrix; 4340 pcbddc->use_qr_single = qr_needed; 4341 } 4342 } else if (pcbddc->user_ChangeOfBasisMatrix || pcbddc->benign_saddle_point) { 4343 PetscBool needglobal = PETSC_FALSE; 4344 if (pcbddc->benign_saddle_point) { 4345 PetscBool have_null = (PetscBool)!!pcbddc->benign_change; 4346 ierr = MPI_Allreduce(&have_null,&needglobal,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 4347 } 4348 if (!needglobal && pcbddc->user_ChangeOfBasisMatrix) { 4349 ierr = PetscObjectReference((PetscObject)pcbddc->user_ChangeOfBasisMatrix);CHKERRQ(ierr); 4350 pcbddc->ChangeOfBasisMatrix = pcbddc->user_ChangeOfBasisMatrix; 4351 } else { 4352 Mat benign_global = NULL; 4353 if (needglobal) { 4354 Mat tmat; 4355 4356 pcbddc->change_interior = PETSC_TRUE; 4357 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 4358 ierr = VecSet(pcis->vec1_N,1.0);CHKERRQ(ierr); 4359 ierr = VecScatterBegin(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4360 ierr = VecScatterEnd(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4361 ierr = VecReciprocal(pcis->vec1_global);CHKERRQ(ierr); 4362 ierr = VecScatterBegin(matis->rctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4363 ierr = VecScatterEnd(matis->rctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4364 ierr = MatDuplicate(pc->pmat,MAT_DO_NOT_COPY_VALUES,&tmat);CHKERRQ(ierr); 4365 if (pcbddc->benign_change) { 4366 Mat M; 4367 4368 ierr = MatDuplicate(pcbddc->benign_change,MAT_COPY_VALUES,&M);CHKERRQ(ierr); 4369 ierr = MatDiagonalScale(M,pcis->vec1_N,NULL);CHKERRQ(ierr); 4370 ierr = MatISSetLocalMat(tmat,M);CHKERRQ(ierr); 4371 ierr = MatDestroy(&M);CHKERRQ(ierr); 4372 } else { 4373 Mat eye; 4374 PetscScalar *array; 4375 4376 ierr = VecGetArray(pcis->vec1_N,&array);CHKERRQ(ierr); 4377 ierr = MatCreateSeqAIJ(PETSC_COMM_SELF,pcis->n,pcis->n,1,NULL,&eye);CHKERRQ(ierr); 4378 for (i=0;i<pcis->n;i++) { 4379 ierr = MatSetValue(eye,i,i,array[i],INSERT_VALUES);CHKERRQ(ierr); 4380 } 4381 ierr = VecRestoreArray(pcis->vec1_N,&array);CHKERRQ(ierr); 4382 ierr = MatAssemblyBegin(eye,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4383 ierr = MatAssemblyEnd(eye,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4384 ierr = MatISSetLocalMat(tmat,eye);CHKERRQ(ierr); 4385 ierr = MatDestroy(&eye);CHKERRQ(ierr); 4386 } 4387 ierr = MatISGetMPIXAIJ(tmat,MAT_INITIAL_MATRIX,&benign_global);CHKERRQ(ierr); 4388 ierr = MatDestroy(&tmat);CHKERRQ(ierr); 4389 } 4390 if (pcbddc->user_ChangeOfBasisMatrix) { 4391 ierr = MatMatMult(pcbddc->user_ChangeOfBasisMatrix,benign_global,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 4392 ierr = MatDestroy(&benign_global);CHKERRQ(ierr); 4393 } else if (needglobal) { 4394 pcbddc->ChangeOfBasisMatrix = benign_global; 4395 } 4396 } 4397 if (pcbddc->switch_static && pcbddc->ChangeOfBasisMatrix) { /* need to save the local change */ 4398 IS is_global; 4399 const PetscInt *gidxs; 4400 4401 ierr = ISLocalToGlobalMappingGetIndices(pc->pmat->rmap->mapping,&gidxs);CHKERRQ(ierr); 4402 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),pcis->n,gidxs,PETSC_COPY_VALUES,&is_global);CHKERRQ(ierr); 4403 ierr = ISLocalToGlobalMappingRestoreIndices(pc->pmat->rmap->mapping,&gidxs);CHKERRQ(ierr); 4404 ierr = MatGetSubMatrixUnsorted(pcbddc->ChangeOfBasisMatrix,is_global,is_global,&pcbddc->switch_static_change);CHKERRQ(ierr); 4405 ierr = ISDestroy(&is_global);CHKERRQ(ierr); 4406 } 4407 } 4408 if (!pcbddc->fake_change && pcbddc->ChangeOfBasisMatrix && !pcbddc->work_change) { 4409 ierr = VecDuplicate(pcis->vec1_global,&pcbddc->work_change);CHKERRQ(ierr); 4410 } 4411 4412 if (!pcbddc->fake_change) { 4413 /* add pressure dofs to set of primal nodes for numbering purposes */ 4414 for (i=0;i<pcbddc->benign_n;i++) { 4415 pcbddc->local_primal_ref_node[pcbddc->local_primal_size_cc] = pcbddc->benign_p0_lidx[i]; 4416 pcbddc->primal_indices_local_idxs[pcbddc->local_primal_size] = pcbddc->benign_p0_lidx[i]; 4417 pcbddc->local_primal_ref_mult[pcbddc->local_primal_size_cc] = 1; 4418 pcbddc->local_primal_size_cc++; 4419 pcbddc->local_primal_size++; 4420 } 4421 4422 /* check if a new primal space has been introduced (also take into account benign trick) */ 4423 pcbddc->new_primal_space_local = PETSC_TRUE; 4424 if (olocal_primal_size == pcbddc->local_primal_size) { 4425 ierr = PetscMemcmp(pcbddc->local_primal_ref_node,olocal_primal_ref_node,olocal_primal_size_cc*sizeof(PetscInt),&pcbddc->new_primal_space_local);CHKERRQ(ierr); 4426 pcbddc->new_primal_space_local = (PetscBool)(!pcbddc->new_primal_space_local); 4427 if (!pcbddc->new_primal_space_local) { 4428 ierr = PetscMemcmp(pcbddc->local_primal_ref_mult,olocal_primal_ref_mult,olocal_primal_size_cc*sizeof(PetscInt),&pcbddc->new_primal_space_local);CHKERRQ(ierr); 4429 pcbddc->new_primal_space_local = (PetscBool)(!pcbddc->new_primal_space_local); 4430 } 4431 } 4432 /* new_primal_space will be used for numbering of coarse dofs, so it should be the same across all subdomains */ 4433 ierr = MPIU_Allreduce(&pcbddc->new_primal_space_local,&pcbddc->new_primal_space,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 4434 } 4435 ierr = PetscFree2(olocal_primal_ref_node,olocal_primal_ref_mult);CHKERRQ(ierr); 4436 4437 /* flush dbg viewer */ 4438 if (pcbddc->dbg_flag) { 4439 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4440 } 4441 4442 /* free workspace */ 4443 ierr = PetscBTDestroy(&qr_needed_idx);CHKERRQ(ierr); 4444 ierr = PetscBTDestroy(&change_basis);CHKERRQ(ierr); 4445 if (!pcbddc->adaptive_selection) { 4446 ierr = PetscFree3(constraints_idxs_ptr,constraints_data_ptr,constraints_n);CHKERRQ(ierr); 4447 ierr = PetscFree3(constraints_data,constraints_idxs,constraints_idxs_B);CHKERRQ(ierr); 4448 } else { 4449 ierr = PetscFree5(pcbddc->adaptive_constraints_n, 4450 pcbddc->adaptive_constraints_idxs_ptr, 4451 pcbddc->adaptive_constraints_data_ptr, 4452 pcbddc->adaptive_constraints_idxs, 4453 pcbddc->adaptive_constraints_data);CHKERRQ(ierr); 4454 ierr = PetscFree(constraints_n);CHKERRQ(ierr); 4455 ierr = PetscFree(constraints_idxs_B);CHKERRQ(ierr); 4456 } 4457 PetscFunctionReturn(0); 4458 } 4459 4460 #undef __FUNCT__ 4461 #define __FUNCT__ "PCBDDCAnalyzeInterface" 4462 PetscErrorCode PCBDDCAnalyzeInterface(PC pc) 4463 { 4464 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 4465 PC_IS *pcis = (PC_IS*)pc->data; 4466 Mat_IS *matis = (Mat_IS*)pc->pmat->data; 4467 PetscInt ierr,i,N; 4468 PetscViewer viewer=pcbddc->dbg_viewer; 4469 4470 PetscFunctionBegin; 4471 /* Reset previously computed graph */ 4472 ierr = PCBDDCGraphReset(pcbddc->mat_graph);CHKERRQ(ierr); 4473 /* Init local Graph struct */ 4474 ierr = MatGetSize(pc->pmat,&N,NULL);CHKERRQ(ierr); 4475 ierr = PCBDDCGraphInit(pcbddc->mat_graph,pcis->mapping,N);CHKERRQ(ierr); 4476 4477 /* Check validity of the csr graph passed in by the user */ 4478 if (pcbddc->mat_graph->nvtxs_csr && pcbddc->mat_graph->nvtxs_csr != pcbddc->mat_graph->nvtxs) { 4479 SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Invalid size of local CSR graph! Found %d, expected %d\n",pcbddc->mat_graph->nvtxs_csr,pcbddc->mat_graph->nvtxs); 4480 } 4481 4482 /* Set default CSR adjacency of local dofs if not provided by the user with PCBDDCSetLocalAdjacencyGraph */ 4483 if ( (!pcbddc->mat_graph->xadj || !pcbddc->mat_graph->adjncy) && pcbddc->use_local_adj) { 4484 PetscInt *xadj,*adjncy; 4485 PetscInt nvtxs; 4486 PetscBool flg_row=PETSC_FALSE; 4487 4488 ierr = MatGetRowIJ(matis->A,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);CHKERRQ(ierr); 4489 if (flg_row) { 4490 ierr = PCBDDCSetLocalAdjacencyGraph(pc,nvtxs,xadj,adjncy,PETSC_COPY_VALUES);CHKERRQ(ierr); 4491 pcbddc->computed_rowadj = PETSC_TRUE; 4492 } 4493 ierr = MatRestoreRowIJ(matis->A,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);CHKERRQ(ierr); 4494 } 4495 if (pcbddc->dbg_flag) { 4496 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4497 } 4498 4499 /* Set default dofs' splitting if no information has been provided by the user with PCBDDCSetDofsSplitting or PCBDDCSetDofsSplittingLocal */ 4500 if (pcbddc->user_provided_isfordofs) { 4501 if (pcbddc->n_ISForDofs) { /* need to convert from global to local and remove references to global dofs splitting */ 4502 ierr = PetscMalloc1(pcbddc->n_ISForDofs,&pcbddc->ISForDofsLocal);CHKERRQ(ierr); 4503 for (i=0;i<pcbddc->n_ISForDofs;i++) { 4504 ierr = PCBDDCGlobalToLocal(matis->rctx,pcis->vec1_global,pcis->vec1_N,pcbddc->ISForDofs[i],&pcbddc->ISForDofsLocal[i]);CHKERRQ(ierr); 4505 ierr = ISDestroy(&pcbddc->ISForDofs[i]);CHKERRQ(ierr); 4506 } 4507 pcbddc->n_ISForDofsLocal = pcbddc->n_ISForDofs; 4508 pcbddc->n_ISForDofs = 0; 4509 ierr = PetscFree(pcbddc->ISForDofs);CHKERRQ(ierr); 4510 } 4511 } else { 4512 if (!pcbddc->n_ISForDofsLocal) { /* field split not present, create it in local ordering */ 4513 ierr = MatGetBlockSize(pc->pmat,&pcbddc->n_ISForDofsLocal);CHKERRQ(ierr); 4514 ierr = PetscMalloc1(pcbddc->n_ISForDofsLocal,&pcbddc->ISForDofsLocal);CHKERRQ(ierr); 4515 for (i=0;i<pcbddc->n_ISForDofsLocal;i++) { 4516 ierr = ISCreateStride(PetscObjectComm((PetscObject)pc),pcis->n/pcbddc->n_ISForDofsLocal,i,pcbddc->n_ISForDofsLocal,&pcbddc->ISForDofsLocal[i]);CHKERRQ(ierr); 4517 } 4518 } 4519 } 4520 4521 /* Setup of Graph */ 4522 if (!pcbddc->DirichletBoundariesLocal && pcbddc->DirichletBoundaries) { /* need to convert from global to local */ 4523 ierr = PCBDDCGlobalToLocal(matis->rctx,pcis->vec1_global,pcis->vec1_N,pcbddc->DirichletBoundaries,&pcbddc->DirichletBoundariesLocal);CHKERRQ(ierr); 4524 } 4525 if (!pcbddc->NeumannBoundariesLocal && pcbddc->NeumannBoundaries) { /* need to convert from global to local */ 4526 ierr = PCBDDCGlobalToLocal(matis->rctx,pcis->vec1_global,pcis->vec1_N,pcbddc->NeumannBoundaries,&pcbddc->NeumannBoundariesLocal);CHKERRQ(ierr); 4527 } 4528 if (!pcbddc->user_primal_vertices_local && pcbddc->user_primal_vertices) { /* need to convert from global to local */ 4529 ierr = PCBDDCGlobalToLocal(matis->rctx,pcis->vec1_global,pcis->vec1_N,pcbddc->user_primal_vertices,&pcbddc->user_primal_vertices_local);CHKERRQ(ierr); 4530 } 4531 ierr = PCBDDCGraphSetUp(pcbddc->mat_graph,pcbddc->vertex_size,pcbddc->NeumannBoundariesLocal,pcbddc->DirichletBoundariesLocal,pcbddc->n_ISForDofsLocal,pcbddc->ISForDofsLocal,pcbddc->user_primal_vertices_local);CHKERRQ(ierr); 4532 4533 /* attach info on disconnected subdomains if present */ 4534 if (pcbddc->n_local_subs) { 4535 PetscInt *local_subs; 4536 4537 ierr = PetscMalloc1(N,&local_subs);CHKERRQ(ierr); 4538 for (i=0;i<pcbddc->n_local_subs;i++) { 4539 const PetscInt *idxs; 4540 PetscInt nl,j; 4541 4542 ierr = ISGetLocalSize(pcbddc->local_subs[i],&nl);CHKERRQ(ierr); 4543 ierr = ISGetIndices(pcbddc->local_subs[i],&idxs);CHKERRQ(ierr); 4544 for (j=0;j<nl;j++) { 4545 local_subs[idxs[j]] = i; 4546 } 4547 ierr = ISRestoreIndices(pcbddc->local_subs[i],&idxs);CHKERRQ(ierr); 4548 } 4549 pcbddc->mat_graph->n_local_subs = pcbddc->n_local_subs; 4550 pcbddc->mat_graph->local_subs = local_subs; 4551 } 4552 4553 /* Graph's connected components analysis */ 4554 ierr = PCBDDCGraphComputeConnectedComponents(pcbddc->mat_graph);CHKERRQ(ierr); 4555 4556 /* print some info to stdout */ 4557 if (pcbddc->dbg_flag) { 4558 ierr = PCBDDCGraphASCIIView(pcbddc->mat_graph,pcbddc->dbg_flag,viewer);CHKERRQ(ierr); 4559 } 4560 PetscFunctionReturn(0); 4561 } 4562 4563 /* given an index sets possibly with holes, renumbers the indexes removing the holes */ 4564 #undef __FUNCT__ 4565 #define __FUNCT__ "PCBDDCSubsetNumbering" 4566 PetscErrorCode PCBDDCSubsetNumbering(IS subset, IS subset_mult, PetscInt *N_n, IS *subset_n) 4567 { 4568 PetscSF sf; 4569 PetscLayout map; 4570 const PetscInt *idxs; 4571 PetscInt *leaf_data,*root_data,*gidxs; 4572 PetscInt N,n,i,lbounds[2],gbounds[2],Nl; 4573 PetscInt n_n,nlocals,start,first_index; 4574 PetscMPIInt commsize; 4575 PetscBool first_found; 4576 PetscErrorCode ierr; 4577 4578 PetscFunctionBegin; 4579 ierr = ISGetLocalSize(subset,&n);CHKERRQ(ierr); 4580 if (subset_mult) { 4581 PetscCheckSameComm(subset,1,subset_mult,2); 4582 ierr = ISGetLocalSize(subset,&i);CHKERRQ(ierr); 4583 if (i != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Local subset and multiplicity sizes don't match! %d != %d",n,i); 4584 } 4585 /* create workspace layout for computing global indices of subset */ 4586 ierr = ISGetIndices(subset,&idxs);CHKERRQ(ierr); 4587 lbounds[0] = lbounds[1] = 0; 4588 for (i=0;i<n;i++) { 4589 if (idxs[i] < lbounds[0]) lbounds[0] = idxs[i]; 4590 else if (idxs[i] > lbounds[1]) lbounds[1] = idxs[i]; 4591 } 4592 lbounds[0] = -lbounds[0]; 4593 ierr = MPIU_Allreduce(lbounds,gbounds,2,MPIU_INT,MPI_MAX,PetscObjectComm((PetscObject)subset));CHKERRQ(ierr); 4594 gbounds[0] = -gbounds[0]; 4595 N = gbounds[1] - gbounds[0] + 1; 4596 ierr = PetscLayoutCreate(PetscObjectComm((PetscObject)subset),&map);CHKERRQ(ierr); 4597 ierr = PetscLayoutSetBlockSize(map,1);CHKERRQ(ierr); 4598 ierr = PetscLayoutSetSize(map,N);CHKERRQ(ierr); 4599 ierr = PetscLayoutSetUp(map);CHKERRQ(ierr); 4600 ierr = PetscLayoutGetLocalSize(map,&Nl);CHKERRQ(ierr); 4601 4602 /* create sf : leaf_data == multiplicity of indexes, root data == global index in layout */ 4603 ierr = PetscMalloc2(n,&leaf_data,Nl,&root_data);CHKERRQ(ierr); 4604 if (subset_mult) { 4605 const PetscInt* idxs_mult; 4606 4607 ierr = ISGetIndices(subset_mult,&idxs_mult);CHKERRQ(ierr); 4608 ierr = PetscMemcpy(leaf_data,idxs_mult,n*sizeof(PetscInt));CHKERRQ(ierr); 4609 ierr = ISRestoreIndices(subset_mult,&idxs_mult);CHKERRQ(ierr); 4610 } else { 4611 for (i=0;i<n;i++) leaf_data[i] = 1; 4612 } 4613 /* local size of new subset */ 4614 n_n = 0; 4615 for (i=0;i<n;i++) n_n += leaf_data[i]; 4616 4617 /* global indexes in layout */ 4618 ierr = PetscMalloc1(n_n,&gidxs);CHKERRQ(ierr); /* allocating possibly extra space in gidxs which will be used later */ 4619 for (i=0;i<n;i++) gidxs[i] = idxs[i] - gbounds[0]; 4620 ierr = ISRestoreIndices(subset,&idxs);CHKERRQ(ierr); 4621 ierr = PetscSFCreate(PetscObjectComm((PetscObject)subset),&sf);CHKERRQ(ierr); 4622 ierr = PetscSFSetGraphLayout(sf,map,n,NULL,PETSC_COPY_VALUES,gidxs);CHKERRQ(ierr); 4623 ierr = PetscLayoutDestroy(&map);CHKERRQ(ierr); 4624 4625 /* reduce from leaves to roots */ 4626 ierr = PetscMemzero(root_data,Nl*sizeof(PetscInt));CHKERRQ(ierr); 4627 ierr = PetscSFReduceBegin(sf,MPIU_INT,leaf_data,root_data,MPI_MAX);CHKERRQ(ierr); 4628 ierr = PetscSFReduceEnd(sf,MPIU_INT,leaf_data,root_data,MPI_MAX);CHKERRQ(ierr); 4629 4630 /* count indexes in local part of layout */ 4631 nlocals = 0; 4632 first_index = -1; 4633 first_found = PETSC_FALSE; 4634 for (i=0;i<Nl;i++) { 4635 if (!first_found && root_data[i]) { 4636 first_found = PETSC_TRUE; 4637 first_index = i; 4638 } 4639 nlocals += root_data[i]; 4640 } 4641 4642 /* cumulative of number of indexes and size of subset without holes */ 4643 #if defined(PETSC_HAVE_MPI_EXSCAN) 4644 start = 0; 4645 ierr = MPI_Exscan(&nlocals,&start,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)subset));CHKERRQ(ierr); 4646 #else 4647 ierr = MPI_Scan(&nlocals,&start,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)subset));CHKERRQ(ierr); 4648 start = start-nlocals; 4649 #endif 4650 4651 if (N_n) { /* compute total size of new subset if requested */ 4652 *N_n = start + nlocals; 4653 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)subset),&commsize);CHKERRQ(ierr); 4654 ierr = MPI_Bcast(N_n,1,MPIU_INT,commsize-1,PetscObjectComm((PetscObject)subset));CHKERRQ(ierr); 4655 } 4656 4657 /* adapt root data with cumulative */ 4658 if (first_found) { 4659 PetscInt old_index; 4660 4661 root_data[first_index] += start; 4662 old_index = first_index; 4663 for (i=first_index+1;i<Nl;i++) { 4664 if (root_data[i]) { 4665 root_data[i] += root_data[old_index]; 4666 old_index = i; 4667 } 4668 } 4669 } 4670 4671 /* from roots to leaves */ 4672 ierr = PetscSFBcastBegin(sf,MPIU_INT,root_data,leaf_data);CHKERRQ(ierr); 4673 ierr = PetscSFBcastEnd(sf,MPIU_INT,root_data,leaf_data);CHKERRQ(ierr); 4674 ierr = PetscSFDestroy(&sf);CHKERRQ(ierr); 4675 4676 /* create new IS with global indexes without holes */ 4677 if (subset_mult) { 4678 const PetscInt* idxs_mult; 4679 PetscInt cum; 4680 4681 cum = 0; 4682 ierr = ISGetIndices(subset_mult,&idxs_mult);CHKERRQ(ierr); 4683 for (i=0;i<n;i++) { 4684 PetscInt j; 4685 for (j=0;j<idxs_mult[i];j++) gidxs[cum++] = leaf_data[i] - idxs_mult[i] + j; 4686 } 4687 ierr = ISRestoreIndices(subset_mult,&idxs_mult);CHKERRQ(ierr); 4688 } else { 4689 for (i=0;i<n;i++) { 4690 gidxs[i] = leaf_data[i]-1; 4691 } 4692 } 4693 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)subset),n_n,gidxs,PETSC_OWN_POINTER,subset_n);CHKERRQ(ierr); 4694 ierr = PetscFree2(leaf_data,root_data);CHKERRQ(ierr); 4695 PetscFunctionReturn(0); 4696 } 4697 4698 #undef __FUNCT__ 4699 #define __FUNCT__ "PCBDDCOrthonormalizeVecs" 4700 PetscErrorCode PCBDDCOrthonormalizeVecs(PetscInt n, Vec vecs[]) 4701 { 4702 PetscInt i,j; 4703 PetscScalar *alphas; 4704 PetscErrorCode ierr; 4705 4706 PetscFunctionBegin; 4707 /* this implements stabilized Gram-Schmidt */ 4708 ierr = PetscMalloc1(n,&alphas);CHKERRQ(ierr); 4709 for (i=0;i<n;i++) { 4710 ierr = VecNormalize(vecs[i],NULL);CHKERRQ(ierr); 4711 if (i<n) { ierr = VecMDot(vecs[i],n-i-1,&vecs[i+1],&alphas[i+1]);CHKERRQ(ierr); } 4712 for (j=i+1;j<n;j++) { ierr = VecAXPY(vecs[j],PetscConj(-alphas[j]),vecs[i]);CHKERRQ(ierr); } 4713 } 4714 ierr = PetscFree(alphas);CHKERRQ(ierr); 4715 PetscFunctionReturn(0); 4716 } 4717 4718 #undef __FUNCT__ 4719 #define __FUNCT__ "MatISGetSubassemblingPattern" 4720 PetscErrorCode MatISGetSubassemblingPattern(Mat mat, PetscInt n_subdomains, PetscInt redprocs, IS* is_sends) 4721 { 4722 IS ranks_send_to; 4723 PetscInt n_neighs,*neighs,*n_shared,**shared; 4724 PetscMPIInt size,rank,color; 4725 PetscInt *xadj,*adjncy; 4726 PetscInt *adjncy_wgt,*v_wgt,*ranks_send_to_idx; 4727 PetscInt i,local_size,threshold=0; 4728 PetscBool use_vwgt=PETSC_FALSE,use_square=PETSC_FALSE; 4729 PetscSubcomm subcomm; 4730 PetscErrorCode ierr; 4731 4732 PetscFunctionBegin; 4733 ierr = PetscOptionsGetBool(NULL,NULL,"-matis_partitioning_use_square",&use_square,NULL);CHKERRQ(ierr); 4734 ierr = PetscOptionsGetBool(NULL,NULL,"-matis_partitioning_use_vwgt",&use_vwgt,NULL);CHKERRQ(ierr); 4735 ierr = PetscOptionsGetInt(NULL,NULL,"-matis_partitioning_threshold",&threshold,NULL);CHKERRQ(ierr); 4736 4737 /* Get info on mapping */ 4738 ierr = ISLocalToGlobalMappingGetSize(mat->rmap->mapping,&local_size);CHKERRQ(ierr); 4739 ierr = ISLocalToGlobalMappingGetInfo(mat->rmap->mapping,&n_neighs,&neighs,&n_shared,&shared);CHKERRQ(ierr); 4740 4741 /* build local CSR graph of subdomains' connectivity */ 4742 ierr = PetscMalloc1(2,&xadj);CHKERRQ(ierr); 4743 xadj[0] = 0; 4744 xadj[1] = PetscMax(n_neighs-1,0); 4745 ierr = PetscMalloc1(xadj[1],&adjncy);CHKERRQ(ierr); 4746 ierr = PetscMalloc1(xadj[1],&adjncy_wgt);CHKERRQ(ierr); 4747 4748 if (threshold) { 4749 PetscInt xadj_count = 0; 4750 for (i=1;i<n_neighs;i++) { 4751 if (n_shared[i] > threshold) { 4752 adjncy[xadj_count] = neighs[i]; 4753 adjncy_wgt[xadj_count] = n_shared[i]; 4754 xadj_count++; 4755 } 4756 } 4757 xadj[1] = xadj_count; 4758 } else { 4759 if (xadj[1]) { 4760 ierr = PetscMemcpy(adjncy,&neighs[1],xadj[1]*sizeof(*adjncy));CHKERRQ(ierr); 4761 ierr = PetscMemcpy(adjncy_wgt,&n_shared[1],xadj[1]*sizeof(*adjncy_wgt));CHKERRQ(ierr); 4762 } 4763 } 4764 ierr = ISLocalToGlobalMappingRestoreInfo(mat->rmap->mapping,&n_neighs,&neighs,&n_shared,&shared);CHKERRQ(ierr); 4765 if (use_square) { 4766 for (i=0;i<xadj[1];i++) { 4767 adjncy_wgt[i] = adjncy_wgt[i]*adjncy_wgt[i]; 4768 } 4769 } 4770 ierr = PetscSortIntWithArray(xadj[1],adjncy,adjncy_wgt);CHKERRQ(ierr); 4771 4772 ierr = PetscMalloc1(1,&ranks_send_to_idx);CHKERRQ(ierr); 4773 4774 /* 4775 Restrict work on active processes only. 4776 */ 4777 ierr = PetscSubcommCreate(PetscObjectComm((PetscObject)mat),&subcomm);CHKERRQ(ierr); 4778 ierr = PetscSubcommSetNumber(subcomm,2);CHKERRQ(ierr); /* 2 groups, active process and not active processes */ 4779 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr); 4780 ierr = PetscMPIIntCast(!local_size,&color);CHKERRQ(ierr); 4781 ierr = PetscSubcommSetTypeGeneral(subcomm,color,rank);CHKERRQ(ierr); 4782 if (color) { 4783 ierr = PetscFree(xadj);CHKERRQ(ierr); 4784 ierr = PetscFree(adjncy);CHKERRQ(ierr); 4785 ierr = PetscFree(adjncy_wgt);CHKERRQ(ierr); 4786 } else { 4787 Mat subdomain_adj; 4788 IS new_ranks,new_ranks_contig; 4789 MatPartitioning partitioner; 4790 PetscInt prank,rstart=0,rend=0; 4791 PetscInt *is_indices,*oldranks; 4792 PetscBool aggregate; 4793 4794 ierr = MPI_Comm_size(PetscSubcommChild(subcomm),&size);CHKERRQ(ierr); 4795 ierr = PetscMalloc1(size,&oldranks);CHKERRQ(ierr); 4796 prank = rank; 4797 ierr = MPI_Allgather(&prank,1,MPIU_INT,oldranks,1,MPIU_INT,PetscSubcommChild(subcomm));CHKERRQ(ierr); 4798 /* 4799 for (i=0;i<size;i++) { 4800 PetscPrintf(subcomm->comm,"oldranks[%d] = %d\n",i,oldranks[i]); 4801 } 4802 */ 4803 for (i=0;i<xadj[1];i++) { 4804 ierr = PetscFindInt(adjncy[i],size,oldranks,&adjncy[i]);CHKERRQ(ierr); 4805 } 4806 ierr = PetscSortIntWithArray(xadj[1],adjncy,adjncy_wgt);CHKERRQ(ierr); 4807 aggregate = ((redprocs > 0 && redprocs < size) ? PETSC_TRUE : PETSC_FALSE); 4808 if (aggregate) { 4809 PetscInt lrows,row,ncols,*cols; 4810 PetscMPIInt nrank; 4811 PetscScalar *vals; 4812 4813 ierr = MPI_Comm_rank(PetscSubcommChild(subcomm),&nrank);CHKERRQ(ierr); 4814 lrows = 0; 4815 if (nrank<redprocs) { 4816 lrows = size/redprocs; 4817 if (nrank<size%redprocs) lrows++; 4818 } 4819 ierr = MatCreateAIJ(PetscSubcommChild(subcomm),lrows,lrows,size,size,50,NULL,50,NULL,&subdomain_adj);CHKERRQ(ierr); 4820 ierr = MatGetOwnershipRange(subdomain_adj,&rstart,&rend);CHKERRQ(ierr); 4821 ierr = MatSetOption(subdomain_adj,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr); 4822 ierr = MatSetOption(subdomain_adj,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr); 4823 row = nrank; 4824 ncols = xadj[1]-xadj[0]; 4825 cols = adjncy; 4826 ierr = PetscMalloc1(ncols,&vals);CHKERRQ(ierr); 4827 for (i=0;i<ncols;i++) vals[i] = adjncy_wgt[i]; 4828 ierr = MatSetValues(subdomain_adj,1,&row,ncols,cols,vals,INSERT_VALUES);CHKERRQ(ierr); 4829 ierr = MatAssemblyBegin(subdomain_adj,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4830 ierr = MatAssemblyEnd(subdomain_adj,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4831 ierr = PetscFree(xadj);CHKERRQ(ierr); 4832 ierr = PetscFree(adjncy);CHKERRQ(ierr); 4833 ierr = PetscFree(adjncy_wgt);CHKERRQ(ierr); 4834 ierr = PetscFree(vals);CHKERRQ(ierr); 4835 } else { 4836 ierr = MatCreateMPIAdj(PetscSubcommChild(subcomm),1,(PetscInt)size,xadj,adjncy,adjncy_wgt,&subdomain_adj);CHKERRQ(ierr); 4837 } 4838 /* ierr = MatView(subdomain_adj,0);CHKERRQ(ierr); */ 4839 4840 /* Partition */ 4841 ierr = MatPartitioningCreate(PetscSubcommChild(subcomm),&partitioner);CHKERRQ(ierr); 4842 ierr = MatPartitioningSetAdjacency(partitioner,subdomain_adj);CHKERRQ(ierr); 4843 if (use_vwgt) { 4844 ierr = PetscMalloc1(1,&v_wgt);CHKERRQ(ierr); 4845 v_wgt[0] = local_size; 4846 ierr = MatPartitioningSetVertexWeights(partitioner,v_wgt);CHKERRQ(ierr); 4847 } 4848 n_subdomains = PetscMin((PetscInt)size,n_subdomains); 4849 ierr = MatPartitioningSetNParts(partitioner,n_subdomains);CHKERRQ(ierr); 4850 ierr = MatPartitioningSetFromOptions(partitioner);CHKERRQ(ierr); 4851 ierr = MatPartitioningApply(partitioner,&new_ranks);CHKERRQ(ierr); 4852 /* ierr = MatPartitioningView(partitioner,0);CHKERRQ(ierr); */ 4853 4854 /* renumber new_ranks to avoid "holes" in new set of processors */ 4855 ierr = PCBDDCSubsetNumbering(new_ranks,NULL,NULL,&new_ranks_contig);CHKERRQ(ierr); 4856 ierr = ISDestroy(&new_ranks);CHKERRQ(ierr); 4857 ierr = ISGetIndices(new_ranks_contig,(const PetscInt**)&is_indices);CHKERRQ(ierr); 4858 if (!redprocs) { 4859 ranks_send_to_idx[0] = oldranks[is_indices[0]]; 4860 } else { 4861 PetscInt idxs[1]; 4862 PetscMPIInt tag; 4863 MPI_Request *reqs; 4864 4865 ierr = PetscObjectGetNewTag((PetscObject)subdomain_adj,&tag);CHKERRQ(ierr); 4866 ierr = PetscMalloc1(rend-rstart,&reqs);CHKERRQ(ierr); 4867 for (i=rstart;i<rend;i++) { 4868 ierr = MPI_Isend(is_indices+i-rstart,1,MPIU_INT,i,tag,PetscSubcommChild(subcomm),&reqs[i-rstart]);CHKERRQ(ierr); 4869 } 4870 ierr = MPI_Recv(idxs,1,MPIU_INT,MPI_ANY_SOURCE,tag,PetscSubcommChild(subcomm),MPI_STATUS_IGNORE);CHKERRQ(ierr); 4871 ierr = MPI_Waitall(rend-rstart,reqs,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 4872 ierr = PetscFree(reqs);CHKERRQ(ierr); 4873 ranks_send_to_idx[0] = oldranks[idxs[0]]; 4874 } 4875 ierr = ISRestoreIndices(new_ranks_contig,(const PetscInt**)&is_indices);CHKERRQ(ierr); 4876 /* clean up */ 4877 ierr = PetscFree(oldranks);CHKERRQ(ierr); 4878 ierr = ISDestroy(&new_ranks_contig);CHKERRQ(ierr); 4879 ierr = MatDestroy(&subdomain_adj);CHKERRQ(ierr); 4880 ierr = MatPartitioningDestroy(&partitioner);CHKERRQ(ierr); 4881 } 4882 ierr = PetscSubcommDestroy(&subcomm);CHKERRQ(ierr); 4883 4884 /* assemble parallel IS for sends */ 4885 i = 1; 4886 if (color) i=0; 4887 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)mat),i,ranks_send_to_idx,PETSC_OWN_POINTER,&ranks_send_to);CHKERRQ(ierr); 4888 /* get back IS */ 4889 *is_sends = ranks_send_to; 4890 PetscFunctionReturn(0); 4891 } 4892 4893 typedef enum {MATDENSE_PRIVATE=0,MATAIJ_PRIVATE,MATBAIJ_PRIVATE,MATSBAIJ_PRIVATE}MatTypePrivate; 4894 4895 #undef __FUNCT__ 4896 #define __FUNCT__ "MatISSubassemble" 4897 PetscErrorCode MatISSubassemble(Mat mat, IS is_sends, PetscInt n_subdomains, PetscBool restrict_comm, PetscBool restrict_full, MatReuse reuse, Mat *mat_n, PetscInt nis, IS isarray[]) 4898 { 4899 Mat local_mat; 4900 IS is_sends_internal; 4901 PetscInt rows,cols,new_local_rows; 4902 PetscInt i,bs,buf_size_idxs,buf_size_idxs_is,buf_size_vals; 4903 PetscBool ismatis,isdense,newisdense,destroy_mat; 4904 ISLocalToGlobalMapping l2gmap; 4905 PetscInt* l2gmap_indices; 4906 const PetscInt* is_indices; 4907 MatType new_local_type; 4908 /* buffers */ 4909 PetscInt *ptr_idxs,*send_buffer_idxs,*recv_buffer_idxs; 4910 PetscInt *ptr_idxs_is,*send_buffer_idxs_is,*recv_buffer_idxs_is; 4911 PetscInt *recv_buffer_idxs_local; 4912 PetscScalar *ptr_vals,*send_buffer_vals,*recv_buffer_vals; 4913 /* MPI */ 4914 MPI_Comm comm,comm_n; 4915 PetscSubcomm subcomm; 4916 PetscMPIInt n_sends,n_recvs,commsize; 4917 PetscMPIInt *iflags,*ilengths_idxs,*ilengths_vals,*ilengths_idxs_is; 4918 PetscMPIInt *onodes,*onodes_is,*olengths_idxs,*olengths_idxs_is,*olengths_vals; 4919 PetscMPIInt len,tag_idxs,tag_idxs_is,tag_vals,source_dest; 4920 MPI_Request *send_req_idxs,*send_req_idxs_is,*send_req_vals; 4921 MPI_Request *recv_req_idxs,*recv_req_idxs_is,*recv_req_vals; 4922 PetscErrorCode ierr; 4923 4924 PetscFunctionBegin; 4925 /* TODO: add missing checks */ 4926 PetscValidLogicalCollectiveInt(mat,n_subdomains,3); 4927 PetscValidLogicalCollectiveBool(mat,restrict_comm,4); 4928 PetscValidLogicalCollectiveEnum(mat,reuse,5); 4929 PetscValidLogicalCollectiveInt(mat,nis,7); 4930 ierr = PetscObjectTypeCompare((PetscObject)mat,MATIS,&ismatis);CHKERRQ(ierr); 4931 if (!ismatis) SETERRQ1(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot use %s on a matrix object which is not of type MATIS",__FUNCT__); 4932 ierr = MatISGetLocalMat(mat,&local_mat);CHKERRQ(ierr); 4933 ierr = PetscObjectTypeCompare((PetscObject)local_mat,MATSEQDENSE,&isdense);CHKERRQ(ierr); 4934 if (!isdense) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Currently cannot subassemble MATIS when local matrix type is not of type SEQDENSE"); 4935 ierr = MatGetSize(local_mat,&rows,&cols);CHKERRQ(ierr); 4936 if (rows != cols) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Local MATIS matrices should be square"); 4937 if (reuse == MAT_REUSE_MATRIX && *mat_n) { 4938 PetscInt mrows,mcols,mnrows,mncols; 4939 ierr = PetscObjectTypeCompare((PetscObject)*mat_n,MATIS,&ismatis);CHKERRQ(ierr); 4940 if (!ismatis) SETERRQ(PetscObjectComm((PetscObject)*mat_n),PETSC_ERR_SUP,"Cannot reuse a matrix which is not of type MATIS"); 4941 ierr = MatGetSize(mat,&mrows,&mcols);CHKERRQ(ierr); 4942 ierr = MatGetSize(*mat_n,&mnrows,&mncols);CHKERRQ(ierr); 4943 if (mrows != mnrows) SETERRQ2(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot reuse matrix! Wrong number of rows %D != %D",mrows,mnrows); 4944 if (mcols != mncols) SETERRQ2(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot reuse matrix! Wrong number of cols %D != %D",mcols,mncols); 4945 } 4946 ierr = MatGetBlockSize(local_mat,&bs);CHKERRQ(ierr); 4947 PetscValidLogicalCollectiveInt(mat,bs,0); 4948 /* prepare IS for sending if not provided */ 4949 if (!is_sends) { 4950 if (!n_subdomains) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"You should specify either an IS or a target number of subdomains"); 4951 ierr = MatISGetSubassemblingPattern(mat,n_subdomains,0,&is_sends_internal);CHKERRQ(ierr); 4952 } else { 4953 ierr = PetscObjectReference((PetscObject)is_sends);CHKERRQ(ierr); 4954 is_sends_internal = is_sends; 4955 } 4956 4957 /* get comm */ 4958 ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr); 4959 4960 /* compute number of sends */ 4961 ierr = ISGetLocalSize(is_sends_internal,&i);CHKERRQ(ierr); 4962 ierr = PetscMPIIntCast(i,&n_sends);CHKERRQ(ierr); 4963 4964 /* compute number of receives */ 4965 ierr = MPI_Comm_size(comm,&commsize);CHKERRQ(ierr); 4966 ierr = PetscMalloc1(commsize,&iflags);CHKERRQ(ierr); 4967 ierr = PetscMemzero(iflags,commsize*sizeof(*iflags));CHKERRQ(ierr); 4968 ierr = ISGetIndices(is_sends_internal,&is_indices);CHKERRQ(ierr); 4969 for (i=0;i<n_sends;i++) iflags[is_indices[i]] = 1; 4970 ierr = PetscGatherNumberOfMessages(comm,iflags,NULL,&n_recvs);CHKERRQ(ierr); 4971 ierr = PetscFree(iflags);CHKERRQ(ierr); 4972 4973 /* restrict comm if requested */ 4974 subcomm = 0; 4975 destroy_mat = PETSC_FALSE; 4976 if (restrict_comm) { 4977 PetscMPIInt color,subcommsize; 4978 4979 color = 0; 4980 if (restrict_full) { 4981 if (!n_recvs) color = 1; /* processes not receiving anything will not partecipate in new comm (full restriction) */ 4982 } else { 4983 if (!n_recvs && n_sends) color = 1; /* just those processes that are sending but not receiving anything will not partecipate in new comm */ 4984 } 4985 ierr = MPIU_Allreduce(&color,&subcommsize,1,MPI_INT,MPI_SUM,comm);CHKERRQ(ierr); 4986 subcommsize = commsize - subcommsize; 4987 /* check if reuse has been requested */ 4988 if (reuse == MAT_REUSE_MATRIX) { 4989 if (*mat_n) { 4990 PetscMPIInt subcommsize2; 4991 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)*mat_n),&subcommsize2);CHKERRQ(ierr); 4992 if (subcommsize != subcommsize2) SETERRQ2(PetscObjectComm((PetscObject)*mat_n),PETSC_ERR_PLIB,"Cannot reuse matrix! wrong subcomm size %d != %d",subcommsize,subcommsize2); 4993 comm_n = PetscObjectComm((PetscObject)*mat_n); 4994 } else { 4995 comm_n = PETSC_COMM_SELF; 4996 } 4997 } else { /* MAT_INITIAL_MATRIX */ 4998 PetscMPIInt rank; 4999 5000 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 5001 ierr = PetscSubcommCreate(comm,&subcomm);CHKERRQ(ierr); 5002 ierr = PetscSubcommSetNumber(subcomm,2);CHKERRQ(ierr); 5003 ierr = PetscSubcommSetTypeGeneral(subcomm,color,rank);CHKERRQ(ierr); 5004 comm_n = PetscSubcommChild(subcomm); 5005 } 5006 /* flag to destroy *mat_n if not significative */ 5007 if (color) destroy_mat = PETSC_TRUE; 5008 } else { 5009 comm_n = comm; 5010 } 5011 5012 /* prepare send/receive buffers */ 5013 ierr = PetscMalloc1(commsize,&ilengths_idxs);CHKERRQ(ierr); 5014 ierr = PetscMemzero(ilengths_idxs,commsize*sizeof(*ilengths_idxs));CHKERRQ(ierr); 5015 ierr = PetscMalloc1(commsize,&ilengths_vals);CHKERRQ(ierr); 5016 ierr = PetscMemzero(ilengths_vals,commsize*sizeof(*ilengths_vals));CHKERRQ(ierr); 5017 if (nis) { 5018 ierr = PetscCalloc1(commsize,&ilengths_idxs_is);CHKERRQ(ierr); 5019 } 5020 5021 /* Get data from local matrices */ 5022 if (!isdense) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Subassembling of AIJ local matrices not yet implemented"); 5023 /* TODO: See below some guidelines on how to prepare the local buffers */ 5024 /* 5025 send_buffer_vals should contain the raw values of the local matrix 5026 send_buffer_idxs should contain: 5027 - MatType_PRIVATE type 5028 - PetscInt size_of_l2gmap 5029 - PetscInt global_row_indices[size_of_l2gmap] 5030 - PetscInt all_other_info_which_is_needed_to_compute_preallocation_and_set_values 5031 */ 5032 else { 5033 ierr = MatDenseGetArray(local_mat,&send_buffer_vals);CHKERRQ(ierr); 5034 ierr = ISLocalToGlobalMappingGetSize(mat->rmap->mapping,&i);CHKERRQ(ierr); 5035 ierr = PetscMalloc1(i+2,&send_buffer_idxs);CHKERRQ(ierr); 5036 send_buffer_idxs[0] = (PetscInt)MATDENSE_PRIVATE; 5037 send_buffer_idxs[1] = i; 5038 ierr = ISLocalToGlobalMappingGetIndices(mat->rmap->mapping,(const PetscInt**)&ptr_idxs);CHKERRQ(ierr); 5039 ierr = PetscMemcpy(&send_buffer_idxs[2],ptr_idxs,i*sizeof(PetscInt));CHKERRQ(ierr); 5040 ierr = ISLocalToGlobalMappingRestoreIndices(mat->rmap->mapping,(const PetscInt**)&ptr_idxs);CHKERRQ(ierr); 5041 ierr = PetscMPIIntCast(i,&len);CHKERRQ(ierr); 5042 for (i=0;i<n_sends;i++) { 5043 ilengths_vals[is_indices[i]] = len*len; 5044 ilengths_idxs[is_indices[i]] = len+2; 5045 } 5046 } 5047 ierr = PetscGatherMessageLengths2(comm,n_sends,n_recvs,ilengths_idxs,ilengths_vals,&onodes,&olengths_idxs,&olengths_vals);CHKERRQ(ierr); 5048 /* additional is (if any) */ 5049 if (nis) { 5050 PetscMPIInt psum; 5051 PetscInt j; 5052 for (j=0,psum=0;j<nis;j++) { 5053 PetscInt plen; 5054 ierr = ISGetLocalSize(isarray[j],&plen);CHKERRQ(ierr); 5055 ierr = PetscMPIIntCast(plen,&len);CHKERRQ(ierr); 5056 psum += len+1; /* indices + lenght */ 5057 } 5058 ierr = PetscMalloc1(psum,&send_buffer_idxs_is);CHKERRQ(ierr); 5059 for (j=0,psum=0;j<nis;j++) { 5060 PetscInt plen; 5061 const PetscInt *is_array_idxs; 5062 ierr = ISGetLocalSize(isarray[j],&plen);CHKERRQ(ierr); 5063 send_buffer_idxs_is[psum] = plen; 5064 ierr = ISGetIndices(isarray[j],&is_array_idxs);CHKERRQ(ierr); 5065 ierr = PetscMemcpy(&send_buffer_idxs_is[psum+1],is_array_idxs,plen*sizeof(PetscInt));CHKERRQ(ierr); 5066 ierr = ISRestoreIndices(isarray[j],&is_array_idxs);CHKERRQ(ierr); 5067 psum += plen+1; /* indices + lenght */ 5068 } 5069 for (i=0;i<n_sends;i++) { 5070 ilengths_idxs_is[is_indices[i]] = psum; 5071 } 5072 ierr = PetscGatherMessageLengths(comm,n_sends,n_recvs,ilengths_idxs_is,&onodes_is,&olengths_idxs_is);CHKERRQ(ierr); 5073 } 5074 5075 buf_size_idxs = 0; 5076 buf_size_vals = 0; 5077 buf_size_idxs_is = 0; 5078 for (i=0;i<n_recvs;i++) { 5079 buf_size_idxs += (PetscInt)olengths_idxs[i]; 5080 buf_size_vals += (PetscInt)olengths_vals[i]; 5081 if (nis) buf_size_idxs_is += (PetscInt)olengths_idxs_is[i]; 5082 } 5083 ierr = PetscMalloc1(buf_size_idxs,&recv_buffer_idxs);CHKERRQ(ierr); 5084 ierr = PetscMalloc1(buf_size_vals,&recv_buffer_vals);CHKERRQ(ierr); 5085 ierr = PetscMalloc1(buf_size_idxs_is,&recv_buffer_idxs_is);CHKERRQ(ierr); 5086 5087 /* get new tags for clean communications */ 5088 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_idxs);CHKERRQ(ierr); 5089 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_vals);CHKERRQ(ierr); 5090 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_idxs_is);CHKERRQ(ierr); 5091 5092 /* allocate for requests */ 5093 ierr = PetscMalloc1(n_sends,&send_req_idxs);CHKERRQ(ierr); 5094 ierr = PetscMalloc1(n_sends,&send_req_vals);CHKERRQ(ierr); 5095 ierr = PetscMalloc1(n_sends,&send_req_idxs_is);CHKERRQ(ierr); 5096 ierr = PetscMalloc1(n_recvs,&recv_req_idxs);CHKERRQ(ierr); 5097 ierr = PetscMalloc1(n_recvs,&recv_req_vals);CHKERRQ(ierr); 5098 ierr = PetscMalloc1(n_recvs,&recv_req_idxs_is);CHKERRQ(ierr); 5099 5100 /* communications */ 5101 ptr_idxs = recv_buffer_idxs; 5102 ptr_vals = recv_buffer_vals; 5103 ptr_idxs_is = recv_buffer_idxs_is; 5104 for (i=0;i<n_recvs;i++) { 5105 source_dest = onodes[i]; 5106 ierr = MPI_Irecv(ptr_idxs,olengths_idxs[i],MPIU_INT,source_dest,tag_idxs,comm,&recv_req_idxs[i]);CHKERRQ(ierr); 5107 ierr = MPI_Irecv(ptr_vals,olengths_vals[i],MPIU_SCALAR,source_dest,tag_vals,comm,&recv_req_vals[i]);CHKERRQ(ierr); 5108 ptr_idxs += olengths_idxs[i]; 5109 ptr_vals += olengths_vals[i]; 5110 if (nis) { 5111 ierr = MPI_Irecv(ptr_idxs_is,olengths_idxs_is[i],MPIU_INT,source_dest,tag_idxs_is,comm,&recv_req_idxs_is[i]);CHKERRQ(ierr); 5112 ptr_idxs_is += olengths_idxs_is[i]; 5113 } 5114 } 5115 for (i=0;i<n_sends;i++) { 5116 ierr = PetscMPIIntCast(is_indices[i],&source_dest);CHKERRQ(ierr); 5117 ierr = MPI_Isend(send_buffer_idxs,ilengths_idxs[source_dest],MPIU_INT,source_dest,tag_idxs,comm,&send_req_idxs[i]);CHKERRQ(ierr); 5118 ierr = MPI_Isend(send_buffer_vals,ilengths_vals[source_dest],MPIU_SCALAR,source_dest,tag_vals,comm,&send_req_vals[i]);CHKERRQ(ierr); 5119 if (nis) { 5120 ierr = MPI_Isend(send_buffer_idxs_is,ilengths_idxs_is[source_dest],MPIU_INT,source_dest,tag_idxs_is,comm,&send_req_idxs_is[i]);CHKERRQ(ierr); 5121 } 5122 } 5123 ierr = ISRestoreIndices(is_sends_internal,&is_indices);CHKERRQ(ierr); 5124 ierr = ISDestroy(&is_sends_internal);CHKERRQ(ierr); 5125 5126 /* assemble new l2g map */ 5127 ierr = MPI_Waitall(n_recvs,recv_req_idxs,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 5128 ptr_idxs = recv_buffer_idxs; 5129 new_local_rows = 0; 5130 for (i=0;i<n_recvs;i++) { 5131 new_local_rows += *(ptr_idxs+1); /* second element is the local size of the l2gmap */ 5132 ptr_idxs += olengths_idxs[i]; 5133 } 5134 ierr = PetscMalloc1(new_local_rows,&l2gmap_indices);CHKERRQ(ierr); 5135 ptr_idxs = recv_buffer_idxs; 5136 new_local_rows = 0; 5137 for (i=0;i<n_recvs;i++) { 5138 ierr = PetscMemcpy(&l2gmap_indices[new_local_rows],ptr_idxs+2,(*(ptr_idxs+1))*sizeof(PetscInt));CHKERRQ(ierr); 5139 new_local_rows += *(ptr_idxs+1); /* second element is the local size of the l2gmap */ 5140 ptr_idxs += olengths_idxs[i]; 5141 } 5142 ierr = PetscSortRemoveDupsInt(&new_local_rows,l2gmap_indices);CHKERRQ(ierr); 5143 ierr = ISLocalToGlobalMappingCreate(comm_n,1,new_local_rows,l2gmap_indices,PETSC_COPY_VALUES,&l2gmap);CHKERRQ(ierr); 5144 ierr = PetscFree(l2gmap_indices);CHKERRQ(ierr); 5145 5146 /* infer new local matrix type from received local matrices type */ 5147 /* currently if all local matrices are of type X, then the resulting matrix will be of type X, except for the dense case */ 5148 /* it also assumes that if the block size is set, than it is the same among all local matrices (see checks at the beginning of the function) */ 5149 if (n_recvs) { 5150 MatTypePrivate new_local_type_private = (MatTypePrivate)send_buffer_idxs[0]; 5151 ptr_idxs = recv_buffer_idxs; 5152 for (i=0;i<n_recvs;i++) { 5153 if ((PetscInt)new_local_type_private != *ptr_idxs) { 5154 new_local_type_private = MATAIJ_PRIVATE; 5155 break; 5156 } 5157 ptr_idxs += olengths_idxs[i]; 5158 } 5159 switch (new_local_type_private) { 5160 case MATDENSE_PRIVATE: 5161 if (n_recvs>1) { /* subassembling of dense matrices does not give a dense matrix! */ 5162 new_local_type = MATSEQAIJ; 5163 bs = 1; 5164 } else { /* if I receive only 1 dense matrix */ 5165 new_local_type = MATSEQDENSE; 5166 bs = 1; 5167 } 5168 break; 5169 case MATAIJ_PRIVATE: 5170 new_local_type = MATSEQAIJ; 5171 bs = 1; 5172 break; 5173 case MATBAIJ_PRIVATE: 5174 new_local_type = MATSEQBAIJ; 5175 break; 5176 case MATSBAIJ_PRIVATE: 5177 new_local_type = MATSEQSBAIJ; 5178 break; 5179 default: 5180 SETERRQ2(comm,PETSC_ERR_SUP,"Unsupported private type %d in %s",new_local_type_private,__FUNCT__); 5181 break; 5182 } 5183 } else { /* by default, new_local_type is seqdense */ 5184 new_local_type = MATSEQDENSE; 5185 bs = 1; 5186 } 5187 5188 /* create MATIS object if needed */ 5189 if (reuse == MAT_INITIAL_MATRIX) { 5190 ierr = MatGetSize(mat,&rows,&cols);CHKERRQ(ierr); 5191 ierr = MatCreateIS(comm_n,bs,PETSC_DECIDE,PETSC_DECIDE,rows,cols,l2gmap,NULL,mat_n);CHKERRQ(ierr); 5192 } else { 5193 /* it also destroys the local matrices */ 5194 ierr = MatSetLocalToGlobalMapping(*mat_n,l2gmap,l2gmap);CHKERRQ(ierr); 5195 } 5196 ierr = MatISGetLocalMat(*mat_n,&local_mat);CHKERRQ(ierr); 5197 ierr = MatSetType(local_mat,new_local_type);CHKERRQ(ierr); 5198 5199 ierr = MPI_Waitall(n_recvs,recv_req_vals,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 5200 5201 /* Global to local map of received indices */ 5202 ierr = PetscMalloc1(buf_size_idxs,&recv_buffer_idxs_local);CHKERRQ(ierr); /* needed for values insertion */ 5203 ierr = ISGlobalToLocalMappingApply(l2gmap,IS_GTOLM_MASK,buf_size_idxs,recv_buffer_idxs,&i,recv_buffer_idxs_local);CHKERRQ(ierr); 5204 ierr = ISLocalToGlobalMappingDestroy(&l2gmap);CHKERRQ(ierr); 5205 5206 /* restore attributes -> type of incoming data and its size */ 5207 buf_size_idxs = 0; 5208 for (i=0;i<n_recvs;i++) { 5209 recv_buffer_idxs_local[buf_size_idxs] = recv_buffer_idxs[buf_size_idxs]; 5210 recv_buffer_idxs_local[buf_size_idxs+1] = recv_buffer_idxs[buf_size_idxs+1]; 5211 buf_size_idxs += (PetscInt)olengths_idxs[i]; 5212 } 5213 ierr = PetscFree(recv_buffer_idxs);CHKERRQ(ierr); 5214 5215 /* set preallocation */ 5216 ierr = PetscObjectTypeCompare((PetscObject)local_mat,MATSEQDENSE,&newisdense);CHKERRQ(ierr); 5217 if (!newisdense) { 5218 PetscInt *new_local_nnz=0; 5219 5220 ptr_vals = recv_buffer_vals; 5221 ptr_idxs = recv_buffer_idxs_local; 5222 if (n_recvs) { 5223 ierr = PetscCalloc1(new_local_rows,&new_local_nnz);CHKERRQ(ierr); 5224 } 5225 for (i=0;i<n_recvs;i++) { 5226 PetscInt j; 5227 if (*ptr_idxs == (PetscInt)MATDENSE_PRIVATE) { /* preallocation provided for dense case only */ 5228 for (j=0;j<*(ptr_idxs+1);j++) { 5229 new_local_nnz[*(ptr_idxs+2+j)] += *(ptr_idxs+1); 5230 } 5231 } else { 5232 /* TODO */ 5233 } 5234 ptr_idxs += olengths_idxs[i]; 5235 } 5236 if (new_local_nnz) { 5237 for (i=0;i<new_local_rows;i++) new_local_nnz[i] = PetscMin(new_local_nnz[i],new_local_rows); 5238 ierr = MatSeqAIJSetPreallocation(local_mat,0,new_local_nnz);CHKERRQ(ierr); 5239 for (i=0;i<new_local_rows;i++) new_local_nnz[i] /= bs; 5240 ierr = MatSeqBAIJSetPreallocation(local_mat,bs,0,new_local_nnz);CHKERRQ(ierr); 5241 for (i=0;i<new_local_rows;i++) new_local_nnz[i] = PetscMax(new_local_nnz[i]-i,0); 5242 ierr = MatSeqSBAIJSetPreallocation(local_mat,bs,0,new_local_nnz);CHKERRQ(ierr); 5243 } else { 5244 ierr = MatSetUp(local_mat);CHKERRQ(ierr); 5245 } 5246 ierr = PetscFree(new_local_nnz);CHKERRQ(ierr); 5247 } else { 5248 ierr = MatSetUp(local_mat);CHKERRQ(ierr); 5249 } 5250 5251 /* set values */ 5252 ptr_vals = recv_buffer_vals; 5253 ptr_idxs = recv_buffer_idxs_local; 5254 for (i=0;i<n_recvs;i++) { 5255 if (*ptr_idxs == (PetscInt)MATDENSE_PRIVATE) { /* values insertion provided for dense case only */ 5256 ierr = MatSetOption(local_mat,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr); 5257 ierr = MatSetValues(local_mat,*(ptr_idxs+1),ptr_idxs+2,*(ptr_idxs+1),ptr_idxs+2,ptr_vals,ADD_VALUES);CHKERRQ(ierr); 5258 ierr = MatAssemblyBegin(local_mat,MAT_FLUSH_ASSEMBLY);CHKERRQ(ierr); 5259 ierr = MatAssemblyEnd(local_mat,MAT_FLUSH_ASSEMBLY);CHKERRQ(ierr); 5260 ierr = MatSetOption(local_mat,MAT_ROW_ORIENTED,PETSC_TRUE);CHKERRQ(ierr); 5261 } else { 5262 /* TODO */ 5263 } 5264 ptr_idxs += olengths_idxs[i]; 5265 ptr_vals += olengths_vals[i]; 5266 } 5267 ierr = MatAssemblyBegin(local_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5268 ierr = MatAssemblyEnd(local_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5269 ierr = MatAssemblyBegin(*mat_n,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5270 ierr = MatAssemblyEnd(*mat_n,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5271 ierr = PetscFree(recv_buffer_vals);CHKERRQ(ierr); 5272 ierr = PetscFree(recv_buffer_idxs_local);CHKERRQ(ierr); 5273 5274 #if 0 5275 if (!restrict_comm) { /* check */ 5276 Vec lvec,rvec; 5277 PetscReal infty_error; 5278 5279 ierr = MatCreateVecs(mat,&rvec,&lvec);CHKERRQ(ierr); 5280 ierr = VecSetRandom(rvec,NULL);CHKERRQ(ierr); 5281 ierr = MatMult(mat,rvec,lvec);CHKERRQ(ierr); 5282 ierr = VecScale(lvec,-1.0);CHKERRQ(ierr); 5283 ierr = MatMultAdd(*mat_n,rvec,lvec,lvec);CHKERRQ(ierr); 5284 ierr = VecNorm(lvec,NORM_INFINITY,&infty_error);CHKERRQ(ierr); 5285 ierr = PetscPrintf(PetscObjectComm((PetscObject)mat),"Infinity error subassembling %1.6e\n",infty_error); 5286 ierr = VecDestroy(&rvec);CHKERRQ(ierr); 5287 ierr = VecDestroy(&lvec);CHKERRQ(ierr); 5288 } 5289 #endif 5290 5291 /* assemble new additional is (if any) */ 5292 if (nis) { 5293 PetscInt **temp_idxs,*count_is,j,psum; 5294 5295 ierr = MPI_Waitall(n_recvs,recv_req_idxs_is,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 5296 ierr = PetscCalloc1(nis,&count_is);CHKERRQ(ierr); 5297 ptr_idxs = recv_buffer_idxs_is; 5298 psum = 0; 5299 for (i=0;i<n_recvs;i++) { 5300 for (j=0;j<nis;j++) { 5301 PetscInt plen = *(ptr_idxs); /* first element is the local size of IS's indices */ 5302 count_is[j] += plen; /* increment counting of buffer for j-th IS */ 5303 psum += plen; 5304 ptr_idxs += plen+1; /* shift pointer to received data */ 5305 } 5306 } 5307 ierr = PetscMalloc1(nis,&temp_idxs);CHKERRQ(ierr); 5308 ierr = PetscMalloc1(psum,&temp_idxs[0]);CHKERRQ(ierr); 5309 for (i=1;i<nis;i++) { 5310 temp_idxs[i] = temp_idxs[i-1]+count_is[i-1]; 5311 } 5312 ierr = PetscMemzero(count_is,nis*sizeof(PetscInt));CHKERRQ(ierr); 5313 ptr_idxs = recv_buffer_idxs_is; 5314 for (i=0;i<n_recvs;i++) { 5315 for (j=0;j<nis;j++) { 5316 PetscInt plen = *(ptr_idxs); /* first element is the local size of IS's indices */ 5317 ierr = PetscMemcpy(&temp_idxs[j][count_is[j]],ptr_idxs+1,plen*sizeof(PetscInt));CHKERRQ(ierr); 5318 count_is[j] += plen; /* increment starting point of buffer for j-th IS */ 5319 ptr_idxs += plen+1; /* shift pointer to received data */ 5320 } 5321 } 5322 for (i=0;i<nis;i++) { 5323 ierr = ISDestroy(&isarray[i]);CHKERRQ(ierr); 5324 ierr = PetscSortRemoveDupsInt(&count_is[i],temp_idxs[i]);CHKERRQ(ierr);CHKERRQ(ierr); 5325 ierr = ISCreateGeneral(comm_n,count_is[i],temp_idxs[i],PETSC_COPY_VALUES,&isarray[i]);CHKERRQ(ierr); 5326 } 5327 ierr = PetscFree(count_is);CHKERRQ(ierr); 5328 ierr = PetscFree(temp_idxs[0]);CHKERRQ(ierr); 5329 ierr = PetscFree(temp_idxs);CHKERRQ(ierr); 5330 } 5331 /* free workspace */ 5332 ierr = PetscFree(recv_buffer_idxs_is);CHKERRQ(ierr); 5333 ierr = MPI_Waitall(n_sends,send_req_idxs,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 5334 ierr = PetscFree(send_buffer_idxs);CHKERRQ(ierr); 5335 ierr = MPI_Waitall(n_sends,send_req_vals,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 5336 if (isdense) { 5337 ierr = MatISGetLocalMat(mat,&local_mat);CHKERRQ(ierr); 5338 ierr = MatDenseRestoreArray(local_mat,&send_buffer_vals);CHKERRQ(ierr); 5339 } else { 5340 /* ierr = PetscFree(send_buffer_vals);CHKERRQ(ierr); */ 5341 } 5342 if (nis) { 5343 ierr = MPI_Waitall(n_sends,send_req_idxs_is,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 5344 ierr = PetscFree(send_buffer_idxs_is);CHKERRQ(ierr); 5345 } 5346 ierr = PetscFree(recv_req_idxs);CHKERRQ(ierr); 5347 ierr = PetscFree(recv_req_vals);CHKERRQ(ierr); 5348 ierr = PetscFree(recv_req_idxs_is);CHKERRQ(ierr); 5349 ierr = PetscFree(send_req_idxs);CHKERRQ(ierr); 5350 ierr = PetscFree(send_req_vals);CHKERRQ(ierr); 5351 ierr = PetscFree(send_req_idxs_is);CHKERRQ(ierr); 5352 ierr = PetscFree(ilengths_vals);CHKERRQ(ierr); 5353 ierr = PetscFree(ilengths_idxs);CHKERRQ(ierr); 5354 ierr = PetscFree(olengths_vals);CHKERRQ(ierr); 5355 ierr = PetscFree(olengths_idxs);CHKERRQ(ierr); 5356 ierr = PetscFree(onodes);CHKERRQ(ierr); 5357 if (nis) { 5358 ierr = PetscFree(ilengths_idxs_is);CHKERRQ(ierr); 5359 ierr = PetscFree(olengths_idxs_is);CHKERRQ(ierr); 5360 ierr = PetscFree(onodes_is);CHKERRQ(ierr); 5361 } 5362 ierr = PetscSubcommDestroy(&subcomm);CHKERRQ(ierr); 5363 if (destroy_mat) { /* destroy mat is true only if restrict comm is true and process will not partecipate */ 5364 ierr = MatDestroy(mat_n);CHKERRQ(ierr); 5365 for (i=0;i<nis;i++) { 5366 ierr = ISDestroy(&isarray[i]);CHKERRQ(ierr); 5367 } 5368 *mat_n = NULL; 5369 } 5370 PetscFunctionReturn(0); 5371 } 5372 5373 /* temporary hack into ksp private data structure */ 5374 #include <petsc/private/kspimpl.h> 5375 5376 #undef __FUNCT__ 5377 #define __FUNCT__ "PCBDDCSetUpCoarseSolver" 5378 PetscErrorCode PCBDDCSetUpCoarseSolver(PC pc,PetscScalar* coarse_submat_vals) 5379 { 5380 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 5381 PC_IS *pcis = (PC_IS*)pc->data; 5382 Mat coarse_mat,coarse_mat_is,coarse_submat_dense; 5383 MatNullSpace CoarseNullSpace=NULL; 5384 ISLocalToGlobalMapping coarse_islg; 5385 IS coarse_is,*isarray; 5386 PetscInt i,im_active=-1,active_procs=-1; 5387 PetscInt nis,nisdofs,nisneu,nisvert; 5388 PC pc_temp; 5389 PCType coarse_pc_type; 5390 KSPType coarse_ksp_type; 5391 PetscBool multilevel_requested,multilevel_allowed; 5392 PetscBool isredundant,isbddc,isnn,coarse_reuse; 5393 Mat t_coarse_mat_is; 5394 PetscInt void_procs,ncoarse_ml,ncoarse_ds,ncoarse; 5395 PetscMPIInt all_procs; 5396 PetscBool csin_ml,csin_ds,csin,csin_type_simple,redist; 5397 PetscBool compute_vecs = PETSC_FALSE; 5398 PetscScalar *array; 5399 PetscErrorCode ierr; 5400 5401 PetscFunctionBegin; 5402 /* Assign global numbering to coarse dofs */ 5403 if (pcbddc->new_primal_space || pcbddc->coarse_size == -1) { /* a new primal space is present or it is the first initialization, so recompute global numbering */ 5404 PetscInt ocoarse_size; 5405 compute_vecs = PETSC_TRUE; 5406 ocoarse_size = pcbddc->coarse_size; 5407 ierr = PetscFree(pcbddc->global_primal_indices);CHKERRQ(ierr); 5408 ierr = PCBDDCComputePrimalNumbering(pc,&pcbddc->coarse_size,&pcbddc->global_primal_indices);CHKERRQ(ierr); 5409 /* see if we can avoid some work */ 5410 if (pcbddc->coarse_ksp) { /* coarse ksp has already been created */ 5411 /* if the coarse size is different or we are using adaptive selection, better to not reuse the coarse matrix */ 5412 if (ocoarse_size != pcbddc->coarse_size || pcbddc->adaptive_selection) { 5413 PC pc; 5414 PetscBool isbddc; 5415 5416 /* temporary workaround since PCBDDC does not have a reset method so far */ 5417 ierr = KSPGetPC(pcbddc->coarse_ksp,&pc);CHKERRQ(ierr); 5418 ierr = PetscObjectTypeCompare((PetscObject)pc,PCBDDC,&isbddc);CHKERRQ(ierr); 5419 if (isbddc) { 5420 ierr = KSPDestroy(&pcbddc->coarse_ksp);CHKERRQ(ierr); 5421 } else { 5422 ierr = KSPReset(pcbddc->coarse_ksp);CHKERRQ(ierr); 5423 } 5424 coarse_reuse = PETSC_FALSE; 5425 } else { /* we can safely reuse already computed coarse matrix */ 5426 coarse_reuse = PETSC_TRUE; 5427 } 5428 } else { /* there's no coarse ksp, so we need to create the coarse matrix too */ 5429 coarse_reuse = PETSC_FALSE; 5430 } 5431 /* reset any subassembling information */ 5432 ierr = ISDestroy(&pcbddc->coarse_subassembling);CHKERRQ(ierr); 5433 ierr = ISDestroy(&pcbddc->coarse_subassembling_init);CHKERRQ(ierr); 5434 } else { /* primal space is unchanged, so we can reuse coarse matrix */ 5435 coarse_reuse = PETSC_TRUE; 5436 } 5437 5438 /* count "active" (i.e. with positive local size) and "void" processes */ 5439 im_active = !!(pcis->n); 5440 ierr = MPIU_Allreduce(&im_active,&active_procs,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 5441 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)pc),&all_procs);CHKERRQ(ierr); 5442 void_procs = all_procs-active_procs; 5443 csin_type_simple = PETSC_TRUE; 5444 redist = PETSC_FALSE; 5445 if (pcbddc->current_level && void_procs) { 5446 csin_ml = PETSC_TRUE; 5447 ncoarse_ml = void_procs; 5448 /* it has no sense to redistribute on a set of processors larger than the number of active processes */ 5449 if (pcbddc->redistribute_coarse > 0 && pcbddc->redistribute_coarse < active_procs) { 5450 csin_ds = PETSC_TRUE; 5451 ncoarse_ds = pcbddc->redistribute_coarse; 5452 redist = PETSC_TRUE; 5453 } else { 5454 csin_ds = PETSC_TRUE; 5455 ncoarse_ds = active_procs; 5456 redist = PETSC_TRUE; 5457 } 5458 } else { 5459 csin_ml = PETSC_FALSE; 5460 ncoarse_ml = all_procs; 5461 if (void_procs) { 5462 csin_ml = PETSC_TRUE; 5463 ncoarse_ml = void_procs; 5464 csin_ds = PETSC_TRUE; 5465 ncoarse_ds = void_procs; 5466 csin_type_simple = PETSC_FALSE; 5467 } else { 5468 if (pcbddc->redistribute_coarse > 0 && pcbddc->redistribute_coarse < all_procs) { 5469 csin_ds = PETSC_TRUE; 5470 ncoarse_ds = pcbddc->redistribute_coarse; 5471 redist = PETSC_TRUE; 5472 } else { 5473 csin_ds = PETSC_FALSE; 5474 ncoarse_ds = all_procs; 5475 } 5476 } 5477 } 5478 5479 /* 5480 test if we can go multilevel: three conditions must be satisfied: 5481 - we have not exceeded the number of levels requested 5482 - we can actually subassemble the active processes 5483 - we can find a suitable number of MPI processes where we can place the subassembled problem 5484 */ 5485 multilevel_allowed = PETSC_FALSE; 5486 multilevel_requested = PETSC_FALSE; 5487 if (pcbddc->current_level < pcbddc->max_levels) { 5488 multilevel_requested = PETSC_TRUE; 5489 if (active_procs/pcbddc->coarsening_ratio < 2 || ncoarse_ml/pcbddc->coarsening_ratio < 2) { 5490 multilevel_allowed = PETSC_FALSE; 5491 } else { 5492 multilevel_allowed = PETSC_TRUE; 5493 } 5494 } 5495 /* determine number of process partecipating to coarse solver */ 5496 if (multilevel_allowed) { 5497 ncoarse = ncoarse_ml; 5498 csin = csin_ml; 5499 redist = PETSC_FALSE; 5500 csin_type_simple = PETSC_TRUE; 5501 } else { 5502 ncoarse = ncoarse_ds; 5503 csin = csin_ds; 5504 } 5505 5506 /* creates temporary l2gmap and IS for coarse indexes */ 5507 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),pcbddc->local_primal_size,pcbddc->global_primal_indices,PETSC_COPY_VALUES,&coarse_is);CHKERRQ(ierr); 5508 ierr = ISLocalToGlobalMappingCreateIS(coarse_is,&coarse_islg);CHKERRQ(ierr); 5509 5510 /* creates temporary MATIS object for coarse matrix */ 5511 ierr = MatCreateSeqDense(PETSC_COMM_SELF,pcbddc->local_primal_size,pcbddc->local_primal_size,NULL,&coarse_submat_dense);CHKERRQ(ierr); 5512 ierr = MatDenseGetArray(coarse_submat_dense,&array);CHKERRQ(ierr); 5513 ierr = PetscMemcpy(array,coarse_submat_vals,sizeof(*coarse_submat_vals)*pcbddc->local_primal_size*pcbddc->local_primal_size);CHKERRQ(ierr); 5514 ierr = MatDenseRestoreArray(coarse_submat_dense,&array);CHKERRQ(ierr); 5515 ierr = MatCreateIS(PetscObjectComm((PetscObject)pc),1,PETSC_DECIDE,PETSC_DECIDE,pcbddc->coarse_size,pcbddc->coarse_size,coarse_islg,NULL,&t_coarse_mat_is);CHKERRQ(ierr); 5516 ierr = MatISSetLocalMat(t_coarse_mat_is,coarse_submat_dense);CHKERRQ(ierr); 5517 ierr = MatAssemblyBegin(t_coarse_mat_is,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5518 ierr = MatAssemblyEnd(t_coarse_mat_is,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5519 ierr = MatDestroy(&coarse_submat_dense);CHKERRQ(ierr); 5520 5521 /* compute dofs splitting and neumann boundaries for coarse dofs */ 5522 if (multilevel_allowed && (pcbddc->n_ISForDofsLocal || pcbddc->NeumannBoundariesLocal || (pcbddc->benign_saddle_point && pcbddc->user_primal_vertices_local))) { /* protects from unneded computations */ 5523 PetscInt *tidxs,*tidxs2,nout,tsize,i; 5524 const PetscInt *idxs; 5525 ISLocalToGlobalMapping tmap; 5526 5527 /* create map between primal indices (in local representative ordering) and local primal numbering */ 5528 ierr = ISLocalToGlobalMappingCreate(PETSC_COMM_SELF,1,pcbddc->local_primal_size,pcbddc->primal_indices_local_idxs,PETSC_COPY_VALUES,&tmap);CHKERRQ(ierr); 5529 /* allocate space for temporary storage */ 5530 ierr = PetscMalloc1(pcbddc->local_primal_size,&tidxs);CHKERRQ(ierr); 5531 ierr = PetscMalloc1(pcbddc->local_primal_size,&tidxs2);CHKERRQ(ierr); 5532 /* allocate for IS array */ 5533 nisdofs = pcbddc->n_ISForDofsLocal; 5534 nisneu = !!pcbddc->NeumannBoundariesLocal; 5535 nisvert = 0; 5536 if (pcbddc->benign_saddle_point && pcbddc->user_primal_vertices_local) { 5537 nisvert = 1; 5538 } 5539 nis = nisdofs + nisneu + nisvert; 5540 ierr = PetscMalloc1(nis,&isarray);CHKERRQ(ierr); 5541 /* dofs splitting */ 5542 for (i=0;i<nisdofs;i++) { 5543 /* ierr = ISView(pcbddc->ISForDofsLocal[i],0);CHKERRQ(ierr); */ 5544 ierr = ISGetLocalSize(pcbddc->ISForDofsLocal[i],&tsize);CHKERRQ(ierr); 5545 ierr = ISGetIndices(pcbddc->ISForDofsLocal[i],&idxs);CHKERRQ(ierr); 5546 ierr = ISGlobalToLocalMappingApply(tmap,IS_GTOLM_DROP,tsize,idxs,&nout,tidxs);CHKERRQ(ierr); 5547 ierr = ISRestoreIndices(pcbddc->ISForDofsLocal[i],&idxs);CHKERRQ(ierr); 5548 ierr = ISLocalToGlobalMappingApply(coarse_islg,nout,tidxs,tidxs2);CHKERRQ(ierr); 5549 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),nout,tidxs2,PETSC_COPY_VALUES,&isarray[i]);CHKERRQ(ierr); 5550 /* ierr = ISView(isarray[i],0);CHKERRQ(ierr); */ 5551 } 5552 /* neumann boundaries */ 5553 if (pcbddc->NeumannBoundariesLocal) { 5554 /* ierr = ISView(pcbddc->NeumannBoundariesLocal,0);CHKERRQ(ierr); */ 5555 ierr = ISGetLocalSize(pcbddc->NeumannBoundariesLocal,&tsize);CHKERRQ(ierr); 5556 ierr = ISGetIndices(pcbddc->NeumannBoundariesLocal,&idxs);CHKERRQ(ierr); 5557 ierr = ISGlobalToLocalMappingApply(tmap,IS_GTOLM_DROP,tsize,idxs,&nout,tidxs);CHKERRQ(ierr); 5558 ierr = ISRestoreIndices(pcbddc->NeumannBoundariesLocal,&idxs);CHKERRQ(ierr); 5559 ierr = ISLocalToGlobalMappingApply(coarse_islg,nout,tidxs,tidxs2);CHKERRQ(ierr); 5560 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),nout,tidxs2,PETSC_COPY_VALUES,&isarray[nisdofs]);CHKERRQ(ierr); 5561 /* ierr = ISView(isarray[nisdofs],0);CHKERRQ(ierr); */ 5562 } 5563 /* primal vertices (benign) */ 5564 if (pcbddc->benign_saddle_point && pcbddc->user_primal_vertices_local) { 5565 ierr = ISGetLocalSize(pcbddc->user_primal_vertices_local,&tsize);CHKERRQ(ierr); 5566 ierr = ISGetIndices(pcbddc->user_primal_vertices_local,&idxs);CHKERRQ(ierr); 5567 ierr = ISGlobalToLocalMappingApply(tmap,IS_GTOLM_DROP,tsize,idxs,&nout,tidxs);CHKERRQ(ierr); 5568 ierr = ISRestoreIndices(pcbddc->user_primal_vertices_local,&idxs);CHKERRQ(ierr); 5569 ierr = ISLocalToGlobalMappingApply(coarse_islg,nout,tidxs,tidxs2);CHKERRQ(ierr); 5570 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),nout,tidxs2,PETSC_COPY_VALUES,&isarray[nis-1]);CHKERRQ(ierr); 5571 /* ierr = ISView(isarray[nis-1],0);CHKERRQ(ierr); */ 5572 } 5573 /* free memory */ 5574 ierr = PetscFree(tidxs);CHKERRQ(ierr); 5575 ierr = PetscFree(tidxs2);CHKERRQ(ierr); 5576 ierr = ISLocalToGlobalMappingDestroy(&tmap);CHKERRQ(ierr); 5577 } else { 5578 nis = 0; 5579 nisdofs = 0; 5580 nisneu = 0; 5581 nisvert = 0; 5582 isarray = NULL; 5583 } 5584 /* destroy no longer needed map */ 5585 ierr = ISLocalToGlobalMappingDestroy(&coarse_islg);CHKERRQ(ierr); 5586 5587 /* restrict on coarse candidates (if needed) */ 5588 coarse_mat_is = NULL; 5589 if (csin) { 5590 if (!pcbddc->coarse_subassembling_init ) { /* creates subassembling init pattern if not present */ 5591 if (redist) { 5592 PetscMPIInt rank; 5593 PetscInt spc,n_spc_p1,dest[1],destsize; 5594 5595 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)pc),&rank);CHKERRQ(ierr); 5596 spc = active_procs/ncoarse; 5597 n_spc_p1 = active_procs%ncoarse; 5598 if (im_active) { 5599 destsize = 1; 5600 if (rank > n_spc_p1*(spc+1)-1) { 5601 dest[0] = n_spc_p1+(rank-(n_spc_p1*(spc+1)))/spc; 5602 } else { 5603 dest[0] = rank/(spc+1); 5604 } 5605 } else { 5606 destsize = 0; 5607 } 5608 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),destsize,dest,PETSC_COPY_VALUES,&pcbddc->coarse_subassembling_init);CHKERRQ(ierr); 5609 } else if (csin_type_simple) { 5610 PetscMPIInt rank; 5611 PetscInt issize,isidx; 5612 5613 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)pc),&rank);CHKERRQ(ierr); 5614 if (im_active) { 5615 issize = 1; 5616 isidx = (PetscInt)rank; 5617 } else { 5618 issize = 0; 5619 isidx = -1; 5620 } 5621 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),issize,&isidx,PETSC_COPY_VALUES,&pcbddc->coarse_subassembling_init);CHKERRQ(ierr); 5622 } else { /* get a suitable subassembling pattern from MATIS code */ 5623 ierr = MatISGetSubassemblingPattern(t_coarse_mat_is,ncoarse,pcbddc->coarse_adj_red,&pcbddc->coarse_subassembling_init);CHKERRQ(ierr); 5624 } 5625 5626 /* we need to shift on coarse candidates either if we are not redistributing or we are redistributing and we have enough void processes */ 5627 if (!redist || ncoarse <= void_procs) { 5628 PetscInt ncoarse_cand,tissize,*nisindices; 5629 PetscInt *coarse_candidates; 5630 const PetscInt* tisindices; 5631 5632 /* get coarse candidates' ranks in pc communicator */ 5633 ierr = PetscMalloc1(all_procs,&coarse_candidates);CHKERRQ(ierr); 5634 ierr = MPI_Allgather(&im_active,1,MPIU_INT,coarse_candidates,1,MPIU_INT,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 5635 for (i=0,ncoarse_cand=0;i<all_procs;i++) { 5636 if (!coarse_candidates[i]) { 5637 coarse_candidates[ncoarse_cand++]=i; 5638 } 5639 } 5640 if (ncoarse_cand < ncoarse) SETERRQ2(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"This should not happen! %d < %d",ncoarse_cand,ncoarse); 5641 5642 5643 if (pcbddc->dbg_flag) { 5644 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 5645 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Subassembling pattern init (before shift)\n");CHKERRQ(ierr); 5646 ierr = ISView(pcbddc->coarse_subassembling_init,pcbddc->dbg_viewer);CHKERRQ(ierr); 5647 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Coarse candidates\n");CHKERRQ(ierr); 5648 for (i=0;i<ncoarse_cand;i++) { 5649 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"%d ",coarse_candidates[i]);CHKERRQ(ierr); 5650 } 5651 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"\n");CHKERRQ(ierr); 5652 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 5653 } 5654 /* shift the pattern on coarse candidates */ 5655 ierr = ISGetLocalSize(pcbddc->coarse_subassembling_init,&tissize);CHKERRQ(ierr); 5656 ierr = ISGetIndices(pcbddc->coarse_subassembling_init,&tisindices);CHKERRQ(ierr); 5657 ierr = PetscMalloc1(tissize,&nisindices);CHKERRQ(ierr); 5658 for (i=0;i<tissize;i++) nisindices[i] = coarse_candidates[tisindices[i]]; 5659 ierr = ISRestoreIndices(pcbddc->coarse_subassembling_init,&tisindices);CHKERRQ(ierr); 5660 ierr = ISGeneralSetIndices(pcbddc->coarse_subassembling_init,tissize,nisindices,PETSC_OWN_POINTER);CHKERRQ(ierr); 5661 ierr = PetscFree(coarse_candidates);CHKERRQ(ierr); 5662 } 5663 if (pcbddc->dbg_flag) { 5664 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 5665 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Subassembling pattern init\n");CHKERRQ(ierr); 5666 ierr = ISView(pcbddc->coarse_subassembling_init,pcbddc->dbg_viewer);CHKERRQ(ierr); 5667 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 5668 } 5669 } 5670 /* get temporary coarse mat in IS format restricted on coarse procs (plus additional index sets of isarray) */ 5671 if (multilevel_allowed) { /* we need to keep tracking of void processes for future placements */ 5672 ierr = MatISSubassemble(t_coarse_mat_is,pcbddc->coarse_subassembling_init,0,PETSC_TRUE,PETSC_FALSE,MAT_INITIAL_MATRIX,&coarse_mat_is,nis,isarray);CHKERRQ(ierr); 5673 } else { /* this is the last level, so use just receiving processes in subcomm */ 5674 ierr = MatISSubassemble(t_coarse_mat_is,pcbddc->coarse_subassembling_init,0,PETSC_TRUE,PETSC_TRUE,MAT_INITIAL_MATRIX,&coarse_mat_is,nis,isarray);CHKERRQ(ierr); 5675 } 5676 } else { 5677 if (pcbddc->dbg_flag) { 5678 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 5679 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Subassembling pattern init not needed\n");CHKERRQ(ierr); 5680 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 5681 } 5682 ierr = PetscObjectReference((PetscObject)t_coarse_mat_is);CHKERRQ(ierr); 5683 coarse_mat_is = t_coarse_mat_is; 5684 } 5685 5686 /* create local to global scatters for coarse problem */ 5687 if (compute_vecs) { 5688 PetscInt lrows; 5689 ierr = VecDestroy(&pcbddc->coarse_vec);CHKERRQ(ierr); 5690 if (coarse_mat_is) { 5691 ierr = MatGetLocalSize(coarse_mat_is,&lrows,NULL);CHKERRQ(ierr); 5692 } else { 5693 lrows = 0; 5694 } 5695 ierr = VecCreate(PetscObjectComm((PetscObject)pc),&pcbddc->coarse_vec);CHKERRQ(ierr); 5696 ierr = VecSetSizes(pcbddc->coarse_vec,lrows,PETSC_DECIDE);CHKERRQ(ierr); 5697 ierr = VecSetType(pcbddc->coarse_vec,VECSTANDARD);CHKERRQ(ierr); 5698 ierr = VecScatterDestroy(&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr); 5699 ierr = VecScatterCreate(pcbddc->vec1_P,NULL,pcbddc->coarse_vec,coarse_is,&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr); 5700 } 5701 ierr = ISDestroy(&coarse_is);CHKERRQ(ierr); 5702 ierr = MatDestroy(&t_coarse_mat_is);CHKERRQ(ierr); 5703 5704 /* set defaults for coarse KSP and PC */ 5705 if (multilevel_allowed) { 5706 coarse_ksp_type = KSPRICHARDSON; 5707 coarse_pc_type = PCBDDC; 5708 } else { 5709 coarse_ksp_type = KSPPREONLY; 5710 coarse_pc_type = PCREDUNDANT; 5711 } 5712 5713 /* print some info if requested */ 5714 if (pcbddc->dbg_flag) { 5715 if (!multilevel_allowed) { 5716 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 5717 if (multilevel_requested) { 5718 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Not enough active processes on level %d (active processes %d, coarsening ratio %d)\n",pcbddc->current_level,active_procs,pcbddc->coarsening_ratio);CHKERRQ(ierr); 5719 } else if (pcbddc->max_levels) { 5720 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Maximum number of requested levels reached (%d)\n",pcbddc->max_levels);CHKERRQ(ierr); 5721 } 5722 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 5723 } 5724 } 5725 5726 /* create the coarse KSP object only once with defaults */ 5727 if (coarse_mat_is) { 5728 MatReuse coarse_mat_reuse; 5729 PetscViewer dbg_viewer = NULL; 5730 if (pcbddc->dbg_flag) { 5731 dbg_viewer = PETSC_VIEWER_STDOUT_(PetscObjectComm((PetscObject)coarse_mat_is)); 5732 ierr = PetscViewerASCIIAddTab(dbg_viewer,2*pcbddc->current_level);CHKERRQ(ierr); 5733 } 5734 if (!pcbddc->coarse_ksp) { 5735 char prefix[256],str_level[16]; 5736 size_t len; 5737 ierr = KSPCreate(PetscObjectComm((PetscObject)coarse_mat_is),&pcbddc->coarse_ksp);CHKERRQ(ierr); 5738 ierr = KSPSetErrorIfNotConverged(pcbddc->coarse_ksp,pc->erroriffailure);CHKERRQ(ierr); 5739 ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->coarse_ksp,(PetscObject)pc,1);CHKERRQ(ierr); 5740 ierr = KSPSetTolerances(pcbddc->coarse_ksp,PETSC_DEFAULT,PETSC_DEFAULT,PETSC_DEFAULT,1);CHKERRQ(ierr); 5741 ierr = KSPSetOperators(pcbddc->coarse_ksp,coarse_mat_is,coarse_mat_is);CHKERRQ(ierr); 5742 ierr = KSPSetType(pcbddc->coarse_ksp,coarse_ksp_type);CHKERRQ(ierr); 5743 ierr = KSPSetNormType(pcbddc->coarse_ksp,KSP_NORM_NONE);CHKERRQ(ierr); 5744 ierr = KSPGetPC(pcbddc->coarse_ksp,&pc_temp);CHKERRQ(ierr); 5745 ierr = PCSetType(pc_temp,coarse_pc_type);CHKERRQ(ierr); 5746 /* prefix */ 5747 ierr = PetscStrcpy(prefix,"");CHKERRQ(ierr); 5748 ierr = PetscStrcpy(str_level,"");CHKERRQ(ierr); 5749 if (!pcbddc->current_level) { 5750 ierr = PetscStrcpy(prefix,((PetscObject)pc)->prefix);CHKERRQ(ierr); 5751 ierr = PetscStrcat(prefix,"pc_bddc_coarse_");CHKERRQ(ierr); 5752 } else { 5753 ierr = PetscStrlen(((PetscObject)pc)->prefix,&len);CHKERRQ(ierr); 5754 if (pcbddc->current_level>1) len -= 3; /* remove "lX_" with X level number */ 5755 if (pcbddc->current_level>10) len -= 1; /* remove another char from level number */ 5756 ierr = PetscStrncpy(prefix,((PetscObject)pc)->prefix,len+1);CHKERRQ(ierr); 5757 sprintf(str_level,"l%d_",(int)(pcbddc->current_level)); 5758 ierr = PetscStrcat(prefix,str_level);CHKERRQ(ierr); 5759 } 5760 ierr = KSPSetOptionsPrefix(pcbddc->coarse_ksp,prefix);CHKERRQ(ierr); 5761 /* propagate BDDC info to the next level (these are dummy calls if pc_temp is not of type PCBDDC) */ 5762 ierr = PCBDDCSetLevel(pc_temp,pcbddc->current_level+1);CHKERRQ(ierr); 5763 ierr = PCBDDCSetCoarseningRatio(pc_temp,pcbddc->coarsening_ratio);CHKERRQ(ierr); 5764 ierr = PCBDDCSetLevels(pc_temp,pcbddc->max_levels);CHKERRQ(ierr); 5765 /* allow user customization */ 5766 ierr = KSPSetFromOptions(pcbddc->coarse_ksp);CHKERRQ(ierr); 5767 } 5768 /* propagate BDDC info to the next level (these are dummy calls if pc_temp is not of type PCBDDC) */ 5769 ierr = KSPGetPC(pcbddc->coarse_ksp,&pc_temp);CHKERRQ(ierr); 5770 if (nisdofs) { 5771 ierr = PCBDDCSetDofsSplitting(pc_temp,nisdofs,isarray);CHKERRQ(ierr); 5772 for (i=0;i<nisdofs;i++) { 5773 ierr = ISDestroy(&isarray[i]);CHKERRQ(ierr); 5774 } 5775 } 5776 if (nisneu) { 5777 ierr = PCBDDCSetNeumannBoundaries(pc_temp,isarray[nisdofs]);CHKERRQ(ierr); 5778 ierr = ISDestroy(&isarray[nisdofs]);CHKERRQ(ierr); 5779 } 5780 if (nisvert) { 5781 ierr = PCBDDCSetPrimalVerticesIS(pc_temp,isarray[nis-1]);CHKERRQ(ierr); 5782 ierr = ISDestroy(&isarray[nis-1]);CHKERRQ(ierr); 5783 } 5784 5785 /* get some info after set from options */ 5786 ierr = PetscObjectTypeCompare((PetscObject)pc_temp,PCNN,&isnn);CHKERRQ(ierr); 5787 ierr = PetscObjectTypeCompare((PetscObject)pc_temp,PCBDDC,&isbddc);CHKERRQ(ierr); 5788 ierr = PetscObjectTypeCompare((PetscObject)pc_temp,PCREDUNDANT,&isredundant);CHKERRQ(ierr); 5789 if (isbddc && !multilevel_allowed) { /* multilevel can only be requested via pc_bddc_set_levels */ 5790 ierr = PCSetType(pc_temp,coarse_pc_type);CHKERRQ(ierr); 5791 isbddc = PETSC_FALSE; 5792 } 5793 ierr = PCFactorSetReuseFill(pc_temp,PETSC_TRUE);CHKERRQ(ierr); 5794 if (isredundant) { 5795 KSP inner_ksp; 5796 PC inner_pc; 5797 ierr = PCRedundantGetKSP(pc_temp,&inner_ksp);CHKERRQ(ierr); 5798 ierr = KSPGetPC(inner_ksp,&inner_pc);CHKERRQ(ierr); 5799 ierr = PCFactorSetReuseFill(inner_pc,PETSC_TRUE);CHKERRQ(ierr); 5800 } 5801 5802 /* assemble coarse matrix */ 5803 if (coarse_reuse) { 5804 ierr = KSPGetOperators(pcbddc->coarse_ksp,&coarse_mat,NULL);CHKERRQ(ierr); 5805 ierr = PetscObjectReference((PetscObject)coarse_mat);CHKERRQ(ierr); 5806 coarse_mat_reuse = MAT_REUSE_MATRIX; 5807 } else { 5808 coarse_mat_reuse = MAT_INITIAL_MATRIX; 5809 } 5810 if (isbddc || isnn) { 5811 if (isbddc) { /* currently there are no APIs for these options */ 5812 PC_BDDC* pcbddc_coarse = (PC_BDDC*)pc_temp->data; 5813 pcbddc_coarse->detect_disconnected = PETSC_TRUE; 5814 pcbddc_coarse->benign_saddle_point = pcbddc->benign_saddle_point; 5815 pcbddc_coarse->benign_compute_nonetflux = pcbddc->benign_compute_nonetflux; 5816 if (pcbddc_coarse->benign_compute_nonetflux) { 5817 pcbddc_coarse->adaptive_userdefined = PETSC_TRUE; 5818 } 5819 } 5820 if (pcbddc->coarsening_ratio > 1) { 5821 if (!pcbddc->coarse_subassembling) { /* subassembling info is not present */ 5822 ierr = MatISGetSubassemblingPattern(coarse_mat_is,active_procs/pcbddc->coarsening_ratio,pcbddc->coarse_adj_red,&pcbddc->coarse_subassembling);CHKERRQ(ierr); 5823 if (pcbddc->dbg_flag) { 5824 ierr = PetscViewerASCIIPrintf(dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 5825 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Subassembling pattern\n");CHKERRQ(ierr); 5826 ierr = ISView(pcbddc->coarse_subassembling,dbg_viewer);CHKERRQ(ierr); 5827 ierr = PetscViewerFlush(dbg_viewer);CHKERRQ(ierr); 5828 } 5829 } 5830 ierr = MatISSubassemble(coarse_mat_is,pcbddc->coarse_subassembling,0,PETSC_FALSE,PETSC_FALSE,coarse_mat_reuse,&coarse_mat,0,NULL);CHKERRQ(ierr); 5831 } else { 5832 ierr = PetscObjectReference((PetscObject)coarse_mat_is);CHKERRQ(ierr); 5833 coarse_mat = coarse_mat_is; 5834 } 5835 } else { 5836 ierr = MatISGetMPIXAIJ(coarse_mat_is,coarse_mat_reuse,&coarse_mat);CHKERRQ(ierr); 5837 } 5838 ierr = MatDestroy(&coarse_mat_is);CHKERRQ(ierr); 5839 5840 /* propagate symmetry info of coarse matrix */ 5841 ierr = MatSetOption(coarse_mat,MAT_STRUCTURALLY_SYMMETRIC,PETSC_TRUE);CHKERRQ(ierr); 5842 if (pc->pmat->symmetric_set) { 5843 ierr = MatSetOption(coarse_mat,MAT_SYMMETRIC,pc->pmat->symmetric);CHKERRQ(ierr); 5844 } 5845 if (pc->pmat->hermitian_set) { 5846 ierr = MatSetOption(coarse_mat,MAT_HERMITIAN,pc->pmat->hermitian);CHKERRQ(ierr); 5847 } 5848 if (pc->pmat->spd_set) { 5849 ierr = MatSetOption(coarse_mat,MAT_SPD,pc->pmat->spd);CHKERRQ(ierr); 5850 } 5851 /* set operators */ 5852 ierr = KSPSetOperators(pcbddc->coarse_ksp,coarse_mat,coarse_mat);CHKERRQ(ierr); 5853 if (pcbddc->dbg_flag) { 5854 ierr = PetscViewerASCIISubtractTab(dbg_viewer,2*pcbddc->current_level);CHKERRQ(ierr); 5855 } 5856 } else { /* processes non partecipating to coarse solver (if any) */ 5857 coarse_mat = 0; 5858 } 5859 ierr = PetscFree(isarray);CHKERRQ(ierr); 5860 #if 0 5861 { 5862 PetscViewer viewer; 5863 char filename[256]; 5864 sprintf(filename,"coarse_mat_level%d.m",pcbddc->current_level); 5865 ierr = PetscViewerASCIIOpen(PetscObjectComm((PetscObject)coarse_mat),filename,&viewer);CHKERRQ(ierr); 5866 ierr = PetscViewerPushFormat(viewer,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr); 5867 ierr = MatView(coarse_mat,viewer);CHKERRQ(ierr); 5868 ierr = PetscViewerPopFormat(viewer);CHKERRQ(ierr); 5869 ierr = PetscViewerDestroy(&viewer);CHKERRQ(ierr); 5870 } 5871 #endif 5872 5873 /* Compute coarse null space (special handling by BDDC only) */ 5874 #if 0 5875 if (pcbddc->NullSpace) { 5876 ierr = PCBDDCNullSpaceAssembleCoarse(pc,coarse_mat,&CoarseNullSpace);CHKERRQ(ierr); 5877 } 5878 #endif 5879 /* hack */ 5880 if (pcbddc->coarse_ksp) { 5881 Vec crhs,csol; 5882 5883 ierr = KSPGetSolution(pcbddc->coarse_ksp,&csol);CHKERRQ(ierr); 5884 ierr = KSPGetRhs(pcbddc->coarse_ksp,&crhs);CHKERRQ(ierr); 5885 if (!csol) { 5886 ierr = MatCreateVecs(coarse_mat,&((pcbddc->coarse_ksp)->vec_sol),NULL);CHKERRQ(ierr); 5887 } 5888 if (!crhs) { 5889 ierr = MatCreateVecs(coarse_mat,NULL,&((pcbddc->coarse_ksp)->vec_rhs));CHKERRQ(ierr); 5890 } 5891 } 5892 5893 /* compute null space for coarse solver if the benign trick has been requested */ 5894 if (pcbddc->benign_null) { 5895 5896 ierr = VecSet(pcbddc->vec1_P,0.);CHKERRQ(ierr); 5897 for (i=0;i<pcbddc->benign_n;i++) { 5898 ierr = VecSetValue(pcbddc->vec1_P,pcbddc->local_primal_size-pcbddc->benign_n+i,1.0,INSERT_VALUES);CHKERRQ(ierr); 5899 } 5900 ierr = VecAssemblyBegin(pcbddc->vec1_P);CHKERRQ(ierr); 5901 ierr = VecAssemblyEnd(pcbddc->vec1_P);CHKERRQ(ierr); 5902 ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,pcbddc->vec1_P,pcbddc->coarse_vec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 5903 ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,pcbddc->vec1_P,pcbddc->coarse_vec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 5904 if (coarse_mat) { 5905 Vec nullv; 5906 PetscScalar *array,*array2; 5907 PetscInt nl; 5908 5909 ierr = MatCreateVecs(coarse_mat,&nullv,NULL);CHKERRQ(ierr); 5910 ierr = VecGetLocalSize(nullv,&nl);CHKERRQ(ierr); 5911 ierr = VecGetArrayRead(pcbddc->coarse_vec,(const PetscScalar**)&array);CHKERRQ(ierr); 5912 ierr = VecGetArray(nullv,&array2);CHKERRQ(ierr); 5913 ierr = PetscMemcpy(array2,array,nl*sizeof(*array));CHKERRQ(ierr); 5914 ierr = VecRestoreArray(nullv,&array2);CHKERRQ(ierr); 5915 ierr = VecRestoreArrayRead(pcbddc->coarse_vec,(const PetscScalar**)&array);CHKERRQ(ierr); 5916 ierr = VecNormalize(nullv,NULL);CHKERRQ(ierr); 5917 ierr = MatNullSpaceCreate(PetscObjectComm((PetscObject)coarse_mat),PETSC_FALSE,1,&nullv,&CoarseNullSpace);CHKERRQ(ierr); 5918 ierr = VecDestroy(&nullv);CHKERRQ(ierr); 5919 } 5920 } 5921 5922 if (pcbddc->coarse_ksp) { 5923 PetscBool ispreonly; 5924 5925 if (CoarseNullSpace) { 5926 PetscBool isnull; 5927 ierr = MatNullSpaceTest(CoarseNullSpace,coarse_mat,&isnull);CHKERRQ(ierr); 5928 if (0) { 5929 if (isbddc && !pcbddc->benign_saddle_point) { 5930 ierr = PCBDDCSetNullSpace(pc_temp,CoarseNullSpace);CHKERRQ(ierr); 5931 } else { 5932 ierr = MatSetNullSpace(coarse_mat,CoarseNullSpace);CHKERRQ(ierr); 5933 } 5934 } else { 5935 ierr = MatNullSpaceDestroy(&CoarseNullSpace);CHKERRQ(ierr); 5936 } 5937 } 5938 /* setup coarse ksp */ 5939 ierr = KSPSetUp(pcbddc->coarse_ksp);CHKERRQ(ierr); 5940 /* Check coarse problem if in debug mode or if solving with an iterative method */ 5941 ierr = PetscObjectTypeCompare((PetscObject)pcbddc->coarse_ksp,KSPPREONLY,&ispreonly);CHKERRQ(ierr); 5942 if (pcbddc->dbg_flag || (!ispreonly && pcbddc->use_coarse_estimates) ) { 5943 KSP check_ksp; 5944 KSPType check_ksp_type; 5945 PC check_pc; 5946 Vec check_vec,coarse_vec; 5947 PetscReal abs_infty_error,infty_error,lambda_min=1.0,lambda_max=1.0; 5948 PetscInt its; 5949 PetscBool compute_eigs; 5950 PetscReal *eigs_r,*eigs_c; 5951 PetscInt neigs; 5952 const char *prefix; 5953 5954 /* Create ksp object suitable for estimation of extreme eigenvalues */ 5955 ierr = KSPCreate(PetscObjectComm((PetscObject)pcbddc->coarse_ksp),&check_ksp);CHKERRQ(ierr); 5956 ierr = KSPSetErrorIfNotConverged(pcbddc->coarse_ksp,pc->erroriffailure);CHKERRQ(ierr); 5957 ierr = KSPSetOperators(check_ksp,coarse_mat,coarse_mat);CHKERRQ(ierr); 5958 ierr = KSPSetTolerances(check_ksp,1.e-12,1.e-12,PETSC_DEFAULT,pcbddc->coarse_size);CHKERRQ(ierr); 5959 if (ispreonly) { 5960 check_ksp_type = KSPPREONLY; 5961 compute_eigs = PETSC_FALSE; 5962 } else { 5963 check_ksp_type = KSPGMRES; 5964 compute_eigs = PETSC_TRUE; 5965 } 5966 ierr = KSPSetType(check_ksp,check_ksp_type);CHKERRQ(ierr); 5967 ierr = KSPSetComputeSingularValues(check_ksp,compute_eigs);CHKERRQ(ierr); 5968 ierr = KSPSetComputeEigenvalues(check_ksp,compute_eigs);CHKERRQ(ierr); 5969 ierr = KSPGMRESSetRestart(check_ksp,pcbddc->coarse_size+1);CHKERRQ(ierr); 5970 ierr = KSPGetOptionsPrefix(pcbddc->coarse_ksp,&prefix);CHKERRQ(ierr); 5971 ierr = KSPSetOptionsPrefix(check_ksp,prefix);CHKERRQ(ierr); 5972 ierr = KSPAppendOptionsPrefix(check_ksp,"check_");CHKERRQ(ierr); 5973 ierr = KSPSetFromOptions(check_ksp);CHKERRQ(ierr); 5974 ierr = KSPSetUp(check_ksp);CHKERRQ(ierr); 5975 ierr = KSPGetPC(pcbddc->coarse_ksp,&check_pc);CHKERRQ(ierr); 5976 ierr = KSPSetPC(check_ksp,check_pc);CHKERRQ(ierr); 5977 /* create random vec */ 5978 ierr = KSPGetSolution(pcbddc->coarse_ksp,&coarse_vec);CHKERRQ(ierr); 5979 ierr = VecDuplicate(coarse_vec,&check_vec);CHKERRQ(ierr); 5980 ierr = VecSetRandom(check_vec,NULL);CHKERRQ(ierr); 5981 if (CoarseNullSpace) { 5982 ierr = MatNullSpaceRemove(CoarseNullSpace,check_vec);CHKERRQ(ierr); 5983 } 5984 ierr = MatMult(coarse_mat,check_vec,coarse_vec);CHKERRQ(ierr); 5985 /* solve coarse problem */ 5986 ierr = KSPSolve(check_ksp,coarse_vec,coarse_vec);CHKERRQ(ierr); 5987 if (CoarseNullSpace) { 5988 ierr = MatNullSpaceRemove(CoarseNullSpace,coarse_vec);CHKERRQ(ierr); 5989 } 5990 /* set eigenvalue estimation if preonly has not been requested */ 5991 if (compute_eigs) { 5992 ierr = PetscMalloc1(pcbddc->coarse_size+1,&eigs_r);CHKERRQ(ierr); 5993 ierr = PetscMalloc1(pcbddc->coarse_size+1,&eigs_c);CHKERRQ(ierr); 5994 ierr = KSPComputeEigenvalues(check_ksp,pcbddc->coarse_size+1,eigs_r,eigs_c,&neigs);CHKERRQ(ierr); 5995 lambda_max = eigs_r[neigs-1]; 5996 lambda_min = eigs_r[0]; 5997 if (pcbddc->use_coarse_estimates) { 5998 if (lambda_max>lambda_min) { 5999 ierr = KSPChebyshevSetEigenvalues(pcbddc->coarse_ksp,lambda_max,lambda_min);CHKERRQ(ierr); 6000 ierr = KSPRichardsonSetScale(pcbddc->coarse_ksp,2.0/(lambda_max+lambda_min));CHKERRQ(ierr); 6001 } 6002 } 6003 } 6004 6005 /* check coarse problem residual error */ 6006 if (pcbddc->dbg_flag) { 6007 PetscViewer dbg_viewer = PETSC_VIEWER_STDOUT_(PetscObjectComm((PetscObject)pcbddc->coarse_ksp)); 6008 ierr = PetscViewerASCIIAddTab(dbg_viewer,2*(pcbddc->current_level+1));CHKERRQ(ierr); 6009 ierr = VecAXPY(check_vec,-1.0,coarse_vec);CHKERRQ(ierr); 6010 ierr = VecNorm(check_vec,NORM_INFINITY,&infty_error);CHKERRQ(ierr); 6011 ierr = MatMult(coarse_mat,check_vec,coarse_vec);CHKERRQ(ierr); 6012 ierr = VecNorm(coarse_vec,NORM_INFINITY,&abs_infty_error);CHKERRQ(ierr); 6013 ierr = VecDestroy(&check_vec);CHKERRQ(ierr); 6014 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem details (use estimates %d)\n",pcbddc->use_coarse_estimates);CHKERRQ(ierr); 6015 ierr = PetscObjectPrintClassNamePrefixType((PetscObject)(pcbddc->coarse_ksp),dbg_viewer);CHKERRQ(ierr); 6016 ierr = PetscObjectPrintClassNamePrefixType((PetscObject)(check_pc),dbg_viewer);CHKERRQ(ierr); 6017 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem exact infty_error : %1.6e\n",infty_error);CHKERRQ(ierr); 6018 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem residual infty_error: %1.6e\n",abs_infty_error);CHKERRQ(ierr); 6019 if (CoarseNullSpace) { 6020 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem is singular\n");CHKERRQ(ierr); 6021 } 6022 if (compute_eigs) { 6023 PetscReal lambda_max_s,lambda_min_s; 6024 ierr = KSPGetType(check_ksp,&check_ksp_type);CHKERRQ(ierr); 6025 ierr = KSPGetIterationNumber(check_ksp,&its);CHKERRQ(ierr); 6026 ierr = KSPComputeExtremeSingularValues(check_ksp,&lambda_max_s,&lambda_min_s);CHKERRQ(ierr); 6027 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem eigenvalues (estimated with %d iterations of %s): %1.6e %1.6e (%1.6e %1.6e)\n",its,check_ksp_type,lambda_min,lambda_max,lambda_min_s,lambda_max_s);CHKERRQ(ierr); 6028 for (i=0;i<neigs;i++) { 6029 ierr = PetscViewerASCIIPrintf(dbg_viewer,"%1.6e %1.6ei\n",eigs_r[i],eigs_c[i]);CHKERRQ(ierr); 6030 } 6031 } 6032 ierr = PetscViewerFlush(dbg_viewer);CHKERRQ(ierr); 6033 ierr = PetscViewerASCIISubtractTab(dbg_viewer,2*(pcbddc->current_level+1));CHKERRQ(ierr); 6034 } 6035 ierr = KSPDestroy(&check_ksp);CHKERRQ(ierr); 6036 if (compute_eigs) { 6037 ierr = PetscFree(eigs_r);CHKERRQ(ierr); 6038 ierr = PetscFree(eigs_c);CHKERRQ(ierr); 6039 } 6040 } 6041 } 6042 /* print additional info */ 6043 if (pcbddc->dbg_flag) { 6044 /* waits until all processes reaches this point */ 6045 ierr = PetscBarrier((PetscObject)pc);CHKERRQ(ierr); 6046 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Coarse solver setup completed at level %d\n",pcbddc->current_level);CHKERRQ(ierr); 6047 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 6048 } 6049 6050 /* free memory */ 6051 ierr = MatNullSpaceDestroy(&CoarseNullSpace);CHKERRQ(ierr); 6052 ierr = MatDestroy(&coarse_mat);CHKERRQ(ierr); 6053 PetscFunctionReturn(0); 6054 } 6055 6056 #undef __FUNCT__ 6057 #define __FUNCT__ "PCBDDCComputePrimalNumbering" 6058 PetscErrorCode PCBDDCComputePrimalNumbering(PC pc,PetscInt* coarse_size_n,PetscInt** local_primal_indices_n) 6059 { 6060 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 6061 PC_IS* pcis = (PC_IS*)pc->data; 6062 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 6063 IS subset,subset_mult,subset_n; 6064 PetscInt local_size,coarse_size=0; 6065 PetscInt *local_primal_indices=NULL; 6066 const PetscInt *t_local_primal_indices; 6067 PetscErrorCode ierr; 6068 6069 PetscFunctionBegin; 6070 /* Compute global number of coarse dofs */ 6071 if (pcbddc->local_primal_size && !pcbddc->local_primal_ref_node) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"BDDC ConstraintsSetUp should be called first"); 6072 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)(pc->pmat)),pcbddc->local_primal_size_cc,pcbddc->local_primal_ref_node,PETSC_COPY_VALUES,&subset_n);CHKERRQ(ierr); 6073 ierr = ISLocalToGlobalMappingApplyIS(pcis->mapping,subset_n,&subset);CHKERRQ(ierr); 6074 ierr = ISDestroy(&subset_n);CHKERRQ(ierr); 6075 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)(pc->pmat)),pcbddc->local_primal_size_cc,pcbddc->local_primal_ref_mult,PETSC_COPY_VALUES,&subset_mult);CHKERRQ(ierr); 6076 ierr = PCBDDCSubsetNumbering(subset,subset_mult,&coarse_size,&subset_n);CHKERRQ(ierr); 6077 ierr = ISDestroy(&subset);CHKERRQ(ierr); 6078 ierr = ISDestroy(&subset_mult);CHKERRQ(ierr); 6079 ierr = ISGetLocalSize(subset_n,&local_size);CHKERRQ(ierr); 6080 if (local_size != pcbddc->local_primal_size) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Invalid number of local primal indices computed %D != %D",local_size,pcbddc->local_primal_size); 6081 ierr = PetscMalloc1(local_size,&local_primal_indices);CHKERRQ(ierr); 6082 ierr = ISGetIndices(subset_n,&t_local_primal_indices);CHKERRQ(ierr); 6083 ierr = PetscMemcpy(local_primal_indices,t_local_primal_indices,local_size*sizeof(PetscInt));CHKERRQ(ierr); 6084 ierr = ISRestoreIndices(subset_n,&t_local_primal_indices);CHKERRQ(ierr); 6085 ierr = ISDestroy(&subset_n);CHKERRQ(ierr); 6086 6087 /* check numbering */ 6088 if (pcbddc->dbg_flag) { 6089 PetscScalar coarsesum,*array,*array2; 6090 PetscInt i; 6091 PetscBool set_error = PETSC_FALSE,set_error_reduced = PETSC_FALSE; 6092 6093 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 6094 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 6095 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Check coarse indices\n");CHKERRQ(ierr); 6096 ierr = PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 6097 /* counter */ 6098 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 6099 ierr = VecSet(pcis->vec1_N,1.0);CHKERRQ(ierr); 6100 ierr = VecScatterBegin(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 6101 ierr = VecScatterEnd(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 6102 ierr = VecScatterBegin(matis->rctx,pcis->vec1_global,pcis->vec2_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 6103 ierr = VecScatterEnd(matis->rctx,pcis->vec1_global,pcis->vec2_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 6104 ierr = VecSet(pcis->vec1_N,0.0);CHKERRQ(ierr); 6105 for (i=0;i<pcbddc->local_primal_size;i++) { 6106 ierr = VecSetValue(pcis->vec1_N,pcbddc->primal_indices_local_idxs[i],1.0,INSERT_VALUES);CHKERRQ(ierr); 6107 } 6108 ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr); 6109 ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr); 6110 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 6111 ierr = VecScatterBegin(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 6112 ierr = VecScatterEnd(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 6113 ierr = VecScatterBegin(matis->rctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 6114 ierr = VecScatterEnd(matis->rctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 6115 ierr = VecGetArray(pcis->vec1_N,&array);CHKERRQ(ierr); 6116 ierr = VecGetArray(pcis->vec2_N,&array2);CHKERRQ(ierr); 6117 for (i=0;i<pcis->n;i++) { 6118 if (array[i] != 0.0 && array[i] != array2[i]) { 6119 PetscInt owned = (PetscInt)PetscRealPart(array[i]); 6120 PetscInt neigh = (PetscInt)PetscRealPart(array2[i]); 6121 set_error = PETSC_TRUE; 6122 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d: local index %d owned by a %d processes instead of %d!\n",PetscGlobalRank,i,owned,neigh);CHKERRQ(ierr); 6123 } 6124 } 6125 ierr = VecRestoreArray(pcis->vec2_N,&array2);CHKERRQ(ierr); 6126 ierr = MPIU_Allreduce(&set_error,&set_error_reduced,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 6127 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 6128 for (i=0;i<pcis->n;i++) { 6129 if (PetscRealPart(array[i]) > 0.0) array[i] = 1.0/PetscRealPart(array[i]); 6130 } 6131 ierr = VecRestoreArray(pcis->vec1_N,&array);CHKERRQ(ierr); 6132 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 6133 ierr = VecScatterBegin(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 6134 ierr = VecScatterEnd(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 6135 ierr = VecSum(pcis->vec1_global,&coarsesum);CHKERRQ(ierr); 6136 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Size of coarse problem is %d (%lf)\n",coarse_size,PetscRealPart(coarsesum));CHKERRQ(ierr); 6137 if (pcbddc->dbg_flag > 1 || set_error_reduced) { 6138 PetscInt *gidxs; 6139 6140 ierr = PetscMalloc1(pcbddc->local_primal_size,&gidxs);CHKERRQ(ierr); 6141 ierr = ISLocalToGlobalMappingApply(pcis->mapping,pcbddc->local_primal_size,pcbddc->primal_indices_local_idxs,gidxs);CHKERRQ(ierr); 6142 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Distribution of local primal indices\n");CHKERRQ(ierr); 6143 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 6144 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d\n",PetscGlobalRank);CHKERRQ(ierr); 6145 for (i=0;i<pcbddc->local_primal_size;i++) { 6146 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local_primal_indices[%d]=%d (%d,%d)\n",i,local_primal_indices[i],pcbddc->primal_indices_local_idxs[i],gidxs[i]);CHKERRQ(ierr); 6147 } 6148 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 6149 ierr = PetscFree(gidxs);CHKERRQ(ierr); 6150 } 6151 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 6152 ierr = PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 6153 if (set_error_reduced) SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"BDDC Numbering of coarse dofs failed"); 6154 } 6155 /* ierr = PetscPrintf(PetscObjectComm((PetscObject)pc),"Size of coarse problem is %d\n",coarse_size);CHKERRQ(ierr); */ 6156 /* get back data */ 6157 *coarse_size_n = coarse_size; 6158 *local_primal_indices_n = local_primal_indices; 6159 PetscFunctionReturn(0); 6160 } 6161 6162 #undef __FUNCT__ 6163 #define __FUNCT__ "PCBDDCGlobalToLocal" 6164 PetscErrorCode PCBDDCGlobalToLocal(VecScatter g2l_ctx,Vec gwork, Vec lwork, IS globalis, IS* localis) 6165 { 6166 IS localis_t; 6167 PetscInt i,lsize,*idxs,n; 6168 PetscScalar *vals; 6169 PetscErrorCode ierr; 6170 6171 PetscFunctionBegin; 6172 /* get indices in local ordering exploiting local to global map */ 6173 ierr = ISGetLocalSize(globalis,&lsize);CHKERRQ(ierr); 6174 ierr = PetscMalloc1(lsize,&vals);CHKERRQ(ierr); 6175 for (i=0;i<lsize;i++) vals[i] = 1.0; 6176 ierr = ISGetIndices(globalis,(const PetscInt**)&idxs);CHKERRQ(ierr); 6177 ierr = VecSet(gwork,0.0);CHKERRQ(ierr); 6178 ierr = VecSet(lwork,0.0);CHKERRQ(ierr); 6179 if (idxs) { /* multilevel guard */ 6180 ierr = VecSetValues(gwork,lsize,idxs,vals,INSERT_VALUES);CHKERRQ(ierr); 6181 } 6182 ierr = VecAssemblyBegin(gwork);CHKERRQ(ierr); 6183 ierr = ISRestoreIndices(globalis,(const PetscInt**)&idxs);CHKERRQ(ierr); 6184 ierr = PetscFree(vals);CHKERRQ(ierr); 6185 ierr = VecAssemblyEnd(gwork);CHKERRQ(ierr); 6186 /* now compute set in local ordering */ 6187 ierr = VecScatterBegin(g2l_ctx,gwork,lwork,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 6188 ierr = VecScatterEnd(g2l_ctx,gwork,lwork,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 6189 ierr = VecGetArrayRead(lwork,(const PetscScalar**)&vals);CHKERRQ(ierr); 6190 ierr = VecGetSize(lwork,&n);CHKERRQ(ierr); 6191 for (i=0,lsize=0;i<n;i++) { 6192 if (PetscRealPart(vals[i]) > 0.5) { 6193 lsize++; 6194 } 6195 } 6196 ierr = PetscMalloc1(lsize,&idxs);CHKERRQ(ierr); 6197 for (i=0,lsize=0;i<n;i++) { 6198 if (PetscRealPart(vals[i]) > 0.5) { 6199 idxs[lsize++] = i; 6200 } 6201 } 6202 ierr = VecRestoreArrayRead(lwork,(const PetscScalar**)&vals);CHKERRQ(ierr); 6203 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)gwork),lsize,idxs,PETSC_OWN_POINTER,&localis_t);CHKERRQ(ierr); 6204 *localis = localis_t; 6205 PetscFunctionReturn(0); 6206 } 6207 6208 #undef __FUNCT__ 6209 #define __FUNCT__ "PCBDDCSetUpSubSchurs" 6210 PetscErrorCode PCBDDCSetUpSubSchurs(PC pc) 6211 { 6212 PC_IS *pcis=(PC_IS*)pc->data; 6213 PC_BDDC *pcbddc=(PC_BDDC*)pc->data; 6214 PCBDDCSubSchurs sub_schurs=pcbddc->sub_schurs; 6215 Mat S_j; 6216 PetscInt *used_xadj,*used_adjncy; 6217 PetscBool free_used_adj; 6218 PetscErrorCode ierr; 6219 6220 PetscFunctionBegin; 6221 /* decide the adjacency to be used for determining internal problems for local schur on subsets */ 6222 free_used_adj = PETSC_FALSE; 6223 if (pcbddc->sub_schurs_layers == -1) { 6224 used_xadj = NULL; 6225 used_adjncy = NULL; 6226 } else { 6227 if (pcbddc->sub_schurs_use_useradj && pcbddc->mat_graph->xadj) { 6228 used_xadj = pcbddc->mat_graph->xadj; 6229 used_adjncy = pcbddc->mat_graph->adjncy; 6230 } else if (pcbddc->computed_rowadj) { 6231 used_xadj = pcbddc->mat_graph->xadj; 6232 used_adjncy = pcbddc->mat_graph->adjncy; 6233 } else { 6234 PetscBool flg_row=PETSC_FALSE; 6235 const PetscInt *xadj,*adjncy; 6236 PetscInt nvtxs; 6237 6238 ierr = MatGetRowIJ(pcbddc->local_mat,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,&xadj,&adjncy,&flg_row);CHKERRQ(ierr); 6239 if (flg_row) { 6240 ierr = PetscMalloc2(nvtxs+1,&used_xadj,xadj[nvtxs],&used_adjncy);CHKERRQ(ierr); 6241 ierr = PetscMemcpy(used_xadj,xadj,(nvtxs+1)*sizeof(*xadj));CHKERRQ(ierr); 6242 ierr = PetscMemcpy(used_adjncy,adjncy,(xadj[nvtxs])*sizeof(*adjncy));CHKERRQ(ierr); 6243 free_used_adj = PETSC_TRUE; 6244 } else { 6245 pcbddc->sub_schurs_layers = -1; 6246 used_xadj = NULL; 6247 used_adjncy = NULL; 6248 } 6249 ierr = MatRestoreRowIJ(pcbddc->local_mat,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,&xadj,&adjncy,&flg_row);CHKERRQ(ierr); 6250 } 6251 } 6252 6253 /* setup sub_schurs data */ 6254 ierr = MatCreateSchurComplement(pcis->A_II,pcis->A_II,pcis->A_IB,pcis->A_BI,pcis->A_BB,&S_j);CHKERRQ(ierr); 6255 if (!sub_schurs->schur_explicit) { 6256 /* pcbddc->ksp_D up to date only if not using MatFactor with Schur complement support */ 6257 ierr = MatSchurComplementSetKSP(S_j,pcbddc->ksp_D);CHKERRQ(ierr); 6258 ierr = PCBDDCSubSchursSetUp(sub_schurs,NULL,S_j,PETSC_FALSE,used_xadj,used_adjncy,pcbddc->sub_schurs_layers,pcbddc->faster_deluxe,NULL,pcbddc->adaptive_selection,PETSC_FALSE,PETSC_FALSE,0,NULL,NULL,NULL,NULL);CHKERRQ(ierr); 6259 } else { 6260 PetscBool reuse_solvers = (PetscBool)!pcbddc->use_change_of_basis; 6261 PetscBool isseqaij,need_change = PETSC_FALSE;; 6262 PetscInt benign_n; 6263 Mat change = NULL; 6264 Vec scaling = NULL; 6265 IS change_primal = NULL; 6266 6267 if (!pcbddc->use_vertices && reuse_solvers) { 6268 PetscInt n_vertices; 6269 6270 ierr = ISGetLocalSize(sub_schurs->is_vertices,&n_vertices);CHKERRQ(ierr); 6271 reuse_solvers = (PetscBool)!n_vertices; 6272 } 6273 ierr = PetscObjectTypeCompare((PetscObject)pcbddc->local_mat,MATSEQAIJ,&isseqaij);CHKERRQ(ierr); 6274 if (!isseqaij) { 6275 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 6276 if (matis->A == pcbddc->local_mat) { 6277 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 6278 ierr = MatConvert(matis->A,MATSEQAIJ,MAT_INITIAL_MATRIX,&pcbddc->local_mat);CHKERRQ(ierr); 6279 } else { 6280 ierr = MatConvert(pcbddc->local_mat,MATSEQAIJ,MAT_INPLACE_MATRIX,&pcbddc->local_mat);CHKERRQ(ierr); 6281 } 6282 } 6283 if (!pcbddc->benign_change_explicit) { 6284 benign_n = pcbddc->benign_n; 6285 } else { 6286 benign_n = 0; 6287 } 6288 /* sub_schurs->change is a local object; instead, PCBDDCConstraintsSetUp and the quantities used in the test below are logically collective on pc. 6289 We need a global reduction to avoid possible deadlocks. 6290 We assume that sub_schurs->change is created once, and then reused for different solves, unless the topography has been recomputed */ 6291 if (pcbddc->adaptive_userdefined || (pcbddc->deluxe_zerorows && !pcbddc->use_change_of_basis)) { 6292 PetscBool have_loc_change = !!(sub_schurs->change); 6293 ierr = MPIU_Allreduce(&have_loc_change,&need_change,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 6294 need_change = !need_change; 6295 } 6296 /* If the user defines additional constraints, we import them here. 6297 We need to compute the change of basis according to the quadrature weights attached to pmat via MatSetNearNullSpace, and this could not be done (at the moment) without some hacking */ 6298 if (need_change) { 6299 PC_IS *pcisf; 6300 PC_BDDC *pcbddcf; 6301 PC pcf; 6302 6303 if (pcbddc->sub_schurs_rebuild) { 6304 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Cannot compute change of basis with a different graph"); 6305 } 6306 ierr = PCCreate(PetscObjectComm((PetscObject)pc),&pcf);CHKERRQ(ierr); 6307 ierr = PCSetOperators(pcf,pc->mat,pc->pmat);CHKERRQ(ierr); 6308 ierr = PCSetType(pcf,PCBDDC);CHKERRQ(ierr); 6309 /* hacks */ 6310 pcisf = (PC_IS*)pcf->data; 6311 pcisf->is_B_local = pcis->is_B_local; 6312 pcisf->vec1_N = pcis->vec1_N; 6313 pcisf->BtoNmap = pcis->BtoNmap; 6314 pcisf->n = pcis->n; 6315 pcisf->n_B = pcis->n_B; 6316 pcbddcf = (PC_BDDC*)pcf->data; 6317 ierr = PetscFree(pcbddcf->mat_graph);CHKERRQ(ierr); 6318 pcbddcf->mat_graph = pcbddc->mat_graph; 6319 pcbddcf->use_faces = PETSC_TRUE; 6320 pcbddcf->use_change_of_basis = PETSC_TRUE; 6321 pcbddcf->use_change_on_faces = PETSC_TRUE; 6322 pcbddcf->use_qr_single = PETSC_TRUE; 6323 pcbddcf->fake_change = PETSC_TRUE; 6324 ierr = PCBDDCConstraintsSetUp(pcf);CHKERRQ(ierr); 6325 /* store information on primal vertices and change of basis (in local numbering) */ 6326 sub_schurs->change_with_qr = pcbddcf->use_qr_single; 6327 ierr = ISCreateGeneral(PETSC_COMM_SELF,pcbddcf->n_vertices,pcbddcf->local_primal_ref_node,PETSC_COPY_VALUES,&change_primal);CHKERRQ(ierr); 6328 change = pcbddcf->ConstraintMatrix; 6329 pcbddcf->ConstraintMatrix = NULL; 6330 /* free unneeded memory allocated in PCBDDCConstraintsSetUp */ 6331 ierr = PetscFree(pcbddcf->sub_schurs);CHKERRQ(ierr); 6332 ierr = MatNullSpaceDestroy(&pcbddcf->onearnullspace);CHKERRQ(ierr); 6333 ierr = PetscFree2(pcbddcf->local_primal_ref_node,pcbddcf->local_primal_ref_mult);CHKERRQ(ierr); 6334 ierr = PetscFree(pcbddcf->primal_indices_local_idxs);CHKERRQ(ierr); 6335 ierr = PetscFree(pcbddcf->onearnullvecs_state);CHKERRQ(ierr); 6336 ierr = PetscFree(pcf->data);CHKERRQ(ierr); 6337 pcf->ops->destroy = NULL; 6338 ierr = PCDestroy(&pcf);CHKERRQ(ierr); 6339 } 6340 if (!pcbddc->use_deluxe_scaling) scaling = pcis->D; 6341 ierr = PCBDDCSubSchursSetUp(sub_schurs,pcbddc->local_mat,S_j,pcbddc->sub_schurs_exact_schur,used_xadj,used_adjncy,pcbddc->sub_schurs_layers,pcbddc->faster_deluxe,scaling,pcbddc->adaptive_selection,reuse_solvers,pcbddc->benign_saddle_point,benign_n,pcbddc->benign_p0_lidx,pcbddc->benign_zerodiag_subs,change,change_primal);CHKERRQ(ierr); 6342 ierr = MatDestroy(&change);CHKERRQ(ierr); 6343 ierr = ISDestroy(&change_primal);CHKERRQ(ierr); 6344 } 6345 ierr = MatDestroy(&S_j);CHKERRQ(ierr); 6346 6347 /* free adjacency */ 6348 if (free_used_adj) { 6349 ierr = PetscFree2(used_xadj,used_adjncy);CHKERRQ(ierr); 6350 } 6351 PetscFunctionReturn(0); 6352 } 6353 6354 #undef __FUNCT__ 6355 #define __FUNCT__ "PCBDDCInitSubSchurs" 6356 PetscErrorCode PCBDDCInitSubSchurs(PC pc) 6357 { 6358 PC_IS *pcis=(PC_IS*)pc->data; 6359 PC_BDDC *pcbddc=(PC_BDDC*)pc->data; 6360 PCBDDCSubSchurs sub_schurs=pcbddc->sub_schurs; 6361 PCBDDCGraph graph; 6362 PetscErrorCode ierr; 6363 6364 PetscFunctionBegin; 6365 /* attach interface graph for determining subsets */ 6366 if (pcbddc->sub_schurs_rebuild) { /* in case rebuild has been requested, it uses a graph generated only by the neighbouring information */ 6367 IS verticesIS,verticescomm; 6368 PetscInt vsize,*idxs; 6369 6370 ierr = PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph,NULL,NULL,NULL,NULL,&verticesIS);CHKERRQ(ierr); 6371 ierr = ISGetSize(verticesIS,&vsize);CHKERRQ(ierr); 6372 ierr = ISGetIndices(verticesIS,(const PetscInt**)&idxs);CHKERRQ(ierr); 6373 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),vsize,idxs,PETSC_COPY_VALUES,&verticescomm);CHKERRQ(ierr); 6374 ierr = ISRestoreIndices(verticesIS,(const PetscInt**)&idxs);CHKERRQ(ierr); 6375 ierr = ISDestroy(&verticesIS);CHKERRQ(ierr); 6376 ierr = PCBDDCGraphCreate(&graph);CHKERRQ(ierr); 6377 ierr = PCBDDCGraphInit(graph,pcbddc->mat_graph->l2gmap,pcbddc->mat_graph->nvtxs_global);CHKERRQ(ierr); 6378 ierr = PCBDDCGraphSetUp(graph,pcbddc->mat_graph->custom_minimal_size,NULL,pcbddc->DirichletBoundariesLocal,0,NULL,verticescomm);CHKERRQ(ierr); 6379 ierr = ISDestroy(&verticescomm);CHKERRQ(ierr); 6380 ierr = PCBDDCGraphComputeConnectedComponents(graph);CHKERRQ(ierr); 6381 /* 6382 if (pcbddc->dbg_flag) { 6383 ierr = PCBDDCGraphASCIIView(graph,pcbddc->dbg_flag,pcbddc->dbg_viewer);CHKERRQ(ierr); 6384 } 6385 */ 6386 } else { 6387 graph = pcbddc->mat_graph; 6388 } 6389 6390 /* sub_schurs init */ 6391 ierr = PCBDDCSubSchursInit(sub_schurs,pcis->is_I_local,pcis->is_B_local,graph,pcis->BtoNmap);CHKERRQ(ierr); 6392 6393 /* free graph struct */ 6394 if (pcbddc->sub_schurs_rebuild) { 6395 ierr = PCBDDCGraphDestroy(&graph);CHKERRQ(ierr); 6396 } 6397 PetscFunctionReturn(0); 6398 } 6399 6400 #undef __FUNCT__ 6401 #define __FUNCT__ "PCBDDCCheckOperator" 6402 PetscErrorCode PCBDDCCheckOperator(PC pc) 6403 { 6404 PC_IS *pcis=(PC_IS*)pc->data; 6405 PC_BDDC *pcbddc=(PC_BDDC*)pc->data; 6406 PetscErrorCode ierr; 6407 6408 PetscFunctionBegin; 6409 if (pcbddc->n_vertices == pcbddc->local_primal_size) { 6410 IS zerodiag = NULL; 6411 Mat S_j,B0_B=NULL; 6412 Vec dummy_vec=NULL,vec_check_B,vec_scale_P; 6413 PetscScalar *p0_check,*array,*array2; 6414 PetscReal norm; 6415 PetscInt i; 6416 6417 /* B0 and B0_B */ 6418 if (zerodiag) { 6419 IS dummy; 6420 6421 ierr = ISCreateStride(PETSC_COMM_SELF,pcbddc->benign_n,0,1,&dummy);CHKERRQ(ierr); 6422 ierr = MatGetSubMatrix(pcbddc->benign_B0,dummy,pcis->is_B_local,MAT_INITIAL_MATRIX,&B0_B);CHKERRQ(ierr); 6423 ierr = MatCreateVecs(B0_B,NULL,&dummy_vec);CHKERRQ(ierr); 6424 ierr = ISDestroy(&dummy);CHKERRQ(ierr); 6425 } 6426 /* I need a primal vector to scale primal nodes since BDDC sums contibutions */ 6427 ierr = VecDuplicate(pcbddc->vec1_P,&vec_scale_P);CHKERRQ(ierr); 6428 ierr = VecSet(pcbddc->vec1_P,1.0);CHKERRQ(ierr); 6429 ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,pcbddc->vec1_P,pcbddc->coarse_vec,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 6430 ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,pcbddc->vec1_P,pcbddc->coarse_vec,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 6431 ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,pcbddc->coarse_vec,vec_scale_P,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 6432 ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,pcbddc->coarse_vec,vec_scale_P,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 6433 ierr = VecReciprocal(vec_scale_P);CHKERRQ(ierr); 6434 /* S_j */ 6435 ierr = MatCreateSchurComplement(pcis->A_II,pcis->A_II,pcis->A_IB,pcis->A_BI,pcis->A_BB,&S_j);CHKERRQ(ierr); 6436 ierr = MatSchurComplementSetKSP(S_j,pcbddc->ksp_D);CHKERRQ(ierr); 6437 6438 /* mimic vector in \widetilde{W}_\Gamma */ 6439 ierr = VecSetRandom(pcis->vec1_N,NULL);CHKERRQ(ierr); 6440 /* continuous in primal space */ 6441 ierr = VecSetRandom(pcbddc->coarse_vec,NULL);CHKERRQ(ierr); 6442 ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,pcbddc->coarse_vec,pcbddc->vec1_P,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 6443 ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,pcbddc->coarse_vec,pcbddc->vec1_P,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 6444 ierr = VecGetArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 6445 ierr = PetscCalloc1(pcbddc->benign_n,&p0_check);CHKERRQ(ierr); 6446 for (i=0;i<pcbddc->benign_n;i++) p0_check[i] = array[pcbddc->local_primal_size-pcbddc->benign_n+i]; 6447 ierr = VecSetValues(pcis->vec1_N,pcbddc->local_primal_size,pcbddc->local_primal_ref_node,array,INSERT_VALUES);CHKERRQ(ierr); 6448 ierr = VecRestoreArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 6449 ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr); 6450 ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr); 6451 ierr = VecScatterBegin(pcis->N_to_B,pcis->vec1_N,pcis->vec2_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 6452 ierr = VecScatterEnd(pcis->N_to_B,pcis->vec1_N,pcis->vec2_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 6453 ierr = VecDuplicate(pcis->vec2_B,&vec_check_B);CHKERRQ(ierr); 6454 ierr = VecCopy(pcis->vec2_B,vec_check_B);CHKERRQ(ierr); 6455 6456 /* assemble rhs for coarse problem */ 6457 /* widetilde{S}_\Gamma w_\Gamma + \widetilde{B0}^T_B p0 */ 6458 /* local with Schur */ 6459 ierr = MatMult(S_j,pcis->vec2_B,pcis->vec1_B);CHKERRQ(ierr); 6460 if (zerodiag) { 6461 ierr = VecGetArray(dummy_vec,&array);CHKERRQ(ierr); 6462 for (i=0;i<pcbddc->benign_n;i++) array[i] = p0_check[i]; 6463 ierr = VecRestoreArray(dummy_vec,&array);CHKERRQ(ierr); 6464 ierr = MatMultTransposeAdd(B0_B,dummy_vec,pcis->vec1_B,pcis->vec1_B);CHKERRQ(ierr); 6465 } 6466 /* sum on primal nodes the local contributions */ 6467 ierr = VecScatterBegin(pcis->N_to_B,pcis->vec1_B,pcis->vec1_N,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 6468 ierr = VecScatterEnd(pcis->N_to_B,pcis->vec1_B,pcis->vec1_N,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 6469 ierr = VecGetArray(pcis->vec1_N,&array);CHKERRQ(ierr); 6470 ierr = VecGetArray(pcbddc->vec1_P,&array2);CHKERRQ(ierr); 6471 for (i=0;i<pcbddc->local_primal_size;i++) array2[i] = array[pcbddc->local_primal_ref_node[i]]; 6472 ierr = VecRestoreArray(pcbddc->vec1_P,&array2);CHKERRQ(ierr); 6473 ierr = VecRestoreArray(pcis->vec1_N,&array);CHKERRQ(ierr); 6474 ierr = VecSet(pcbddc->coarse_vec,0.);CHKERRQ(ierr); 6475 ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,pcbddc->vec1_P,pcbddc->coarse_vec,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 6476 ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,pcbddc->vec1_P,pcbddc->coarse_vec,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 6477 ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,pcbddc->coarse_vec,pcbddc->vec1_P,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 6478 ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,pcbddc->coarse_vec,pcbddc->vec1_P,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 6479 ierr = VecGetArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 6480 /* scale primal nodes (BDDC sums contibutions) */ 6481 ierr = VecPointwiseMult(pcbddc->vec1_P,vec_scale_P,pcbddc->vec1_P);CHKERRQ(ierr); 6482 ierr = VecSetValues(pcis->vec1_N,pcbddc->local_primal_size,pcbddc->local_primal_ref_node,array,INSERT_VALUES);CHKERRQ(ierr); 6483 ierr = VecRestoreArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 6484 ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr); 6485 ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr); 6486 ierr = VecScatterBegin(pcis->N_to_B,pcis->vec1_N,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 6487 ierr = VecScatterEnd(pcis->N_to_B,pcis->vec1_N,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 6488 /* global: \widetilde{B0}_B w_\Gamma */ 6489 if (zerodiag) { 6490 ierr = MatMult(B0_B,pcis->vec2_B,dummy_vec);CHKERRQ(ierr); 6491 ierr = VecGetArray(dummy_vec,&array);CHKERRQ(ierr); 6492 for (i=0;i<pcbddc->benign_n;i++) pcbddc->benign_p0[i] = array[i]; 6493 ierr = VecRestoreArray(dummy_vec,&array);CHKERRQ(ierr); 6494 } 6495 /* BDDC */ 6496 ierr = VecSet(pcis->vec1_D,0.);CHKERRQ(ierr); 6497 ierr = PCBDDCApplyInterfacePreconditioner(pc,PETSC_FALSE);CHKERRQ(ierr); 6498 6499 ierr = VecCopy(pcis->vec1_B,pcis->vec2_B);CHKERRQ(ierr); 6500 ierr = VecAXPY(pcis->vec1_B,-1.0,vec_check_B);CHKERRQ(ierr); 6501 ierr = VecNorm(pcis->vec1_B,NORM_INFINITY,&norm);CHKERRQ(ierr); 6502 PetscPrintf(PETSC_COMM_SELF,"[%d] BDDC local error is %1.4e\n",PetscGlobalRank,norm); 6503 for (i=0;i<pcbddc->benign_n;i++) { 6504 PetscPrintf(PETSC_COMM_SELF,"[%d] BDDC p0[%d] error is %1.4e\n",PetscGlobalRank,i,PetscAbsScalar(pcbddc->benign_p0[i]-p0_check[i])); 6505 } 6506 ierr = PetscFree(p0_check);CHKERRQ(ierr); 6507 ierr = VecDestroy(&vec_scale_P);CHKERRQ(ierr); 6508 ierr = VecDestroy(&vec_check_B);CHKERRQ(ierr); 6509 ierr = VecDestroy(&dummy_vec);CHKERRQ(ierr); 6510 ierr = MatDestroy(&S_j);CHKERRQ(ierr); 6511 ierr = MatDestroy(&B0_B);CHKERRQ(ierr); 6512 } 6513 PetscFunctionReturn(0); 6514 } 6515