1 #include <../src/mat/impls/aij/seq/aij.h> 2 #include <../src/ksp/pc/impls/bddc/bddc.h> 3 #include <../src/ksp/pc/impls/bddc/bddcprivate.h> 4 #include <petscblaslapack.h> 5 6 #undef __FUNCT__ 7 #define __FUNCT__ "PCBDDCBenignMatMult_Private_Private" 8 PetscErrorCode PCBDDCBenignMatMult_Private_Private(Mat A, Vec x, Vec y, PetscBool transpose) 9 { 10 PCBDDCBenignMatMult_ctx ctx; 11 PetscErrorCode ierr; 12 PetscBool apply_right,apply_left,reset_x; 13 14 PetscFunctionBegin; 15 ierr = MatShellGetContext(A,&ctx);CHKERRQ(ierr); 16 if (transpose) { 17 apply_right = ctx->apply_left; 18 apply_left = ctx->apply_right; 19 } else { 20 apply_right = ctx->apply_right; 21 apply_left = ctx->apply_left; 22 } 23 reset_x = PETSC_FALSE; 24 if (apply_right) { 25 const PetscScalar *ax; 26 PetscInt nl,i; 27 28 ierr = VecGetLocalSize(x,&nl);CHKERRQ(ierr); 29 ierr = VecGetArrayRead(x,&ax);CHKERRQ(ierr); 30 ierr = PetscMemcpy(ctx->work,ax,nl*sizeof(PetscScalar));CHKERRQ(ierr); 31 ierr = VecRestoreArrayRead(x,&ax);CHKERRQ(ierr); 32 for (i=0;i<ctx->benign_n;i++) { 33 PetscScalar sum,val; 34 const PetscInt *idxs; 35 PetscInt nz,j; 36 ierr = ISGetLocalSize(ctx->benign_zerodiag_subs[i],&nz);CHKERRQ(ierr); 37 ierr = ISGetIndices(ctx->benign_zerodiag_subs[i],&idxs);CHKERRQ(ierr); 38 sum = 0.; 39 if (ctx->apply_p0) { 40 val = ctx->work[idxs[nz-1]]; 41 for (j=0;j<nz-1;j++) { 42 sum += ctx->work[idxs[j]]; 43 ctx->work[idxs[j]] += val; 44 } 45 } else { 46 for (j=0;j<nz-1;j++) { 47 sum += ctx->work[idxs[j]]; 48 } 49 } 50 ctx->work[idxs[nz-1]] -= sum; 51 ierr = ISRestoreIndices(ctx->benign_zerodiag_subs[i],&idxs);CHKERRQ(ierr); 52 } 53 ierr = VecPlaceArray(x,ctx->work);CHKERRQ(ierr); 54 reset_x = PETSC_TRUE; 55 } 56 if (transpose) { 57 ierr = MatMultTranspose(ctx->A,x,y);CHKERRQ(ierr); 58 } else { 59 ierr = MatMult(ctx->A,x,y);CHKERRQ(ierr); 60 } 61 if (reset_x) { 62 ierr = VecResetArray(x);CHKERRQ(ierr); 63 } 64 if (apply_left) { 65 PetscScalar *ay; 66 PetscInt i; 67 68 ierr = VecGetArray(y,&ay);CHKERRQ(ierr); 69 for (i=0;i<ctx->benign_n;i++) { 70 PetscScalar sum,val; 71 const PetscInt *idxs; 72 PetscInt nz,j; 73 ierr = ISGetLocalSize(ctx->benign_zerodiag_subs[i],&nz);CHKERRQ(ierr); 74 ierr = ISGetIndices(ctx->benign_zerodiag_subs[i],&idxs);CHKERRQ(ierr); 75 val = -ay[idxs[nz-1]]; 76 if (ctx->apply_p0) { 77 sum = 0.; 78 for (j=0;j<nz-1;j++) { 79 sum += ay[idxs[j]]; 80 ay[idxs[j]] += val; 81 } 82 ay[idxs[nz-1]] += sum; 83 } else { 84 for (j=0;j<nz-1;j++) { 85 ay[idxs[j]] += val; 86 } 87 ay[idxs[nz-1]] = 0.; 88 } 89 ierr = ISRestoreIndices(ctx->benign_zerodiag_subs[i],&idxs);CHKERRQ(ierr); 90 } 91 ierr = VecRestoreArray(y,&ay);CHKERRQ(ierr); 92 } 93 PetscFunctionReturn(0); 94 } 95 96 #undef __FUNCT__ 97 #define __FUNCT__ "PCBDDCBenignMatMultTranspose_Private" 98 PetscErrorCode PCBDDCBenignMatMultTranspose_Private(Mat A, Vec x, Vec y) 99 { 100 PetscErrorCode ierr; 101 102 PetscFunctionBegin; 103 ierr = PCBDDCBenignMatMult_Private_Private(A,x,y,PETSC_TRUE);CHKERRQ(ierr); 104 PetscFunctionReturn(0); 105 } 106 107 #undef __FUNCT__ 108 #define __FUNCT__ "PCBDDCBenignMatMult_Private" 109 PetscErrorCode PCBDDCBenignMatMult_Private(Mat A, Vec x, Vec y) 110 { 111 PetscErrorCode ierr; 112 113 PetscFunctionBegin; 114 ierr = PCBDDCBenignMatMult_Private_Private(A,x,y,PETSC_FALSE);CHKERRQ(ierr); 115 PetscFunctionReturn(0); 116 } 117 118 #undef __FUNCT__ 119 #define __FUNCT__ "PCBDDCBenignShellMat" 120 PetscErrorCode PCBDDCBenignShellMat(PC pc, PetscBool restore) 121 { 122 PC_IS *pcis = (PC_IS*)pc->data; 123 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 124 PCBDDCBenignMatMult_ctx ctx; 125 PetscErrorCode ierr; 126 127 PetscFunctionBegin; 128 if (!restore) { 129 Mat A_IB,A_BI; 130 PetscScalar *work; 131 PCBDDCReuseSolvers reuse = pcbddc->sub_schurs->reuse_solver; 132 133 if (pcbddc->benign_original_mat) { 134 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Benign original mat has not been restored"); 135 } 136 if (!pcbddc->benign_change || !pcbddc->benign_n || pcbddc->benign_change_explicit) { 137 PetscFunctionReturn(0); 138 } 139 ierr = PetscMalloc1(pcis->n,&work);CHKERRQ(ierr); 140 ierr = MatCreate(PETSC_COMM_SELF,&A_IB);CHKERRQ(ierr); 141 ierr = MatSetSizes(A_IB,pcis->n-pcis->n_B,pcis->n_B,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr); 142 ierr = MatSetType(A_IB,MATSHELL);CHKERRQ(ierr); 143 ierr = MatShellSetOperation(A_IB,MATOP_MULT,(void (*)(void))PCBDDCBenignMatMult_Private);CHKERRQ(ierr); 144 ierr = MatShellSetOperation(A_IB,MATOP_MULT_TRANSPOSE,(void (*)(void))PCBDDCBenignMatMultTranspose_Private);CHKERRQ(ierr); 145 ierr = PetscNew(&ctx);CHKERRQ(ierr); 146 ierr = MatShellSetContext(A_IB,ctx);CHKERRQ(ierr); 147 ctx->apply_left = PETSC_TRUE; 148 ctx->apply_right = PETSC_FALSE; 149 ctx->apply_p0 = PETSC_FALSE; 150 ctx->benign_n = pcbddc->benign_n; 151 if (reuse) { 152 ctx->benign_zerodiag_subs = reuse->benign_zerodiag_subs; 153 ctx->free = PETSC_FALSE; 154 } else { /* TODO: could be optimized for successive solves */ 155 ISLocalToGlobalMapping N_to_D; 156 PetscInt i; 157 158 ierr = ISLocalToGlobalMappingCreateIS(pcis->is_I_local,&N_to_D);CHKERRQ(ierr); 159 ierr = PetscMalloc1(pcbddc->benign_n,&ctx->benign_zerodiag_subs);CHKERRQ(ierr); 160 for (i=0;i<pcbddc->benign_n;i++) { 161 ierr = ISGlobalToLocalMappingApplyIS(N_to_D,IS_GTOLM_DROP,pcbddc->benign_zerodiag_subs[i],&ctx->benign_zerodiag_subs[i]);CHKERRQ(ierr); 162 } 163 ierr = ISLocalToGlobalMappingDestroy(&N_to_D);CHKERRQ(ierr); 164 ctx->free = PETSC_TRUE; 165 } 166 ctx->A = pcis->A_IB; 167 ctx->work = work; 168 ierr = MatSetUp(A_IB);CHKERRQ(ierr); 169 ierr = MatAssemblyBegin(A_IB,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 170 ierr = MatAssemblyEnd(A_IB,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 171 pcis->A_IB = A_IB; 172 173 /* A_BI as A_IB^T */ 174 ierr = MatCreateTranspose(A_IB,&A_BI);CHKERRQ(ierr); 175 pcbddc->benign_original_mat = pcis->A_BI; 176 pcis->A_BI = A_BI; 177 } else { 178 if (!pcbddc->benign_original_mat) { 179 PetscFunctionReturn(0); 180 } 181 ierr = MatShellGetContext(pcis->A_IB,&ctx);CHKERRQ(ierr); 182 ierr = MatDestroy(&pcis->A_IB);CHKERRQ(ierr); 183 pcis->A_IB = ctx->A; 184 ctx->A = NULL; 185 ierr = MatDestroy(&pcis->A_BI);CHKERRQ(ierr); 186 pcis->A_BI = pcbddc->benign_original_mat; 187 pcbddc->benign_original_mat = NULL; 188 if (ctx->free) { 189 PetscInt i; 190 for (i=0;i<ctx->benign_n;i++) { 191 ierr = ISDestroy(&ctx->benign_zerodiag_subs[i]);CHKERRQ(ierr); 192 } 193 ierr = PetscFree(ctx->benign_zerodiag_subs);CHKERRQ(ierr); 194 } 195 ierr = PetscFree(ctx->work);CHKERRQ(ierr); 196 ierr = PetscFree(ctx);CHKERRQ(ierr); 197 } 198 PetscFunctionReturn(0); 199 } 200 201 /* used just in bddc debug mode */ 202 #undef __FUNCT__ 203 #define __FUNCT__ "PCBDDCBenignProject" 204 PetscErrorCode PCBDDCBenignProject(PC pc, IS is1, IS is2, Mat *B) 205 { 206 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 207 Mat_IS *matis = (Mat_IS*)pc->pmat->data; 208 Mat An; 209 PetscErrorCode ierr; 210 211 PetscFunctionBegin; 212 ierr = MatPtAP(matis->A,pcbddc->benign_change,MAT_INITIAL_MATRIX,2.0,&An);CHKERRQ(ierr); 213 ierr = MatZeroRowsColumns(An,pcbddc->benign_n,pcbddc->benign_p0_lidx,1.0,NULL,NULL);CHKERRQ(ierr); 214 if (is1) { 215 ierr = MatGetSubMatrix(An,is1,is2,MAT_INITIAL_MATRIX,B);CHKERRQ(ierr); 216 ierr = MatDestroy(&An);CHKERRQ(ierr); 217 } else { 218 *B = An; 219 } 220 PetscFunctionReturn(0); 221 } 222 223 /* TODO: add reuse flag */ 224 #undef __FUNCT__ 225 #define __FUNCT__ "MatSeqAIJCompress" 226 PetscErrorCode MatSeqAIJCompress(Mat A, Mat *B) 227 { 228 Mat Bt; 229 PetscScalar *a,*bdata; 230 const PetscInt *ii,*ij; 231 PetscInt m,n,i,nnz,*bii,*bij; 232 PetscBool flg_row; 233 PetscErrorCode ierr; 234 235 PetscFunctionBegin; 236 ierr = MatGetSize(A,&n,&m);CHKERRQ(ierr); 237 ierr = MatGetRowIJ(A,0,PETSC_FALSE,PETSC_FALSE,&n,&ii,&ij,&flg_row);CHKERRQ(ierr); 238 ierr = MatSeqAIJGetArray(A,&a);CHKERRQ(ierr); 239 nnz = n; 240 for (i=0;i<ii[n];i++) { 241 if (PetscLikely(PetscAbsScalar(a[i]) > PETSC_SMALL)) nnz++; 242 } 243 ierr = PetscMalloc1(n+1,&bii);CHKERRQ(ierr); 244 ierr = PetscMalloc1(nnz,&bij);CHKERRQ(ierr); 245 ierr = PetscMalloc1(nnz,&bdata);CHKERRQ(ierr); 246 nnz = 0; 247 bii[0] = 0; 248 for (i=0;i<n;i++) { 249 PetscInt j; 250 for (j=ii[i];j<ii[i+1];j++) { 251 PetscScalar entry = a[j]; 252 if (PetscLikely(PetscAbsScalar(entry) > PETSC_SMALL) || ij[j] == i) { 253 bij[nnz] = ij[j]; 254 bdata[nnz] = entry; 255 nnz++; 256 } 257 } 258 bii[i+1] = nnz; 259 } 260 ierr = MatSeqAIJRestoreArray(A,&a);CHKERRQ(ierr); 261 ierr = MatCreateSeqAIJWithArrays(PetscObjectComm((PetscObject)A),n,m,bii,bij,bdata,&Bt);CHKERRQ(ierr); 262 ierr = MatRestoreRowIJ(A,0,PETSC_FALSE,PETSC_FALSE,&n,&ii,&ij,&flg_row);CHKERRQ(ierr); 263 { 264 Mat_SeqAIJ *b = (Mat_SeqAIJ*)(Bt->data); 265 b->free_a = PETSC_TRUE; 266 b->free_ij = PETSC_TRUE; 267 } 268 *B = Bt; 269 PetscFunctionReturn(0); 270 } 271 272 #undef __FUNCT__ 273 #define __FUNCT__ "MatDetectDisconnectedComponents" 274 PetscErrorCode MatDetectDisconnectedComponents(Mat A, PetscBool filter, PetscInt *ncc, IS* cc[]) 275 { 276 Mat B; 277 IS is_dummy,*cc_n; 278 ISLocalToGlobalMapping l2gmap_dummy; 279 PCBDDCGraph graph; 280 PetscInt i,n; 281 PetscInt *xadj,*adjncy; 282 PetscInt *xadj_filtered,*adjncy_filtered; 283 PetscBool flg_row,isseqaij; 284 PetscErrorCode ierr; 285 286 PetscFunctionBegin; 287 if (!A->rmap->N || !A->cmap->N) { 288 *ncc = 0; 289 *cc = NULL; 290 PetscFunctionReturn(0); 291 } 292 ierr = PetscObjectTypeCompare((PetscObject)A,MATSEQAIJ,&isseqaij);CHKERRQ(ierr); 293 if (!isseqaij && filter) { 294 PetscBool isseqdense; 295 296 ierr = PetscObjectTypeCompare((PetscObject)A,MATSEQDENSE,&isseqdense);CHKERRQ(ierr); 297 if (!isseqdense) { 298 ierr = MatConvert(A,MATSEQAIJ,MAT_INITIAL_MATRIX,&B);CHKERRQ(ierr); 299 } else { /* TODO: rectangular case and LDA */ 300 PetscScalar *array; 301 PetscReal chop=1.e-6; 302 303 ierr = MatDuplicate(A,MAT_COPY_VALUES,&B);CHKERRQ(ierr); 304 ierr = MatDenseGetArray(B,&array);CHKERRQ(ierr); 305 ierr = MatGetSize(B,&n,NULL);CHKERRQ(ierr); 306 for (i=0;i<n;i++) { 307 PetscInt j; 308 for (j=i+1;j<n;j++) { 309 PetscReal thresh = chop*(PetscAbsScalar(array[i*(n+1)])+PetscAbsScalar(array[j*(n+1)])); 310 if (PetscAbsScalar(array[i*n+j]) < thresh) array[i*n+j] = 0.; 311 if (PetscAbsScalar(array[j*n+i]) < thresh) array[j*n+i] = 0.; 312 } 313 } 314 ierr = MatDenseRestoreArray(B,&array);CHKERRQ(ierr); 315 ierr = MatConvert(B,MATSEQAIJ,MAT_INPLACE_MATRIX,&B);CHKERRQ(ierr); 316 } 317 } else { 318 B = A; 319 } 320 ierr = MatGetRowIJ(B,0,PETSC_TRUE,PETSC_FALSE,&n,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);CHKERRQ(ierr); 321 322 /* if filter is true, then removes entries lower than PETSC_SMALL in magnitude */ 323 if (filter) { 324 PetscScalar *data; 325 PetscInt j,cum; 326 327 ierr = PetscCalloc2(n+1,&xadj_filtered,xadj[n],&adjncy_filtered);CHKERRQ(ierr); 328 ierr = MatSeqAIJGetArray(B,&data);CHKERRQ(ierr); 329 cum = 0; 330 for (i=0;i<n;i++) { 331 PetscInt t; 332 333 for (j=xadj[i];j<xadj[i+1];j++) { 334 if (PetscUnlikely(PetscAbsScalar(data[j]) < PETSC_SMALL)) { 335 continue; 336 } 337 adjncy_filtered[cum+xadj_filtered[i]++] = adjncy[j]; 338 } 339 t = xadj_filtered[i]; 340 xadj_filtered[i] = cum; 341 cum += t; 342 } 343 ierr = MatSeqAIJRestoreArray(B,&data);CHKERRQ(ierr); 344 } else { 345 xadj_filtered = NULL; 346 adjncy_filtered = NULL; 347 } 348 349 /* compute local connected components using PCBDDCGraph */ 350 ierr = ISCreateStride(PETSC_COMM_SELF,n,0,1,&is_dummy);CHKERRQ(ierr); 351 ierr = ISLocalToGlobalMappingCreateIS(is_dummy,&l2gmap_dummy);CHKERRQ(ierr); 352 ierr = ISDestroy(&is_dummy);CHKERRQ(ierr); 353 ierr = PCBDDCGraphCreate(&graph);CHKERRQ(ierr); 354 ierr = PCBDDCGraphInit(graph,l2gmap_dummy,n);CHKERRQ(ierr); 355 ierr = ISLocalToGlobalMappingDestroy(&l2gmap_dummy);CHKERRQ(ierr); 356 if (xadj_filtered) { 357 graph->xadj = xadj_filtered; 358 graph->adjncy = adjncy_filtered; 359 } else { 360 graph->xadj = xadj; 361 graph->adjncy = adjncy; 362 } 363 ierr = PCBDDCGraphSetUp(graph,1,NULL,NULL,0,NULL,NULL);CHKERRQ(ierr); 364 ierr = PCBDDCGraphComputeConnectedComponents(graph);CHKERRQ(ierr); 365 /* partial clean up */ 366 ierr = PetscFree2(xadj_filtered,adjncy_filtered);CHKERRQ(ierr); 367 ierr = MatRestoreRowIJ(B,0,PETSC_TRUE,PETSC_FALSE,&n,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);CHKERRQ(ierr); 368 if (A != B) { 369 ierr = MatDestroy(&B);CHKERRQ(ierr); 370 } 371 372 /* get back data */ 373 if (ncc) *ncc = graph->ncc; 374 if (cc) { 375 ierr = PetscMalloc1(graph->ncc,&cc_n);CHKERRQ(ierr); 376 for (i=0;i<graph->ncc;i++) { 377 ierr = ISCreateGeneral(PETSC_COMM_SELF,graph->cptr[i+1]-graph->cptr[i],graph->queue+graph->cptr[i],PETSC_COPY_VALUES,&cc_n[i]);CHKERRQ(ierr); 378 } 379 *cc = cc_n; 380 } 381 /* clean up graph */ 382 graph->xadj = 0; 383 graph->adjncy = 0; 384 ierr = PCBDDCGraphDestroy(&graph);CHKERRQ(ierr); 385 PetscFunctionReturn(0); 386 } 387 388 #undef __FUNCT__ 389 #define __FUNCT__ "PCBDDCBenignCheck" 390 PetscErrorCode PCBDDCBenignCheck(PC pc, IS zerodiag) 391 { 392 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 393 PC_IS* pcis = (PC_IS*)(pc->data); 394 IS dirIS = NULL; 395 PetscInt i; 396 PetscErrorCode ierr; 397 398 PetscFunctionBegin; 399 ierr = PCBDDCGraphGetDirichletDofs(pcbddc->mat_graph,&dirIS);CHKERRQ(ierr); 400 if (zerodiag) { 401 Mat A; 402 Vec vec3_N; 403 PetscScalar *vals; 404 const PetscInt *idxs; 405 PetscInt nz,*count; 406 407 /* p0 */ 408 ierr = VecSet(pcis->vec1_N,0.);CHKERRQ(ierr); 409 ierr = PetscMalloc1(pcis->n,&vals);CHKERRQ(ierr); 410 ierr = ISGetLocalSize(zerodiag,&nz);CHKERRQ(ierr); 411 ierr = ISGetIndices(zerodiag,&idxs);CHKERRQ(ierr); 412 for (i=0;i<nz;i++) vals[i] = 1.; 413 ierr = VecSetValues(pcis->vec1_N,nz,idxs,vals,INSERT_VALUES);CHKERRQ(ierr); 414 ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr); 415 ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr); 416 /* v_I */ 417 ierr = VecSetRandom(pcis->vec2_N,NULL);CHKERRQ(ierr); 418 for (i=0;i<nz;i++) vals[i] = 0.; 419 ierr = VecSetValues(pcis->vec2_N,nz,idxs,vals,INSERT_VALUES);CHKERRQ(ierr); 420 ierr = ISRestoreIndices(zerodiag,&idxs);CHKERRQ(ierr); 421 ierr = ISGetIndices(pcis->is_B_local,&idxs);CHKERRQ(ierr); 422 for (i=0;i<pcis->n_B;i++) vals[i] = 0.; 423 ierr = VecSetValues(pcis->vec2_N,pcis->n_B,idxs,vals,INSERT_VALUES);CHKERRQ(ierr); 424 ierr = ISRestoreIndices(pcis->is_B_local,&idxs);CHKERRQ(ierr); 425 if (dirIS) { 426 PetscInt n; 427 428 ierr = ISGetLocalSize(dirIS,&n);CHKERRQ(ierr); 429 ierr = ISGetIndices(dirIS,&idxs);CHKERRQ(ierr); 430 for (i=0;i<n;i++) vals[i] = 0.; 431 ierr = VecSetValues(pcis->vec2_N,n,idxs,vals,INSERT_VALUES);CHKERRQ(ierr); 432 ierr = ISRestoreIndices(dirIS,&idxs);CHKERRQ(ierr); 433 } 434 ierr = VecAssemblyBegin(pcis->vec2_N);CHKERRQ(ierr); 435 ierr = VecAssemblyEnd(pcis->vec2_N);CHKERRQ(ierr); 436 ierr = VecDuplicate(pcis->vec1_N,&vec3_N);CHKERRQ(ierr); 437 ierr = VecSet(vec3_N,0.);CHKERRQ(ierr); 438 ierr = MatISGetLocalMat(pc->mat,&A);CHKERRQ(ierr); 439 ierr = MatMult(A,pcis->vec1_N,vec3_N);CHKERRQ(ierr); 440 ierr = VecDot(vec3_N,pcis->vec2_N,&vals[0]);CHKERRQ(ierr); 441 if (PetscAbsScalar(vals[0]) > 1.e-1) { 442 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"Benign trick can not be applied! b(v_I,p_0) = %1.6e (should be numerically 0.)",PetscAbsScalar(vals[0])); 443 } 444 ierr = PetscFree(vals);CHKERRQ(ierr); 445 ierr = VecDestroy(&vec3_N);CHKERRQ(ierr); 446 447 /* there should not be any pressure dofs lying on the interface */ 448 ierr = PetscCalloc1(pcis->n,&count);CHKERRQ(ierr); 449 ierr = ISGetIndices(pcis->is_B_local,&idxs);CHKERRQ(ierr); 450 for (i=0;i<pcis->n_B;i++) count[idxs[i]]++; 451 ierr = ISRestoreIndices(pcis->is_B_local,&idxs);CHKERRQ(ierr); 452 ierr = ISGetIndices(zerodiag,&idxs);CHKERRQ(ierr); 453 for (i=0;i<nz;i++) { 454 if (count[idxs[i]]) { 455 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"Benign trick can not be applied! pressure dof %d is an interface dof",idxs[i]); 456 } 457 } 458 ierr = ISRestoreIndices(zerodiag,&idxs);CHKERRQ(ierr); 459 ierr = PetscFree(count);CHKERRQ(ierr); 460 } 461 ierr = ISDestroy(&dirIS);CHKERRQ(ierr); 462 463 /* check PCBDDCBenignGetOrSetP0 */ 464 ierr = VecSetRandom(pcis->vec1_global,NULL);CHKERRQ(ierr); 465 for (i=0;i<pcbddc->benign_n;i++) pcbddc->benign_p0[i] = -PetscGlobalRank-i; 466 ierr = PCBDDCBenignGetOrSetP0(pc,pcis->vec1_global,PETSC_FALSE);CHKERRQ(ierr); 467 for (i=0;i<pcbddc->benign_n;i++) pcbddc->benign_p0[i] = 1; 468 ierr = PCBDDCBenignGetOrSetP0(pc,pcis->vec1_global,PETSC_TRUE);CHKERRQ(ierr); 469 for (i=0;i<pcbddc->benign_n;i++) { 470 if ((PetscInt)PetscRealPart(pcbddc->benign_p0[i]) != -PetscGlobalRank-i) { 471 SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error testing PCBDDCBenignGetOrSetP0! Found %1.4e at %d instead of %1.4e\n",pcbddc->benign_p0[i],i,-PetscGlobalRank-i);CHKERRQ(ierr); 472 } 473 } 474 PetscFunctionReturn(0); 475 } 476 477 #undef __FUNCT__ 478 #define __FUNCT__ "PCBDDCBenignDetectSaddlePoint" 479 PetscErrorCode PCBDDCBenignDetectSaddlePoint(PC pc, IS *zerodiaglocal) 480 { 481 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 482 IS pressures,zerodiag,*zerodiag_subs; 483 PetscInt nz,n; 484 PetscBool sorted,have_null,has_null_pressures,recompute_zerodiag; 485 PetscErrorCode ierr; 486 487 PetscFunctionBegin; 488 ierr = PetscSFDestroy(&pcbddc->benign_sf);CHKERRQ(ierr); 489 ierr = MatDestroy(&pcbddc->benign_B0);CHKERRQ(ierr); 490 for (n=0;n<pcbddc->benign_n;n++) { 491 ierr = ISDestroy(&pcbddc->benign_zerodiag_subs[n]);CHKERRQ(ierr); 492 } 493 ierr = PetscFree(pcbddc->benign_zerodiag_subs);CHKERRQ(ierr); 494 pcbddc->benign_n = 0; 495 /* if a local info on dofs is present, assumes the last field is represented by "pressures" 496 otherwise, it uses only zerodiagonal dofs (ok if the pressure block is all zero; it could fail if it is not) 497 Checks if all the pressure dofs in each subdomain have a zero diagonal 498 If not, a change of basis on pressures is not needed 499 since the local Schur complements are SPD 500 */ 501 has_null_pressures = PETSC_TRUE; 502 have_null = PETSC_TRUE; 503 if (pcbddc->n_ISForDofsLocal) { 504 PetscInt npl,*idxs,p = pcbddc->n_ISForDofsLocal-1; 505 506 /* Dofs splitting for BDDC cannot have PETSC_COMM_SELF, so create a sequential IS */ 507 ierr = ISGetLocalSize(pcbddc->ISForDofsLocal[p],&npl);CHKERRQ(ierr); 508 ierr = ISGetIndices(pcbddc->ISForDofsLocal[p],(const PetscInt**)&idxs);CHKERRQ(ierr); 509 ierr = ISCreateGeneral(PETSC_COMM_SELF,npl,idxs,PETSC_COPY_VALUES,&pressures);CHKERRQ(ierr); 510 ierr = ISRestoreIndices(pcbddc->ISForDofsLocal[p],(const PetscInt**)&idxs);CHKERRQ(ierr); 511 ierr = ISSorted(pressures,&sorted);CHKERRQ(ierr); 512 if (!sorted) { 513 ierr = ISSort(pressures);CHKERRQ(ierr); 514 } 515 } else { 516 pressures = NULL; 517 } 518 /* pcis has not been setup yet, so get the local size from the subdomain matrix */ 519 ierr = MatGetLocalSize(pcbddc->local_mat,&n,NULL);CHKERRQ(ierr); 520 ierr = MatFindZeroDiagonals(pcbddc->local_mat,&zerodiag);CHKERRQ(ierr); 521 ierr = ISSorted(zerodiag,&sorted);CHKERRQ(ierr); 522 if (!sorted) { 523 ierr = ISSort(zerodiag);CHKERRQ(ierr); 524 } 525 ierr = ISGetLocalSize(zerodiag,&nz);CHKERRQ(ierr); 526 if (!nz) { 527 if (n) have_null = PETSC_FALSE; 528 has_null_pressures = PETSC_FALSE; 529 ierr = ISDestroy(&zerodiag);CHKERRQ(ierr); 530 } 531 recompute_zerodiag = PETSC_FALSE; 532 /* in case disconnected subdomains info is present, split the pressures accordingly (otherwise the benign trick could fail) */ 533 zerodiag_subs = NULL; 534 pcbddc->benign_n = 0; 535 if (has_null_pressures) { 536 IS *subs; 537 PetscInt nsubs,i; 538 539 subs = pcbddc->local_subs; 540 nsubs = pcbddc->n_local_subs; 541 if (nsubs > 1) { 542 ierr = PetscCalloc1(nsubs,&zerodiag_subs);CHKERRQ(ierr); 543 for (i=0;i<nsubs;i++) { 544 ISLocalToGlobalMapping l2g; 545 IS t_zerodiag_subs; 546 PetscInt nl; 547 548 ierr = ISLocalToGlobalMappingCreateIS(subs[i],&l2g);CHKERRQ(ierr); 549 ierr = ISGlobalToLocalMappingApplyIS(l2g,IS_GTOLM_DROP,zerodiag,&t_zerodiag_subs);CHKERRQ(ierr); 550 ierr = ISGetLocalSize(t_zerodiag_subs,&nl);CHKERRQ(ierr); 551 if (nl) { 552 PetscBool valid = PETSC_TRUE; 553 554 if (pressures) { 555 IS t_pressure_subs; 556 ierr = ISGlobalToLocalMappingApplyIS(l2g,IS_GTOLM_DROP,pressures,&t_pressure_subs);CHKERRQ(ierr); 557 ierr = ISEqual(t_pressure_subs,t_zerodiag_subs,&valid);CHKERRQ(ierr); 558 ierr = ISDestroy(&t_pressure_subs);CHKERRQ(ierr); 559 } 560 if (valid) { 561 ierr = ISLocalToGlobalMappingApplyIS(l2g,t_zerodiag_subs,&zerodiag_subs[pcbddc->benign_n]);CHKERRQ(ierr); 562 pcbddc->benign_n++; 563 } else { 564 recompute_zerodiag = PETSC_TRUE; 565 } 566 } 567 ierr = ISDestroy(&t_zerodiag_subs);CHKERRQ(ierr); 568 ierr = ISLocalToGlobalMappingDestroy(&l2g);CHKERRQ(ierr); 569 } 570 } else { /* there's just one subdomain (or zero if they have not been detected */ 571 PetscBool valid = PETSC_TRUE; 572 if (pressures) { 573 ierr = ISEqual(pressures,zerodiag,&valid);CHKERRQ(ierr); 574 } 575 if (valid) { 576 pcbddc->benign_n = 1; 577 ierr = PetscMalloc1(pcbddc->benign_n,&zerodiag_subs);CHKERRQ(ierr); 578 ierr = PetscObjectReference((PetscObject)zerodiag);CHKERRQ(ierr); 579 zerodiag_subs[0] = zerodiag; 580 } 581 } 582 } 583 584 if (!pcbddc->benign_n) { 585 PetscInt n; 586 587 ierr = ISDestroy(&zerodiag);CHKERRQ(ierr); 588 recompute_zerodiag = PETSC_FALSE; 589 ierr = MatGetLocalSize(pcbddc->local_mat,&n,NULL);CHKERRQ(ierr); 590 if (n) { 591 has_null_pressures = PETSC_FALSE; 592 have_null = PETSC_FALSE; 593 } 594 } 595 596 /* final check for null pressures */ 597 if (zerodiag && pressures) { 598 PetscInt nz,np; 599 ierr = ISGetLocalSize(zerodiag,&nz);CHKERRQ(ierr); 600 ierr = ISGetLocalSize(pressures,&np);CHKERRQ(ierr); 601 if (nz != np) have_null = PETSC_FALSE; 602 } 603 604 if (recompute_zerodiag) { 605 ierr = ISDestroy(&zerodiag);CHKERRQ(ierr); 606 if (pcbddc->benign_n == 1) { 607 ierr = PetscObjectReference((PetscObject)zerodiag_subs[0]);CHKERRQ(ierr); 608 zerodiag = zerodiag_subs[0]; 609 } else { 610 PetscInt i,nzn,*new_idxs; 611 612 nzn = 0; 613 for (i=0;i<pcbddc->benign_n;i++) { 614 PetscInt ns; 615 ierr = ISGetLocalSize(zerodiag_subs[i],&ns);CHKERRQ(ierr); 616 nzn += ns; 617 } 618 ierr = PetscMalloc1(nzn,&new_idxs);CHKERRQ(ierr); 619 nzn = 0; 620 for (i=0;i<pcbddc->benign_n;i++) { 621 PetscInt ns,*idxs; 622 ierr = ISGetLocalSize(zerodiag_subs[i],&ns);CHKERRQ(ierr); 623 ierr = ISGetIndices(zerodiag_subs[i],(const PetscInt**)&idxs);CHKERRQ(ierr); 624 ierr = PetscMemcpy(new_idxs+nzn,idxs,ns*sizeof(PetscInt));CHKERRQ(ierr); 625 ierr = ISRestoreIndices(zerodiag_subs[i],(const PetscInt**)&idxs);CHKERRQ(ierr); 626 nzn += ns; 627 } 628 ierr = PetscSortInt(nzn,new_idxs);CHKERRQ(ierr); 629 ierr = ISCreateGeneral(PETSC_COMM_SELF,nzn,new_idxs,PETSC_OWN_POINTER,&zerodiag);CHKERRQ(ierr); 630 } 631 have_null = PETSC_FALSE; 632 } 633 634 /* no-net-flux */ 635 if (pcbddc->benign_compute_nonetflux) { 636 Mat_IS* matis = (Mat_IS*)(pc->pmat->data); 637 MatNullSpace near_null_space; 638 ISLocalToGlobalMapping rmap; 639 Vec quad_vec; 640 PetscScalar *pvals; 641 PetscInt i,np,*dummyins; 642 643 /* create vector to hold quadrature weights */ 644 ierr = MatCreateVecs(pc->pmat,&quad_vec,NULL);CHKERRQ(ierr); 645 ierr = VecSet(quad_vec,0.0);CHKERRQ(ierr); 646 ierr = MatGetLocalToGlobalMapping(pc->pmat,&rmap,NULL);CHKERRQ(ierr); 647 ierr = VecSetLocalToGlobalMapping(quad_vec,rmap);CHKERRQ(ierr); 648 649 /* compute B^{(i)T} * 1_p */ 650 np = 0; 651 if (zerodiag) { 652 ierr = ISGetLocalSize(zerodiag,&np);CHKERRQ(ierr); 653 } else if (pressures) { 654 ierr = ISGetLocalSize(pressures,&np);CHKERRQ(ierr); 655 } 656 ierr = PetscMalloc1(np,&pvals);CHKERRQ(ierr); 657 for (i=0;i<np;i++) pvals[i] = 1.; 658 ierr = VecSet(matis->x,0.);CHKERRQ(ierr); 659 if (np) { 660 const PetscInt *pidxs; 661 662 if (zerodiag) { 663 ierr = ISGetIndices(zerodiag,&pidxs);CHKERRQ(ierr); 664 } else if (pressures) { 665 ierr = ISGetIndices(pressures,&pidxs);CHKERRQ(ierr); 666 } 667 ierr = VecSetValues(matis->x,np,pidxs,pvals,INSERT_VALUES);CHKERRQ(ierr); 668 if (zerodiag) { 669 ierr = ISRestoreIndices(zerodiag,&pidxs);CHKERRQ(ierr); 670 } else if (pressures) { 671 ierr = ISRestoreIndices(pressures,&pidxs);CHKERRQ(ierr); 672 } 673 } 674 ierr = VecAssemblyBegin(matis->x);CHKERRQ(ierr); 675 ierr = VecAssemblyEnd(matis->x);CHKERRQ(ierr); 676 ierr = MatMult(matis->A,matis->x,matis->y);CHKERRQ(ierr); 677 ierr = PetscFree(pvals);CHKERRQ(ierr); 678 679 /* decide which of the sharing ranks (per dof) has to insert the values (should just be a matter of having a different orientation) */ 680 ierr = MatISSetUpSF(pc->pmat);CHKERRQ(ierr); 681 for (i=0;i<matis->sf_nroots;i++) matis->sf_rootdata[i] = -1; 682 for (i=0;i<matis->sf_nleaves;i++) matis->sf_leafdata[i] = PetscGlobalRank; 683 ierr = PetscSFReduceBegin(matis->sf,MPIU_INT,matis->sf_leafdata,matis->sf_rootdata,MPI_MAX);CHKERRQ(ierr); 684 ierr = PetscSFReduceEnd(matis->sf,MPIU_INT,matis->sf_leafdata,matis->sf_rootdata,MPI_MAX);CHKERRQ(ierr); 685 ierr = PetscSFBcastBegin(matis->sf,MPIU_INT,matis->sf_rootdata,matis->sf_leafdata);CHKERRQ(ierr); 686 ierr = PetscSFBcastEnd(matis->sf,MPIU_INT,matis->sf_rootdata,matis->sf_leafdata);CHKERRQ(ierr); 687 ierr = VecGetArray(matis->y,&pvals);CHKERRQ(ierr); 688 for (i=0;i<matis->sf_nleaves;i++) { 689 if (PetscGlobalRank != matis->sf_leafdata[i]) { 690 pvals[i] = 0.; 691 } 692 } 693 ierr = PetscMalloc1(matis->sf_nleaves,&dummyins);CHKERRQ(ierr); 694 for (i=0;i<matis->sf_nleaves;i++) dummyins[i] = i; 695 ierr = VecSetValuesLocal(quad_vec,matis->sf_nleaves,dummyins,pvals,ADD_VALUES);CHKERRQ(ierr); 696 ierr = VecRestoreArray(matis->y,&pvals);CHKERRQ(ierr); 697 ierr = PetscFree(dummyins);CHKERRQ(ierr); 698 699 /* assembly quadrature vec and attach near null space to pmat */ 700 ierr = VecAssemblyBegin(quad_vec);CHKERRQ(ierr); 701 ierr = VecAssemblyEnd(quad_vec);CHKERRQ(ierr); 702 ierr = VecNormalize(quad_vec,NULL);CHKERRQ(ierr); 703 ierr = MatNullSpaceCreate(PetscObjectComm((PetscObject)pc),PETSC_FALSE,1,&quad_vec,&near_null_space);CHKERRQ(ierr); 704 ierr = VecDestroy(&quad_vec);CHKERRQ(ierr); 705 ierr = MatSetNearNullSpace(pc->pmat,near_null_space);CHKERRQ(ierr); 706 ierr = MatNullSpaceDestroy(&near_null_space);CHKERRQ(ierr); 707 } 708 709 /* change of basis and p0 dofs */ 710 if (has_null_pressures) { 711 IS zerodiagc; 712 const PetscInt *idxs,*idxsc; 713 PetscInt i,s,*nnz; 714 715 ierr = ISGetLocalSize(zerodiag,&nz);CHKERRQ(ierr); 716 ierr = ISComplement(zerodiag,0,n,&zerodiagc);CHKERRQ(ierr); 717 ierr = ISGetIndices(zerodiagc,&idxsc);CHKERRQ(ierr); 718 /* local change of basis for pressures */ 719 ierr = MatDestroy(&pcbddc->benign_change);CHKERRQ(ierr); 720 ierr = MatCreate(PetscObjectComm((PetscObject)pcbddc->local_mat),&pcbddc->benign_change);CHKERRQ(ierr); 721 ierr = MatSetType(pcbddc->benign_change,MATAIJ);CHKERRQ(ierr); 722 ierr = MatSetSizes(pcbddc->benign_change,n,n,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr); 723 ierr = PetscMalloc1(n,&nnz);CHKERRQ(ierr); 724 for (i=0;i<n-nz;i++) nnz[idxsc[i]] = 1; /* identity on velocities plus pressure dofs for non-singular subdomains */ 725 for (i=0;i<pcbddc->benign_n;i++) { 726 PetscInt nzs,j; 727 728 ierr = ISGetLocalSize(zerodiag_subs[i],&nzs);CHKERRQ(ierr); 729 ierr = ISGetIndices(zerodiag_subs[i],&idxs);CHKERRQ(ierr); 730 for (j=0;j<nzs-1;j++) nnz[idxs[j]] = 2; /* change on pressures */ 731 nnz[idxs[nzs-1]] = nzs; /* last local pressure dof in subdomain */ 732 ierr = ISRestoreIndices(zerodiag_subs[i],&idxs);CHKERRQ(ierr); 733 } 734 ierr = MatSeqAIJSetPreallocation(pcbddc->benign_change,0,nnz);CHKERRQ(ierr); 735 ierr = PetscFree(nnz);CHKERRQ(ierr); 736 /* set identity on velocities */ 737 for (i=0;i<n-nz;i++) { 738 ierr = MatSetValue(pcbddc->benign_change,idxsc[i],idxsc[i],1.,INSERT_VALUES);CHKERRQ(ierr); 739 } 740 ierr = ISRestoreIndices(zerodiagc,&idxsc);CHKERRQ(ierr); 741 ierr = ISDestroy(&zerodiagc);CHKERRQ(ierr); 742 ierr = PetscFree3(pcbddc->benign_p0_lidx,pcbddc->benign_p0_gidx,pcbddc->benign_p0);CHKERRQ(ierr); 743 ierr = PetscMalloc3(pcbddc->benign_n,&pcbddc->benign_p0_lidx,pcbddc->benign_n,&pcbddc->benign_p0_gidx,pcbddc->benign_n,&pcbddc->benign_p0);CHKERRQ(ierr); 744 /* set change on pressures */ 745 for (s=0;s<pcbddc->benign_n;s++) { 746 PetscScalar *array; 747 PetscInt nzs; 748 749 ierr = ISGetLocalSize(zerodiag_subs[s],&nzs);CHKERRQ(ierr); 750 ierr = ISGetIndices(zerodiag_subs[s],&idxs);CHKERRQ(ierr); 751 for (i=0;i<nzs-1;i++) { 752 PetscScalar vals[2]; 753 PetscInt cols[2]; 754 755 cols[0] = idxs[i]; 756 cols[1] = idxs[nzs-1]; 757 vals[0] = 1.; 758 vals[1] = 1.; 759 ierr = MatSetValues(pcbddc->benign_change,1,cols,2,cols,vals,INSERT_VALUES);CHKERRQ(ierr); 760 } 761 ierr = PetscMalloc1(nzs,&array);CHKERRQ(ierr); 762 for (i=0;i<nzs-1;i++) array[i] = -1.; 763 array[nzs-1] = 1.; 764 ierr = MatSetValues(pcbddc->benign_change,1,idxs+nzs-1,nzs,idxs,array,INSERT_VALUES);CHKERRQ(ierr); 765 /* store local idxs for p0 */ 766 pcbddc->benign_p0_lidx[s] = idxs[nzs-1]; 767 ierr = ISRestoreIndices(zerodiag_subs[s],&idxs);CHKERRQ(ierr); 768 ierr = PetscFree(array);CHKERRQ(ierr); 769 } 770 ierr = MatAssemblyBegin(pcbddc->benign_change,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 771 ierr = MatAssemblyEnd(pcbddc->benign_change,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 772 /* project if needed */ 773 if (pcbddc->benign_change_explicit) { 774 Mat M; 775 776 ierr = MatPtAP(pcbddc->local_mat,pcbddc->benign_change,MAT_INITIAL_MATRIX,2.0,&M);CHKERRQ(ierr); 777 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 778 ierr = MatSeqAIJCompress(M,&pcbddc->local_mat);CHKERRQ(ierr); 779 ierr = MatDestroy(&M);CHKERRQ(ierr); 780 } 781 /* store global idxs for p0 */ 782 ierr = ISLocalToGlobalMappingApply(pc->pmat->rmap->mapping,pcbddc->benign_n,pcbddc->benign_p0_lidx,pcbddc->benign_p0_gidx);CHKERRQ(ierr); 783 } 784 pcbddc->benign_zerodiag_subs = zerodiag_subs; 785 ierr = ISDestroy(&pressures);CHKERRQ(ierr); 786 787 /* determines if the coarse solver will be singular or not */ 788 ierr = MPI_Allreduce(&have_null,&pcbddc->benign_null,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 789 *zerodiaglocal = zerodiag; 790 PetscFunctionReturn(0); 791 } 792 793 #undef __FUNCT__ 794 #define __FUNCT__ "PCBDDCBenignGetOrSetP0" 795 PetscErrorCode PCBDDCBenignGetOrSetP0(PC pc, Vec v, PetscBool get) 796 { 797 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 798 PetscScalar *array; 799 PetscErrorCode ierr; 800 801 PetscFunctionBegin; 802 if (!pcbddc->benign_sf) { 803 ierr = PetscSFCreate(PetscObjectComm((PetscObject)pc),&pcbddc->benign_sf);CHKERRQ(ierr); 804 ierr = PetscSFSetGraphLayout(pcbddc->benign_sf,pc->pmat->rmap,pcbddc->benign_n,NULL,PETSC_OWN_POINTER,pcbddc->benign_p0_gidx);CHKERRQ(ierr); 805 } 806 if (get) { 807 ierr = VecGetArrayRead(v,(const PetscScalar**)&array);CHKERRQ(ierr); 808 ierr = PetscSFBcastBegin(pcbddc->benign_sf,MPIU_SCALAR,array,pcbddc->benign_p0);CHKERRQ(ierr); 809 ierr = PetscSFBcastEnd(pcbddc->benign_sf,MPIU_SCALAR,array,pcbddc->benign_p0);CHKERRQ(ierr); 810 ierr = VecRestoreArrayRead(v,(const PetscScalar**)&array);CHKERRQ(ierr); 811 } else { 812 ierr = VecGetArray(v,&array);CHKERRQ(ierr); 813 ierr = PetscSFReduceBegin(pcbddc->benign_sf,MPIU_SCALAR,pcbddc->benign_p0,array,MPIU_REPLACE);CHKERRQ(ierr); 814 ierr = PetscSFReduceEnd(pcbddc->benign_sf,MPIU_SCALAR,pcbddc->benign_p0,array,MPIU_REPLACE);CHKERRQ(ierr); 815 ierr = VecRestoreArray(v,&array);CHKERRQ(ierr); 816 } 817 PetscFunctionReturn(0); 818 } 819 820 #undef __FUNCT__ 821 #define __FUNCT__ "PCBDDCBenignPopOrPushB0" 822 PetscErrorCode PCBDDCBenignPopOrPushB0(PC pc, PetscBool pop) 823 { 824 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 825 PetscErrorCode ierr; 826 827 PetscFunctionBegin; 828 /* TODO: add error checking 829 - avoid nested pop (or push) calls. 830 - cannot push before pop. 831 - cannot call this if pcbddc->local_mat is NULL 832 */ 833 if (!pcbddc->benign_n) { 834 PetscFunctionReturn(0); 835 } 836 if (pop) { 837 if (pcbddc->benign_change_explicit) { 838 IS is_p0; 839 MatReuse reuse; 840 841 /* extract B_0 */ 842 reuse = MAT_INITIAL_MATRIX; 843 if (pcbddc->benign_B0) { 844 reuse = MAT_REUSE_MATRIX; 845 } 846 ierr = ISCreateGeneral(PETSC_COMM_SELF,pcbddc->benign_n,pcbddc->benign_p0_lidx,PETSC_COPY_VALUES,&is_p0);CHKERRQ(ierr); 847 ierr = MatGetSubMatrix(pcbddc->local_mat,is_p0,NULL,reuse,&pcbddc->benign_B0);CHKERRQ(ierr); 848 /* remove rows and cols from local problem */ 849 ierr = MatSetOption(pcbddc->local_mat,MAT_KEEP_NONZERO_PATTERN,PETSC_TRUE);CHKERRQ(ierr); 850 ierr = MatSetOption(pcbddc->local_mat,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr); 851 ierr = MatZeroRowsColumnsIS(pcbddc->local_mat,is_p0,1.0,NULL,NULL);CHKERRQ(ierr); 852 ierr = ISDestroy(&is_p0);CHKERRQ(ierr); 853 } else { 854 Mat_IS *matis = (Mat_IS*)pc->pmat->data; 855 PetscScalar *vals; 856 PetscInt i,n,*idxs_ins; 857 858 ierr = VecGetLocalSize(matis->y,&n);CHKERRQ(ierr); 859 ierr = PetscMalloc2(n,&idxs_ins,n,&vals);CHKERRQ(ierr); 860 if (!pcbddc->benign_B0) { 861 PetscInt *nnz; 862 ierr = MatCreate(PetscObjectComm((PetscObject)pcbddc->local_mat),&pcbddc->benign_B0);CHKERRQ(ierr); 863 ierr = MatSetType(pcbddc->benign_B0,MATAIJ);CHKERRQ(ierr); 864 ierr = MatSetSizes(pcbddc->benign_B0,pcbddc->benign_n,n,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr); 865 ierr = PetscMalloc1(pcbddc->benign_n,&nnz);CHKERRQ(ierr); 866 for (i=0;i<pcbddc->benign_n;i++) { 867 ierr = ISGetLocalSize(pcbddc->benign_zerodiag_subs[i],&nnz[i]);CHKERRQ(ierr); 868 nnz[i] = n - nnz[i]; 869 } 870 ierr = MatSeqAIJSetPreallocation(pcbddc->benign_B0,0,nnz);CHKERRQ(ierr); 871 ierr = PetscFree(nnz);CHKERRQ(ierr); 872 } 873 874 for (i=0;i<pcbddc->benign_n;i++) { 875 PetscScalar *array; 876 PetscInt *idxs,j,nz,cum; 877 878 ierr = VecSet(matis->x,0.);CHKERRQ(ierr); 879 ierr = ISGetLocalSize(pcbddc->benign_zerodiag_subs[i],&nz);CHKERRQ(ierr); 880 ierr = ISGetIndices(pcbddc->benign_zerodiag_subs[i],(const PetscInt**)&idxs);CHKERRQ(ierr); 881 for (j=0;j<nz;j++) vals[j] = 1.; 882 ierr = VecSetValues(matis->x,nz,idxs,vals,INSERT_VALUES);CHKERRQ(ierr); 883 ierr = VecAssemblyBegin(matis->x);CHKERRQ(ierr); 884 ierr = VecAssemblyEnd(matis->x);CHKERRQ(ierr); 885 ierr = VecSet(matis->y,0.);CHKERRQ(ierr); 886 ierr = MatMult(matis->A,matis->x,matis->y);CHKERRQ(ierr); 887 ierr = VecGetArray(matis->y,&array);CHKERRQ(ierr); 888 cum = 0; 889 for (j=0;j<n;j++) { 890 if (PetscUnlikely(PetscAbsReal(array[j]) > PETSC_SMALL)) { 891 vals[cum] = array[j]; 892 idxs_ins[cum] = j; 893 cum++; 894 } 895 } 896 ierr = MatSetValues(pcbddc->benign_B0,1,&i,cum,idxs_ins,vals,INSERT_VALUES);CHKERRQ(ierr); 897 ierr = VecRestoreArray(matis->y,&array);CHKERRQ(ierr); 898 ierr = ISRestoreIndices(pcbddc->benign_zerodiag_subs[i],(const PetscInt**)&idxs);CHKERRQ(ierr); 899 } 900 ierr = MatAssemblyBegin(pcbddc->benign_B0,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 901 ierr = MatAssemblyEnd(pcbddc->benign_B0,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 902 ierr = PetscFree2(idxs_ins,vals);CHKERRQ(ierr); 903 } 904 } else { /* push */ 905 if (pcbddc->benign_change_explicit) { 906 PetscInt i; 907 908 for (i=0;i<pcbddc->benign_n;i++) { 909 PetscScalar *B0_vals; 910 PetscInt *B0_cols,B0_ncol; 911 912 ierr = MatGetRow(pcbddc->benign_B0,i,&B0_ncol,(const PetscInt**)&B0_cols,(const PetscScalar**)&B0_vals);CHKERRQ(ierr); 913 ierr = MatSetValues(pcbddc->local_mat,1,pcbddc->benign_p0_lidx+i,B0_ncol,B0_cols,B0_vals,INSERT_VALUES);CHKERRQ(ierr); 914 ierr = MatSetValues(pcbddc->local_mat,B0_ncol,B0_cols,1,pcbddc->benign_p0_lidx+i,B0_vals,INSERT_VALUES);CHKERRQ(ierr); 915 ierr = MatSetValue(pcbddc->local_mat,pcbddc->benign_p0_lidx[i],pcbddc->benign_p0_lidx[i],0.0,INSERT_VALUES);CHKERRQ(ierr); 916 ierr = MatRestoreRow(pcbddc->benign_B0,i,&B0_ncol,(const PetscInt**)&B0_cols,(const PetscScalar**)&B0_vals);CHKERRQ(ierr); 917 } 918 ierr = MatAssemblyBegin(pcbddc->local_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 919 ierr = MatAssemblyEnd(pcbddc->local_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 920 } else { 921 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Cannot push B0!\n"); 922 } 923 } 924 PetscFunctionReturn(0); 925 } 926 927 #undef __FUNCT__ 928 #define __FUNCT__ "PCBDDCAdaptiveSelection" 929 PetscErrorCode PCBDDCAdaptiveSelection(PC pc) 930 { 931 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 932 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 933 PetscBLASInt B_dummyint,B_neigs,B_ierr,B_lwork; 934 PetscBLASInt *B_iwork,*B_ifail; 935 PetscScalar *work,lwork; 936 PetscScalar *St,*S,*eigv; 937 PetscScalar *Sarray,*Starray; 938 PetscReal *eigs,thresh; 939 PetscInt i,nmax,nmin,nv,cum,mss,cum2,cumarray,maxneigs; 940 PetscBool allocated_S_St; 941 #if defined(PETSC_USE_COMPLEX) 942 PetscReal *rwork; 943 #endif 944 PetscErrorCode ierr; 945 946 PetscFunctionBegin; 947 if (!sub_schurs->schur_explicit) SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_SUP,"Adaptive selection of constraints requires MUMPS and/or MKL_CPARDISO"); 948 if (sub_schurs->n_subs && (!sub_schurs->is_hermitian || !sub_schurs->is_posdef)) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_SUP,"Adaptive selection not yet implemented for general matrix pencils (herm %d, posdef %d)\n",sub_schurs->is_hermitian,sub_schurs->is_posdef); 949 950 if (pcbddc->dbg_flag) { 951 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 952 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 953 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Check adaptive selection of constraints\n");CHKERRQ(ierr); 954 ierr = PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 955 } 956 957 if (pcbddc->dbg_flag) { 958 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d cc %d (%d,%d).\n",PetscGlobalRank,sub_schurs->n_subs,sub_schurs->is_hermitian,sub_schurs->is_posdef); 959 } 960 961 /* max size of subsets */ 962 mss = 0; 963 for (i=0;i<sub_schurs->n_subs;i++) { 964 PetscInt subset_size; 965 966 ierr = ISGetLocalSize(sub_schurs->is_subs[i],&subset_size);CHKERRQ(ierr); 967 mss = PetscMax(mss,subset_size); 968 } 969 970 /* min/max and threshold */ 971 nmax = pcbddc->adaptive_nmax > 0 ? pcbddc->adaptive_nmax : mss; 972 nmin = pcbddc->adaptive_nmin > 0 ? pcbddc->adaptive_nmin : 0; 973 nmax = PetscMax(nmin,nmax); 974 allocated_S_St = PETSC_FALSE; 975 if (nmin) { 976 allocated_S_St = PETSC_TRUE; 977 } 978 979 /* allocate lapack workspace */ 980 cum = cum2 = 0; 981 maxneigs = 0; 982 for (i=0;i<sub_schurs->n_subs;i++) { 983 PetscInt n,subset_size; 984 985 ierr = ISGetLocalSize(sub_schurs->is_subs[i],&subset_size);CHKERRQ(ierr); 986 n = PetscMin(subset_size,nmax); 987 cum += subset_size; 988 cum2 += subset_size*n; 989 maxneigs = PetscMax(maxneigs,n); 990 } 991 if (mss) { 992 if (sub_schurs->is_hermitian && sub_schurs->is_posdef) { 993 PetscBLASInt B_itype = 1; 994 PetscBLASInt B_N = mss; 995 PetscReal zero = 0.0; 996 PetscReal eps = 0.0; /* dlamch? */ 997 998 B_lwork = -1; 999 S = NULL; 1000 St = NULL; 1001 eigs = NULL; 1002 eigv = NULL; 1003 B_iwork = NULL; 1004 B_ifail = NULL; 1005 #if defined(PETSC_USE_COMPLEX) 1006 rwork = NULL; 1007 #endif 1008 thresh = 1.0; 1009 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 1010 #if defined(PETSC_USE_COMPLEX) 1011 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&zero,&thresh,&B_dummyint,&B_dummyint,&eps,&B_neigs,eigs,eigv,&B_N,&lwork,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr)); 1012 #else 1013 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&zero,&thresh,&B_dummyint,&B_dummyint,&eps,&B_neigs,eigs,eigv,&B_N,&lwork,&B_lwork,B_iwork,B_ifail,&B_ierr)); 1014 #endif 1015 if (B_ierr != 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to SYGVX Lapack routine %d",(int)B_ierr); 1016 ierr = PetscFPTrapPop();CHKERRQ(ierr); 1017 } else { 1018 /* TODO */ 1019 } 1020 } else { 1021 lwork = 0; 1022 } 1023 1024 nv = 0; 1025 if (sub_schurs->is_vertices && pcbddc->use_vertices) { /* complement set of active subsets, each entry is a vertex (boundary made by active subsets, vertices and dirichlet dofs) */ 1026 ierr = ISGetLocalSize(sub_schurs->is_vertices,&nv);CHKERRQ(ierr); 1027 } 1028 ierr = PetscBLASIntCast((PetscInt)PetscRealPart(lwork),&B_lwork);CHKERRQ(ierr); 1029 if (allocated_S_St) { 1030 ierr = PetscMalloc2(mss*mss,&S,mss*mss,&St);CHKERRQ(ierr); 1031 } 1032 ierr = PetscMalloc5(mss*mss,&eigv,mss,&eigs,B_lwork,&work,5*mss,&B_iwork,mss,&B_ifail);CHKERRQ(ierr); 1033 #if defined(PETSC_USE_COMPLEX) 1034 ierr = PetscMalloc1(7*mss,&rwork);CHKERRQ(ierr); 1035 #endif 1036 ierr = PetscMalloc5(nv+sub_schurs->n_subs,&pcbddc->adaptive_constraints_n, 1037 nv+sub_schurs->n_subs+1,&pcbddc->adaptive_constraints_idxs_ptr, 1038 nv+sub_schurs->n_subs+1,&pcbddc->adaptive_constraints_data_ptr, 1039 nv+cum,&pcbddc->adaptive_constraints_idxs, 1040 nv+cum2,&pcbddc->adaptive_constraints_data);CHKERRQ(ierr); 1041 ierr = PetscMemzero(pcbddc->adaptive_constraints_n,(nv+sub_schurs->n_subs)*sizeof(PetscInt));CHKERRQ(ierr); 1042 1043 maxneigs = 0; 1044 cum = cumarray = 0; 1045 pcbddc->adaptive_constraints_idxs_ptr[0] = 0; 1046 pcbddc->adaptive_constraints_data_ptr[0] = 0; 1047 if (sub_schurs->is_vertices && pcbddc->use_vertices) { 1048 const PetscInt *idxs; 1049 1050 ierr = ISGetIndices(sub_schurs->is_vertices,&idxs);CHKERRQ(ierr); 1051 for (cum=0;cum<nv;cum++) { 1052 pcbddc->adaptive_constraints_n[cum] = 1; 1053 pcbddc->adaptive_constraints_idxs[cum] = idxs[cum]; 1054 pcbddc->adaptive_constraints_data[cum] = 1.0; 1055 pcbddc->adaptive_constraints_idxs_ptr[cum+1] = pcbddc->adaptive_constraints_idxs_ptr[cum]+1; 1056 pcbddc->adaptive_constraints_data_ptr[cum+1] = pcbddc->adaptive_constraints_data_ptr[cum]+1; 1057 } 1058 ierr = ISRestoreIndices(sub_schurs->is_vertices,&idxs);CHKERRQ(ierr); 1059 } 1060 1061 if (mss) { /* multilevel */ 1062 ierr = MatSeqAIJGetArray(sub_schurs->sum_S_Ej_inv_all,&Sarray);CHKERRQ(ierr); 1063 ierr = MatSeqAIJGetArray(sub_schurs->sum_S_Ej_tilda_all,&Starray);CHKERRQ(ierr); 1064 } 1065 1066 thresh = pcbddc->adaptive_threshold; 1067 for (i=0;i<sub_schurs->n_subs;i++) { 1068 const PetscInt *idxs; 1069 PetscReal upper,lower; 1070 PetscInt j,subset_size,eigs_start = 0; 1071 PetscBLASInt B_N; 1072 PetscBool same_data = PETSC_FALSE; 1073 1074 if (pcbddc->use_deluxe_scaling) { 1075 upper = PETSC_MAX_REAL; 1076 lower = thresh; 1077 } else { 1078 upper = 1./thresh; 1079 lower = 0.; 1080 } 1081 ierr = ISGetLocalSize(sub_schurs->is_subs[i],&subset_size);CHKERRQ(ierr); 1082 ierr = ISGetIndices(sub_schurs->is_subs[i],&idxs);CHKERRQ(ierr); 1083 ierr = PetscBLASIntCast(subset_size,&B_N);CHKERRQ(ierr); 1084 if (allocated_S_St) { /* S and S_t should be copied since we could need them later */ 1085 if (sub_schurs->is_hermitian) { 1086 PetscInt j,k; 1087 if (sub_schurs->n_subs == 1) { /* zeroing memory to use PetscMemcmp later */ 1088 ierr = PetscMemzero(S,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 1089 ierr = PetscMemzero(St,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 1090 } 1091 for (j=0;j<subset_size;j++) { 1092 for (k=j;k<subset_size;k++) { 1093 S [j*subset_size+k] = Sarray [cumarray+j*subset_size+k]; 1094 St[j*subset_size+k] = Starray[cumarray+j*subset_size+k]; 1095 } 1096 } 1097 } else { 1098 ierr = PetscMemcpy(S,Sarray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 1099 ierr = PetscMemcpy(St,Starray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 1100 } 1101 } else { 1102 S = Sarray + cumarray; 1103 St = Starray + cumarray; 1104 } 1105 /* see if we can save some work */ 1106 if (sub_schurs->n_subs == 1 && pcbddc->use_deluxe_scaling) { 1107 ierr = PetscMemcmp(S,St,subset_size*subset_size*sizeof(PetscScalar),&same_data);CHKERRQ(ierr); 1108 } 1109 1110 if (same_data && !sub_schurs->change) { /* there's no need of constraints here */ 1111 B_neigs = 0; 1112 } else { 1113 if (sub_schurs->is_hermitian && sub_schurs->is_posdef) { 1114 PetscBLASInt B_itype = 1; 1115 PetscBLASInt B_IL, B_IU; 1116 PetscReal eps = -1.0; /* dlamch? */ 1117 PetscInt nmin_s; 1118 PetscBool compute_range = PETSC_FALSE; 1119 1120 if (pcbddc->dbg_flag) { 1121 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Computing for sub %d/%d %d %d.\n",i,sub_schurs->n_subs,subset_size,pcbddc->mat_graph->count[idxs[0]]); 1122 } 1123 1124 compute_range = PETSC_FALSE; 1125 if (thresh > 1.+PETSC_SMALL && !same_data) { 1126 compute_range = PETSC_TRUE; 1127 } 1128 1129 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 1130 if (compute_range) { 1131 1132 /* ask for eigenvalues larger than thresh */ 1133 #if defined(PETSC_USE_COMPLEX) 1134 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&lower,&upper,&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr)); 1135 #else 1136 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&lower,&upper,&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr)); 1137 #endif 1138 } else if (!same_data) { 1139 B_IU = PetscMax(1,PetscMin(B_N,nmax)); 1140 B_IL = 1; 1141 #if defined(PETSC_USE_COMPLEX) 1142 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","I","L",&B_N,St,&B_N,S,&B_N,&lower,&upper,&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr)); 1143 #else 1144 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","I","L",&B_N,St,&B_N,S,&B_N,&lower,&upper,&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr)); 1145 #endif 1146 } else { /* same_data is true, so get the adaptive function requested by the user */ 1147 PetscInt k; 1148 if (!sub_schurs->change_primal_sub) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"This should not happen"); 1149 ierr = ISGetLocalSize(sub_schurs->change_primal_sub[i],&nmax);CHKERRQ(ierr); 1150 ierr = PetscBLASIntCast(nmax,&B_neigs);CHKERRQ(ierr); 1151 nmin = nmax; 1152 ierr = PetscMemzero(eigv,subset_size*nmax*sizeof(PetscScalar));CHKERRQ(ierr); 1153 for (k=0;k<nmax;k++) { 1154 eigs[k] = 1./PETSC_SMALL; 1155 eigv[k*(subset_size+1)] = 1.0; 1156 } 1157 } 1158 ierr = PetscFPTrapPop();CHKERRQ(ierr); 1159 if (B_ierr) { 1160 if (B_ierr < 0 ) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: illegal value for argument %d",-(int)B_ierr); 1161 else if (B_ierr <= B_N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: %d eigenvalues failed to converge",(int)B_ierr); 1162 else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: leading minor of order %d is not positive definite",(int)B_ierr-B_N-1); 1163 } 1164 1165 if (B_neigs > nmax) { 1166 if (pcbddc->dbg_flag) { 1167 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," found %d eigs, more than maximum required %d.\n",B_neigs,nmax); 1168 } 1169 if (pcbddc->use_deluxe_scaling) eigs_start = B_neigs -nmax; 1170 B_neigs = nmax; 1171 } 1172 1173 nmin_s = PetscMin(nmin,B_N); 1174 if (B_neigs < nmin_s) { 1175 PetscBLASInt B_neigs2; 1176 1177 if (pcbddc->use_deluxe_scaling) { 1178 B_IL = B_N - nmin_s + 1; 1179 B_IU = B_N - B_neigs; 1180 } else { 1181 B_IL = B_neigs + 1; 1182 B_IU = nmin_s; 1183 } 1184 if (pcbddc->dbg_flag) { 1185 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," found %d eigs, less than minimum required %d. Asking for %d to %d incl (fortran like)\n",B_neigs,nmin,B_IL,B_IU); 1186 } 1187 if (sub_schurs->is_hermitian) { 1188 PetscInt j; 1189 for (j=0;j<subset_size;j++) { 1190 ierr = PetscMemcpy(S+j*(subset_size+1),Sarray+cumarray+j*(subset_size+1),(subset_size-j)*sizeof(PetscScalar));CHKERRQ(ierr); 1191 } 1192 for (j=0;j<subset_size;j++) { 1193 ierr = PetscMemcpy(St+j*(subset_size+1),Starray+cumarray+j*(subset_size+1),(subset_size-j)*sizeof(PetscScalar));CHKERRQ(ierr); 1194 } 1195 } else { 1196 ierr = PetscMemcpy(S,Sarray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 1197 ierr = PetscMemcpy(St,Starray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 1198 } 1199 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 1200 #if defined(PETSC_USE_COMPLEX) 1201 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","I","L",&B_N,St,&B_N,S,&B_N,&lower,&upper,&B_IL,&B_IU,&eps,&B_neigs2,eigs+B_neigs,eigv+B_neigs*subset_size,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr)); 1202 #else 1203 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","I","L",&B_N,St,&B_N,S,&B_N,&lower,&upper,&B_IL,&B_IU,&eps,&B_neigs2,eigs+B_neigs,eigv+B_neigs*subset_size,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr)); 1204 #endif 1205 ierr = PetscFPTrapPop();CHKERRQ(ierr); 1206 B_neigs += B_neigs2; 1207 } 1208 if (B_ierr) { 1209 if (B_ierr < 0 ) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: illegal value for argument %d",-(int)B_ierr); 1210 else if (B_ierr <= B_N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: %d eigenvalues failed to converge",(int)B_ierr); 1211 else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: leading minor of order %d is not positive definite",(int)B_ierr-B_N-1); 1212 } 1213 if (pcbddc->dbg_flag) { 1214 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," -> Got %d eigs\n",B_neigs);CHKERRQ(ierr); 1215 for (j=0;j<B_neigs;j++) { 1216 if (eigs[j] == 0.0) { 1217 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," Inf\n");CHKERRQ(ierr); 1218 } else { 1219 if (pcbddc->use_deluxe_scaling) { 1220 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," %1.6e\n",eigs[j+eigs_start]);CHKERRQ(ierr); 1221 } else { 1222 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," %1.6e\n",1./eigs[j+eigs_start]);CHKERRQ(ierr); 1223 } 1224 } 1225 } 1226 } 1227 } else { 1228 /* TODO */ 1229 } 1230 } 1231 /* change the basis back to the original one */ 1232 if (sub_schurs->change) { 1233 Mat change,phi,phit; 1234 1235 if (pcbddc->dbg_flag > 1) { 1236 PetscInt ii; 1237 for (ii=0;ii<B_neigs;ii++) { 1238 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," -> Eigenvector (old basis) %d/%d (%d)\n",ii,B_neigs,B_N);CHKERRQ(ierr); 1239 for (j=0;j<B_N;j++) { 1240 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," %1.4e\n",eigv[(ii+eigs_start)*subset_size+j]);CHKERRQ(ierr); 1241 } 1242 } 1243 } 1244 ierr = KSPGetOperators(sub_schurs->change[i],&change,NULL);CHKERRQ(ierr); 1245 ierr = MatCreateSeqDense(PETSC_COMM_SELF,subset_size,B_neigs,eigv+eigs_start*subset_size,&phit);CHKERRQ(ierr); 1246 ierr = MatMatMult(change,phit,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&phi);CHKERRQ(ierr); 1247 ierr = MatCopy(phi,phit,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 1248 ierr = MatDestroy(&phit);CHKERRQ(ierr); 1249 ierr = MatDestroy(&phi);CHKERRQ(ierr); 1250 } 1251 maxneigs = PetscMax(B_neigs,maxneigs); 1252 pcbddc->adaptive_constraints_n[i+nv] = B_neigs; 1253 if (B_neigs) { 1254 ierr = PetscMemcpy(pcbddc->adaptive_constraints_data+pcbddc->adaptive_constraints_data_ptr[cum],eigv+eigs_start*subset_size,B_neigs*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 1255 1256 if (pcbddc->dbg_flag > 1) { 1257 PetscInt ii; 1258 for (ii=0;ii<B_neigs;ii++) { 1259 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," -> Eigenvector %d/%d (%d)\n",ii,B_neigs,B_N);CHKERRQ(ierr); 1260 for (j=0;j<B_N;j++) { 1261 #if defined(PETSC_USE_COMPLEX) 1262 PetscReal r = PetscRealPart(pcbddc->adaptive_constraints_data[ii*subset_size+j+pcbddc->adaptive_constraints_data_ptr[cum]]); 1263 PetscReal c = PetscImaginaryPart(pcbddc->adaptive_constraints_data[ii*subset_size+j+pcbddc->adaptive_constraints_data_ptr[cum]]); 1264 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," %1.4e + %1.4e i\n",r,c);CHKERRQ(ierr); 1265 #else 1266 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," %1.4e\n",pcbddc->adaptive_constraints_data[ii*subset_size+j+pcbddc->adaptive_constraints_data_ptr[cum]]);CHKERRQ(ierr); 1267 #endif 1268 } 1269 } 1270 } 1271 ierr = PetscMemcpy(pcbddc->adaptive_constraints_idxs+pcbddc->adaptive_constraints_idxs_ptr[cum],idxs,subset_size*sizeof(PetscInt));CHKERRQ(ierr); 1272 pcbddc->adaptive_constraints_idxs_ptr[cum+1] = pcbddc->adaptive_constraints_idxs_ptr[cum] + subset_size; 1273 pcbddc->adaptive_constraints_data_ptr[cum+1] = pcbddc->adaptive_constraints_data_ptr[cum] + subset_size*B_neigs; 1274 cum++; 1275 } 1276 ierr = ISRestoreIndices(sub_schurs->is_subs[i],&idxs);CHKERRQ(ierr); 1277 /* shift for next computation */ 1278 cumarray += subset_size*subset_size; 1279 } 1280 if (pcbddc->dbg_flag) { 1281 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 1282 } 1283 1284 if (mss) { 1285 ierr = MatSeqAIJRestoreArray(sub_schurs->sum_S_Ej_inv_all,&Sarray);CHKERRQ(ierr); 1286 ierr = MatSeqAIJRestoreArray(sub_schurs->sum_S_Ej_tilda_all,&Starray);CHKERRQ(ierr); 1287 /* destroy matrices (junk) */ 1288 ierr = MatDestroy(&sub_schurs->sum_S_Ej_inv_all);CHKERRQ(ierr); 1289 ierr = MatDestroy(&sub_schurs->sum_S_Ej_tilda_all);CHKERRQ(ierr); 1290 } 1291 if (allocated_S_St) { 1292 ierr = PetscFree2(S,St);CHKERRQ(ierr); 1293 } 1294 ierr = PetscFree5(eigv,eigs,work,B_iwork,B_ifail);CHKERRQ(ierr); 1295 #if defined(PETSC_USE_COMPLEX) 1296 ierr = PetscFree(rwork);CHKERRQ(ierr); 1297 #endif 1298 if (pcbddc->dbg_flag) { 1299 PetscInt maxneigs_r; 1300 ierr = MPIU_Allreduce(&maxneigs,&maxneigs_r,1,MPIU_INT,MPI_MAX,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 1301 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Maximum number of constraints per cc %d\n",maxneigs_r);CHKERRQ(ierr); 1302 } 1303 PetscFunctionReturn(0); 1304 } 1305 1306 #undef __FUNCT__ 1307 #define __FUNCT__ "PCBDDCSetUpSolvers" 1308 PetscErrorCode PCBDDCSetUpSolvers(PC pc) 1309 { 1310 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 1311 PetscScalar *coarse_submat_vals; 1312 PetscErrorCode ierr; 1313 1314 PetscFunctionBegin; 1315 /* Setup local scatters R_to_B and (optionally) R_to_D */ 1316 /* PCBDDCSetUpLocalWorkVectors should be called first! */ 1317 ierr = PCBDDCSetUpLocalScatters(pc);CHKERRQ(ierr); 1318 1319 /* Setup local neumann solver ksp_R */ 1320 /* PCBDDCSetUpLocalScatters should be called first! */ 1321 ierr = PCBDDCSetUpLocalSolvers(pc,PETSC_FALSE,PETSC_TRUE);CHKERRQ(ierr); 1322 1323 /* Change global null space passed in by the user if change of basis has been requested */ 1324 if (pcbddc->NullSpace && pcbddc->ChangeOfBasisMatrix) { 1325 ierr = PCBDDCNullSpaceAdaptGlobal(pc);CHKERRQ(ierr); 1326 } 1327 1328 /* 1329 Setup local correction and local part of coarse basis. 1330 Gives back the dense local part of the coarse matrix in column major ordering 1331 */ 1332 ierr = PCBDDCSetUpCorrection(pc,&coarse_submat_vals);CHKERRQ(ierr); 1333 1334 /* Compute total number of coarse nodes and setup coarse solver */ 1335 ierr = PCBDDCSetUpCoarseSolver(pc,coarse_submat_vals);CHKERRQ(ierr); 1336 1337 /* free */ 1338 ierr = PetscFree(coarse_submat_vals);CHKERRQ(ierr); 1339 PetscFunctionReturn(0); 1340 } 1341 1342 #undef __FUNCT__ 1343 #define __FUNCT__ "PCBDDCResetCustomization" 1344 PetscErrorCode PCBDDCResetCustomization(PC pc) 1345 { 1346 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 1347 PetscErrorCode ierr; 1348 1349 PetscFunctionBegin; 1350 ierr = PCBDDCGraphResetCSR(pcbddc->mat_graph);CHKERRQ(ierr); 1351 ierr = ISDestroy(&pcbddc->user_primal_vertices);CHKERRQ(ierr); 1352 ierr = ISDestroy(&pcbddc->user_primal_vertices_local);CHKERRQ(ierr); 1353 ierr = MatNullSpaceDestroy(&pcbddc->NullSpace);CHKERRQ(ierr); 1354 ierr = ISDestroy(&pcbddc->NeumannBoundaries);CHKERRQ(ierr); 1355 ierr = ISDestroy(&pcbddc->NeumannBoundariesLocal);CHKERRQ(ierr); 1356 ierr = ISDestroy(&pcbddc->DirichletBoundaries);CHKERRQ(ierr); 1357 ierr = MatNullSpaceDestroy(&pcbddc->onearnullspace);CHKERRQ(ierr); 1358 ierr = PetscFree(pcbddc->onearnullvecs_state);CHKERRQ(ierr); 1359 ierr = ISDestroy(&pcbddc->DirichletBoundariesLocal);CHKERRQ(ierr); 1360 ierr = PCBDDCSetDofsSplitting(pc,0,NULL);CHKERRQ(ierr); 1361 ierr = PCBDDCSetDofsSplittingLocal(pc,0,NULL);CHKERRQ(ierr); 1362 PetscFunctionReturn(0); 1363 } 1364 1365 #undef __FUNCT__ 1366 #define __FUNCT__ "PCBDDCResetTopography" 1367 PetscErrorCode PCBDDCResetTopography(PC pc) 1368 { 1369 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 1370 PetscInt i; 1371 PetscErrorCode ierr; 1372 1373 PetscFunctionBegin; 1374 ierr = MatDestroy(&pcbddc->user_ChangeOfBasisMatrix);CHKERRQ(ierr); 1375 ierr = MatDestroy(&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 1376 ierr = MatDestroy(&pcbddc->switch_static_change);CHKERRQ(ierr); 1377 ierr = VecDestroy(&pcbddc->work_change);CHKERRQ(ierr); 1378 ierr = MatDestroy(&pcbddc->ConstraintMatrix);CHKERRQ(ierr); 1379 ierr = PCBDDCGraphReset(pcbddc->mat_graph);CHKERRQ(ierr); 1380 for (i=0;i<pcbddc->n_local_subs;i++) { 1381 ierr = ISDestroy(&pcbddc->local_subs[i]);CHKERRQ(ierr); 1382 } 1383 ierr = PetscFree(pcbddc->local_subs);CHKERRQ(ierr); 1384 ierr = PCBDDCSubSchursReset(pcbddc->sub_schurs);CHKERRQ(ierr); 1385 PetscFunctionReturn(0); 1386 } 1387 1388 #undef __FUNCT__ 1389 #define __FUNCT__ "PCBDDCResetSolvers" 1390 PetscErrorCode PCBDDCResetSolvers(PC pc) 1391 { 1392 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 1393 PetscErrorCode ierr; 1394 1395 PetscFunctionBegin; 1396 ierr = VecDestroy(&pcbddc->coarse_vec);CHKERRQ(ierr); 1397 if (pcbddc->coarse_phi_B) { 1398 PetscScalar *array; 1399 ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&array);CHKERRQ(ierr); 1400 ierr = PetscFree(array);CHKERRQ(ierr); 1401 } 1402 ierr = MatDestroy(&pcbddc->coarse_phi_B);CHKERRQ(ierr); 1403 ierr = MatDestroy(&pcbddc->coarse_phi_D);CHKERRQ(ierr); 1404 ierr = MatDestroy(&pcbddc->coarse_psi_B);CHKERRQ(ierr); 1405 ierr = MatDestroy(&pcbddc->coarse_psi_D);CHKERRQ(ierr); 1406 ierr = VecDestroy(&pcbddc->vec1_P);CHKERRQ(ierr); 1407 ierr = VecDestroy(&pcbddc->vec1_C);CHKERRQ(ierr); 1408 ierr = MatDestroy(&pcbddc->local_auxmat2);CHKERRQ(ierr); 1409 ierr = MatDestroy(&pcbddc->local_auxmat1);CHKERRQ(ierr); 1410 ierr = VecDestroy(&pcbddc->vec1_R);CHKERRQ(ierr); 1411 ierr = VecDestroy(&pcbddc->vec2_R);CHKERRQ(ierr); 1412 ierr = ISDestroy(&pcbddc->is_R_local);CHKERRQ(ierr); 1413 ierr = VecScatterDestroy(&pcbddc->R_to_B);CHKERRQ(ierr); 1414 ierr = VecScatterDestroy(&pcbddc->R_to_D);CHKERRQ(ierr); 1415 ierr = VecScatterDestroy(&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr); 1416 ierr = KSPDestroy(&pcbddc->ksp_D);CHKERRQ(ierr); 1417 ierr = KSPDestroy(&pcbddc->ksp_R);CHKERRQ(ierr); 1418 ierr = KSPDestroy(&pcbddc->coarse_ksp);CHKERRQ(ierr); 1419 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 1420 ierr = PetscFree(pcbddc->primal_indices_local_idxs);CHKERRQ(ierr); 1421 ierr = PetscFree2(pcbddc->local_primal_ref_node,pcbddc->local_primal_ref_mult);CHKERRQ(ierr); 1422 ierr = PetscFree(pcbddc->global_primal_indices);CHKERRQ(ierr); 1423 ierr = ISDestroy(&pcbddc->coarse_subassembling);CHKERRQ(ierr); 1424 ierr = MatDestroy(&pcbddc->benign_change);CHKERRQ(ierr); 1425 ierr = VecDestroy(&pcbddc->benign_vec);CHKERRQ(ierr); 1426 ierr = PCBDDCBenignShellMat(pc,PETSC_TRUE);CHKERRQ(ierr); 1427 ierr = MatDestroy(&pcbddc->benign_B0);CHKERRQ(ierr); 1428 ierr = PetscSFDestroy(&pcbddc->benign_sf);CHKERRQ(ierr); 1429 if (pcbddc->benign_zerodiag_subs) { 1430 PetscInt i; 1431 for (i=0;i<pcbddc->benign_n;i++) { 1432 ierr = ISDestroy(&pcbddc->benign_zerodiag_subs[i]);CHKERRQ(ierr); 1433 } 1434 ierr = PetscFree(pcbddc->benign_zerodiag_subs);CHKERRQ(ierr); 1435 } 1436 ierr = PetscFree3(pcbddc->benign_p0_lidx,pcbddc->benign_p0_gidx,pcbddc->benign_p0);CHKERRQ(ierr); 1437 PetscFunctionReturn(0); 1438 } 1439 1440 #undef __FUNCT__ 1441 #define __FUNCT__ "PCBDDCSetUpLocalWorkVectors" 1442 PetscErrorCode PCBDDCSetUpLocalWorkVectors(PC pc) 1443 { 1444 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 1445 PC_IS *pcis = (PC_IS*)pc->data; 1446 VecType impVecType; 1447 PetscInt n_constraints,n_R,old_size; 1448 PetscErrorCode ierr; 1449 1450 PetscFunctionBegin; 1451 if (!pcbddc->ConstraintMatrix) SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"BDDC Constraint matrix has not been created"); 1452 /* get sizes */ 1453 n_constraints = pcbddc->local_primal_size - pcbddc->benign_n - pcbddc->n_vertices; 1454 n_R = pcis->n - pcbddc->n_vertices; 1455 ierr = VecGetType(pcis->vec1_N,&impVecType);CHKERRQ(ierr); 1456 /* local work vectors (try to avoid unneeded work)*/ 1457 /* R nodes */ 1458 old_size = -1; 1459 if (pcbddc->vec1_R) { 1460 ierr = VecGetSize(pcbddc->vec1_R,&old_size);CHKERRQ(ierr); 1461 } 1462 if (n_R != old_size) { 1463 ierr = VecDestroy(&pcbddc->vec1_R);CHKERRQ(ierr); 1464 ierr = VecDestroy(&pcbddc->vec2_R);CHKERRQ(ierr); 1465 ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_R);CHKERRQ(ierr); 1466 ierr = VecSetSizes(pcbddc->vec1_R,PETSC_DECIDE,n_R);CHKERRQ(ierr); 1467 ierr = VecSetType(pcbddc->vec1_R,impVecType);CHKERRQ(ierr); 1468 ierr = VecDuplicate(pcbddc->vec1_R,&pcbddc->vec2_R);CHKERRQ(ierr); 1469 } 1470 /* local primal dofs */ 1471 old_size = -1; 1472 if (pcbddc->vec1_P) { 1473 ierr = VecGetSize(pcbddc->vec1_P,&old_size);CHKERRQ(ierr); 1474 } 1475 if (pcbddc->local_primal_size != old_size) { 1476 ierr = VecDestroy(&pcbddc->vec1_P);CHKERRQ(ierr); 1477 ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_P);CHKERRQ(ierr); 1478 ierr = VecSetSizes(pcbddc->vec1_P,PETSC_DECIDE,pcbddc->local_primal_size);CHKERRQ(ierr); 1479 ierr = VecSetType(pcbddc->vec1_P,impVecType);CHKERRQ(ierr); 1480 } 1481 /* local explicit constraints */ 1482 old_size = -1; 1483 if (pcbddc->vec1_C) { 1484 ierr = VecGetSize(pcbddc->vec1_C,&old_size);CHKERRQ(ierr); 1485 } 1486 if (n_constraints && n_constraints != old_size) { 1487 ierr = VecDestroy(&pcbddc->vec1_C);CHKERRQ(ierr); 1488 ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_C);CHKERRQ(ierr); 1489 ierr = VecSetSizes(pcbddc->vec1_C,PETSC_DECIDE,n_constraints);CHKERRQ(ierr); 1490 ierr = VecSetType(pcbddc->vec1_C,impVecType);CHKERRQ(ierr); 1491 } 1492 PetscFunctionReturn(0); 1493 } 1494 1495 #undef __FUNCT__ 1496 #define __FUNCT__ "PCBDDCSetUpCorrection" 1497 PetscErrorCode PCBDDCSetUpCorrection(PC pc, PetscScalar **coarse_submat_vals_n) 1498 { 1499 PetscErrorCode ierr; 1500 /* pointers to pcis and pcbddc */ 1501 PC_IS* pcis = (PC_IS*)pc->data; 1502 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 1503 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 1504 /* submatrices of local problem */ 1505 Mat A_RV,A_VR,A_VV,local_auxmat2_R; 1506 /* submatrices of local coarse problem */ 1507 Mat S_VV,S_CV,S_VC,S_CC; 1508 /* working matrices */ 1509 Mat C_CR; 1510 /* additional working stuff */ 1511 PC pc_R; 1512 Mat F; 1513 Vec dummy_vec; 1514 PetscBool isLU,isCHOL,isILU,need_benign_correction; 1515 PetscScalar *coarse_submat_vals; /* TODO: use a PETSc matrix */ 1516 PetscScalar *work; 1517 PetscInt *idx_V_B; 1518 PetscInt lda_rhs,n,n_vertices,n_constraints,*p0_lidx_I; 1519 PetscInt i,n_R,n_D,n_B; 1520 1521 /* some shortcuts to scalars */ 1522 PetscScalar one=1.0,m_one=-1.0; 1523 1524 PetscFunctionBegin; 1525 if (!pcbddc->symmetric_primal && pcbddc->benign_n) { 1526 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Non-symmetric primal basis computation with benign trick not yet implemented"); 1527 } 1528 1529 /* Set Non-overlapping dimensions */ 1530 n_vertices = pcbddc->n_vertices; 1531 n_constraints = pcbddc->local_primal_size - pcbddc->benign_n - n_vertices; 1532 n_B = pcis->n_B; 1533 n_D = pcis->n - n_B; 1534 n_R = pcis->n - n_vertices; 1535 1536 /* vertices in boundary numbering */ 1537 ierr = PetscMalloc1(n_vertices,&idx_V_B);CHKERRQ(ierr); 1538 ierr = ISGlobalToLocalMappingApply(pcis->BtoNmap,IS_GTOLM_DROP,n_vertices,pcbddc->local_primal_ref_node,&i,idx_V_B);CHKERRQ(ierr); 1539 if (i != n_vertices) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error in boundary numbering for BDDC vertices! %D != %D\n",n_vertices,i); 1540 1541 /* Subdomain contribution (Non-overlapping) to coarse matrix */ 1542 ierr = PetscCalloc1(pcbddc->local_primal_size*pcbddc->local_primal_size,&coarse_submat_vals);CHKERRQ(ierr); 1543 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_vertices,n_vertices,coarse_submat_vals,&S_VV);CHKERRQ(ierr); 1544 ierr = MatSeqDenseSetLDA(S_VV,pcbddc->local_primal_size);CHKERRQ(ierr); 1545 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_constraints,n_vertices,coarse_submat_vals+n_vertices,&S_CV);CHKERRQ(ierr); 1546 ierr = MatSeqDenseSetLDA(S_CV,pcbddc->local_primal_size);CHKERRQ(ierr); 1547 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_vertices,n_constraints,coarse_submat_vals+pcbddc->local_primal_size*n_vertices,&S_VC);CHKERRQ(ierr); 1548 ierr = MatSeqDenseSetLDA(S_VC,pcbddc->local_primal_size);CHKERRQ(ierr); 1549 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_constraints,n_constraints,coarse_submat_vals+(pcbddc->local_primal_size+1)*n_vertices,&S_CC);CHKERRQ(ierr); 1550 ierr = MatSeqDenseSetLDA(S_CC,pcbddc->local_primal_size);CHKERRQ(ierr); 1551 1552 /* determine if can use MatSolve routines instead of calling KSPSolve on ksp_R */ 1553 ierr = KSPGetPC(pcbddc->ksp_R,&pc_R);CHKERRQ(ierr); 1554 ierr = PetscObjectTypeCompare((PetscObject)pc_R,PCLU,&isLU);CHKERRQ(ierr); 1555 ierr = PetscObjectTypeCompare((PetscObject)pc_R,PCILU,&isILU);CHKERRQ(ierr); 1556 ierr = PetscObjectTypeCompare((PetscObject)pc_R,PCCHOLESKY,&isCHOL);CHKERRQ(ierr); 1557 lda_rhs = n_R; 1558 need_benign_correction = PETSC_FALSE; 1559 if (isLU || isILU || isCHOL) { 1560 ierr = PCFactorGetMatrix(pc_R,&F);CHKERRQ(ierr); 1561 } else if (sub_schurs->reuse_solver) { 1562 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 1563 MatFactorType type; 1564 1565 F = reuse_solver->F; 1566 ierr = MatGetFactorType(F,&type);CHKERRQ(ierr); 1567 if (type == MAT_FACTOR_CHOLESKY) isCHOL = PETSC_TRUE; 1568 ierr = MatGetSize(F,&lda_rhs,NULL);CHKERRQ(ierr); 1569 need_benign_correction = !!reuse_solver->benign_n; 1570 } else { 1571 F = NULL; 1572 } 1573 1574 /* allocate workspace */ 1575 n = 0; 1576 if (n_constraints) { 1577 n += lda_rhs*n_constraints; 1578 } 1579 if (n_vertices) { 1580 n = PetscMax(2*lda_rhs*n_vertices,n); 1581 n = PetscMax((lda_rhs+n_B)*n_vertices,n); 1582 } 1583 ierr = PetscMalloc1(n,&work);CHKERRQ(ierr); 1584 1585 /* create dummy vector to modify rhs and sol of MatMatSolve (work array will never be used) */ 1586 dummy_vec = NULL; 1587 if (need_benign_correction && lda_rhs != n_R && F) { 1588 ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,1,lda_rhs,work,&dummy_vec);CHKERRQ(ierr); 1589 } 1590 1591 /* Precompute stuffs needed for preprocessing and application of BDDC*/ 1592 if (n_constraints) { 1593 Mat M1,M2,M3,C_B; 1594 IS is_aux; 1595 PetscScalar *array,*array2; 1596 1597 ierr = MatDestroy(&pcbddc->local_auxmat1);CHKERRQ(ierr); 1598 ierr = MatDestroy(&pcbddc->local_auxmat2);CHKERRQ(ierr); 1599 1600 /* Extract constraints on R nodes: C_{CR} */ 1601 ierr = ISCreateStride(PETSC_COMM_SELF,n_constraints,n_vertices,1,&is_aux);CHKERRQ(ierr); 1602 ierr = MatGetSubMatrix(pcbddc->ConstraintMatrix,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&C_CR);CHKERRQ(ierr); 1603 ierr = MatGetSubMatrix(pcbddc->ConstraintMatrix,is_aux,pcis->is_B_local,MAT_INITIAL_MATRIX,&C_B);CHKERRQ(ierr); 1604 1605 /* Assemble local_auxmat2_R = (- A_{RR}^{-1} C^T_{CR}) needed by BDDC setup */ 1606 /* Assemble pcbddc->local_auxmat2 = R_to_B (- A_{RR}^{-1} C^T_{CR}) needed by BDDC application */ 1607 ierr = PetscMemzero(work,lda_rhs*n_constraints*sizeof(PetscScalar));CHKERRQ(ierr); 1608 for (i=0;i<n_constraints;i++) { 1609 const PetscScalar *row_cmat_values; 1610 const PetscInt *row_cmat_indices; 1611 PetscInt size_of_constraint,j; 1612 1613 ierr = MatGetRow(C_CR,i,&size_of_constraint,&row_cmat_indices,&row_cmat_values);CHKERRQ(ierr); 1614 for (j=0;j<size_of_constraint;j++) { 1615 work[row_cmat_indices[j]+i*lda_rhs] = -row_cmat_values[j]; 1616 } 1617 ierr = MatRestoreRow(C_CR,i,&size_of_constraint,&row_cmat_indices,&row_cmat_values);CHKERRQ(ierr); 1618 } 1619 ierr = MatCreateSeqDense(PETSC_COMM_SELF,lda_rhs,n_constraints,NULL,&local_auxmat2_R);CHKERRQ(ierr); 1620 if (F) { 1621 Mat B; 1622 1623 ierr = MatCreateSeqDense(PETSC_COMM_SELF,lda_rhs,n_constraints,work,&B);CHKERRQ(ierr); 1624 if (need_benign_correction) { 1625 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 1626 1627 /* rhs is already zero on interior dofs, no need to change the rhs */ 1628 ierr = PetscMemzero(reuse_solver->benign_save_vals,pcbddc->benign_n*sizeof(PetscScalar));CHKERRQ(ierr); 1629 } 1630 ierr = MatMatSolve(F,B,local_auxmat2_R);CHKERRQ(ierr); 1631 if (need_benign_correction) { 1632 PetscScalar *marr; 1633 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 1634 1635 ierr = MatDenseGetArray(local_auxmat2_R,&marr);CHKERRQ(ierr); 1636 if (lda_rhs != n_R) { 1637 for (i=0;i<n_constraints;i++) { 1638 ierr = VecPlaceArray(dummy_vec,marr+i*lda_rhs);CHKERRQ(ierr); 1639 ierr = PCBDDCReuseSolversBenignAdapt(reuse_solver,dummy_vec,NULL,PETSC_TRUE,PETSC_TRUE);CHKERRQ(ierr); 1640 ierr = VecResetArray(dummy_vec);CHKERRQ(ierr); 1641 } 1642 } else { 1643 for (i=0;i<n_constraints;i++) { 1644 ierr = VecPlaceArray(pcbddc->vec1_R,marr+i*lda_rhs);CHKERRQ(ierr); 1645 ierr = PCBDDCReuseSolversBenignAdapt(reuse_solver,pcbddc->vec1_R,NULL,PETSC_TRUE,PETSC_TRUE);CHKERRQ(ierr); 1646 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 1647 } 1648 } 1649 ierr = MatDenseRestoreArray(local_auxmat2_R,&marr);CHKERRQ(ierr); 1650 } 1651 ierr = MatDestroy(&B);CHKERRQ(ierr); 1652 } else { 1653 PetscScalar *marr; 1654 1655 ierr = MatDenseGetArray(local_auxmat2_R,&marr);CHKERRQ(ierr); 1656 for (i=0;i<n_constraints;i++) { 1657 ierr = VecPlaceArray(pcbddc->vec1_R,work+i*lda_rhs);CHKERRQ(ierr); 1658 ierr = VecPlaceArray(pcbddc->vec2_R,marr+i*lda_rhs);CHKERRQ(ierr); 1659 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 1660 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 1661 ierr = VecResetArray(pcbddc->vec2_R);CHKERRQ(ierr); 1662 } 1663 ierr = MatDenseRestoreArray(local_auxmat2_R,&marr);CHKERRQ(ierr); 1664 } 1665 if (!pcbddc->switch_static) { 1666 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_B,n_constraints,NULL,&pcbddc->local_auxmat2);CHKERRQ(ierr); 1667 ierr = MatDenseGetArray(pcbddc->local_auxmat2,&array);CHKERRQ(ierr); 1668 ierr = MatDenseGetArray(local_auxmat2_R,&array2);CHKERRQ(ierr); 1669 for (i=0;i<n_constraints;i++) { 1670 ierr = VecPlaceArray(pcbddc->vec1_R,array2+i*lda_rhs);CHKERRQ(ierr); 1671 ierr = VecPlaceArray(pcis->vec1_B,array+i*n_B);CHKERRQ(ierr); 1672 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1673 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1674 ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr); 1675 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 1676 } 1677 ierr = MatDenseRestoreArray(local_auxmat2_R,&array2);CHKERRQ(ierr); 1678 ierr = MatDenseRestoreArray(pcbddc->local_auxmat2,&array);CHKERRQ(ierr); 1679 ierr = MatMatMult(C_B,pcbddc->local_auxmat2,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&M3);CHKERRQ(ierr); 1680 } else { 1681 if (lda_rhs != n_R) { 1682 IS dummy; 1683 1684 ierr = ISCreateStride(PETSC_COMM_SELF,n_R,0,1,&dummy);CHKERRQ(ierr); 1685 ierr = MatGetSubMatrix(local_auxmat2_R,dummy,NULL,MAT_INITIAL_MATRIX,&pcbddc->local_auxmat2);CHKERRQ(ierr); 1686 ierr = ISDestroy(&dummy);CHKERRQ(ierr); 1687 } else { 1688 ierr = PetscObjectReference((PetscObject)local_auxmat2_R);CHKERRQ(ierr); 1689 pcbddc->local_auxmat2 = local_auxmat2_R; 1690 } 1691 ierr = MatMatMult(C_CR,pcbddc->local_auxmat2,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&M3);CHKERRQ(ierr); 1692 } 1693 ierr = ISDestroy(&is_aux);CHKERRQ(ierr); 1694 /* Assemble explicitly S_CC = ( C_{CR} A_{RR}^{-1} C^T_{CR} )^{-1} */ 1695 ierr = MatScale(M3,m_one);CHKERRQ(ierr); 1696 ierr = MatDuplicate(M3,MAT_DO_NOT_COPY_VALUES,&M1);CHKERRQ(ierr); 1697 ierr = MatDuplicate(M3,MAT_DO_NOT_COPY_VALUES,&M2);CHKERRQ(ierr); 1698 if (isCHOL) { 1699 ierr = MatCholeskyFactor(M3,NULL,NULL);CHKERRQ(ierr); 1700 } else { 1701 ierr = MatLUFactor(M3,NULL,NULL,NULL);CHKERRQ(ierr); 1702 } 1703 ierr = VecSet(pcbddc->vec1_C,one);CHKERRQ(ierr); 1704 ierr = MatDiagonalSet(M2,pcbddc->vec1_C,INSERT_VALUES);CHKERRQ(ierr); 1705 ierr = MatMatSolve(M3,M2,M1);CHKERRQ(ierr); 1706 ierr = MatDestroy(&M2);CHKERRQ(ierr); 1707 ierr = MatDestroy(&M3);CHKERRQ(ierr); 1708 /* Assemble local_auxmat1 = S_CC*C_{CB} needed by BDDC application in KSP and in preproc */ 1709 ierr = MatMatMult(M1,C_B,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&pcbddc->local_auxmat1);CHKERRQ(ierr); 1710 ierr = MatDestroy(&C_B);CHKERRQ(ierr); 1711 ierr = MatCopy(M1,S_CC,SAME_NONZERO_PATTERN);CHKERRQ(ierr); /* S_CC can have a different LDA, MatMatSolve doesn't support it */ 1712 ierr = MatDestroy(&M1);CHKERRQ(ierr); 1713 } 1714 1715 /* Get submatrices from subdomain matrix */ 1716 if (n_vertices) { 1717 IS is_aux; 1718 1719 if (sub_schurs->reuse_solver) { /* is_R_local is not sorted, ISComplement doesn't like it */ 1720 IS tis; 1721 1722 ierr = ISDuplicate(pcbddc->is_R_local,&tis);CHKERRQ(ierr); 1723 ierr = ISSort(tis);CHKERRQ(ierr); 1724 ierr = ISComplement(tis,0,pcis->n,&is_aux);CHKERRQ(ierr); 1725 ierr = ISDestroy(&tis);CHKERRQ(ierr); 1726 } else { 1727 ierr = ISComplement(pcbddc->is_R_local,0,pcis->n,&is_aux);CHKERRQ(ierr); 1728 } 1729 ierr = MatGetSubMatrix(pcbddc->local_mat,pcbddc->is_R_local,is_aux,MAT_INITIAL_MATRIX,&A_RV);CHKERRQ(ierr); 1730 ierr = MatGetSubMatrix(pcbddc->local_mat,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&A_VR);CHKERRQ(ierr); 1731 ierr = MatGetSubMatrix(pcbddc->local_mat,is_aux,is_aux,MAT_INITIAL_MATRIX,&A_VV);CHKERRQ(ierr); 1732 ierr = ISDestroy(&is_aux);CHKERRQ(ierr); 1733 } 1734 1735 /* Matrix of coarse basis functions (local) */ 1736 if (pcbddc->coarse_phi_B) { 1737 PetscInt on_B,on_primal,on_D=n_D; 1738 if (pcbddc->coarse_phi_D) { 1739 ierr = MatGetSize(pcbddc->coarse_phi_D,&on_D,NULL);CHKERRQ(ierr); 1740 } 1741 ierr = MatGetSize(pcbddc->coarse_phi_B,&on_B,&on_primal);CHKERRQ(ierr); 1742 if (on_B != n_B || on_primal != pcbddc->local_primal_size || on_D != n_D) { 1743 PetscScalar *marray; 1744 1745 ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&marray);CHKERRQ(ierr); 1746 ierr = PetscFree(marray);CHKERRQ(ierr); 1747 ierr = MatDestroy(&pcbddc->coarse_phi_B);CHKERRQ(ierr); 1748 ierr = MatDestroy(&pcbddc->coarse_psi_B);CHKERRQ(ierr); 1749 ierr = MatDestroy(&pcbddc->coarse_phi_D);CHKERRQ(ierr); 1750 ierr = MatDestroy(&pcbddc->coarse_psi_D);CHKERRQ(ierr); 1751 } 1752 } 1753 1754 if (!pcbddc->coarse_phi_B) { 1755 PetscScalar *marray; 1756 1757 n = n_B*pcbddc->local_primal_size; 1758 if (pcbddc->switch_static || pcbddc->dbg_flag) { 1759 n += n_D*pcbddc->local_primal_size; 1760 } 1761 if (!pcbddc->symmetric_primal) { 1762 n *= 2; 1763 } 1764 ierr = PetscCalloc1(n,&marray);CHKERRQ(ierr); 1765 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_B,pcbddc->local_primal_size,marray,&pcbddc->coarse_phi_B);CHKERRQ(ierr); 1766 n = n_B*pcbddc->local_primal_size; 1767 if (pcbddc->switch_static || pcbddc->dbg_flag) { 1768 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_D,pcbddc->local_primal_size,marray+n,&pcbddc->coarse_phi_D);CHKERRQ(ierr); 1769 n += n_D*pcbddc->local_primal_size; 1770 } 1771 if (!pcbddc->symmetric_primal) { 1772 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_B,pcbddc->local_primal_size,marray+n,&pcbddc->coarse_psi_B);CHKERRQ(ierr); 1773 if (pcbddc->switch_static || pcbddc->dbg_flag) { 1774 n = n_B*pcbddc->local_primal_size; 1775 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_D,pcbddc->local_primal_size,marray+n,&pcbddc->coarse_psi_D);CHKERRQ(ierr); 1776 } 1777 } else { 1778 ierr = PetscObjectReference((PetscObject)pcbddc->coarse_phi_B);CHKERRQ(ierr); 1779 pcbddc->coarse_psi_B = pcbddc->coarse_phi_B; 1780 if (pcbddc->switch_static || pcbddc->dbg_flag) { 1781 ierr = PetscObjectReference((PetscObject)pcbddc->coarse_phi_D);CHKERRQ(ierr); 1782 pcbddc->coarse_psi_D = pcbddc->coarse_phi_D; 1783 } 1784 } 1785 } 1786 1787 /* We are now ready to evaluate coarse basis functions and subdomain contribution to coarse problem */ 1788 p0_lidx_I = NULL; 1789 if (pcbddc->benign_n && (pcbddc->switch_static || pcbddc->dbg_flag)) { 1790 const PetscInt *idxs; 1791 1792 ierr = ISGetIndices(pcis->is_I_local,&idxs);CHKERRQ(ierr); 1793 ierr = PetscMalloc1(pcbddc->benign_n,&p0_lidx_I);CHKERRQ(ierr); 1794 for (i=0;i<pcbddc->benign_n;i++) { 1795 ierr = PetscFindInt(pcbddc->benign_p0_lidx[i],pcis->n-pcis->n_B,idxs,&p0_lidx_I[i]);CHKERRQ(ierr); 1796 } 1797 ierr = ISRestoreIndices(pcis->is_I_local,&idxs);CHKERRQ(ierr); 1798 } 1799 1800 /* vertices */ 1801 if (n_vertices) { 1802 1803 ierr = MatConvert(A_VV,MATDENSE,MAT_INPLACE_MATRIX,&A_VV);CHKERRQ(ierr); 1804 1805 if (n_R) { 1806 Mat A_RRmA_RV,A_RV_bcorr=NULL,S_VVt; /* S_VVt with LDA=N */ 1807 PetscBLASInt B_N,B_one = 1; 1808 PetscScalar *x,*y; 1809 PetscBool isseqaij; 1810 1811 ierr = MatScale(A_RV,m_one);CHKERRQ(ierr); 1812 if (need_benign_correction) { 1813 ISLocalToGlobalMapping RtoN; 1814 IS is_p0; 1815 PetscInt *idxs_p0,n; 1816 1817 ierr = PetscMalloc1(pcbddc->benign_n,&idxs_p0);CHKERRQ(ierr); 1818 ierr = ISLocalToGlobalMappingCreateIS(pcbddc->is_R_local,&RtoN);CHKERRQ(ierr); 1819 ierr = ISGlobalToLocalMappingApply(RtoN,IS_GTOLM_DROP,pcbddc->benign_n,pcbddc->benign_p0_lidx,&n,idxs_p0);CHKERRQ(ierr); 1820 if (n != pcbddc->benign_n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error in R numbering for benign p0! %d != %d\n",n,pcbddc->benign_n); 1821 ierr = ISLocalToGlobalMappingDestroy(&RtoN);CHKERRQ(ierr); 1822 ierr = ISCreateGeneral(PETSC_COMM_SELF,n,idxs_p0,PETSC_OWN_POINTER,&is_p0);CHKERRQ(ierr); 1823 ierr = MatGetSubMatrix(A_RV,is_p0,NULL,MAT_INITIAL_MATRIX,&A_RV_bcorr);CHKERRQ(ierr); 1824 ierr = ISDestroy(&is_p0);CHKERRQ(ierr); 1825 } 1826 1827 if (lda_rhs == n_R) { 1828 ierr = MatConvert(A_RV,MATDENSE,MAT_INPLACE_MATRIX,&A_RV);CHKERRQ(ierr); 1829 } else { 1830 PetscScalar *av,*array; 1831 const PetscInt *xadj,*adjncy; 1832 PetscInt n; 1833 PetscBool flg_row; 1834 1835 array = work+lda_rhs*n_vertices; 1836 ierr = PetscMemzero(array,lda_rhs*n_vertices*sizeof(PetscScalar));CHKERRQ(ierr); 1837 ierr = MatConvert(A_RV,MATSEQAIJ,MAT_INPLACE_MATRIX,&A_RV);CHKERRQ(ierr); 1838 ierr = MatGetRowIJ(A_RV,0,PETSC_FALSE,PETSC_FALSE,&n,&xadj,&adjncy,&flg_row);CHKERRQ(ierr); 1839 ierr = MatSeqAIJGetArray(A_RV,&av);CHKERRQ(ierr); 1840 for (i=0;i<n;i++) { 1841 PetscInt j; 1842 for (j=xadj[i];j<xadj[i+1];j++) array[lda_rhs*adjncy[j]+i] = av[j]; 1843 } 1844 ierr = MatRestoreRowIJ(A_RV,0,PETSC_FALSE,PETSC_FALSE,&n,&xadj,&adjncy,&flg_row);CHKERRQ(ierr); 1845 ierr = MatDestroy(&A_RV);CHKERRQ(ierr); 1846 ierr = MatCreateSeqDense(PETSC_COMM_SELF,lda_rhs,n_vertices,array,&A_RV);CHKERRQ(ierr); 1847 } 1848 ierr = MatCreateSeqDense(PETSC_COMM_SELF,lda_rhs,n_vertices,work,&A_RRmA_RV);CHKERRQ(ierr); 1849 if (need_benign_correction) { 1850 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 1851 PetscScalar *marr; 1852 1853 ierr = MatDenseGetArray(A_RV,&marr);CHKERRQ(ierr); 1854 /* need \Phi^T A_RV = (I+L)A_RV, L given by 1855 1856 | 0 0 0 | (V) 1857 L = | 0 0 -1 | (P-p0) 1858 | 0 0 -1 | (p0) 1859 1860 */ 1861 for (i=0;i<reuse_solver->benign_n;i++) { 1862 const PetscScalar *vals; 1863 const PetscInt *idxs,*idxs_zero; 1864 PetscInt n,j,nz; 1865 1866 ierr = ISGetLocalSize(reuse_solver->benign_zerodiag_subs[i],&nz);CHKERRQ(ierr); 1867 ierr = ISGetIndices(reuse_solver->benign_zerodiag_subs[i],&idxs_zero);CHKERRQ(ierr); 1868 ierr = MatGetRow(A_RV_bcorr,i,&n,&idxs,&vals);CHKERRQ(ierr); 1869 for (j=0;j<n;j++) { 1870 PetscScalar val = vals[j]; 1871 PetscInt k,col = idxs[j]; 1872 for (k=0;k<nz;k++) marr[idxs_zero[k]+lda_rhs*col] -= val; 1873 } 1874 ierr = MatRestoreRow(A_RV_bcorr,i,&n,&idxs,&vals);CHKERRQ(ierr); 1875 ierr = ISRestoreIndices(reuse_solver->benign_zerodiag_subs[i],&idxs_zero);CHKERRQ(ierr); 1876 } 1877 ierr = MatDenseRestoreArray(A_RV,&marr);CHKERRQ(ierr); 1878 } 1879 if (F) { 1880 /* need to correct the rhs */ 1881 if (need_benign_correction) { 1882 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 1883 PetscScalar *marr; 1884 1885 ierr = MatDenseGetArray(A_RV,&marr);CHKERRQ(ierr); 1886 if (lda_rhs != n_R) { 1887 for (i=0;i<n_vertices;i++) { 1888 ierr = VecPlaceArray(dummy_vec,marr+i*lda_rhs);CHKERRQ(ierr); 1889 ierr = PCBDDCReuseSolversBenignAdapt(reuse_solver,dummy_vec,NULL,PETSC_FALSE,PETSC_TRUE);CHKERRQ(ierr); 1890 ierr = VecResetArray(dummy_vec);CHKERRQ(ierr); 1891 } 1892 } else { 1893 for (i=0;i<n_vertices;i++) { 1894 ierr = VecPlaceArray(pcbddc->vec1_R,marr+i*lda_rhs);CHKERRQ(ierr); 1895 ierr = PCBDDCReuseSolversBenignAdapt(reuse_solver,pcbddc->vec1_R,NULL,PETSC_FALSE,PETSC_TRUE);CHKERRQ(ierr); 1896 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 1897 } 1898 } 1899 ierr = MatDenseRestoreArray(A_RV,&marr);CHKERRQ(ierr); 1900 } 1901 ierr = MatMatSolve(F,A_RV,A_RRmA_RV);CHKERRQ(ierr); 1902 /* need to correct the solution */ 1903 if (need_benign_correction) { 1904 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 1905 PetscScalar *marr; 1906 1907 ierr = MatDenseGetArray(A_RRmA_RV,&marr);CHKERRQ(ierr); 1908 if (lda_rhs != n_R) { 1909 for (i=0;i<n_vertices;i++) { 1910 ierr = VecPlaceArray(dummy_vec,marr+i*lda_rhs);CHKERRQ(ierr); 1911 ierr = PCBDDCReuseSolversBenignAdapt(reuse_solver,dummy_vec,NULL,PETSC_TRUE,PETSC_TRUE);CHKERRQ(ierr); 1912 ierr = VecResetArray(dummy_vec);CHKERRQ(ierr); 1913 } 1914 } else { 1915 for (i=0;i<n_vertices;i++) { 1916 ierr = VecPlaceArray(pcbddc->vec1_R,marr+i*lda_rhs);CHKERRQ(ierr); 1917 ierr = PCBDDCReuseSolversBenignAdapt(reuse_solver,pcbddc->vec1_R,NULL,PETSC_TRUE,PETSC_TRUE);CHKERRQ(ierr); 1918 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 1919 } 1920 } 1921 ierr = MatDenseRestoreArray(A_RRmA_RV,&marr);CHKERRQ(ierr); 1922 } 1923 } else { 1924 ierr = MatDenseGetArray(A_RV,&y);CHKERRQ(ierr); 1925 for (i=0;i<n_vertices;i++) { 1926 ierr = VecPlaceArray(pcbddc->vec1_R,y+i*lda_rhs);CHKERRQ(ierr); 1927 ierr = VecPlaceArray(pcbddc->vec2_R,work+i*lda_rhs);CHKERRQ(ierr); 1928 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 1929 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 1930 ierr = VecResetArray(pcbddc->vec2_R);CHKERRQ(ierr); 1931 } 1932 ierr = MatDenseRestoreArray(A_RV,&y);CHKERRQ(ierr); 1933 } 1934 ierr = MatDestroy(&A_RV);CHKERRQ(ierr); 1935 /* S_VV and S_CV */ 1936 if (n_constraints) { 1937 Mat B; 1938 1939 ierr = PetscMemzero(work+lda_rhs*n_vertices,n_B*n_vertices*sizeof(PetscScalar));CHKERRQ(ierr); 1940 for (i=0;i<n_vertices;i++) { 1941 ierr = VecPlaceArray(pcbddc->vec1_R,work+i*lda_rhs);CHKERRQ(ierr); 1942 ierr = VecPlaceArray(pcis->vec1_B,work+lda_rhs*n_vertices+i*n_B);CHKERRQ(ierr); 1943 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1944 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1945 ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr); 1946 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 1947 } 1948 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_B,n_vertices,work+lda_rhs*n_vertices,&B);CHKERRQ(ierr); 1949 ierr = MatMatMult(pcbddc->local_auxmat1,B,MAT_REUSE_MATRIX,PETSC_DEFAULT,&S_CV);CHKERRQ(ierr); 1950 ierr = MatDestroy(&B);CHKERRQ(ierr); 1951 ierr = MatCreateSeqDense(PETSC_COMM_SELF,lda_rhs,n_vertices,work+lda_rhs*n_vertices,&B);CHKERRQ(ierr); 1952 ierr = MatMatMult(local_auxmat2_R,S_CV,MAT_REUSE_MATRIX,PETSC_DEFAULT,&B);CHKERRQ(ierr); 1953 ierr = MatScale(S_CV,m_one);CHKERRQ(ierr); 1954 ierr = PetscBLASIntCast(lda_rhs*n_vertices,&B_N);CHKERRQ(ierr); 1955 PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&B_N,&one,work+lda_rhs*n_vertices,&B_one,work,&B_one)); 1956 ierr = MatDestroy(&B);CHKERRQ(ierr); 1957 } 1958 ierr = PetscObjectTypeCompare((PetscObject)A_VR,MATSEQAIJ,&isseqaij);CHKERRQ(ierr); 1959 if (!isseqaij) { /* MatMatMult with SEQ(S)BAIJ below will raise an error */ 1960 ierr = MatConvert(A_VR,MATSEQAIJ,MAT_INPLACE_MATRIX,&A_VR);CHKERRQ(ierr); 1961 } 1962 if (lda_rhs != n_R) { 1963 ierr = MatDestroy(&A_RRmA_RV);CHKERRQ(ierr); 1964 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_vertices,work,&A_RRmA_RV);CHKERRQ(ierr); 1965 ierr = MatSeqDenseSetLDA(A_RRmA_RV,lda_rhs);CHKERRQ(ierr); 1966 } 1967 ierr = MatMatMult(A_VR,A_RRmA_RV,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&S_VVt);CHKERRQ(ierr); 1968 /* need A_VR * \Phi * A_RRmA_RV = A_VR * (I+L)^T * A_RRmA_RV, L given as before */ 1969 if (need_benign_correction) { 1970 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 1971 PetscScalar *marr,*sums; 1972 1973 ierr = PetscMalloc1(n_vertices,&sums);CHKERRQ(ierr); 1974 ierr = MatDenseGetArray(S_VVt,&marr); 1975 for (i=0;i<reuse_solver->benign_n;i++) { 1976 const PetscScalar *vals; 1977 const PetscInt *idxs,*idxs_zero; 1978 PetscInt n,j,nz; 1979 1980 ierr = ISGetLocalSize(reuse_solver->benign_zerodiag_subs[i],&nz);CHKERRQ(ierr); 1981 ierr = ISGetIndices(reuse_solver->benign_zerodiag_subs[i],&idxs_zero);CHKERRQ(ierr); 1982 for (j=0;j<n_vertices;j++) { 1983 PetscInt k; 1984 sums[j] = 0.; 1985 for (k=0;k<nz;k++) sums[j] += work[idxs_zero[k]+j*lda_rhs]; 1986 } 1987 ierr = MatGetRow(A_RV_bcorr,i,&n,&idxs,&vals);CHKERRQ(ierr); 1988 for (j=0;j<n;j++) { 1989 PetscScalar val = vals[j]; 1990 PetscInt k; 1991 for (k=0;k<n_vertices;k++) { 1992 marr[idxs[j]+k*n_vertices] += val*sums[k]; 1993 } 1994 } 1995 ierr = MatRestoreRow(A_RV_bcorr,i,&n,&idxs,&vals);CHKERRQ(ierr); 1996 ierr = ISRestoreIndices(reuse_solver->benign_zerodiag_subs[i],&idxs_zero);CHKERRQ(ierr); 1997 } 1998 ierr = PetscFree(sums);CHKERRQ(ierr); 1999 ierr = MatDenseRestoreArray(S_VVt,&marr); 2000 ierr = MatDestroy(&A_RV_bcorr);CHKERRQ(ierr); 2001 } 2002 ierr = MatDestroy(&A_RRmA_RV);CHKERRQ(ierr); 2003 ierr = PetscBLASIntCast(n_vertices*n_vertices,&B_N);CHKERRQ(ierr); 2004 ierr = MatDenseGetArray(A_VV,&x);CHKERRQ(ierr); 2005 ierr = MatDenseGetArray(S_VVt,&y);CHKERRQ(ierr); 2006 PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&B_N,&one,x,&B_one,y,&B_one)); 2007 ierr = MatDenseRestoreArray(A_VV,&x);CHKERRQ(ierr); 2008 ierr = MatDenseRestoreArray(S_VVt,&y);CHKERRQ(ierr); 2009 ierr = MatCopy(S_VVt,S_VV,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 2010 ierr = MatDestroy(&S_VVt);CHKERRQ(ierr); 2011 } else { 2012 ierr = MatCopy(A_VV,S_VV,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 2013 } 2014 ierr = MatDestroy(&A_VV);CHKERRQ(ierr); 2015 2016 /* coarse basis functions */ 2017 for (i=0;i<n_vertices;i++) { 2018 PetscScalar *y; 2019 2020 ierr = VecPlaceArray(pcbddc->vec1_R,work+lda_rhs*i);CHKERRQ(ierr); 2021 ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&y);CHKERRQ(ierr); 2022 ierr = VecPlaceArray(pcis->vec1_B,y+n_B*i);CHKERRQ(ierr); 2023 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2024 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2025 y[n_B*i+idx_V_B[i]] = 1.0; 2026 ierr = MatDenseRestoreArray(pcbddc->coarse_phi_B,&y);CHKERRQ(ierr); 2027 ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr); 2028 2029 if (pcbddc->switch_static || pcbddc->dbg_flag) { 2030 PetscInt j; 2031 2032 ierr = MatDenseGetArray(pcbddc->coarse_phi_D,&y);CHKERRQ(ierr); 2033 ierr = VecPlaceArray(pcis->vec1_D,y+n_D*i);CHKERRQ(ierr); 2034 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2035 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2036 ierr = VecResetArray(pcis->vec1_D);CHKERRQ(ierr); 2037 for (j=0;j<pcbddc->benign_n;j++) y[n_D*i+p0_lidx_I[j]] = 0.0; 2038 ierr = MatDenseRestoreArray(pcbddc->coarse_phi_D,&y);CHKERRQ(ierr); 2039 } 2040 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 2041 } 2042 /* if n_R == 0 the object is not destroyed */ 2043 ierr = MatDestroy(&A_RV);CHKERRQ(ierr); 2044 } 2045 ierr = VecDestroy(&dummy_vec);CHKERRQ(ierr); 2046 2047 if (n_constraints) { 2048 Mat B; 2049 2050 ierr = MatCreateSeqDense(PETSC_COMM_SELF,lda_rhs,n_constraints,work,&B);CHKERRQ(ierr); 2051 ierr = MatScale(S_CC,m_one);CHKERRQ(ierr); 2052 ierr = MatMatMult(local_auxmat2_R,S_CC,MAT_REUSE_MATRIX,PETSC_DEFAULT,&B);CHKERRQ(ierr); 2053 ierr = MatScale(S_CC,m_one);CHKERRQ(ierr); 2054 if (n_vertices) { 2055 if (isCHOL) { /* if we can solve the interior problem with cholesky, we should also be fine with transposing here */ 2056 ierr = MatTranspose(S_CV,MAT_REUSE_MATRIX,&S_VC);CHKERRQ(ierr); 2057 } else { 2058 Mat S_VCt; 2059 2060 if (lda_rhs != n_R) { 2061 ierr = MatDestroy(&B);CHKERRQ(ierr); 2062 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_constraints,work,&B);CHKERRQ(ierr); 2063 ierr = MatSeqDenseSetLDA(B,lda_rhs);CHKERRQ(ierr); 2064 } 2065 ierr = MatMatMult(A_VR,B,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&S_VCt);CHKERRQ(ierr); 2066 ierr = MatCopy(S_VCt,S_VC,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 2067 ierr = MatDestroy(&S_VCt);CHKERRQ(ierr); 2068 } 2069 } 2070 ierr = MatDestroy(&B);CHKERRQ(ierr); 2071 /* coarse basis functions */ 2072 for (i=0;i<n_constraints;i++) { 2073 PetscScalar *y; 2074 2075 ierr = VecPlaceArray(pcbddc->vec1_R,work+lda_rhs*i);CHKERRQ(ierr); 2076 ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&y);CHKERRQ(ierr); 2077 ierr = VecPlaceArray(pcis->vec1_B,y+n_B*(i+n_vertices));CHKERRQ(ierr); 2078 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2079 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2080 ierr = MatDenseRestoreArray(pcbddc->coarse_phi_B,&y);CHKERRQ(ierr); 2081 ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr); 2082 if (pcbddc->switch_static || pcbddc->dbg_flag) { 2083 PetscInt j; 2084 2085 ierr = MatDenseGetArray(pcbddc->coarse_phi_D,&y);CHKERRQ(ierr); 2086 ierr = VecPlaceArray(pcis->vec1_D,y+n_D*(i+n_vertices));CHKERRQ(ierr); 2087 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2088 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2089 ierr = VecResetArray(pcis->vec1_D);CHKERRQ(ierr); 2090 for (j=0;j<pcbddc->benign_n;j++) y[n_D*i+p0_lidx_I[j]] = 0.0; 2091 ierr = MatDenseRestoreArray(pcbddc->coarse_phi_D,&y);CHKERRQ(ierr); 2092 } 2093 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 2094 } 2095 } 2096 if (n_constraints) { 2097 ierr = MatDestroy(&local_auxmat2_R);CHKERRQ(ierr); 2098 } 2099 ierr = PetscFree(p0_lidx_I);CHKERRQ(ierr); 2100 2101 /* coarse matrix entries relative to B_0 */ 2102 if (pcbddc->benign_n) { 2103 Mat B0_B,B0_BPHI; 2104 IS is_dummy; 2105 PetscScalar *data; 2106 PetscInt j; 2107 2108 ierr = ISCreateStride(PETSC_COMM_SELF,pcbddc->benign_n,0,1,&is_dummy);CHKERRQ(ierr); 2109 ierr = MatGetSubMatrix(pcbddc->benign_B0,is_dummy,pcis->is_B_local,MAT_INITIAL_MATRIX,&B0_B);CHKERRQ(ierr); 2110 ierr = ISDestroy(&is_dummy);CHKERRQ(ierr); 2111 ierr = MatMatMult(B0_B,pcbddc->coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&B0_BPHI);CHKERRQ(ierr); 2112 ierr = MatConvert(B0_BPHI,MATSEQDENSE,MAT_INPLACE_MATRIX,&B0_BPHI);CHKERRQ(ierr); 2113 ierr = MatDenseGetArray(B0_BPHI,&data);CHKERRQ(ierr); 2114 for (j=0;j<pcbddc->benign_n;j++) { 2115 PetscInt primal_idx = pcbddc->local_primal_size - pcbddc->benign_n + j; 2116 for (i=0;i<pcbddc->local_primal_size;i++) { 2117 coarse_submat_vals[primal_idx*pcbddc->local_primal_size+i] = data[i*pcbddc->benign_n+j]; 2118 coarse_submat_vals[i*pcbddc->local_primal_size+primal_idx] = data[i*pcbddc->benign_n+j]; 2119 } 2120 } 2121 ierr = MatDenseRestoreArray(B0_BPHI,&data);CHKERRQ(ierr); 2122 ierr = MatDestroy(&B0_B);CHKERRQ(ierr); 2123 ierr = MatDestroy(&B0_BPHI);CHKERRQ(ierr); 2124 } 2125 2126 /* compute other basis functions for non-symmetric problems */ 2127 if (!pcbddc->symmetric_primal) { 2128 Mat B_V=NULL,B_C=NULL; 2129 PetscScalar *marray; 2130 2131 if (n_constraints) { 2132 Mat S_CCT,C_CRT; 2133 2134 ierr = MatTranspose(C_CR,MAT_INPLACE_MATRIX,&C_CRT);CHKERRQ(ierr); 2135 ierr = MatTranspose(S_CC,MAT_INITIAL_MATRIX,&S_CCT);CHKERRQ(ierr); 2136 ierr = MatMatMult(C_CRT,S_CCT,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&B_C);CHKERRQ(ierr); 2137 ierr = MatDestroy(&S_CCT);CHKERRQ(ierr); 2138 if (n_vertices) { 2139 Mat S_VCT; 2140 2141 ierr = MatTranspose(S_VC,MAT_INITIAL_MATRIX,&S_VCT);CHKERRQ(ierr); 2142 ierr = MatMatMult(C_CRT,S_VCT,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&B_V);CHKERRQ(ierr); 2143 ierr = MatDestroy(&S_VCT);CHKERRQ(ierr); 2144 } 2145 ierr = MatDestroy(&C_CRT);CHKERRQ(ierr); 2146 } 2147 if (n_vertices && n_R) { 2148 PetscScalar *av,*marray; 2149 const PetscInt *xadj,*adjncy; 2150 PetscInt n; 2151 PetscBool flg_row; 2152 2153 /* B_V = B_V - A_VR^T */ 2154 ierr = MatConvert(A_VR,MATSEQAIJ,MAT_INPLACE_MATRIX,&A_VR);CHKERRQ(ierr); 2155 ierr = MatGetRowIJ(A_VR,0,PETSC_FALSE,PETSC_FALSE,&n,&xadj,&adjncy,&flg_row);CHKERRQ(ierr); 2156 ierr = MatSeqAIJGetArray(A_VR,&av);CHKERRQ(ierr); 2157 ierr = MatDenseGetArray(B_V,&marray);CHKERRQ(ierr); 2158 for (i=0;i<n;i++) { 2159 PetscInt j; 2160 for (j=xadj[i];j<xadj[i+1];j++) marray[i*n_R + adjncy[j]] -= av[j]; 2161 } 2162 ierr = MatDenseRestoreArray(B_V,&marray);CHKERRQ(ierr); 2163 ierr = MatRestoreRowIJ(A_VR,0,PETSC_FALSE,PETSC_FALSE,&n,&xadj,&adjncy,&flg_row);CHKERRQ(ierr); 2164 ierr = MatDestroy(&A_VR);CHKERRQ(ierr); 2165 } 2166 2167 /* currently there's no support for MatTransposeMatSolve(F,B,X) */ 2168 ierr = MatDenseGetArray(B_V,&marray);CHKERRQ(ierr); 2169 for (i=0;i<n_vertices;i++) { 2170 ierr = VecPlaceArray(pcbddc->vec1_R,marray+i*n_R);CHKERRQ(ierr); 2171 ierr = VecPlaceArray(pcbddc->vec2_R,work+i*n_R);CHKERRQ(ierr); 2172 ierr = KSPSolveTranspose(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 2173 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 2174 ierr = VecResetArray(pcbddc->vec2_R);CHKERRQ(ierr); 2175 } 2176 ierr = MatDenseRestoreArray(B_V,&marray);CHKERRQ(ierr); 2177 ierr = MatDenseGetArray(B_C,&marray);CHKERRQ(ierr); 2178 for (i=n_vertices;i<n_constraints+n_vertices;i++) { 2179 ierr = VecPlaceArray(pcbddc->vec1_R,marray+(i-n_vertices)*n_R);CHKERRQ(ierr); 2180 ierr = VecPlaceArray(pcbddc->vec2_R,work+i*n_R);CHKERRQ(ierr); 2181 ierr = KSPSolveTranspose(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 2182 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 2183 ierr = VecResetArray(pcbddc->vec2_R);CHKERRQ(ierr); 2184 } 2185 ierr = MatDenseRestoreArray(B_C,&marray);CHKERRQ(ierr); 2186 /* coarse basis functions */ 2187 for (i=0;i<pcbddc->local_primal_size;i++) { 2188 PetscScalar *y; 2189 2190 ierr = VecPlaceArray(pcbddc->vec1_R,work+i*n_R);CHKERRQ(ierr); 2191 ierr = MatDenseGetArray(pcbddc->coarse_psi_B,&y);CHKERRQ(ierr); 2192 ierr = VecPlaceArray(pcis->vec1_B,y+n_B*i);CHKERRQ(ierr); 2193 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2194 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2195 if (i<n_vertices) { 2196 y[n_B*i+idx_V_B[i]] = 1.0; 2197 } 2198 ierr = MatDenseRestoreArray(pcbddc->coarse_psi_B,&y);CHKERRQ(ierr); 2199 ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr); 2200 2201 if (pcbddc->switch_static || pcbddc->dbg_flag) { 2202 ierr = MatDenseGetArray(pcbddc->coarse_psi_D,&y);CHKERRQ(ierr); 2203 ierr = VecPlaceArray(pcis->vec1_D,y+n_D*i);CHKERRQ(ierr); 2204 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2205 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2206 ierr = VecResetArray(pcis->vec1_D);CHKERRQ(ierr); 2207 ierr = MatDenseRestoreArray(pcbddc->coarse_psi_D,&y);CHKERRQ(ierr); 2208 } 2209 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 2210 } 2211 ierr = MatDestroy(&B_V);CHKERRQ(ierr); 2212 ierr = MatDestroy(&B_C);CHKERRQ(ierr); 2213 } 2214 /* free memory */ 2215 ierr = PetscFree(idx_V_B);CHKERRQ(ierr); 2216 ierr = MatDestroy(&S_VV);CHKERRQ(ierr); 2217 ierr = MatDestroy(&S_CV);CHKERRQ(ierr); 2218 ierr = MatDestroy(&S_VC);CHKERRQ(ierr); 2219 ierr = MatDestroy(&S_CC);CHKERRQ(ierr); 2220 ierr = PetscFree(work);CHKERRQ(ierr); 2221 if (n_vertices) { 2222 ierr = MatDestroy(&A_VR);CHKERRQ(ierr); 2223 } 2224 if (n_constraints) { 2225 ierr = MatDestroy(&C_CR);CHKERRQ(ierr); 2226 } 2227 /* Checking coarse_sub_mat and coarse basis functios */ 2228 /* Symmetric case : It should be \Phi^{(j)^T} A^{(j)} \Phi^{(j)}=coarse_sub_mat */ 2229 /* Non-symmetric case : It should be \Psi^{(j)^T} A^{(j)} \Phi^{(j)}=coarse_sub_mat */ 2230 if (pcbddc->dbg_flag) { 2231 Mat coarse_sub_mat; 2232 Mat AUXMAT,TM1,TM2,TM3,TM4; 2233 Mat coarse_phi_D,coarse_phi_B; 2234 Mat coarse_psi_D,coarse_psi_B; 2235 Mat A_II,A_BB,A_IB,A_BI; 2236 Mat C_B,CPHI; 2237 IS is_dummy; 2238 Vec mones; 2239 MatType checkmattype=MATSEQAIJ; 2240 PetscReal real_value; 2241 2242 if (pcbddc->benign_n && !pcbddc->benign_change_explicit) { 2243 Mat A; 2244 ierr = PCBDDCBenignProject(pc,NULL,NULL,&A);CHKERRQ(ierr); 2245 ierr = MatGetSubMatrix(A,pcis->is_I_local,pcis->is_I_local,MAT_INITIAL_MATRIX,&A_II);CHKERRQ(ierr); 2246 ierr = MatGetSubMatrix(A,pcis->is_I_local,pcis->is_B_local,MAT_INITIAL_MATRIX,&A_IB);CHKERRQ(ierr); 2247 ierr = MatGetSubMatrix(A,pcis->is_B_local,pcis->is_I_local,MAT_INITIAL_MATRIX,&A_BI);CHKERRQ(ierr); 2248 ierr = MatGetSubMatrix(A,pcis->is_B_local,pcis->is_B_local,MAT_INITIAL_MATRIX,&A_BB);CHKERRQ(ierr); 2249 ierr = MatDestroy(&A);CHKERRQ(ierr); 2250 } else { 2251 ierr = MatConvert(pcis->A_II,checkmattype,MAT_INITIAL_MATRIX,&A_II);CHKERRQ(ierr); 2252 ierr = MatConvert(pcis->A_IB,checkmattype,MAT_INITIAL_MATRIX,&A_IB);CHKERRQ(ierr); 2253 ierr = MatConvert(pcis->A_BI,checkmattype,MAT_INITIAL_MATRIX,&A_BI);CHKERRQ(ierr); 2254 ierr = MatConvert(pcis->A_BB,checkmattype,MAT_INITIAL_MATRIX,&A_BB);CHKERRQ(ierr); 2255 } 2256 ierr = MatConvert(pcbddc->coarse_phi_D,checkmattype,MAT_INITIAL_MATRIX,&coarse_phi_D);CHKERRQ(ierr); 2257 ierr = MatConvert(pcbddc->coarse_phi_B,checkmattype,MAT_INITIAL_MATRIX,&coarse_phi_B);CHKERRQ(ierr); 2258 if (!pcbddc->symmetric_primal) { 2259 ierr = MatConvert(pcbddc->coarse_psi_D,checkmattype,MAT_INITIAL_MATRIX,&coarse_psi_D);CHKERRQ(ierr); 2260 ierr = MatConvert(pcbddc->coarse_psi_B,checkmattype,MAT_INITIAL_MATRIX,&coarse_psi_B);CHKERRQ(ierr); 2261 } 2262 ierr = MatCreateSeqDense(PETSC_COMM_SELF,pcbddc->local_primal_size,pcbddc->local_primal_size,coarse_submat_vals,&coarse_sub_mat);CHKERRQ(ierr); 2263 2264 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 2265 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Check coarse sub mat computation (symmetric %d)\n",pcbddc->symmetric_primal);CHKERRQ(ierr); 2266 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 2267 if (!pcbddc->symmetric_primal) { 2268 ierr = MatMatMult(A_II,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 2269 ierr = MatTransposeMatMult(coarse_psi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM1);CHKERRQ(ierr); 2270 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 2271 ierr = MatMatMult(A_BB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 2272 ierr = MatTransposeMatMult(coarse_psi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM2);CHKERRQ(ierr); 2273 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 2274 ierr = MatMatMult(A_IB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 2275 ierr = MatTransposeMatMult(coarse_psi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM3);CHKERRQ(ierr); 2276 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 2277 ierr = MatMatMult(A_BI,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 2278 ierr = MatTransposeMatMult(coarse_psi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM4);CHKERRQ(ierr); 2279 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 2280 } else { 2281 ierr = MatPtAP(A_II,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&TM1);CHKERRQ(ierr); 2282 ierr = MatPtAP(A_BB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&TM2);CHKERRQ(ierr); 2283 ierr = MatMatMult(A_IB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 2284 ierr = MatTransposeMatMult(coarse_phi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM3);CHKERRQ(ierr); 2285 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 2286 ierr = MatMatMult(A_BI,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 2287 ierr = MatTransposeMatMult(coarse_phi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM4);CHKERRQ(ierr); 2288 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 2289 } 2290 ierr = MatAXPY(TM1,one,TM2,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 2291 ierr = MatAXPY(TM1,one,TM3,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 2292 ierr = MatAXPY(TM1,one,TM4,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 2293 ierr = MatConvert(TM1,MATSEQDENSE,MAT_INPLACE_MATRIX,&TM1);CHKERRQ(ierr); 2294 if (pcbddc->benign_n) { 2295 Mat B0_B,B0_BPHI; 2296 PetscScalar *data,*data2; 2297 PetscInt j; 2298 2299 ierr = ISCreateStride(PETSC_COMM_SELF,pcbddc->benign_n,0,1,&is_dummy);CHKERRQ(ierr); 2300 ierr = MatGetSubMatrix(pcbddc->benign_B0,is_dummy,pcis->is_B_local,MAT_INITIAL_MATRIX,&B0_B);CHKERRQ(ierr); 2301 ierr = MatMatMult(B0_B,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&B0_BPHI);CHKERRQ(ierr); 2302 ierr = MatConvert(B0_BPHI,MATSEQDENSE,MAT_INPLACE_MATRIX,&B0_BPHI);CHKERRQ(ierr); 2303 ierr = MatDenseGetArray(TM1,&data);CHKERRQ(ierr); 2304 ierr = MatDenseGetArray(B0_BPHI,&data2);CHKERRQ(ierr); 2305 for (j=0;j<pcbddc->benign_n;j++) { 2306 PetscInt primal_idx = pcbddc->local_primal_size - pcbddc->benign_n + j; 2307 for (i=0;i<pcbddc->local_primal_size;i++) { 2308 data[primal_idx*pcbddc->local_primal_size+i] += data2[i*pcbddc->benign_n+j]; 2309 data[i*pcbddc->local_primal_size+primal_idx] += data2[i*pcbddc->benign_n+j]; 2310 } 2311 } 2312 ierr = MatDenseRestoreArray(TM1,&data);CHKERRQ(ierr); 2313 ierr = MatDenseRestoreArray(B0_BPHI,&data2);CHKERRQ(ierr); 2314 ierr = MatDestroy(&B0_B);CHKERRQ(ierr); 2315 ierr = ISDestroy(&is_dummy);CHKERRQ(ierr); 2316 ierr = MatDestroy(&B0_BPHI);CHKERRQ(ierr); 2317 } 2318 #if 0 2319 { 2320 PetscViewer viewer; 2321 char filename[256]; 2322 sprintf(filename,"details_local_coarse_mat%d_level%d.m",PetscGlobalRank,pcbddc->current_level); 2323 ierr = PetscViewerASCIIOpen(PETSC_COMM_SELF,filename,&viewer);CHKERRQ(ierr); 2324 ierr = PetscViewerSetFormat(viewer,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr); 2325 ierr = PetscObjectSetName((PetscObject)coarse_sub_mat,"computed");CHKERRQ(ierr); 2326 ierr = MatView(coarse_sub_mat,viewer);CHKERRQ(ierr); 2327 ierr = PetscObjectSetName((PetscObject)TM1,"projected");CHKERRQ(ierr); 2328 ierr = MatView(TM1,viewer);CHKERRQ(ierr); 2329 if (save_change) { 2330 Mat phi_B; 2331 ierr = MatMatMult(save_change,pcbddc->coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&phi_B);CHKERRQ(ierr); 2332 ierr = PetscObjectSetName((PetscObject)phi_B,"phi_B");CHKERRQ(ierr); 2333 ierr = MatView(phi_B,viewer);CHKERRQ(ierr); 2334 ierr = MatDestroy(&phi_B);CHKERRQ(ierr); 2335 } else { 2336 ierr = PetscObjectSetName((PetscObject)pcbddc->coarse_phi_B,"phi_B");CHKERRQ(ierr); 2337 ierr = MatView(pcbddc->coarse_phi_B,viewer);CHKERRQ(ierr); 2338 } 2339 if (pcbddc->coarse_phi_D) { 2340 ierr = PetscObjectSetName((PetscObject)pcbddc->coarse_phi_D,"phi_D");CHKERRQ(ierr); 2341 ierr = MatView(pcbddc->coarse_phi_D,viewer);CHKERRQ(ierr); 2342 } 2343 if (pcbddc->coarse_psi_B) { 2344 ierr = PetscObjectSetName((PetscObject)pcbddc->coarse_psi_B,"psi_B");CHKERRQ(ierr); 2345 ierr = MatView(pcbddc->coarse_psi_B,viewer);CHKERRQ(ierr); 2346 } 2347 if (pcbddc->coarse_psi_D) { 2348 ierr = PetscObjectSetName((PetscObject)pcbddc->coarse_psi_D,"psi_D");CHKERRQ(ierr); 2349 ierr = MatView(pcbddc->coarse_psi_D,viewer);CHKERRQ(ierr); 2350 } 2351 ierr = PetscViewerDestroy(&viewer);CHKERRQ(ierr); 2352 } 2353 #endif 2354 ierr = MatAXPY(TM1,m_one,coarse_sub_mat,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 2355 ierr = MatNorm(TM1,NORM_FROBENIUS,&real_value);CHKERRQ(ierr); 2356 ierr = PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 2357 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d matrix error % 1.14e\n",PetscGlobalRank,real_value);CHKERRQ(ierr); 2358 2359 /* check constraints */ 2360 ierr = ISCreateStride(PETSC_COMM_SELF,pcbddc->local_primal_size-pcbddc->benign_n,0,1,&is_dummy);CHKERRQ(ierr); 2361 ierr = MatGetSubMatrix(pcbddc->ConstraintMatrix,is_dummy,pcis->is_B_local,MAT_INITIAL_MATRIX,&C_B);CHKERRQ(ierr); 2362 if (!pcbddc->benign_n) { /* TODO: add benign case */ 2363 ierr = MatMatMult(C_B,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&CPHI);CHKERRQ(ierr); 2364 } else { 2365 PetscScalar *data; 2366 Mat tmat; 2367 ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&data);CHKERRQ(ierr); 2368 ierr = MatCreateSeqDense(PETSC_COMM_SELF,pcis->n_B,pcbddc->local_primal_size-pcbddc->benign_n,data,&tmat);CHKERRQ(ierr); 2369 ierr = MatDenseRestoreArray(pcbddc->coarse_phi_B,&data);CHKERRQ(ierr); 2370 ierr = MatMatMult(C_B,tmat,MAT_INITIAL_MATRIX,1.0,&CPHI);CHKERRQ(ierr); 2371 ierr = MatDestroy(&tmat);CHKERRQ(ierr); 2372 } 2373 ierr = MatCreateVecs(CPHI,&mones,NULL);CHKERRQ(ierr); 2374 ierr = VecSet(mones,-1.0);CHKERRQ(ierr); 2375 ierr = MatDiagonalSet(CPHI,mones,ADD_VALUES);CHKERRQ(ierr); 2376 ierr = MatNorm(CPHI,NORM_FROBENIUS,&real_value);CHKERRQ(ierr); 2377 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d phi constraints error % 1.14e\n",PetscGlobalRank,real_value);CHKERRQ(ierr); 2378 if (!pcbddc->symmetric_primal) { 2379 ierr = MatMatMult(C_B,coarse_psi_B,MAT_REUSE_MATRIX,1.0,&CPHI);CHKERRQ(ierr); 2380 ierr = VecSet(mones,-1.0);CHKERRQ(ierr); 2381 ierr = MatDiagonalSet(CPHI,mones,ADD_VALUES);CHKERRQ(ierr); 2382 ierr = MatNorm(CPHI,NORM_FROBENIUS,&real_value);CHKERRQ(ierr); 2383 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d psi constraints error % 1.14e\n",PetscGlobalRank,real_value);CHKERRQ(ierr); 2384 } 2385 ierr = MatDestroy(&C_B);CHKERRQ(ierr); 2386 ierr = MatDestroy(&CPHI);CHKERRQ(ierr); 2387 ierr = ISDestroy(&is_dummy);CHKERRQ(ierr); 2388 ierr = VecDestroy(&mones);CHKERRQ(ierr); 2389 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 2390 ierr = MatDestroy(&A_II);CHKERRQ(ierr); 2391 ierr = MatDestroy(&A_BB);CHKERRQ(ierr); 2392 ierr = MatDestroy(&A_IB);CHKERRQ(ierr); 2393 ierr = MatDestroy(&A_BI);CHKERRQ(ierr); 2394 ierr = MatDestroy(&TM1);CHKERRQ(ierr); 2395 ierr = MatDestroy(&TM2);CHKERRQ(ierr); 2396 ierr = MatDestroy(&TM3);CHKERRQ(ierr); 2397 ierr = MatDestroy(&TM4);CHKERRQ(ierr); 2398 ierr = MatDestroy(&coarse_phi_D);CHKERRQ(ierr); 2399 ierr = MatDestroy(&coarse_phi_B);CHKERRQ(ierr); 2400 if (!pcbddc->symmetric_primal) { 2401 ierr = MatDestroy(&coarse_psi_D);CHKERRQ(ierr); 2402 ierr = MatDestroy(&coarse_psi_B);CHKERRQ(ierr); 2403 } 2404 ierr = MatDestroy(&coarse_sub_mat);CHKERRQ(ierr); 2405 } 2406 /* get back data */ 2407 *coarse_submat_vals_n = coarse_submat_vals; 2408 PetscFunctionReturn(0); 2409 } 2410 2411 #undef __FUNCT__ 2412 #define __FUNCT__ "MatGetSubMatrixUnsorted" 2413 PetscErrorCode MatGetSubMatrixUnsorted(Mat A, IS isrow, IS iscol, Mat* B) 2414 { 2415 Mat *work_mat; 2416 IS isrow_s,iscol_s; 2417 PetscBool rsorted,csorted; 2418 PetscInt rsize,*idxs_perm_r,csize,*idxs_perm_c; 2419 PetscErrorCode ierr; 2420 2421 PetscFunctionBegin; 2422 ierr = ISSorted(isrow,&rsorted);CHKERRQ(ierr); 2423 ierr = ISSorted(iscol,&csorted);CHKERRQ(ierr); 2424 ierr = ISGetLocalSize(isrow,&rsize);CHKERRQ(ierr); 2425 ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr); 2426 2427 if (!rsorted) { 2428 const PetscInt *idxs; 2429 PetscInt *idxs_sorted,i; 2430 2431 ierr = PetscMalloc1(rsize,&idxs_perm_r);CHKERRQ(ierr); 2432 ierr = PetscMalloc1(rsize,&idxs_sorted);CHKERRQ(ierr); 2433 for (i=0;i<rsize;i++) { 2434 idxs_perm_r[i] = i; 2435 } 2436 ierr = ISGetIndices(isrow,&idxs);CHKERRQ(ierr); 2437 ierr = PetscSortIntWithPermutation(rsize,idxs,idxs_perm_r);CHKERRQ(ierr); 2438 for (i=0;i<rsize;i++) { 2439 idxs_sorted[i] = idxs[idxs_perm_r[i]]; 2440 } 2441 ierr = ISRestoreIndices(isrow,&idxs);CHKERRQ(ierr); 2442 ierr = ISCreateGeneral(PETSC_COMM_SELF,rsize,idxs_sorted,PETSC_OWN_POINTER,&isrow_s);CHKERRQ(ierr); 2443 } else { 2444 ierr = PetscObjectReference((PetscObject)isrow);CHKERRQ(ierr); 2445 isrow_s = isrow; 2446 } 2447 2448 if (!csorted) { 2449 if (isrow == iscol) { 2450 ierr = PetscObjectReference((PetscObject)isrow_s);CHKERRQ(ierr); 2451 iscol_s = isrow_s; 2452 } else { 2453 const PetscInt *idxs; 2454 PetscInt *idxs_sorted,i; 2455 2456 ierr = PetscMalloc1(csize,&idxs_perm_c);CHKERRQ(ierr); 2457 ierr = PetscMalloc1(csize,&idxs_sorted);CHKERRQ(ierr); 2458 for (i=0;i<csize;i++) { 2459 idxs_perm_c[i] = i; 2460 } 2461 ierr = ISGetIndices(iscol,&idxs);CHKERRQ(ierr); 2462 ierr = PetscSortIntWithPermutation(csize,idxs,idxs_perm_c);CHKERRQ(ierr); 2463 for (i=0;i<csize;i++) { 2464 idxs_sorted[i] = idxs[idxs_perm_c[i]]; 2465 } 2466 ierr = ISRestoreIndices(iscol,&idxs);CHKERRQ(ierr); 2467 ierr = ISCreateGeneral(PETSC_COMM_SELF,csize,idxs_sorted,PETSC_OWN_POINTER,&iscol_s);CHKERRQ(ierr); 2468 } 2469 } else { 2470 ierr = PetscObjectReference((PetscObject)iscol);CHKERRQ(ierr); 2471 iscol_s = iscol; 2472 } 2473 2474 ierr = MatGetSubMatrices(A,1,&isrow_s,&iscol_s,MAT_INITIAL_MATRIX,&work_mat);CHKERRQ(ierr); 2475 2476 if (!rsorted || !csorted) { 2477 Mat new_mat; 2478 IS is_perm_r,is_perm_c; 2479 2480 if (!rsorted) { 2481 PetscInt *idxs_r,i; 2482 ierr = PetscMalloc1(rsize,&idxs_r);CHKERRQ(ierr); 2483 for (i=0;i<rsize;i++) { 2484 idxs_r[idxs_perm_r[i]] = i; 2485 } 2486 ierr = PetscFree(idxs_perm_r);CHKERRQ(ierr); 2487 ierr = ISCreateGeneral(PETSC_COMM_SELF,rsize,idxs_r,PETSC_OWN_POINTER,&is_perm_r);CHKERRQ(ierr); 2488 } else { 2489 ierr = ISCreateStride(PETSC_COMM_SELF,rsize,0,1,&is_perm_r);CHKERRQ(ierr); 2490 } 2491 ierr = ISSetPermutation(is_perm_r);CHKERRQ(ierr); 2492 2493 if (!csorted) { 2494 if (isrow_s == iscol_s) { 2495 ierr = PetscObjectReference((PetscObject)is_perm_r);CHKERRQ(ierr); 2496 is_perm_c = is_perm_r; 2497 } else { 2498 PetscInt *idxs_c,i; 2499 ierr = PetscMalloc1(csize,&idxs_c);CHKERRQ(ierr); 2500 for (i=0;i<csize;i++) { 2501 idxs_c[idxs_perm_c[i]] = i; 2502 } 2503 ierr = PetscFree(idxs_perm_c);CHKERRQ(ierr); 2504 ierr = ISCreateGeneral(PETSC_COMM_SELF,csize,idxs_c,PETSC_OWN_POINTER,&is_perm_c);CHKERRQ(ierr); 2505 } 2506 } else { 2507 ierr = ISCreateStride(PETSC_COMM_SELF,csize,0,1,&is_perm_c);CHKERRQ(ierr); 2508 } 2509 ierr = ISSetPermutation(is_perm_c);CHKERRQ(ierr); 2510 2511 ierr = MatPermute(work_mat[0],is_perm_r,is_perm_c,&new_mat);CHKERRQ(ierr); 2512 ierr = MatDestroy(&work_mat[0]);CHKERRQ(ierr); 2513 work_mat[0] = new_mat; 2514 ierr = ISDestroy(&is_perm_r);CHKERRQ(ierr); 2515 ierr = ISDestroy(&is_perm_c);CHKERRQ(ierr); 2516 } 2517 2518 ierr = PetscObjectReference((PetscObject)work_mat[0]);CHKERRQ(ierr); 2519 *B = work_mat[0]; 2520 ierr = MatDestroyMatrices(1,&work_mat);CHKERRQ(ierr); 2521 ierr = ISDestroy(&isrow_s);CHKERRQ(ierr); 2522 ierr = ISDestroy(&iscol_s);CHKERRQ(ierr); 2523 PetscFunctionReturn(0); 2524 } 2525 2526 #undef __FUNCT__ 2527 #define __FUNCT__ "PCBDDCComputeLocalMatrix" 2528 PetscErrorCode PCBDDCComputeLocalMatrix(PC pc, Mat ChangeOfBasisMatrix) 2529 { 2530 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 2531 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 2532 Mat new_mat; 2533 IS is_local,is_global; 2534 PetscInt local_size; 2535 PetscBool isseqaij; 2536 PetscErrorCode ierr; 2537 2538 PetscFunctionBegin; 2539 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 2540 ierr = MatGetSize(matis->A,&local_size,NULL);CHKERRQ(ierr); 2541 ierr = ISCreateStride(PetscObjectComm((PetscObject)matis->A),local_size,0,1,&is_local);CHKERRQ(ierr); 2542 ierr = ISLocalToGlobalMappingApplyIS(pc->pmat->rmap->mapping,is_local,&is_global);CHKERRQ(ierr); 2543 ierr = ISDestroy(&is_local);CHKERRQ(ierr); 2544 ierr = MatGetSubMatrixUnsorted(ChangeOfBasisMatrix,is_global,is_global,&new_mat);CHKERRQ(ierr); 2545 ierr = ISDestroy(&is_global);CHKERRQ(ierr); 2546 2547 /* check */ 2548 if (pcbddc->dbg_flag) { 2549 Vec x,x_change; 2550 PetscReal error; 2551 2552 ierr = MatCreateVecs(ChangeOfBasisMatrix,&x,&x_change);CHKERRQ(ierr); 2553 ierr = VecSetRandom(x,NULL);CHKERRQ(ierr); 2554 ierr = MatMult(ChangeOfBasisMatrix,x,x_change);CHKERRQ(ierr); 2555 ierr = VecScatterBegin(matis->cctx,x,matis->x,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2556 ierr = VecScatterEnd(matis->cctx,x,matis->x,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2557 ierr = MatMult(new_mat,matis->x,matis->y);CHKERRQ(ierr); 2558 ierr = VecScatterBegin(matis->rctx,matis->y,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2559 ierr = VecScatterEnd(matis->rctx,matis->y,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2560 ierr = VecAXPY(x,-1.0,x_change);CHKERRQ(ierr); 2561 ierr = VecNorm(x,NORM_INFINITY,&error);CHKERRQ(ierr); 2562 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 2563 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Error global vs local change on N: %1.6e\n",error);CHKERRQ(ierr); 2564 ierr = VecDestroy(&x);CHKERRQ(ierr); 2565 ierr = VecDestroy(&x_change);CHKERRQ(ierr); 2566 } 2567 2568 /* TODO: HOW TO WORK WITH BAIJ and SBAIJ and SEQDENSE? */ 2569 ierr = PetscObjectTypeCompare((PetscObject)matis->A,MATSEQAIJ,&isseqaij);CHKERRQ(ierr); 2570 if (isseqaij) { 2571 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 2572 ierr = MatPtAP(matis->A,new_mat,MAT_INITIAL_MATRIX,2.0,&pcbddc->local_mat);CHKERRQ(ierr); 2573 } else { 2574 Mat work_mat; 2575 2576 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 2577 ierr = MatConvert(matis->A,MATSEQAIJ,MAT_INITIAL_MATRIX,&work_mat);CHKERRQ(ierr); 2578 ierr = MatPtAP(work_mat,new_mat,MAT_INITIAL_MATRIX,2.0,&pcbddc->local_mat);CHKERRQ(ierr); 2579 } 2580 if (matis->A->symmetric_set) { 2581 ierr = MatSetOption(pcbddc->local_mat,MAT_SYMMETRIC,matis->A->symmetric);CHKERRQ(ierr); 2582 #if !defined(PETSC_USE_COMPLEX) 2583 ierr = MatSetOption(pcbddc->local_mat,MAT_HERMITIAN,matis->A->symmetric);CHKERRQ(ierr); 2584 #endif 2585 } 2586 ierr = MatDestroy(&new_mat);CHKERRQ(ierr); 2587 PetscFunctionReturn(0); 2588 } 2589 2590 #undef __FUNCT__ 2591 #define __FUNCT__ "PCBDDCSetUpLocalScatters" 2592 PetscErrorCode PCBDDCSetUpLocalScatters(PC pc) 2593 { 2594 PC_IS* pcis = (PC_IS*)(pc->data); 2595 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 2596 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 2597 PetscInt *idx_R_local=NULL; 2598 PetscInt n_vertices,i,j,n_R,n_D,n_B; 2599 PetscInt vbs,bs; 2600 PetscBT bitmask=NULL; 2601 PetscErrorCode ierr; 2602 2603 PetscFunctionBegin; 2604 /* 2605 No need to setup local scatters if 2606 - primal space is unchanged 2607 AND 2608 - we actually have locally some primal dofs (could not be true in multilevel or for isolated subdomains) 2609 AND 2610 - we are not in debugging mode (this is needed since there are Synchronized prints at the end of the subroutine 2611 */ 2612 if (!pcbddc->new_primal_space_local && pcbddc->local_primal_size && !pcbddc->dbg_flag) { 2613 PetscFunctionReturn(0); 2614 } 2615 /* destroy old objects */ 2616 ierr = ISDestroy(&pcbddc->is_R_local);CHKERRQ(ierr); 2617 ierr = VecScatterDestroy(&pcbddc->R_to_B);CHKERRQ(ierr); 2618 ierr = VecScatterDestroy(&pcbddc->R_to_D);CHKERRQ(ierr); 2619 /* Set Non-overlapping dimensions */ 2620 n_B = pcis->n_B; 2621 n_D = pcis->n - n_B; 2622 n_vertices = pcbddc->n_vertices; 2623 2624 /* Dohrmann's notation: dofs splitted in R (Remaining: all dofs but the vertices) and V (Vertices) */ 2625 2626 /* create auxiliary bitmask and allocate workspace */ 2627 if (!sub_schurs->reuse_solver) { 2628 ierr = PetscMalloc1(pcis->n-n_vertices,&idx_R_local);CHKERRQ(ierr); 2629 ierr = PetscBTCreate(pcis->n,&bitmask);CHKERRQ(ierr); 2630 for (i=0;i<n_vertices;i++) { 2631 ierr = PetscBTSet(bitmask,pcbddc->local_primal_ref_node[i]);CHKERRQ(ierr); 2632 } 2633 2634 for (i=0, n_R=0; i<pcis->n; i++) { 2635 if (!PetscBTLookup(bitmask,i)) { 2636 idx_R_local[n_R++] = i; 2637 } 2638 } 2639 } else { /* A different ordering (already computed) is present if we are reusing the Schur solver */ 2640 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 2641 2642 ierr = ISGetIndices(reuse_solver->is_R,(const PetscInt**)&idx_R_local);CHKERRQ(ierr); 2643 ierr = ISGetLocalSize(reuse_solver->is_R,&n_R);CHKERRQ(ierr); 2644 } 2645 2646 /* Block code */ 2647 vbs = 1; 2648 ierr = MatGetBlockSize(pcbddc->local_mat,&bs);CHKERRQ(ierr); 2649 if (bs>1 && !(n_vertices%bs)) { 2650 PetscBool is_blocked = PETSC_TRUE; 2651 PetscInt *vary; 2652 if (!sub_schurs->reuse_solver) { 2653 ierr = PetscMalloc1(pcis->n/bs,&vary);CHKERRQ(ierr); 2654 ierr = PetscMemzero(vary,pcis->n/bs*sizeof(PetscInt));CHKERRQ(ierr); 2655 /* Verify that the vertex indices correspond to each element in a block (code taken from sbaij2.c) */ 2656 /* it is ok to check this way since local_primal_ref_node are always sorted by local numbering and idx_R_local is obtained as a complement */ 2657 for (i=0; i<n_vertices; i++) vary[pcbddc->local_primal_ref_node[i]/bs]++; 2658 for (i=0; i<pcis->n/bs; i++) { 2659 if (vary[i]!=0 && vary[i]!=bs) { 2660 is_blocked = PETSC_FALSE; 2661 break; 2662 } 2663 } 2664 ierr = PetscFree(vary);CHKERRQ(ierr); 2665 } else { 2666 /* Verify directly the R set */ 2667 for (i=0; i<n_R/bs; i++) { 2668 PetscInt j,node=idx_R_local[bs*i]; 2669 for (j=1; j<bs; j++) { 2670 if (node != idx_R_local[bs*i+j]-j) { 2671 is_blocked = PETSC_FALSE; 2672 break; 2673 } 2674 } 2675 } 2676 } 2677 if (is_blocked) { /* build compressed IS for R nodes (complement of vertices) */ 2678 vbs = bs; 2679 for (i=0;i<n_R/vbs;i++) { 2680 idx_R_local[i] = idx_R_local[vbs*i]/vbs; 2681 } 2682 } 2683 } 2684 ierr = ISCreateBlock(PETSC_COMM_SELF,vbs,n_R/vbs,idx_R_local,PETSC_COPY_VALUES,&pcbddc->is_R_local);CHKERRQ(ierr); 2685 if (sub_schurs->reuse_solver) { 2686 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 2687 2688 ierr = ISRestoreIndices(reuse_solver->is_R,(const PetscInt**)&idx_R_local);CHKERRQ(ierr); 2689 ierr = ISDestroy(&reuse_solver->is_R);CHKERRQ(ierr); 2690 ierr = PetscObjectReference((PetscObject)pcbddc->is_R_local);CHKERRQ(ierr); 2691 reuse_solver->is_R = pcbddc->is_R_local; 2692 } else { 2693 ierr = PetscFree(idx_R_local);CHKERRQ(ierr); 2694 } 2695 2696 /* print some info if requested */ 2697 if (pcbddc->dbg_flag) { 2698 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 2699 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 2700 ierr = PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 2701 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d local dimensions\n",PetscGlobalRank);CHKERRQ(ierr); 2702 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local_size = %d, dirichlet_size = %d, boundary_size = %d\n",pcis->n,n_D,n_B);CHKERRQ(ierr); 2703 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"r_size = %d, v_size = %d, constraints = %d, local_primal_size = %d\n",n_R,n_vertices,pcbddc->local_primal_size-n_vertices-pcbddc->benign_n,pcbddc->local_primal_size);CHKERRQ(ierr); 2704 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 2705 } 2706 2707 /* VecScatters pcbddc->R_to_B and (optionally) pcbddc->R_to_D */ 2708 if (!sub_schurs->reuse_solver) { 2709 IS is_aux1,is_aux2; 2710 PetscInt *aux_array1,*aux_array2,*is_indices,*idx_R_local; 2711 2712 ierr = ISGetIndices(pcbddc->is_R_local,(const PetscInt**)&idx_R_local);CHKERRQ(ierr); 2713 ierr = PetscMalloc1(pcis->n_B-n_vertices,&aux_array1);CHKERRQ(ierr); 2714 ierr = PetscMalloc1(pcis->n_B-n_vertices,&aux_array2);CHKERRQ(ierr); 2715 ierr = ISGetIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 2716 for (i=0; i<n_D; i++) { 2717 ierr = PetscBTSet(bitmask,is_indices[i]);CHKERRQ(ierr); 2718 } 2719 ierr = ISRestoreIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 2720 for (i=0, j=0; i<n_R; i++) { 2721 if (!PetscBTLookup(bitmask,idx_R_local[i])) { 2722 aux_array1[j++] = i; 2723 } 2724 } 2725 ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array1,PETSC_OWN_POINTER,&is_aux1);CHKERRQ(ierr); 2726 ierr = ISGetIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 2727 for (i=0, j=0; i<n_B; i++) { 2728 if (!PetscBTLookup(bitmask,is_indices[i])) { 2729 aux_array2[j++] = i; 2730 } 2731 } 2732 ierr = ISRestoreIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 2733 ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array2,PETSC_OWN_POINTER,&is_aux2);CHKERRQ(ierr); 2734 ierr = VecScatterCreate(pcbddc->vec1_R,is_aux1,pcis->vec1_B,is_aux2,&pcbddc->R_to_B);CHKERRQ(ierr); 2735 ierr = ISDestroy(&is_aux1);CHKERRQ(ierr); 2736 ierr = ISDestroy(&is_aux2);CHKERRQ(ierr); 2737 2738 if (pcbddc->switch_static || pcbddc->dbg_flag) { 2739 ierr = PetscMalloc1(n_D,&aux_array1);CHKERRQ(ierr); 2740 for (i=0, j=0; i<n_R; i++) { 2741 if (PetscBTLookup(bitmask,idx_R_local[i])) { 2742 aux_array1[j++] = i; 2743 } 2744 } 2745 ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array1,PETSC_OWN_POINTER,&is_aux1);CHKERRQ(ierr); 2746 ierr = VecScatterCreate(pcbddc->vec1_R,is_aux1,pcis->vec1_D,(IS)0,&pcbddc->R_to_D);CHKERRQ(ierr); 2747 ierr = ISDestroy(&is_aux1);CHKERRQ(ierr); 2748 } 2749 ierr = PetscBTDestroy(&bitmask);CHKERRQ(ierr); 2750 ierr = ISRestoreIndices(pcbddc->is_R_local,(const PetscInt**)&idx_R_local);CHKERRQ(ierr); 2751 } else { 2752 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 2753 IS tis; 2754 PetscInt schur_size; 2755 2756 ierr = ISGetLocalSize(reuse_solver->is_B,&schur_size);CHKERRQ(ierr); 2757 ierr = ISCreateStride(PETSC_COMM_SELF,schur_size,n_D,1,&tis);CHKERRQ(ierr); 2758 ierr = VecScatterCreate(pcbddc->vec1_R,tis,pcis->vec1_B,reuse_solver->is_B,&pcbddc->R_to_B);CHKERRQ(ierr); 2759 ierr = ISDestroy(&tis);CHKERRQ(ierr); 2760 if (pcbddc->switch_static || pcbddc->dbg_flag) { 2761 ierr = ISCreateStride(PETSC_COMM_SELF,n_D,0,1,&tis);CHKERRQ(ierr); 2762 ierr = VecScatterCreate(pcbddc->vec1_R,tis,pcis->vec1_D,(IS)0,&pcbddc->R_to_D);CHKERRQ(ierr); 2763 ierr = ISDestroy(&tis);CHKERRQ(ierr); 2764 } 2765 } 2766 PetscFunctionReturn(0); 2767 } 2768 2769 2770 #undef __FUNCT__ 2771 #define __FUNCT__ "PCBDDCSetUpLocalSolvers" 2772 PetscErrorCode PCBDDCSetUpLocalSolvers(PC pc, PetscBool dirichlet, PetscBool neumann) 2773 { 2774 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 2775 PC_IS *pcis = (PC_IS*)pc->data; 2776 PC pc_temp; 2777 Mat A_RR; 2778 MatReuse reuse; 2779 PetscScalar m_one = -1.0; 2780 PetscReal value; 2781 PetscInt n_D,n_R; 2782 PetscBool use_exact,use_exact_reduced,issbaij; 2783 PetscErrorCode ierr; 2784 /* prefixes stuff */ 2785 char dir_prefix[256],neu_prefix[256],str_level[16]; 2786 size_t len; 2787 2788 PetscFunctionBegin; 2789 2790 /* compute prefixes */ 2791 ierr = PetscStrcpy(dir_prefix,"");CHKERRQ(ierr); 2792 ierr = PetscStrcpy(neu_prefix,"");CHKERRQ(ierr); 2793 if (!pcbddc->current_level) { 2794 ierr = PetscStrcpy(dir_prefix,((PetscObject)pc)->prefix);CHKERRQ(ierr); 2795 ierr = PetscStrcpy(neu_prefix,((PetscObject)pc)->prefix);CHKERRQ(ierr); 2796 ierr = PetscStrcat(dir_prefix,"pc_bddc_dirichlet_");CHKERRQ(ierr); 2797 ierr = PetscStrcat(neu_prefix,"pc_bddc_neumann_");CHKERRQ(ierr); 2798 } else { 2799 ierr = PetscStrcpy(str_level,"");CHKERRQ(ierr); 2800 sprintf(str_level,"l%d_",(int)(pcbddc->current_level)); 2801 ierr = PetscStrlen(((PetscObject)pc)->prefix,&len);CHKERRQ(ierr); 2802 len -= 15; /* remove "pc_bddc_coarse_" */ 2803 if (pcbddc->current_level>1) len -= 3; /* remove "lX_" with X level number */ 2804 if (pcbddc->current_level>10) len -= 1; /* remove another char from level number */ 2805 ierr = PetscStrncpy(dir_prefix,((PetscObject)pc)->prefix,len+1);CHKERRQ(ierr); 2806 ierr = PetscStrncpy(neu_prefix,((PetscObject)pc)->prefix,len+1);CHKERRQ(ierr); 2807 ierr = PetscStrcat(dir_prefix,"pc_bddc_dirichlet_");CHKERRQ(ierr); 2808 ierr = PetscStrcat(neu_prefix,"pc_bddc_neumann_");CHKERRQ(ierr); 2809 ierr = PetscStrcat(dir_prefix,str_level);CHKERRQ(ierr); 2810 ierr = PetscStrcat(neu_prefix,str_level);CHKERRQ(ierr); 2811 } 2812 2813 /* DIRICHLET PROBLEM */ 2814 if (dirichlet) { 2815 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 2816 if (pcbddc->benign_n && !pcbddc->benign_change_explicit) { 2817 if (!sub_schurs->reuse_solver) { 2818 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Not yet implemented\n"); 2819 } 2820 if (pcbddc->dbg_flag) { 2821 Mat A_IIn; 2822 2823 ierr = PCBDDCBenignProject(pc,pcis->is_I_local,pcis->is_I_local,&A_IIn);CHKERRQ(ierr); 2824 ierr = MatDestroy(&pcis->A_II);CHKERRQ(ierr); 2825 pcis->A_II = A_IIn; 2826 } 2827 } 2828 if (pcbddc->local_mat->symmetric_set) { 2829 ierr = MatSetOption(pcis->A_II,MAT_SYMMETRIC,pcbddc->local_mat->symmetric_set);CHKERRQ(ierr); 2830 } 2831 /* Matrix for Dirichlet problem is pcis->A_II */ 2832 n_D = pcis->n - pcis->n_B; 2833 if (!pcbddc->ksp_D) { /* create object if not yet build */ 2834 ierr = KSPCreate(PETSC_COMM_SELF,&pcbddc->ksp_D);CHKERRQ(ierr); 2835 ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->ksp_D,(PetscObject)pc,1);CHKERRQ(ierr); 2836 /* default */ 2837 ierr = KSPSetType(pcbddc->ksp_D,KSPPREONLY);CHKERRQ(ierr); 2838 ierr = KSPSetOptionsPrefix(pcbddc->ksp_D,dir_prefix);CHKERRQ(ierr); 2839 ierr = PetscObjectTypeCompare((PetscObject)pcis->A_II,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr); 2840 ierr = KSPGetPC(pcbddc->ksp_D,&pc_temp);CHKERRQ(ierr); 2841 if (issbaij) { 2842 ierr = PCSetType(pc_temp,PCCHOLESKY);CHKERRQ(ierr); 2843 } else { 2844 ierr = PCSetType(pc_temp,PCLU);CHKERRQ(ierr); 2845 } 2846 /* Allow user's customization */ 2847 ierr = KSPSetFromOptions(pcbddc->ksp_D);CHKERRQ(ierr); 2848 ierr = PCFactorSetReuseFill(pc_temp,PETSC_TRUE);CHKERRQ(ierr); 2849 } 2850 ierr = KSPSetOperators(pcbddc->ksp_D,pcis->A_II,pcis->A_II);CHKERRQ(ierr); 2851 if (sub_schurs->reuse_solver) { 2852 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 2853 2854 ierr = KSPSetPC(pcbddc->ksp_D,reuse_solver->interior_solver);CHKERRQ(ierr); 2855 } 2856 /* umfpack interface has a bug when matrix dimension is zero. TODO solve from umfpack interface */ 2857 if (!n_D) { 2858 ierr = KSPGetPC(pcbddc->ksp_D,&pc_temp);CHKERRQ(ierr); 2859 ierr = PCSetType(pc_temp,PCNONE);CHKERRQ(ierr); 2860 } 2861 /* Set Up KSP for Dirichlet problem of BDDC */ 2862 ierr = KSPSetUp(pcbddc->ksp_D);CHKERRQ(ierr); 2863 /* set ksp_D into pcis data */ 2864 ierr = KSPDestroy(&pcis->ksp_D);CHKERRQ(ierr); 2865 ierr = PetscObjectReference((PetscObject)pcbddc->ksp_D);CHKERRQ(ierr); 2866 pcis->ksp_D = pcbddc->ksp_D; 2867 } 2868 2869 /* NEUMANN PROBLEM */ 2870 A_RR = 0; 2871 if (neumann) { 2872 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 2873 PetscInt ibs,mbs; 2874 PetscBool issbaij; 2875 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 2876 /* Matrix for Neumann problem is A_RR -> we need to create/reuse it at this point */ 2877 ierr = ISGetSize(pcbddc->is_R_local,&n_R);CHKERRQ(ierr); 2878 if (pcbddc->ksp_R) { /* already created ksp */ 2879 PetscInt nn_R; 2880 ierr = KSPGetOperators(pcbddc->ksp_R,NULL,&A_RR);CHKERRQ(ierr); 2881 ierr = PetscObjectReference((PetscObject)A_RR);CHKERRQ(ierr); 2882 ierr = MatGetSize(A_RR,&nn_R,NULL);CHKERRQ(ierr); 2883 if (nn_R != n_R) { /* old ksp is not reusable, so reset it */ 2884 ierr = KSPReset(pcbddc->ksp_R);CHKERRQ(ierr); 2885 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 2886 reuse = MAT_INITIAL_MATRIX; 2887 } else { /* same sizes, but nonzero pattern depend on primal vertices so it can be changed */ 2888 if (pcbddc->new_primal_space_local) { /* we are not sure the matrix will have the same nonzero pattern */ 2889 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 2890 reuse = MAT_INITIAL_MATRIX; 2891 } else { /* safe to reuse the matrix */ 2892 reuse = MAT_REUSE_MATRIX; 2893 } 2894 } 2895 /* last check */ 2896 if (pc->flag == DIFFERENT_NONZERO_PATTERN) { 2897 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 2898 reuse = MAT_INITIAL_MATRIX; 2899 } 2900 } else { /* first time, so we need to create the matrix */ 2901 reuse = MAT_INITIAL_MATRIX; 2902 } 2903 /* convert pcbddc->local_mat if needed later in PCBDDCSetUpCorrection */ 2904 ierr = MatGetBlockSize(pcbddc->local_mat,&mbs);CHKERRQ(ierr); 2905 ierr = ISGetBlockSize(pcbddc->is_R_local,&ibs);CHKERRQ(ierr); 2906 ierr = PetscObjectTypeCompare((PetscObject)pcbddc->local_mat,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr); 2907 if (ibs != mbs) { /* need to convert to SEQAIJ to extract any submatrix with is_R_local */ 2908 if (matis->A == pcbddc->local_mat) { 2909 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 2910 ierr = MatConvert(matis->A,MATSEQAIJ,MAT_INITIAL_MATRIX,&pcbddc->local_mat);CHKERRQ(ierr); 2911 } else { 2912 ierr = MatConvert(pcbddc->local_mat,MATSEQAIJ,MAT_INPLACE_MATRIX,&pcbddc->local_mat);CHKERRQ(ierr); 2913 } 2914 } else if (issbaij) { /* need to convert to BAIJ to get offdiagonal blocks */ 2915 if (matis->A == pcbddc->local_mat) { 2916 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 2917 ierr = MatConvert(matis->A,MATSEQBAIJ,MAT_INITIAL_MATRIX,&pcbddc->local_mat);CHKERRQ(ierr); 2918 } else { 2919 ierr = MatConvert(pcbddc->local_mat,MATSEQBAIJ,MAT_INPLACE_MATRIX,&pcbddc->local_mat);CHKERRQ(ierr); 2920 } 2921 } 2922 /* extract A_RR */ 2923 if (sub_schurs->reuse_solver) { 2924 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 2925 2926 if (pcbddc->dbg_flag) { /* we need A_RR to test the solver later */ 2927 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 2928 if (reuse_solver->benign_n) { /* we are not using the explicit change of basis on the pressures */ 2929 ierr = PCBDDCBenignProject(pc,pcbddc->is_R_local,pcbddc->is_R_local,&A_RR);CHKERRQ(ierr); 2930 } else { 2931 ierr = MatGetSubMatrix(pcbddc->local_mat,pcbddc->is_R_local,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&A_RR);CHKERRQ(ierr); 2932 } 2933 } else { 2934 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 2935 ierr = PCGetOperators(reuse_solver->correction_solver,&A_RR,NULL);CHKERRQ(ierr); 2936 ierr = PetscObjectReference((PetscObject)A_RR);CHKERRQ(ierr); 2937 } 2938 } else { /* we have to build the neumann solver, so we need to extract the relevant matrix */ 2939 ierr = MatGetSubMatrix(pcbddc->local_mat,pcbddc->is_R_local,pcbddc->is_R_local,reuse,&A_RR);CHKERRQ(ierr); 2940 } 2941 if (pcbddc->local_mat->symmetric_set) { 2942 ierr = MatSetOption(A_RR,MAT_SYMMETRIC,pcbddc->local_mat->symmetric_set);CHKERRQ(ierr); 2943 } 2944 if (!pcbddc->ksp_R) { /* create object if not present */ 2945 ierr = KSPCreate(PETSC_COMM_SELF,&pcbddc->ksp_R);CHKERRQ(ierr); 2946 ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->ksp_R,(PetscObject)pc,1);CHKERRQ(ierr); 2947 /* default */ 2948 ierr = KSPSetType(pcbddc->ksp_R,KSPPREONLY);CHKERRQ(ierr); 2949 ierr = KSPSetOptionsPrefix(pcbddc->ksp_R,neu_prefix);CHKERRQ(ierr); 2950 ierr = KSPGetPC(pcbddc->ksp_R,&pc_temp);CHKERRQ(ierr); 2951 ierr = PetscObjectTypeCompare((PetscObject)A_RR,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr); 2952 if (issbaij) { 2953 ierr = PCSetType(pc_temp,PCCHOLESKY);CHKERRQ(ierr); 2954 } else { 2955 ierr = PCSetType(pc_temp,PCLU);CHKERRQ(ierr); 2956 } 2957 /* Allow user's customization */ 2958 ierr = KSPSetFromOptions(pcbddc->ksp_R);CHKERRQ(ierr); 2959 ierr = PCFactorSetReuseFill(pc_temp,PETSC_TRUE);CHKERRQ(ierr); 2960 } 2961 /* umfpack interface has a bug when matrix dimension is zero. TODO solve from umfpack interface */ 2962 if (!n_R) { 2963 ierr = KSPGetPC(pcbddc->ksp_R,&pc_temp);CHKERRQ(ierr); 2964 ierr = PCSetType(pc_temp,PCNONE);CHKERRQ(ierr); 2965 } 2966 ierr = KSPSetOperators(pcbddc->ksp_R,A_RR,A_RR);CHKERRQ(ierr); 2967 /* Reuse solver if it is present */ 2968 if (sub_schurs->reuse_solver) { 2969 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 2970 2971 ierr = KSPSetPC(pcbddc->ksp_R,reuse_solver->correction_solver);CHKERRQ(ierr); 2972 } 2973 /* Set Up KSP for Neumann problem of BDDC */ 2974 ierr = KSPSetUp(pcbddc->ksp_R);CHKERRQ(ierr); 2975 } 2976 2977 /* check Dirichlet and Neumann solvers and adapt them if a nullspace correction is needed */ 2978 if (pcbddc->NullSpace || pcbddc->dbg_flag) { 2979 if (pcbddc->dbg_flag) { 2980 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 2981 ierr = PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 2982 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 2983 } 2984 if (dirichlet) { /* Dirichlet */ 2985 ierr = VecSetRandom(pcis->vec1_D,NULL);CHKERRQ(ierr); 2986 ierr = MatMult(pcis->A_II,pcis->vec1_D,pcis->vec2_D);CHKERRQ(ierr); 2987 ierr = KSPSolve(pcbddc->ksp_D,pcis->vec2_D,pcis->vec2_D);CHKERRQ(ierr); 2988 ierr = VecAXPY(pcis->vec1_D,m_one,pcis->vec2_D);CHKERRQ(ierr); 2989 ierr = VecNorm(pcis->vec1_D,NORM_INFINITY,&value);CHKERRQ(ierr); 2990 /* need to be adapted? */ 2991 use_exact = (PetscAbsReal(value) > 1.e-4 ? PETSC_FALSE : PETSC_TRUE); 2992 ierr = MPIU_Allreduce(&use_exact,&use_exact_reduced,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 2993 ierr = PCBDDCSetUseExactDirichlet(pc,use_exact_reduced);CHKERRQ(ierr); 2994 /* print info */ 2995 if (pcbddc->dbg_flag) { 2996 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d infinity error for Dirichlet solve (%s) = % 1.14e \n",PetscGlobalRank,((PetscObject)(pcbddc->ksp_D))->prefix,value);CHKERRQ(ierr); 2997 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 2998 } 2999 if (pcbddc->NullSpace && !use_exact_reduced && !pcbddc->switch_static) { 3000 ierr = PCBDDCNullSpaceAssembleCorrection(pc,PETSC_TRUE,pcis->is_I_local);CHKERRQ(ierr); 3001 } 3002 } 3003 if (neumann) { /* Neumann */ 3004 ierr = VecSetRandom(pcbddc->vec1_R,NULL);CHKERRQ(ierr); 3005 ierr = MatMult(A_RR,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 3006 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec2_R,pcbddc->vec2_R);CHKERRQ(ierr); 3007 ierr = VecAXPY(pcbddc->vec1_R,m_one,pcbddc->vec2_R);CHKERRQ(ierr); 3008 ierr = VecNorm(pcbddc->vec1_R,NORM_INFINITY,&value);CHKERRQ(ierr); 3009 /* need to be adapted? */ 3010 use_exact = (PetscAbsReal(value) > 1.e-4 ? PETSC_FALSE : PETSC_TRUE); 3011 ierr = MPIU_Allreduce(&use_exact,&use_exact_reduced,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 3012 /* print info */ 3013 if (pcbddc->dbg_flag) { 3014 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d infinity error for Neumann solve (%s) = % 1.14e\n",PetscGlobalRank,((PetscObject)(pcbddc->ksp_R))->prefix,value);CHKERRQ(ierr); 3015 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 3016 } 3017 if (pcbddc->NullSpace && !use_exact_reduced) { /* is it the right logic? */ 3018 ierr = PCBDDCNullSpaceAssembleCorrection(pc,PETSC_FALSE,pcbddc->is_R_local);CHKERRQ(ierr); 3019 } 3020 } 3021 } 3022 /* free Neumann problem's matrix */ 3023 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 3024 PetscFunctionReturn(0); 3025 } 3026 3027 #undef __FUNCT__ 3028 #define __FUNCT__ "PCBDDCSolveSubstructureCorrection" 3029 static PetscErrorCode PCBDDCSolveSubstructureCorrection(PC pc, Vec inout_B, Vec inout_D, PetscBool applytranspose) 3030 { 3031 PetscErrorCode ierr; 3032 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 3033 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 3034 3035 PetscFunctionBegin; 3036 if (!sub_schurs->reuse_solver) { 3037 ierr = VecSet(pcbddc->vec1_R,0.);CHKERRQ(ierr); 3038 } 3039 if (!pcbddc->switch_static) { 3040 if (applytranspose && pcbddc->local_auxmat1) { 3041 ierr = MatMultTranspose(pcbddc->local_auxmat2,inout_B,pcbddc->vec1_C);CHKERRQ(ierr); 3042 ierr = MatMultTransposeAdd(pcbddc->local_auxmat1,pcbddc->vec1_C,inout_B,inout_B);CHKERRQ(ierr); 3043 } 3044 if (!sub_schurs->reuse_solver) { 3045 ierr = VecScatterBegin(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 3046 ierr = VecScatterEnd(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 3047 } else { 3048 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 3049 3050 ierr = VecScatterBegin(reuse_solver->correction_scatter_B,inout_B,reuse_solver->rhs_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3051 ierr = VecScatterEnd(reuse_solver->correction_scatter_B,inout_B,reuse_solver->rhs_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3052 } 3053 } else { 3054 ierr = VecScatterBegin(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 3055 ierr = VecScatterEnd(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 3056 ierr = VecScatterBegin(pcbddc->R_to_D,inout_D,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 3057 ierr = VecScatterEnd(pcbddc->R_to_D,inout_D,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 3058 if (applytranspose && pcbddc->local_auxmat1) { 3059 ierr = MatMultTranspose(pcbddc->local_auxmat2,pcbddc->vec1_R,pcbddc->vec1_C);CHKERRQ(ierr); 3060 ierr = MatMultTransposeAdd(pcbddc->local_auxmat1,pcbddc->vec1_C,inout_B,inout_B);CHKERRQ(ierr); 3061 ierr = VecScatterBegin(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 3062 ierr = VecScatterEnd(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 3063 } 3064 } 3065 if (!sub_schurs->reuse_solver || pcbddc->switch_static) { 3066 if (applytranspose) { 3067 ierr = KSPSolveTranspose(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr); 3068 } else { 3069 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr); 3070 } 3071 } else { 3072 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 3073 3074 if (applytranspose) { 3075 ierr = MatFactorSolveSchurComplementTranspose(reuse_solver->F,reuse_solver->rhs_B,reuse_solver->sol_B);CHKERRQ(ierr); 3076 } else { 3077 ierr = MatFactorSolveSchurComplement(reuse_solver->F,reuse_solver->rhs_B,reuse_solver->sol_B);CHKERRQ(ierr); 3078 } 3079 } 3080 ierr = VecSet(inout_B,0.);CHKERRQ(ierr); 3081 if (!pcbddc->switch_static) { 3082 if (!sub_schurs->reuse_solver) { 3083 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3084 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3085 } else { 3086 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 3087 3088 ierr = VecScatterBegin(reuse_solver->correction_scatter_B,reuse_solver->sol_B,inout_B,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 3089 ierr = VecScatterEnd(reuse_solver->correction_scatter_B,reuse_solver->sol_B,inout_B,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 3090 } 3091 if (!applytranspose && pcbddc->local_auxmat1) { 3092 ierr = MatMult(pcbddc->local_auxmat1,inout_B,pcbddc->vec1_C);CHKERRQ(ierr); 3093 ierr = MatMultAdd(pcbddc->local_auxmat2,pcbddc->vec1_C,inout_B,inout_B);CHKERRQ(ierr); 3094 } 3095 } else { 3096 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3097 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3098 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,inout_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3099 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,inout_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3100 if (!applytranspose && pcbddc->local_auxmat1) { 3101 ierr = MatMult(pcbddc->local_auxmat1,inout_B,pcbddc->vec1_C);CHKERRQ(ierr); 3102 ierr = MatMultAdd(pcbddc->local_auxmat2,pcbddc->vec1_C,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr); 3103 } 3104 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3105 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3106 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,inout_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3107 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,inout_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3108 } 3109 PetscFunctionReturn(0); 3110 } 3111 3112 /* parameter apply transpose determines if the interface preconditioner should be applied transposed or not */ 3113 #undef __FUNCT__ 3114 #define __FUNCT__ "PCBDDCApplyInterfacePreconditioner" 3115 PetscErrorCode PCBDDCApplyInterfacePreconditioner(PC pc, PetscBool applytranspose) 3116 { 3117 PetscErrorCode ierr; 3118 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 3119 PC_IS* pcis = (PC_IS*) (pc->data); 3120 const PetscScalar zero = 0.0; 3121 3122 PetscFunctionBegin; 3123 /* Application of PSI^T or PHI^T (depending on applytranspose, see comment above) */ 3124 if (!pcbddc->benign_apply_coarse_only) { 3125 if (applytranspose) { 3126 ierr = MatMultTranspose(pcbddc->coarse_phi_B,pcis->vec1_B,pcbddc->vec1_P);CHKERRQ(ierr); 3127 if (pcbddc->switch_static) { ierr = MatMultTransposeAdd(pcbddc->coarse_phi_D,pcis->vec1_D,pcbddc->vec1_P,pcbddc->vec1_P);CHKERRQ(ierr); } 3128 } else { 3129 ierr = MatMultTranspose(pcbddc->coarse_psi_B,pcis->vec1_B,pcbddc->vec1_P);CHKERRQ(ierr); 3130 if (pcbddc->switch_static) { ierr = MatMultTransposeAdd(pcbddc->coarse_psi_D,pcis->vec1_D,pcbddc->vec1_P,pcbddc->vec1_P);CHKERRQ(ierr); } 3131 } 3132 } else { 3133 ierr = VecSet(pcbddc->vec1_P,zero);CHKERRQ(ierr); 3134 } 3135 3136 /* add p0 to the last value of vec1_P holding the coarse dof relative to p0 */ 3137 if (pcbddc->benign_n) { 3138 PetscScalar *array; 3139 PetscInt j; 3140 3141 ierr = VecGetArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 3142 for (j=0;j<pcbddc->benign_n;j++) array[pcbddc->local_primal_size-pcbddc->benign_n+j] += pcbddc->benign_p0[j]; 3143 ierr = VecRestoreArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 3144 } 3145 3146 /* start communications from local primal nodes to rhs of coarse solver */ 3147 ierr = VecSet(pcbddc->coarse_vec,zero);CHKERRQ(ierr); 3148 ierr = PCBDDCScatterCoarseDataBegin(pc,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3149 ierr = PCBDDCScatterCoarseDataEnd(pc,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3150 3151 /* Coarse solution -> rhs and sol updated inside PCBDDCScattarCoarseDataBegin/End */ 3152 /* TODO remove null space when doing multilevel */ 3153 if (pcbddc->coarse_ksp) { 3154 Mat coarse_mat; 3155 Vec rhs,sol; 3156 MatNullSpace nullsp; 3157 3158 ierr = KSPGetRhs(pcbddc->coarse_ksp,&rhs);CHKERRQ(ierr); 3159 ierr = KSPGetSolution(pcbddc->coarse_ksp,&sol);CHKERRQ(ierr); 3160 ierr = KSPGetOperators(pcbddc->coarse_ksp,&coarse_mat,NULL);CHKERRQ(ierr); 3161 ierr = MatGetNullSpace(coarse_mat,&nullsp);CHKERRQ(ierr); 3162 if (nullsp) { 3163 ierr = MatNullSpaceRemove(nullsp,rhs);CHKERRQ(ierr); 3164 } 3165 if (applytranspose) { 3166 ierr = KSPSolveTranspose(pcbddc->coarse_ksp,rhs,sol);CHKERRQ(ierr); 3167 } else { 3168 ierr = KSPSolve(pcbddc->coarse_ksp,rhs,sol);CHKERRQ(ierr); 3169 } 3170 if (nullsp) { 3171 ierr = MatNullSpaceRemove(nullsp,sol);CHKERRQ(ierr); 3172 } 3173 } 3174 3175 /* Local solution on R nodes */ 3176 if (pcis->n && !pcbddc->benign_apply_coarse_only) { 3177 ierr = PCBDDCSolveSubstructureCorrection(pc,pcis->vec1_B,pcis->vec1_D,applytranspose);CHKERRQ(ierr); 3178 } 3179 /* communications from coarse sol to local primal nodes */ 3180 ierr = PCBDDCScatterCoarseDataBegin(pc,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 3181 ierr = PCBDDCScatterCoarseDataEnd(pc,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 3182 3183 /* Sum contributions from the two levels */ 3184 if (!pcbddc->benign_apply_coarse_only) { 3185 if (applytranspose) { 3186 ierr = MatMultAdd(pcbddc->coarse_psi_B,pcbddc->vec1_P,pcis->vec1_B,pcis->vec1_B);CHKERRQ(ierr); 3187 if (pcbddc->switch_static) { ierr = MatMultAdd(pcbddc->coarse_psi_D,pcbddc->vec1_P,pcis->vec1_D,pcis->vec1_D);CHKERRQ(ierr); } 3188 } else { 3189 ierr = MatMultAdd(pcbddc->coarse_phi_B,pcbddc->vec1_P,pcis->vec1_B,pcis->vec1_B);CHKERRQ(ierr); 3190 if (pcbddc->switch_static) { ierr = MatMultAdd(pcbddc->coarse_phi_D,pcbddc->vec1_P,pcis->vec1_D,pcis->vec1_D);CHKERRQ(ierr); } 3191 } 3192 /* store p0 */ 3193 if (pcbddc->benign_n) { 3194 PetscScalar *array; 3195 PetscInt j; 3196 3197 ierr = VecGetArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 3198 for (j=0;j<pcbddc->benign_n;j++) pcbddc->benign_p0[j] = array[pcbddc->local_primal_size-pcbddc->benign_n+j]; 3199 ierr = VecRestoreArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 3200 } 3201 } else { /* expand the coarse solution */ 3202 if (applytranspose) { 3203 ierr = MatMult(pcbddc->coarse_psi_B,pcbddc->vec1_P,pcis->vec1_B);CHKERRQ(ierr); 3204 } else { 3205 ierr = MatMult(pcbddc->coarse_phi_B,pcbddc->vec1_P,pcis->vec1_B);CHKERRQ(ierr); 3206 } 3207 } 3208 PetscFunctionReturn(0); 3209 } 3210 3211 #undef __FUNCT__ 3212 #define __FUNCT__ "PCBDDCScatterCoarseDataBegin" 3213 PetscErrorCode PCBDDCScatterCoarseDataBegin(PC pc,InsertMode imode, ScatterMode smode) 3214 { 3215 PetscErrorCode ierr; 3216 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 3217 PetscScalar *array; 3218 Vec from,to; 3219 3220 PetscFunctionBegin; 3221 if (smode == SCATTER_REVERSE) { /* from global to local -> get data from coarse solution */ 3222 from = pcbddc->coarse_vec; 3223 to = pcbddc->vec1_P; 3224 if (pcbddc->coarse_ksp) { /* get array from coarse processes */ 3225 Vec tvec; 3226 3227 ierr = KSPGetRhs(pcbddc->coarse_ksp,&tvec);CHKERRQ(ierr); 3228 ierr = VecResetArray(tvec);CHKERRQ(ierr); 3229 ierr = KSPGetSolution(pcbddc->coarse_ksp,&tvec);CHKERRQ(ierr); 3230 ierr = VecGetArray(tvec,&array);CHKERRQ(ierr); 3231 ierr = VecPlaceArray(from,array);CHKERRQ(ierr); 3232 ierr = VecRestoreArray(tvec,&array);CHKERRQ(ierr); 3233 } 3234 } else { /* from local to global -> put data in coarse right hand side */ 3235 from = pcbddc->vec1_P; 3236 to = pcbddc->coarse_vec; 3237 } 3238 ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,from,to,imode,smode);CHKERRQ(ierr); 3239 PetscFunctionReturn(0); 3240 } 3241 3242 #undef __FUNCT__ 3243 #define __FUNCT__ "PCBDDCScatterCoarseDataEnd" 3244 PetscErrorCode PCBDDCScatterCoarseDataEnd(PC pc, InsertMode imode, ScatterMode smode) 3245 { 3246 PetscErrorCode ierr; 3247 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 3248 PetscScalar *array; 3249 Vec from,to; 3250 3251 PetscFunctionBegin; 3252 if (smode == SCATTER_REVERSE) { /* from global to local -> get data from coarse solution */ 3253 from = pcbddc->coarse_vec; 3254 to = pcbddc->vec1_P; 3255 } else { /* from local to global -> put data in coarse right hand side */ 3256 from = pcbddc->vec1_P; 3257 to = pcbddc->coarse_vec; 3258 } 3259 ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,from,to,imode,smode);CHKERRQ(ierr); 3260 if (smode == SCATTER_FORWARD) { 3261 if (pcbddc->coarse_ksp) { /* get array from coarse processes */ 3262 Vec tvec; 3263 3264 ierr = KSPGetRhs(pcbddc->coarse_ksp,&tvec);CHKERRQ(ierr); 3265 ierr = VecGetArray(to,&array);CHKERRQ(ierr); 3266 ierr = VecPlaceArray(tvec,array);CHKERRQ(ierr); 3267 ierr = VecRestoreArray(to,&array);CHKERRQ(ierr); 3268 } 3269 } else { 3270 if (pcbddc->coarse_ksp) { /* restore array of pcbddc->coarse_vec */ 3271 ierr = VecResetArray(from);CHKERRQ(ierr); 3272 } 3273 } 3274 PetscFunctionReturn(0); 3275 } 3276 3277 /* uncomment for testing purposes */ 3278 /* #define PETSC_MISSING_LAPACK_GESVD 1 */ 3279 #undef __FUNCT__ 3280 #define __FUNCT__ "PCBDDCConstraintsSetUp" 3281 PetscErrorCode PCBDDCConstraintsSetUp(PC pc) 3282 { 3283 PetscErrorCode ierr; 3284 PC_IS* pcis = (PC_IS*)(pc->data); 3285 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 3286 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 3287 /* one and zero */ 3288 PetscScalar one=1.0,zero=0.0; 3289 /* space to store constraints and their local indices */ 3290 PetscScalar *constraints_data; 3291 PetscInt *constraints_idxs,*constraints_idxs_B; 3292 PetscInt *constraints_idxs_ptr,*constraints_data_ptr; 3293 PetscInt *constraints_n; 3294 /* iterators */ 3295 PetscInt i,j,k,total_counts,total_counts_cc,cum; 3296 /* BLAS integers */ 3297 PetscBLASInt lwork,lierr; 3298 PetscBLASInt Blas_N,Blas_M,Blas_K,Blas_one=1; 3299 PetscBLASInt Blas_LDA,Blas_LDB,Blas_LDC; 3300 /* reuse */ 3301 PetscInt olocal_primal_size,olocal_primal_size_cc; 3302 PetscInt *olocal_primal_ref_node,*olocal_primal_ref_mult; 3303 /* change of basis */ 3304 PetscBool qr_needed; 3305 PetscBT change_basis,qr_needed_idx; 3306 /* auxiliary stuff */ 3307 PetscInt *nnz,*is_indices; 3308 PetscInt ncc; 3309 /* some quantities */ 3310 PetscInt n_vertices,total_primal_vertices,valid_constraints; 3311 PetscInt size_of_constraint,max_size_of_constraint=0,max_constraints,temp_constraints; 3312 3313 PetscFunctionBegin; 3314 /* Destroy Mat objects computed previously */ 3315 ierr = MatDestroy(&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 3316 ierr = MatDestroy(&pcbddc->ConstraintMatrix);CHKERRQ(ierr); 3317 ierr = MatDestroy(&pcbddc->switch_static_change);CHKERRQ(ierr); 3318 /* save info on constraints from previous setup (if any) */ 3319 olocal_primal_size = pcbddc->local_primal_size; 3320 olocal_primal_size_cc = pcbddc->local_primal_size_cc; 3321 ierr = PetscMalloc2(olocal_primal_size_cc,&olocal_primal_ref_node,olocal_primal_size_cc,&olocal_primal_ref_mult);CHKERRQ(ierr); 3322 ierr = PetscMemcpy(olocal_primal_ref_node,pcbddc->local_primal_ref_node,olocal_primal_size_cc*sizeof(PetscInt));CHKERRQ(ierr); 3323 ierr = PetscMemcpy(olocal_primal_ref_mult,pcbddc->local_primal_ref_mult,olocal_primal_size_cc*sizeof(PetscInt));CHKERRQ(ierr); 3324 ierr = PetscFree2(pcbddc->local_primal_ref_node,pcbddc->local_primal_ref_mult);CHKERRQ(ierr); 3325 ierr = PetscFree(pcbddc->primal_indices_local_idxs);CHKERRQ(ierr); 3326 3327 /* print some info */ 3328 if (pcbddc->dbg_flag) { 3329 IS vertices; 3330 PetscInt nv,nedges,nfaces; 3331 ierr = PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph,&nfaces,NULL,&nedges,NULL,&vertices);CHKERRQ(ierr); 3332 ierr = ISGetSize(vertices,&nv);CHKERRQ(ierr); 3333 ierr = ISDestroy(&vertices);CHKERRQ(ierr); 3334 ierr = PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 3335 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"--------------------------------------------------------------\n");CHKERRQ(ierr); 3336 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate vertices (%d)\n",PetscGlobalRank,nv,pcbddc->use_vertices);CHKERRQ(ierr); 3337 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate edges (%d)\n",PetscGlobalRank,nedges,pcbddc->use_edges);CHKERRQ(ierr); 3338 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate faces (%d)\n",PetscGlobalRank,nfaces,pcbddc->use_faces);CHKERRQ(ierr); 3339 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 3340 ierr = PetscViewerASCIIPopSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 3341 } 3342 3343 if (!pcbddc->adaptive_selection) { 3344 IS ISForVertices,*ISForFaces,*ISForEdges; 3345 MatNullSpace nearnullsp; 3346 const Vec *nearnullvecs; 3347 Vec *localnearnullsp; 3348 PetscScalar *array; 3349 PetscInt n_ISForFaces,n_ISForEdges,nnsp_size; 3350 PetscBool nnsp_has_cnst; 3351 /* LAPACK working arrays for SVD or POD */ 3352 PetscBool skip_lapack,boolforchange; 3353 PetscScalar *work; 3354 PetscReal *singular_vals; 3355 #if defined(PETSC_USE_COMPLEX) 3356 PetscReal *rwork; 3357 #endif 3358 #if defined(PETSC_MISSING_LAPACK_GESVD) 3359 PetscScalar *temp_basis,*correlation_mat; 3360 #else 3361 PetscBLASInt dummy_int=1; 3362 PetscScalar dummy_scalar=1.; 3363 #endif 3364 3365 /* Get index sets for faces, edges and vertices from graph */ 3366 ierr = PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph,&n_ISForFaces,&ISForFaces,&n_ISForEdges,&ISForEdges,&ISForVertices);CHKERRQ(ierr); 3367 /* free unneeded index sets */ 3368 if (!pcbddc->use_vertices) { 3369 ierr = ISDestroy(&ISForVertices);CHKERRQ(ierr); 3370 } 3371 if (!pcbddc->use_edges) { 3372 for (i=0;i<n_ISForEdges;i++) { 3373 ierr = ISDestroy(&ISForEdges[i]);CHKERRQ(ierr); 3374 } 3375 ierr = PetscFree(ISForEdges);CHKERRQ(ierr); 3376 n_ISForEdges = 0; 3377 } 3378 if (!pcbddc->use_faces) { 3379 for (i=0;i<n_ISForFaces;i++) { 3380 ierr = ISDestroy(&ISForFaces[i]);CHKERRQ(ierr); 3381 } 3382 ierr = PetscFree(ISForFaces);CHKERRQ(ierr); 3383 n_ISForFaces = 0; 3384 } 3385 3386 #if defined(PETSC_USE_DEBUG) 3387 /* HACK: when solving singular problems not using vertices, a change of basis is mandatory. 3388 Also use_change_of_basis should be consistent among processors */ 3389 if (pcbddc->NullSpace) { 3390 PetscBool tbool[2],gbool[2]; 3391 3392 if (!ISForVertices && !pcbddc->user_ChangeOfBasisMatrix) { 3393 pcbddc->use_change_of_basis = PETSC_TRUE; 3394 if (!ISForEdges) { 3395 pcbddc->use_change_on_faces = PETSC_TRUE; 3396 } 3397 } 3398 tbool[0] = pcbddc->use_change_of_basis; 3399 tbool[1] = pcbddc->use_change_on_faces; 3400 ierr = MPIU_Allreduce(tbool,gbool,2,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 3401 pcbddc->use_change_of_basis = gbool[0]; 3402 pcbddc->use_change_on_faces = gbool[1]; 3403 } 3404 #endif 3405 3406 /* check if near null space is attached to global mat */ 3407 ierr = MatGetNearNullSpace(pc->pmat,&nearnullsp);CHKERRQ(ierr); 3408 if (nearnullsp) { 3409 ierr = MatNullSpaceGetVecs(nearnullsp,&nnsp_has_cnst,&nnsp_size,&nearnullvecs);CHKERRQ(ierr); 3410 /* remove any stored info */ 3411 ierr = MatNullSpaceDestroy(&pcbddc->onearnullspace);CHKERRQ(ierr); 3412 ierr = PetscFree(pcbddc->onearnullvecs_state);CHKERRQ(ierr); 3413 /* store information for BDDC solver reuse */ 3414 ierr = PetscObjectReference((PetscObject)nearnullsp);CHKERRQ(ierr); 3415 pcbddc->onearnullspace = nearnullsp; 3416 ierr = PetscMalloc1(nnsp_size,&pcbddc->onearnullvecs_state);CHKERRQ(ierr); 3417 for (i=0;i<nnsp_size;i++) { 3418 ierr = PetscObjectStateGet((PetscObject)nearnullvecs[i],&pcbddc->onearnullvecs_state[i]);CHKERRQ(ierr); 3419 } 3420 } else { /* if near null space is not provided BDDC uses constants by default */ 3421 nnsp_size = 0; 3422 nnsp_has_cnst = PETSC_TRUE; 3423 } 3424 /* get max number of constraints on a single cc */ 3425 max_constraints = nnsp_size; 3426 if (nnsp_has_cnst) max_constraints++; 3427 3428 /* 3429 Evaluate maximum storage size needed by the procedure 3430 - Indices for connected component i stored at "constraints_idxs + constraints_idxs_ptr[i]" 3431 - Values for constraints on connected component i stored at "constraints_data + constraints_data_ptr[i]" 3432 There can be multiple constraints per connected component 3433 */ 3434 n_vertices = 0; 3435 if (ISForVertices) { 3436 ierr = ISGetSize(ISForVertices,&n_vertices);CHKERRQ(ierr); 3437 } 3438 ncc = n_vertices+n_ISForFaces+n_ISForEdges; 3439 ierr = PetscMalloc3(ncc+1,&constraints_idxs_ptr,ncc+1,&constraints_data_ptr,ncc,&constraints_n);CHKERRQ(ierr); 3440 3441 total_counts = n_ISForFaces+n_ISForEdges; 3442 total_counts *= max_constraints; 3443 total_counts += n_vertices; 3444 ierr = PetscBTCreate(total_counts,&change_basis);CHKERRQ(ierr); 3445 3446 total_counts = 0; 3447 max_size_of_constraint = 0; 3448 for (i=0;i<n_ISForEdges+n_ISForFaces;i++) { 3449 IS used_is; 3450 if (i<n_ISForEdges) { 3451 used_is = ISForEdges[i]; 3452 } else { 3453 used_is = ISForFaces[i-n_ISForEdges]; 3454 } 3455 ierr = ISGetSize(used_is,&j);CHKERRQ(ierr); 3456 total_counts += j; 3457 max_size_of_constraint = PetscMax(j,max_size_of_constraint); 3458 } 3459 ierr = PetscMalloc3(total_counts*max_constraints+n_vertices,&constraints_data,total_counts+n_vertices,&constraints_idxs,total_counts+n_vertices,&constraints_idxs_B);CHKERRQ(ierr); 3460 3461 /* get local part of global near null space vectors */ 3462 ierr = PetscMalloc1(nnsp_size,&localnearnullsp);CHKERRQ(ierr); 3463 for (k=0;k<nnsp_size;k++) { 3464 ierr = VecDuplicate(pcis->vec1_N,&localnearnullsp[k]);CHKERRQ(ierr); 3465 ierr = VecScatterBegin(matis->rctx,nearnullvecs[k],localnearnullsp[k],INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3466 ierr = VecScatterEnd(matis->rctx,nearnullvecs[k],localnearnullsp[k],INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3467 } 3468 3469 /* whether or not to skip lapack calls */ 3470 skip_lapack = PETSC_TRUE; 3471 if (n_ISForFaces+n_ISForEdges && max_constraints > 1 && !pcbddc->use_nnsp_true) skip_lapack = PETSC_FALSE; 3472 3473 /* First we issue queries to allocate optimal workspace for LAPACKgesvd (or LAPACKsyev if SVD is missing) */ 3474 if (!skip_lapack) { 3475 PetscScalar temp_work; 3476 3477 #if defined(PETSC_MISSING_LAPACK_GESVD) 3478 /* Proper Orthogonal Decomposition (POD) using the snapshot method */ 3479 ierr = PetscMalloc1(max_constraints*max_constraints,&correlation_mat);CHKERRQ(ierr); 3480 ierr = PetscMalloc1(max_constraints,&singular_vals);CHKERRQ(ierr); 3481 ierr = PetscMalloc1(max_size_of_constraint*max_constraints,&temp_basis);CHKERRQ(ierr); 3482 #if defined(PETSC_USE_COMPLEX) 3483 ierr = PetscMalloc1(3*max_constraints,&rwork);CHKERRQ(ierr); 3484 #endif 3485 /* now we evaluate the optimal workspace using query with lwork=-1 */ 3486 ierr = PetscBLASIntCast(max_constraints,&Blas_N);CHKERRQ(ierr); 3487 ierr = PetscBLASIntCast(max_constraints,&Blas_LDA);CHKERRQ(ierr); 3488 lwork = -1; 3489 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 3490 #if !defined(PETSC_USE_COMPLEX) 3491 PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,&temp_work,&lwork,&lierr)); 3492 #else 3493 PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,&temp_work,&lwork,rwork,&lierr)); 3494 #endif 3495 ierr = PetscFPTrapPop();CHKERRQ(ierr); 3496 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to SYEV Lapack routine %d",(int)lierr); 3497 #else /* on missing GESVD */ 3498 /* SVD */ 3499 PetscInt max_n,min_n; 3500 max_n = max_size_of_constraint; 3501 min_n = max_constraints; 3502 if (max_size_of_constraint < max_constraints) { 3503 min_n = max_size_of_constraint; 3504 max_n = max_constraints; 3505 } 3506 ierr = PetscMalloc1(min_n,&singular_vals);CHKERRQ(ierr); 3507 #if defined(PETSC_USE_COMPLEX) 3508 ierr = PetscMalloc1(5*min_n,&rwork);CHKERRQ(ierr); 3509 #endif 3510 /* now we evaluate the optimal workspace using query with lwork=-1 */ 3511 lwork = -1; 3512 ierr = PetscBLASIntCast(max_n,&Blas_M);CHKERRQ(ierr); 3513 ierr = PetscBLASIntCast(min_n,&Blas_N);CHKERRQ(ierr); 3514 ierr = PetscBLASIntCast(max_n,&Blas_LDA);CHKERRQ(ierr); 3515 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 3516 #if !defined(PETSC_USE_COMPLEX) 3517 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&constraints_data[0],&Blas_LDA,singular_vals,&dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,&temp_work,&lwork,&lierr)); 3518 #else 3519 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&constraints_data[0],&Blas_LDA,singular_vals,&dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,&temp_work,&lwork,rwork,&lierr)); 3520 #endif 3521 ierr = PetscFPTrapPop();CHKERRQ(ierr); 3522 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to GESVD Lapack routine %d",(int)lierr); 3523 #endif /* on missing GESVD */ 3524 /* Allocate optimal workspace */ 3525 ierr = PetscBLASIntCast((PetscInt)PetscRealPart(temp_work),&lwork);CHKERRQ(ierr); 3526 ierr = PetscMalloc1(lwork,&work);CHKERRQ(ierr); 3527 } 3528 /* Now we can loop on constraining sets */ 3529 total_counts = 0; 3530 constraints_idxs_ptr[0] = 0; 3531 constraints_data_ptr[0] = 0; 3532 /* vertices */ 3533 if (n_vertices) { 3534 ierr = ISGetIndices(ISForVertices,(const PetscInt**)&is_indices);CHKERRQ(ierr); 3535 ierr = PetscMemcpy(constraints_idxs,is_indices,n_vertices*sizeof(PetscInt));CHKERRQ(ierr); 3536 for (i=0;i<n_vertices;i++) { 3537 constraints_n[total_counts] = 1; 3538 constraints_data[total_counts] = 1.0; 3539 constraints_idxs_ptr[total_counts+1] = constraints_idxs_ptr[total_counts]+1; 3540 constraints_data_ptr[total_counts+1] = constraints_data_ptr[total_counts]+1; 3541 total_counts++; 3542 } 3543 ierr = ISRestoreIndices(ISForVertices,(const PetscInt**)&is_indices);CHKERRQ(ierr); 3544 n_vertices = total_counts; 3545 } 3546 3547 /* edges and faces */ 3548 total_counts_cc = total_counts; 3549 for (ncc=0;ncc<n_ISForEdges+n_ISForFaces;ncc++) { 3550 IS used_is; 3551 PetscBool idxs_copied = PETSC_FALSE; 3552 3553 if (ncc<n_ISForEdges) { 3554 used_is = ISForEdges[ncc]; 3555 boolforchange = pcbddc->use_change_of_basis; /* change or not the basis on the edge */ 3556 } else { 3557 used_is = ISForFaces[ncc-n_ISForEdges]; 3558 boolforchange = (PetscBool)(pcbddc->use_change_of_basis && pcbddc->use_change_on_faces); /* change or not the basis on the face */ 3559 } 3560 temp_constraints = 0; /* zero the number of constraints I have on this conn comp */ 3561 3562 ierr = ISGetSize(used_is,&size_of_constraint);CHKERRQ(ierr); 3563 ierr = ISGetIndices(used_is,(const PetscInt**)&is_indices);CHKERRQ(ierr); 3564 /* change of basis should not be performed on local periodic nodes */ 3565 if (pcbddc->mat_graph->mirrors && pcbddc->mat_graph->mirrors[is_indices[0]]) boolforchange = PETSC_FALSE; 3566 if (nnsp_has_cnst) { 3567 PetscScalar quad_value; 3568 3569 ierr = PetscMemcpy(constraints_idxs + constraints_idxs_ptr[total_counts_cc],is_indices,size_of_constraint*sizeof(PetscInt));CHKERRQ(ierr); 3570 idxs_copied = PETSC_TRUE; 3571 3572 if (!pcbddc->use_nnsp_true) { 3573 quad_value = (PetscScalar)(1.0/PetscSqrtReal((PetscReal)size_of_constraint)); 3574 } else { 3575 quad_value = 1.0; 3576 } 3577 for (j=0;j<size_of_constraint;j++) { 3578 constraints_data[constraints_data_ptr[total_counts_cc]+j] = quad_value; 3579 } 3580 temp_constraints++; 3581 total_counts++; 3582 } 3583 for (k=0;k<nnsp_size;k++) { 3584 PetscReal real_value; 3585 PetscScalar *ptr_to_data; 3586 3587 ierr = VecGetArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr); 3588 ptr_to_data = &constraints_data[constraints_data_ptr[total_counts_cc]+temp_constraints*size_of_constraint]; 3589 for (j=0;j<size_of_constraint;j++) { 3590 ptr_to_data[j] = array[is_indices[j]]; 3591 } 3592 ierr = VecRestoreArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr); 3593 /* check if array is null on the connected component */ 3594 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 3595 PetscStackCallBLAS("BLASasum",real_value = BLASasum_(&Blas_N,ptr_to_data,&Blas_one)); 3596 if (real_value > 0.0) { /* keep indices and values */ 3597 temp_constraints++; 3598 total_counts++; 3599 if (!idxs_copied) { 3600 ierr = PetscMemcpy(constraints_idxs + constraints_idxs_ptr[total_counts_cc],is_indices,size_of_constraint*sizeof(PetscInt));CHKERRQ(ierr); 3601 idxs_copied = PETSC_TRUE; 3602 } 3603 } 3604 } 3605 ierr = ISRestoreIndices(used_is,(const PetscInt**)&is_indices);CHKERRQ(ierr); 3606 valid_constraints = temp_constraints; 3607 if (!pcbddc->use_nnsp_true && temp_constraints) { 3608 if (temp_constraints == 1) { /* just normalize the constraint */ 3609 PetscScalar norm,*ptr_to_data; 3610 3611 ptr_to_data = &constraints_data[constraints_data_ptr[total_counts_cc]]; 3612 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 3613 PetscStackCallBLAS("BLASdot",norm = BLASdot_(&Blas_N,ptr_to_data,&Blas_one,ptr_to_data,&Blas_one)); 3614 norm = 1.0/PetscSqrtReal(PetscRealPart(norm)); 3615 PetscStackCallBLAS("BLASscal",BLASscal_(&Blas_N,&norm,ptr_to_data,&Blas_one)); 3616 } else { /* perform SVD */ 3617 PetscReal tol = 1.0e-8; /* tolerance for retaining eigenmodes */ 3618 PetscScalar *ptr_to_data = &constraints_data[constraints_data_ptr[total_counts_cc]]; 3619 3620 #if defined(PETSC_MISSING_LAPACK_GESVD) 3621 /* SVD: Y = U*S*V^H -> U (eigenvectors of Y*Y^H) = Y*V*(S)^\dag 3622 POD: Y^H*Y = V*D*V^H, D = S^H*S -> U = Y*V*D^(-1/2) 3623 -> When PETSC_USE_COMPLEX and PETSC_MISSING_LAPACK_GESVD are defined 3624 the constraints basis will differ (by a complex factor with absolute value equal to 1) 3625 from that computed using LAPACKgesvd 3626 -> This is due to a different computation of eigenvectors in LAPACKheev 3627 -> The quality of the POD-computed basis will be the same */ 3628 ierr = PetscMemzero(correlation_mat,temp_constraints*temp_constraints*sizeof(PetscScalar));CHKERRQ(ierr); 3629 /* Store upper triangular part of correlation matrix */ 3630 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 3631 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 3632 for (j=0;j<temp_constraints;j++) { 3633 for (k=0;k<j+1;k++) { 3634 PetscStackCallBLAS("BLASdot",correlation_mat[j*temp_constraints+k] = BLASdot_(&Blas_N,ptr_to_data+k*size_of_constraint,&Blas_one,ptr_to_data+j*size_of_constraint,&Blas_one)); 3635 } 3636 } 3637 /* compute eigenvalues and eigenvectors of correlation matrix */ 3638 ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr); 3639 ierr = PetscBLASIntCast(temp_constraints,&Blas_LDA);CHKERRQ(ierr); 3640 #if !defined(PETSC_USE_COMPLEX) 3641 PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,work,&lwork,&lierr)); 3642 #else 3643 PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,work,&lwork,rwork,&lierr)); 3644 #endif 3645 ierr = PetscFPTrapPop();CHKERRQ(ierr); 3646 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYEV Lapack routine %d",(int)lierr); 3647 /* retain eigenvalues greater than tol: note that LAPACKsyev gives eigs in ascending order */ 3648 j = 0; 3649 while (j < temp_constraints && singular_vals[j] < tol) j++; 3650 total_counts = total_counts-j; 3651 valid_constraints = temp_constraints-j; 3652 /* scale and copy POD basis into used quadrature memory */ 3653 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 3654 ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr); 3655 ierr = PetscBLASIntCast(temp_constraints,&Blas_K);CHKERRQ(ierr); 3656 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 3657 ierr = PetscBLASIntCast(temp_constraints,&Blas_LDB);CHKERRQ(ierr); 3658 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDC);CHKERRQ(ierr); 3659 if (j<temp_constraints) { 3660 PetscInt ii; 3661 for (k=j;k<temp_constraints;k++) singular_vals[k] = 1.0/PetscSqrtReal(singular_vals[k]); 3662 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 3663 PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&Blas_M,&Blas_N,&Blas_K,&one,ptr_to_data,&Blas_LDA,correlation_mat,&Blas_LDB,&zero,temp_basis,&Blas_LDC)); 3664 ierr = PetscFPTrapPop();CHKERRQ(ierr); 3665 for (k=0;k<temp_constraints-j;k++) { 3666 for (ii=0;ii<size_of_constraint;ii++) { 3667 ptr_to_data[k*size_of_constraint+ii] = singular_vals[temp_constraints-1-k]*temp_basis[(temp_constraints-1-k)*size_of_constraint+ii]; 3668 } 3669 } 3670 } 3671 #else /* on missing GESVD */ 3672 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 3673 ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr); 3674 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 3675 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 3676 #if !defined(PETSC_USE_COMPLEX) 3677 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,ptr_to_data,&Blas_LDA,singular_vals,&dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,work,&lwork,&lierr)); 3678 #else 3679 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,ptr_to_data,&Blas_LDA,singular_vals,&dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,work,&lwork,rwork,&lierr)); 3680 #endif 3681 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in GESVD Lapack routine %d",(int)lierr); 3682 ierr = PetscFPTrapPop();CHKERRQ(ierr); 3683 /* retain eigenvalues greater than tol: note that LAPACKgesvd gives eigs in descending order */ 3684 k = temp_constraints; 3685 if (k > size_of_constraint) k = size_of_constraint; 3686 j = 0; 3687 while (j < k && singular_vals[k-j-1] < tol) j++; 3688 valid_constraints = k-j; 3689 total_counts = total_counts-temp_constraints+valid_constraints; 3690 #endif /* on missing GESVD */ 3691 } 3692 } 3693 /* update pointers information */ 3694 if (valid_constraints) { 3695 constraints_n[total_counts_cc] = valid_constraints; 3696 constraints_idxs_ptr[total_counts_cc+1] = constraints_idxs_ptr[total_counts_cc]+size_of_constraint; 3697 constraints_data_ptr[total_counts_cc+1] = constraints_data_ptr[total_counts_cc]+size_of_constraint*valid_constraints; 3698 /* set change_of_basis flag */ 3699 if (boolforchange) { 3700 PetscBTSet(change_basis,total_counts_cc); 3701 } 3702 total_counts_cc++; 3703 } 3704 } 3705 /* free workspace */ 3706 if (!skip_lapack) { 3707 ierr = PetscFree(work);CHKERRQ(ierr); 3708 #if defined(PETSC_USE_COMPLEX) 3709 ierr = PetscFree(rwork);CHKERRQ(ierr); 3710 #endif 3711 ierr = PetscFree(singular_vals);CHKERRQ(ierr); 3712 #if defined(PETSC_MISSING_LAPACK_GESVD) 3713 ierr = PetscFree(correlation_mat);CHKERRQ(ierr); 3714 ierr = PetscFree(temp_basis);CHKERRQ(ierr); 3715 #endif 3716 } 3717 for (k=0;k<nnsp_size;k++) { 3718 ierr = VecDestroy(&localnearnullsp[k]);CHKERRQ(ierr); 3719 } 3720 ierr = PetscFree(localnearnullsp);CHKERRQ(ierr); 3721 /* free index sets of faces, edges and vertices */ 3722 for (i=0;i<n_ISForFaces;i++) { 3723 ierr = ISDestroy(&ISForFaces[i]);CHKERRQ(ierr); 3724 } 3725 if (n_ISForFaces) { 3726 ierr = PetscFree(ISForFaces);CHKERRQ(ierr); 3727 } 3728 for (i=0;i<n_ISForEdges;i++) { 3729 ierr = ISDestroy(&ISForEdges[i]);CHKERRQ(ierr); 3730 } 3731 if (n_ISForEdges) { 3732 ierr = PetscFree(ISForEdges);CHKERRQ(ierr); 3733 } 3734 ierr = ISDestroy(&ISForVertices);CHKERRQ(ierr); 3735 } else { 3736 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 3737 3738 total_counts = 0; 3739 n_vertices = 0; 3740 if (sub_schurs->is_vertices && pcbddc->use_vertices) { 3741 ierr = ISGetLocalSize(sub_schurs->is_vertices,&n_vertices);CHKERRQ(ierr); 3742 } 3743 max_constraints = 0; 3744 total_counts_cc = 0; 3745 for (i=0;i<sub_schurs->n_subs+n_vertices;i++) { 3746 total_counts += pcbddc->adaptive_constraints_n[i]; 3747 if (pcbddc->adaptive_constraints_n[i]) total_counts_cc++; 3748 max_constraints = PetscMax(max_constraints,pcbddc->adaptive_constraints_n[i]); 3749 } 3750 constraints_idxs_ptr = pcbddc->adaptive_constraints_idxs_ptr; 3751 constraints_data_ptr = pcbddc->adaptive_constraints_data_ptr; 3752 constraints_idxs = pcbddc->adaptive_constraints_idxs; 3753 constraints_data = pcbddc->adaptive_constraints_data; 3754 /* constraints_n differs from pcbddc->adaptive_constraints_n */ 3755 ierr = PetscMalloc1(total_counts_cc,&constraints_n);CHKERRQ(ierr); 3756 total_counts_cc = 0; 3757 for (i=0;i<sub_schurs->n_subs+n_vertices;i++) { 3758 if (pcbddc->adaptive_constraints_n[i]) { 3759 constraints_n[total_counts_cc++] = pcbddc->adaptive_constraints_n[i]; 3760 } 3761 } 3762 #if 0 3763 printf("Found %d totals (%d)\n",total_counts_cc,total_counts); 3764 for (i=0;i<total_counts_cc;i++) { 3765 printf("const %d, start %d",i,constraints_idxs_ptr[i]); 3766 printf(" end %d:\n",constraints_idxs_ptr[i+1]); 3767 for (j=constraints_idxs_ptr[i];j<constraints_idxs_ptr[i+1];j++) { 3768 printf(" %d",constraints_idxs[j]); 3769 } 3770 printf("\n"); 3771 printf("number of cc: %d\n",constraints_n[i]); 3772 } 3773 for (i=0;i<n_vertices;i++) { 3774 PetscPrintf(PETSC_COMM_SELF,"[%d] vertex %d, n %d\n",PetscGlobalRank,i,pcbddc->adaptive_constraints_n[i]); 3775 } 3776 for (i=0;i<sub_schurs->n_subs;i++) { 3777 PetscPrintf(PETSC_COMM_SELF,"[%d] sub %d, edge %d, n %d\n",PetscGlobalRank,i,(PetscBool)PetscBTLookup(sub_schurs->is_edge,i),pcbddc->adaptive_constraints_n[i+n_vertices]); 3778 } 3779 #endif 3780 3781 max_size_of_constraint = 0; 3782 for (i=0;i<total_counts_cc;i++) max_size_of_constraint = PetscMax(max_size_of_constraint,constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i]); 3783 ierr = PetscMalloc1(constraints_idxs_ptr[total_counts_cc],&constraints_idxs_B);CHKERRQ(ierr); 3784 /* Change of basis */ 3785 ierr = PetscBTCreate(total_counts_cc,&change_basis);CHKERRQ(ierr); 3786 if (pcbddc->use_change_of_basis) { 3787 for (i=0;i<sub_schurs->n_subs;i++) { 3788 if (PetscBTLookup(sub_schurs->is_edge,i) || pcbddc->use_change_on_faces) { 3789 ierr = PetscBTSet(change_basis,i+n_vertices);CHKERRQ(ierr); 3790 } 3791 } 3792 } 3793 } 3794 pcbddc->local_primal_size = total_counts; 3795 ierr = PetscMalloc1(pcbddc->local_primal_size+pcbddc->benign_n,&pcbddc->primal_indices_local_idxs);CHKERRQ(ierr); 3796 3797 /* map constraints_idxs in boundary numbering */ 3798 ierr = ISGlobalToLocalMappingApply(pcis->BtoNmap,IS_GTOLM_DROP,constraints_idxs_ptr[total_counts_cc],constraints_idxs,&i,constraints_idxs_B);CHKERRQ(ierr); 3799 if (i != constraints_idxs_ptr[total_counts_cc]) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_SUP,"Error in boundary numbering for constraints indices %D != %D\n",constraints_idxs_ptr[total_counts_cc],i); 3800 3801 /* Create constraint matrix */ 3802 ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->ConstraintMatrix);CHKERRQ(ierr); 3803 ierr = MatSetType(pcbddc->ConstraintMatrix,MATAIJ);CHKERRQ(ierr); 3804 ierr = MatSetSizes(pcbddc->ConstraintMatrix,pcbddc->local_primal_size,pcis->n,pcbddc->local_primal_size,pcis->n);CHKERRQ(ierr); 3805 3806 /* find primal_dofs: subdomain corners plus dofs selected as primal after change of basis */ 3807 /* determine if a QR strategy is needed for change of basis */ 3808 qr_needed = PETSC_FALSE; 3809 ierr = PetscBTCreate(total_counts_cc,&qr_needed_idx);CHKERRQ(ierr); 3810 total_primal_vertices=0; 3811 pcbddc->local_primal_size_cc = 0; 3812 for (i=0;i<total_counts_cc;i++) { 3813 size_of_constraint = constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i]; 3814 if (size_of_constraint == 1 && pcbddc->mat_graph->custom_minimal_size) { 3815 pcbddc->primal_indices_local_idxs[total_primal_vertices++] = constraints_idxs[constraints_idxs_ptr[i]]; 3816 pcbddc->local_primal_size_cc += 1; 3817 } else if (PetscBTLookup(change_basis,i)) { 3818 for (k=0;k<constraints_n[i];k++) { 3819 pcbddc->primal_indices_local_idxs[total_primal_vertices++] = constraints_idxs[constraints_idxs_ptr[i]+k]; 3820 } 3821 pcbddc->local_primal_size_cc += constraints_n[i]; 3822 if (constraints_n[i] > 1 || pcbddc->use_qr_single) { 3823 PetscBTSet(qr_needed_idx,i); 3824 qr_needed = PETSC_TRUE; 3825 } 3826 } else { 3827 pcbddc->local_primal_size_cc += 1; 3828 } 3829 } 3830 /* note that the local variable n_vertices used below stores the number of pointwise constraints */ 3831 pcbddc->n_vertices = total_primal_vertices; 3832 /* permute indices in order to have a sorted set of vertices */ 3833 ierr = PetscSortInt(total_primal_vertices,pcbddc->primal_indices_local_idxs);CHKERRQ(ierr); 3834 3835 ierr = PetscMalloc2(pcbddc->local_primal_size_cc+pcbddc->benign_n,&pcbddc->local_primal_ref_node,pcbddc->local_primal_size_cc+pcbddc->benign_n,&pcbddc->local_primal_ref_mult);CHKERRQ(ierr); 3836 ierr = PetscMemcpy(pcbddc->local_primal_ref_node,pcbddc->primal_indices_local_idxs,total_primal_vertices*sizeof(PetscInt));CHKERRQ(ierr); 3837 for (i=0;i<total_primal_vertices;i++) pcbddc->local_primal_ref_mult[i] = 1; 3838 3839 /* nonzero structure of constraint matrix */ 3840 /* and get reference dof for local constraints */ 3841 ierr = PetscMalloc1(pcbddc->local_primal_size,&nnz);CHKERRQ(ierr); 3842 for (i=0;i<total_primal_vertices;i++) nnz[i] = 1; 3843 3844 j = total_primal_vertices; 3845 total_counts = total_primal_vertices; 3846 cum = total_primal_vertices; 3847 for (i=n_vertices;i<total_counts_cc;i++) { 3848 if (!PetscBTLookup(change_basis,i)) { 3849 pcbddc->local_primal_ref_node[cum] = constraints_idxs[constraints_idxs_ptr[i]]; 3850 pcbddc->local_primal_ref_mult[cum] = constraints_n[i]; 3851 cum++; 3852 size_of_constraint = constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i]; 3853 for (k=0;k<constraints_n[i];k++) { 3854 pcbddc->primal_indices_local_idxs[total_counts++] = constraints_idxs[constraints_idxs_ptr[i]+k]; 3855 nnz[j+k] = size_of_constraint; 3856 } 3857 j += constraints_n[i]; 3858 } 3859 } 3860 ierr = MatSeqAIJSetPreallocation(pcbddc->ConstraintMatrix,0,nnz);CHKERRQ(ierr); 3861 ierr = PetscFree(nnz);CHKERRQ(ierr); 3862 3863 /* set values in constraint matrix */ 3864 for (i=0;i<total_primal_vertices;i++) { 3865 ierr = MatSetValue(pcbddc->ConstraintMatrix,i,pcbddc->local_primal_ref_node[i],1.0,INSERT_VALUES);CHKERRQ(ierr); 3866 } 3867 total_counts = total_primal_vertices; 3868 for (i=n_vertices;i<total_counts_cc;i++) { 3869 if (!PetscBTLookup(change_basis,i)) { 3870 PetscInt *cols; 3871 3872 size_of_constraint = constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i]; 3873 cols = constraints_idxs+constraints_idxs_ptr[i]; 3874 for (k=0;k<constraints_n[i];k++) { 3875 PetscInt row = total_counts+k; 3876 PetscScalar *vals; 3877 3878 vals = constraints_data+constraints_data_ptr[i]+k*size_of_constraint; 3879 ierr = MatSetValues(pcbddc->ConstraintMatrix,1,&row,size_of_constraint,cols,vals,INSERT_VALUES);CHKERRQ(ierr); 3880 } 3881 total_counts += constraints_n[i]; 3882 } 3883 } 3884 /* assembling */ 3885 ierr = MatAssemblyBegin(pcbddc->ConstraintMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3886 ierr = MatAssemblyEnd(pcbddc->ConstraintMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3887 3888 /* 3889 ierr = PetscViewerPushFormat(PETSC_VIEWER_STDOUT_SELF,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr); 3890 ierr = MatView(pcbddc->ConstraintMatrix,(PetscViewer)0);CHKERRQ(ierr); 3891 ierr = PetscViewerPopFormat(PETSC_VIEWER_STDOUT_SELF);CHKERRQ(ierr); 3892 */ 3893 /* Create matrix for change of basis. We don't need it in case pcbddc->use_change_of_basis is FALSE */ 3894 if (pcbddc->use_change_of_basis) { 3895 /* dual and primal dofs on a single cc */ 3896 PetscInt dual_dofs,primal_dofs; 3897 /* working stuff for GEQRF */ 3898 PetscScalar *qr_basis,*qr_tau = NULL,*qr_work,lqr_work_t; 3899 PetscBLASInt lqr_work; 3900 /* working stuff for UNGQR */ 3901 PetscScalar *gqr_work,lgqr_work_t; 3902 PetscBLASInt lgqr_work; 3903 /* working stuff for TRTRS */ 3904 PetscScalar *trs_rhs; 3905 PetscBLASInt Blas_NRHS; 3906 /* pointers for values insertion into change of basis matrix */ 3907 PetscInt *start_rows,*start_cols; 3908 PetscScalar *start_vals; 3909 /* working stuff for values insertion */ 3910 PetscBT is_primal; 3911 PetscInt *aux_primal_numbering_B; 3912 /* matrix sizes */ 3913 PetscInt global_size,local_size; 3914 /* temporary change of basis */ 3915 Mat localChangeOfBasisMatrix; 3916 /* extra space for debugging */ 3917 PetscScalar *dbg_work; 3918 3919 /* local temporary change of basis acts on local interfaces -> dimension is n_B x n_B */ 3920 ierr = MatCreate(PETSC_COMM_SELF,&localChangeOfBasisMatrix);CHKERRQ(ierr); 3921 ierr = MatSetType(localChangeOfBasisMatrix,MATAIJ);CHKERRQ(ierr); 3922 ierr = MatSetSizes(localChangeOfBasisMatrix,pcis->n,pcis->n,pcis->n,pcis->n);CHKERRQ(ierr); 3923 /* nonzeros for local mat */ 3924 ierr = PetscMalloc1(pcis->n,&nnz);CHKERRQ(ierr); 3925 if (!pcbddc->benign_change || pcbddc->fake_change) { 3926 for (i=0;i<pcis->n;i++) nnz[i]=1; 3927 } else { 3928 const PetscInt *ii; 3929 PetscInt n; 3930 PetscBool flg_row; 3931 ierr = MatGetRowIJ(pcbddc->benign_change,0,PETSC_FALSE,PETSC_FALSE,&n,&ii,NULL,&flg_row);CHKERRQ(ierr); 3932 for (i=0;i<n;i++) nnz[i] = ii[i+1]-ii[i]; 3933 ierr = MatRestoreRowIJ(pcbddc->benign_change,0,PETSC_FALSE,PETSC_FALSE,&n,&ii,NULL,&flg_row);CHKERRQ(ierr); 3934 } 3935 for (i=n_vertices;i<total_counts_cc;i++) { 3936 if (PetscBTLookup(change_basis,i)) { 3937 size_of_constraint = constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i]; 3938 if (PetscBTLookup(qr_needed_idx,i)) { 3939 for (j=0;j<size_of_constraint;j++) nnz[constraints_idxs[constraints_idxs_ptr[i]+j]] = size_of_constraint; 3940 } else { 3941 nnz[constraints_idxs[constraints_idxs_ptr[i]]] = size_of_constraint; 3942 for (j=1;j<size_of_constraint;j++) nnz[constraints_idxs[constraints_idxs_ptr[i]+j]] = 2; 3943 } 3944 } 3945 } 3946 ierr = MatSeqAIJSetPreallocation(localChangeOfBasisMatrix,0,nnz);CHKERRQ(ierr); 3947 ierr = PetscFree(nnz);CHKERRQ(ierr); 3948 /* Set interior change in the matrix */ 3949 if (!pcbddc->benign_change || pcbddc->fake_change) { 3950 for (i=0;i<pcis->n;i++) { 3951 ierr = MatSetValue(localChangeOfBasisMatrix,i,i,1.0,INSERT_VALUES);CHKERRQ(ierr); 3952 } 3953 } else { 3954 const PetscInt *ii,*jj; 3955 PetscScalar *aa; 3956 PetscInt n; 3957 PetscBool flg_row; 3958 ierr = MatGetRowIJ(pcbddc->benign_change,0,PETSC_FALSE,PETSC_FALSE,&n,&ii,&jj,&flg_row);CHKERRQ(ierr); 3959 ierr = MatSeqAIJGetArray(pcbddc->benign_change,&aa);CHKERRQ(ierr); 3960 for (i=0;i<n;i++) { 3961 ierr = MatSetValues(localChangeOfBasisMatrix,1,&i,ii[i+1]-ii[i],jj+ii[i],aa+ii[i],INSERT_VALUES);CHKERRQ(ierr); 3962 } 3963 ierr = MatSeqAIJRestoreArray(pcbddc->benign_change,&aa);CHKERRQ(ierr); 3964 ierr = MatRestoreRowIJ(pcbddc->benign_change,0,PETSC_FALSE,PETSC_FALSE,&n,&ii,&jj,&flg_row);CHKERRQ(ierr); 3965 } 3966 3967 if (pcbddc->dbg_flag) { 3968 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"--------------------------------------------------------------\n");CHKERRQ(ierr); 3969 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Checking change of basis computation for subdomain %04d\n",PetscGlobalRank);CHKERRQ(ierr); 3970 } 3971 3972 3973 /* Now we loop on the constraints which need a change of basis */ 3974 /* 3975 Change of basis matrix is evaluated similarly to the FIRST APPROACH in 3976 Klawonn and Widlund, Dual-primal FETI-DP methods for linear elasticity, (see Sect 6.2.1) 3977 3978 Basic blocks of change of basis matrix T computed by 3979 3980 - Using the following block transformation if there is only a primal dof on the cc (and -pc_bddc_use_qr_single is not specified) 3981 3982 | 1 0 ... 0 s_1/S | 3983 | 0 1 ... 0 s_2/S | 3984 | ... | 3985 | 0 ... 1 s_{n-1}/S | 3986 | -s_1/s_n ... -s_{n-1}/s_n s_n/S | 3987 3988 with S = \sum_{i=1}^n s_i^2 3989 NOTE: in the above example, the primal dof is the last one of the edge in LOCAL ordering 3990 in the current implementation, the primal dof is the first one of the edge in GLOBAL ordering 3991 3992 - QR decomposition of constraints otherwise 3993 */ 3994 if (qr_needed) { 3995 /* space to store Q */ 3996 ierr = PetscMalloc1(max_size_of_constraint*max_size_of_constraint,&qr_basis);CHKERRQ(ierr); 3997 /* first we issue queries for optimal work */ 3998 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_M);CHKERRQ(ierr); 3999 ierr = PetscBLASIntCast(max_constraints,&Blas_N);CHKERRQ(ierr); 4000 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 4001 lqr_work = -1; 4002 PetscStackCallBLAS("LAPACKgeqrf",LAPACKgeqrf_(&Blas_M,&Blas_N,qr_basis,&Blas_LDA,qr_tau,&lqr_work_t,&lqr_work,&lierr)); 4003 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to GEQRF Lapack routine %d",(int)lierr); 4004 ierr = PetscBLASIntCast((PetscInt)PetscRealPart(lqr_work_t),&lqr_work);CHKERRQ(ierr); 4005 ierr = PetscMalloc1((PetscInt)PetscRealPart(lqr_work_t),&qr_work);CHKERRQ(ierr); 4006 lgqr_work = -1; 4007 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_M);CHKERRQ(ierr); 4008 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_N);CHKERRQ(ierr); 4009 ierr = PetscBLASIntCast(max_constraints,&Blas_K);CHKERRQ(ierr); 4010 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 4011 if (Blas_K>Blas_M) Blas_K=Blas_M; /* adjust just for computing optimal work */ 4012 PetscStackCallBLAS("LAPACKungqr",LAPACKungqr_(&Blas_M,&Blas_N,&Blas_K,qr_basis,&Blas_LDA,qr_tau,&lgqr_work_t,&lgqr_work,&lierr)); 4013 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to UNGQR Lapack routine %d",(int)lierr); 4014 ierr = PetscBLASIntCast((PetscInt)PetscRealPart(lgqr_work_t),&lgqr_work);CHKERRQ(ierr); 4015 ierr = PetscMalloc1((PetscInt)PetscRealPart(lgqr_work_t),&gqr_work);CHKERRQ(ierr); 4016 /* array to store scaling factors for reflectors */ 4017 ierr = PetscMalloc1(max_constraints,&qr_tau);CHKERRQ(ierr); 4018 /* array to store rhs and solution of triangular solver */ 4019 ierr = PetscMalloc1(max_constraints*max_constraints,&trs_rhs);CHKERRQ(ierr); 4020 /* allocating workspace for check */ 4021 if (pcbddc->dbg_flag) { 4022 ierr = PetscMalloc1(max_size_of_constraint*(max_constraints+max_size_of_constraint),&dbg_work);CHKERRQ(ierr); 4023 } 4024 } 4025 /* array to store whether a node is primal or not */ 4026 ierr = PetscBTCreate(pcis->n_B,&is_primal);CHKERRQ(ierr); 4027 ierr = PetscMalloc1(total_primal_vertices,&aux_primal_numbering_B);CHKERRQ(ierr); 4028 ierr = ISGlobalToLocalMappingApply(pcis->BtoNmap,IS_GTOLM_DROP,total_primal_vertices,pcbddc->local_primal_ref_node,&i,aux_primal_numbering_B);CHKERRQ(ierr); 4029 if (i != total_primal_vertices) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_SUP,"Error in boundary numbering for BDDC vertices! %D != %D\n",total_primal_vertices,i); 4030 for (i=0;i<total_primal_vertices;i++) { 4031 ierr = PetscBTSet(is_primal,aux_primal_numbering_B[i]);CHKERRQ(ierr); 4032 } 4033 ierr = PetscFree(aux_primal_numbering_B);CHKERRQ(ierr); 4034 4035 /* loop on constraints and see whether or not they need a change of basis and compute it */ 4036 for (total_counts=n_vertices;total_counts<total_counts_cc;total_counts++) { 4037 size_of_constraint = constraints_idxs_ptr[total_counts+1]-constraints_idxs_ptr[total_counts]; 4038 if (PetscBTLookup(change_basis,total_counts)) { 4039 /* get constraint info */ 4040 primal_dofs = constraints_n[total_counts]; 4041 dual_dofs = size_of_constraint-primal_dofs; 4042 4043 if (pcbddc->dbg_flag) { 4044 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Constraints %d: %d need a change of basis (size %d)\n",total_counts,primal_dofs,size_of_constraint);CHKERRQ(ierr); 4045 } 4046 4047 if (PetscBTLookup(qr_needed_idx,total_counts)) { /* QR */ 4048 4049 /* copy quadrature constraints for change of basis check */ 4050 if (pcbddc->dbg_flag) { 4051 ierr = PetscMemcpy(dbg_work,&constraints_data[constraints_data_ptr[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr); 4052 } 4053 /* copy temporary constraints into larger work vector (in order to store all columns of Q) */ 4054 ierr = PetscMemcpy(qr_basis,&constraints_data[constraints_data_ptr[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr); 4055 4056 /* compute QR decomposition of constraints */ 4057 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 4058 ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr); 4059 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 4060 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 4061 PetscStackCallBLAS("LAPACKgeqrf",LAPACKgeqrf_(&Blas_M,&Blas_N,qr_basis,&Blas_LDA,qr_tau,qr_work,&lqr_work,&lierr)); 4062 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in GEQRF Lapack routine %d",(int)lierr); 4063 ierr = PetscFPTrapPop();CHKERRQ(ierr); 4064 4065 /* explictly compute R^-T */ 4066 ierr = PetscMemzero(trs_rhs,primal_dofs*primal_dofs*sizeof(*trs_rhs));CHKERRQ(ierr); 4067 for (j=0;j<primal_dofs;j++) trs_rhs[j*(primal_dofs+1)] = 1.0; 4068 ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr); 4069 ierr = PetscBLASIntCast(primal_dofs,&Blas_NRHS);CHKERRQ(ierr); 4070 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 4071 ierr = PetscBLASIntCast(primal_dofs,&Blas_LDB);CHKERRQ(ierr); 4072 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 4073 PetscStackCallBLAS("LAPACKtrtrs",LAPACKtrtrs_("U","T","N",&Blas_N,&Blas_NRHS,qr_basis,&Blas_LDA,trs_rhs,&Blas_LDB,&lierr)); 4074 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in TRTRS Lapack routine %d",(int)lierr); 4075 ierr = PetscFPTrapPop();CHKERRQ(ierr); 4076 4077 /* explicitly compute all columns of Q (Q = [Q1 | Q2] ) overwriting QR factorization in qr_basis */ 4078 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 4079 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 4080 ierr = PetscBLASIntCast(primal_dofs,&Blas_K);CHKERRQ(ierr); 4081 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 4082 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 4083 PetscStackCallBLAS("LAPACKungqr",LAPACKungqr_(&Blas_M,&Blas_N,&Blas_K,qr_basis,&Blas_LDA,qr_tau,gqr_work,&lgqr_work,&lierr)); 4084 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in UNGQR Lapack routine %d",(int)lierr); 4085 ierr = PetscFPTrapPop();CHKERRQ(ierr); 4086 4087 /* first primal_dofs columns of Q need to be re-scaled in order to be unitary w.r.t constraints 4088 i.e. C_{pxn}*Q_{nxn} should be equal to [I_pxp | 0_pxd] (see check below) 4089 where n=size_of_constraint, p=primal_dofs, d=dual_dofs (n=p+d), I and 0 identity and null matrix resp. */ 4090 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 4091 ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr); 4092 ierr = PetscBLASIntCast(primal_dofs,&Blas_K);CHKERRQ(ierr); 4093 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 4094 ierr = PetscBLASIntCast(primal_dofs,&Blas_LDB);CHKERRQ(ierr); 4095 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDC);CHKERRQ(ierr); 4096 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 4097 PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&Blas_M,&Blas_N,&Blas_K,&one,qr_basis,&Blas_LDA,trs_rhs,&Blas_LDB,&zero,constraints_data+constraints_data_ptr[total_counts],&Blas_LDC)); 4098 ierr = PetscFPTrapPop();CHKERRQ(ierr); 4099 ierr = PetscMemcpy(qr_basis,&constraints_data[constraints_data_ptr[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr); 4100 4101 /* insert values in change of basis matrix respecting global ordering of new primal dofs */ 4102 start_rows = &constraints_idxs[constraints_idxs_ptr[total_counts]]; 4103 /* insert cols for primal dofs */ 4104 for (j=0;j<primal_dofs;j++) { 4105 start_vals = &qr_basis[j*size_of_constraint]; 4106 start_cols = &constraints_idxs[constraints_idxs_ptr[total_counts]+j]; 4107 ierr = MatSetValues(localChangeOfBasisMatrix,size_of_constraint,start_rows,1,start_cols,start_vals,INSERT_VALUES);CHKERRQ(ierr); 4108 } 4109 /* insert cols for dual dofs */ 4110 for (j=0,k=0;j<dual_dofs;k++) { 4111 if (!PetscBTLookup(is_primal,constraints_idxs_B[constraints_idxs_ptr[total_counts]+k])) { 4112 start_vals = &qr_basis[(primal_dofs+j)*size_of_constraint]; 4113 start_cols = &constraints_idxs[constraints_idxs_ptr[total_counts]+k]; 4114 ierr = MatSetValues(localChangeOfBasisMatrix,size_of_constraint,start_rows,1,start_cols,start_vals,INSERT_VALUES);CHKERRQ(ierr); 4115 j++; 4116 } 4117 } 4118 4119 /* check change of basis */ 4120 if (pcbddc->dbg_flag) { 4121 PetscInt ii,jj; 4122 PetscBool valid_qr=PETSC_TRUE; 4123 ierr = PetscBLASIntCast(primal_dofs,&Blas_M);CHKERRQ(ierr); 4124 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 4125 ierr = PetscBLASIntCast(size_of_constraint,&Blas_K);CHKERRQ(ierr); 4126 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 4127 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDB);CHKERRQ(ierr); 4128 ierr = PetscBLASIntCast(primal_dofs,&Blas_LDC);CHKERRQ(ierr); 4129 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 4130 PetscStackCallBLAS("BLASgemm",BLASgemm_("T","N",&Blas_M,&Blas_N,&Blas_K,&one,dbg_work,&Blas_LDA,qr_basis,&Blas_LDB,&zero,&dbg_work[size_of_constraint*primal_dofs],&Blas_LDC)); 4131 ierr = PetscFPTrapPop();CHKERRQ(ierr); 4132 for (jj=0;jj<size_of_constraint;jj++) { 4133 for (ii=0;ii<primal_dofs;ii++) { 4134 if (ii != jj && PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]) > 1.e-12) valid_qr = PETSC_FALSE; 4135 if (ii == jj && PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]-1.0) > 1.e-12) valid_qr = PETSC_FALSE; 4136 } 4137 } 4138 if (!valid_qr) { 4139 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> wrong change of basis!\n");CHKERRQ(ierr); 4140 for (jj=0;jj<size_of_constraint;jj++) { 4141 for (ii=0;ii<primal_dofs;ii++) { 4142 if (ii != jj && PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]) > 1.e-12) { 4143 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\tQr basis function %d is not orthogonal to constraint %d (%1.14e)!\n",jj,ii,PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii])); 4144 } 4145 if (ii == jj && PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]-1.0) > 1.e-12) { 4146 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\tQr basis function %d is not unitary w.r.t constraint %d (%1.14e)!\n",jj,ii,PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii])); 4147 } 4148 } 4149 } 4150 } else { 4151 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> right change of basis!\n");CHKERRQ(ierr); 4152 } 4153 } 4154 } else { /* simple transformation block */ 4155 PetscInt row,col; 4156 PetscScalar val,norm; 4157 4158 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 4159 PetscStackCallBLAS("BLASdot",norm = BLASdot_(&Blas_N,constraints_data+constraints_data_ptr[total_counts],&Blas_one,constraints_data+constraints_data_ptr[total_counts],&Blas_one)); 4160 for (j=0;j<size_of_constraint;j++) { 4161 PetscInt row_B = constraints_idxs_B[constraints_idxs_ptr[total_counts]+j]; 4162 row = constraints_idxs[constraints_idxs_ptr[total_counts]+j]; 4163 if (!PetscBTLookup(is_primal,row_B)) { 4164 col = constraints_idxs[constraints_idxs_ptr[total_counts]]; 4165 ierr = MatSetValue(localChangeOfBasisMatrix,row,row,1.0,INSERT_VALUES);CHKERRQ(ierr); 4166 ierr = MatSetValue(localChangeOfBasisMatrix,row,col,constraints_data[constraints_data_ptr[total_counts]+j]/norm,INSERT_VALUES);CHKERRQ(ierr); 4167 } else { 4168 for (k=0;k<size_of_constraint;k++) { 4169 col = constraints_idxs[constraints_idxs_ptr[total_counts]+k]; 4170 if (row != col) { 4171 val = -constraints_data[constraints_data_ptr[total_counts]+k]/constraints_data[constraints_data_ptr[total_counts]]; 4172 } else { 4173 val = constraints_data[constraints_data_ptr[total_counts]]/norm; 4174 } 4175 ierr = MatSetValue(localChangeOfBasisMatrix,row,col,val,INSERT_VALUES);CHKERRQ(ierr); 4176 } 4177 } 4178 } 4179 if (pcbddc->dbg_flag) { 4180 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> using standard change of basis\n");CHKERRQ(ierr); 4181 } 4182 } 4183 } else { 4184 if (pcbddc->dbg_flag) { 4185 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Constraint %d does not need a change of basis (size %d)\n",total_counts,size_of_constraint);CHKERRQ(ierr); 4186 } 4187 } 4188 } 4189 4190 /* free workspace */ 4191 if (qr_needed) { 4192 if (pcbddc->dbg_flag) { 4193 ierr = PetscFree(dbg_work);CHKERRQ(ierr); 4194 } 4195 ierr = PetscFree(trs_rhs);CHKERRQ(ierr); 4196 ierr = PetscFree(qr_tau);CHKERRQ(ierr); 4197 ierr = PetscFree(qr_work);CHKERRQ(ierr); 4198 ierr = PetscFree(gqr_work);CHKERRQ(ierr); 4199 ierr = PetscFree(qr_basis);CHKERRQ(ierr); 4200 } 4201 ierr = PetscBTDestroy(&is_primal);CHKERRQ(ierr); 4202 ierr = MatAssemblyBegin(localChangeOfBasisMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4203 ierr = MatAssemblyEnd(localChangeOfBasisMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4204 4205 /* assembling of global change of variable */ 4206 if (!pcbddc->fake_change) { 4207 Mat tmat; 4208 PetscInt bs; 4209 4210 ierr = VecGetSize(pcis->vec1_global,&global_size);CHKERRQ(ierr); 4211 ierr = VecGetLocalSize(pcis->vec1_global,&local_size);CHKERRQ(ierr); 4212 ierr = MatDuplicate(pc->pmat,MAT_DO_NOT_COPY_VALUES,&tmat);CHKERRQ(ierr); 4213 ierr = MatISSetLocalMat(tmat,localChangeOfBasisMatrix);CHKERRQ(ierr); 4214 ierr = MatCreate(PetscObjectComm((PetscObject)pc),&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 4215 ierr = MatSetType(pcbddc->ChangeOfBasisMatrix,MATAIJ);CHKERRQ(ierr); 4216 ierr = MatGetBlockSize(pc->pmat,&bs);CHKERRQ(ierr); 4217 ierr = MatSetBlockSize(pcbddc->ChangeOfBasisMatrix,bs);CHKERRQ(ierr); 4218 ierr = MatSetSizes(pcbddc->ChangeOfBasisMatrix,local_size,local_size,global_size,global_size);CHKERRQ(ierr); 4219 ierr = MatISSetMPIXAIJPreallocation_Private(tmat,pcbddc->ChangeOfBasisMatrix,PETSC_TRUE);CHKERRQ(ierr); 4220 ierr = MatISGetMPIXAIJ(tmat,MAT_REUSE_MATRIX,&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 4221 ierr = MatDestroy(&tmat);CHKERRQ(ierr); 4222 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 4223 ierr = VecSet(pcis->vec1_N,1.0);CHKERRQ(ierr); 4224 ierr = VecScatterBegin(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4225 ierr = VecScatterEnd(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4226 ierr = VecReciprocal(pcis->vec1_global);CHKERRQ(ierr); 4227 ierr = MatDiagonalScale(pcbddc->ChangeOfBasisMatrix,pcis->vec1_global,NULL);CHKERRQ(ierr); 4228 4229 /* check */ 4230 if (pcbddc->dbg_flag) { 4231 PetscReal error; 4232 Vec x,x_change; 4233 4234 ierr = VecDuplicate(pcis->vec1_global,&x);CHKERRQ(ierr); 4235 ierr = VecDuplicate(pcis->vec1_global,&x_change);CHKERRQ(ierr); 4236 ierr = VecSetRandom(x,NULL);CHKERRQ(ierr); 4237 ierr = VecCopy(x,pcis->vec1_global);CHKERRQ(ierr); 4238 ierr = VecScatterBegin(matis->rctx,x,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4239 ierr = VecScatterEnd(matis->rctx,x,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4240 ierr = MatMult(localChangeOfBasisMatrix,pcis->vec1_N,pcis->vec2_N);CHKERRQ(ierr); 4241 ierr = VecScatterBegin(matis->rctx,pcis->vec2_N,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4242 ierr = VecScatterEnd(matis->rctx,pcis->vec2_N,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4243 ierr = MatMult(pcbddc->ChangeOfBasisMatrix,pcis->vec1_global,x_change);CHKERRQ(ierr); 4244 ierr = VecAXPY(x,-1.0,x_change);CHKERRQ(ierr); 4245 ierr = VecNorm(x,NORM_INFINITY,&error);CHKERRQ(ierr); 4246 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4247 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Error global vs local change: %1.6e\n",error);CHKERRQ(ierr); 4248 ierr = VecDestroy(&x);CHKERRQ(ierr); 4249 ierr = VecDestroy(&x_change);CHKERRQ(ierr); 4250 } 4251 /* adapt sub_schurs computed (if any) */ 4252 if (pcbddc->use_deluxe_scaling) { 4253 PCBDDCSubSchurs sub_schurs=pcbddc->sub_schurs; 4254 4255 if (pcbddc->use_change_of_basis && pcbddc->adaptive_userdefined) { 4256 SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_SUP,"Cannot mix automatic change of basis, adaptive selection and user-defined constraints");CHKERRQ(ierr); 4257 } 4258 if (sub_schurs->S_Ej_all) { 4259 Mat S_new,tmat; 4260 IS is_all_N,is_V_Sall = NULL; 4261 4262 ierr = ISLocalToGlobalMappingApplyIS(pcis->BtoNmap,sub_schurs->is_Ej_all,&is_all_N);CHKERRQ(ierr); 4263 ierr = MatGetSubMatrix(localChangeOfBasisMatrix,is_all_N,is_all_N,MAT_INITIAL_MATRIX,&tmat);CHKERRQ(ierr); 4264 if (pcbddc->deluxe_zerorows) { 4265 ISLocalToGlobalMapping NtoSall; 4266 IS is_V; 4267 ierr = ISCreateGeneral(PETSC_COMM_SELF,pcbddc->n_vertices,pcbddc->local_primal_ref_node,PETSC_COPY_VALUES,&is_V);CHKERRQ(ierr); 4268 ierr = ISLocalToGlobalMappingCreateIS(is_all_N,&NtoSall);CHKERRQ(ierr); 4269 ierr = ISGlobalToLocalMappingApplyIS(NtoSall,IS_GTOLM_DROP,is_V,&is_V_Sall);CHKERRQ(ierr); 4270 ierr = ISLocalToGlobalMappingDestroy(&NtoSall);CHKERRQ(ierr); 4271 ierr = ISDestroy(&is_V);CHKERRQ(ierr); 4272 } 4273 ierr = ISDestroy(&is_all_N);CHKERRQ(ierr); 4274 ierr = MatPtAP(sub_schurs->S_Ej_all,tmat,MAT_INITIAL_MATRIX,1.0,&S_new);CHKERRQ(ierr); 4275 ierr = MatDestroy(&sub_schurs->S_Ej_all);CHKERRQ(ierr); 4276 ierr = PetscObjectReference((PetscObject)S_new);CHKERRQ(ierr); 4277 if (pcbddc->deluxe_zerorows) { 4278 const PetscScalar *array; 4279 const PetscInt *idxs_V,*idxs_all; 4280 PetscInt i,n_V; 4281 4282 ierr = MatZeroRowsColumnsIS(S_new,is_V_Sall,1.,NULL,NULL);CHKERRQ(ierr); 4283 ierr = ISGetLocalSize(is_V_Sall,&n_V);CHKERRQ(ierr); 4284 ierr = ISGetIndices(is_V_Sall,&idxs_V);CHKERRQ(ierr); 4285 ierr = ISGetIndices(sub_schurs->is_Ej_all,&idxs_all);CHKERRQ(ierr); 4286 ierr = VecGetArrayRead(pcis->D,&array);CHKERRQ(ierr); 4287 for (i=0;i<n_V;i++) { 4288 PetscScalar val; 4289 PetscInt idx; 4290 4291 idx = idxs_V[i]; 4292 val = array[idxs_all[idxs_V[i]]]; 4293 ierr = MatSetValue(S_new,idx,idx,val,INSERT_VALUES);CHKERRQ(ierr); 4294 } 4295 ierr = MatAssemblyBegin(S_new,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4296 ierr = MatAssemblyEnd(S_new,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4297 ierr = VecRestoreArrayRead(pcis->D,&array);CHKERRQ(ierr); 4298 ierr = ISRestoreIndices(sub_schurs->is_Ej_all,&idxs_all);CHKERRQ(ierr); 4299 ierr = ISRestoreIndices(is_V_Sall,&idxs_V);CHKERRQ(ierr); 4300 } 4301 sub_schurs->S_Ej_all = S_new; 4302 ierr = MatDestroy(&S_new);CHKERRQ(ierr); 4303 if (sub_schurs->sum_S_Ej_all) { 4304 ierr = MatPtAP(sub_schurs->sum_S_Ej_all,tmat,MAT_INITIAL_MATRIX,1.0,&S_new);CHKERRQ(ierr); 4305 ierr = MatDestroy(&sub_schurs->sum_S_Ej_all);CHKERRQ(ierr); 4306 ierr = PetscObjectReference((PetscObject)S_new);CHKERRQ(ierr); 4307 if (pcbddc->deluxe_zerorows) { 4308 ierr = MatZeroRowsColumnsIS(S_new,is_V_Sall,1.,NULL,NULL);CHKERRQ(ierr); 4309 } 4310 sub_schurs->sum_S_Ej_all = S_new; 4311 ierr = MatDestroy(&S_new);CHKERRQ(ierr); 4312 } 4313 ierr = ISDestroy(&is_V_Sall);CHKERRQ(ierr); 4314 ierr = MatDestroy(&tmat);CHKERRQ(ierr); 4315 } 4316 /* destroy any change of basis context in sub_schurs */ 4317 if (sub_schurs->change) { 4318 PetscInt i; 4319 4320 for (i=0;i<sub_schurs->n_subs;i++) { 4321 ierr = KSPDestroy(&sub_schurs->change[i]);CHKERRQ(ierr); 4322 } 4323 ierr = PetscFree(sub_schurs->change);CHKERRQ(ierr); 4324 } 4325 } 4326 if (pcbddc->switch_static) { /* need to save the local change */ 4327 pcbddc->switch_static_change = localChangeOfBasisMatrix; 4328 } else { 4329 ierr = MatDestroy(&localChangeOfBasisMatrix);CHKERRQ(ierr); 4330 } 4331 /* determine if any process has changed the pressures locally */ 4332 if (pcbddc->benign_saddle_point) { 4333 PetscBool have_null = (PetscBool)!!pcbddc->benign_change; 4334 ierr = MPI_Allreduce(&have_null,&pcbddc->change_interior,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 4335 } 4336 } else { /* fake change (get back change of basis into ConstraintMatrix and info on qr) */ 4337 ierr = MatDestroy(&pcbddc->ConstraintMatrix);CHKERRQ(ierr); 4338 pcbddc->ConstraintMatrix = localChangeOfBasisMatrix; 4339 pcbddc->use_qr_single = qr_needed; 4340 } 4341 } else if (pcbddc->user_ChangeOfBasisMatrix || pcbddc->benign_saddle_point) { 4342 PetscBool needglobal = PETSC_FALSE; 4343 if (pcbddc->benign_saddle_point) { 4344 PetscBool have_null = (PetscBool)!!pcbddc->benign_change; 4345 ierr = MPI_Allreduce(&have_null,&needglobal,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 4346 } 4347 if (!needglobal && pcbddc->user_ChangeOfBasisMatrix) { 4348 ierr = PetscObjectReference((PetscObject)pcbddc->user_ChangeOfBasisMatrix);CHKERRQ(ierr); 4349 pcbddc->ChangeOfBasisMatrix = pcbddc->user_ChangeOfBasisMatrix; 4350 } else { 4351 Mat benign_global = NULL; 4352 if (needglobal) { 4353 Mat tmat; 4354 4355 pcbddc->change_interior = PETSC_TRUE; 4356 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 4357 ierr = VecSet(pcis->vec1_N,1.0);CHKERRQ(ierr); 4358 ierr = VecScatterBegin(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4359 ierr = VecScatterEnd(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4360 ierr = VecReciprocal(pcis->vec1_global);CHKERRQ(ierr); 4361 ierr = VecScatterBegin(matis->rctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4362 ierr = VecScatterEnd(matis->rctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4363 ierr = MatDuplicate(pc->pmat,MAT_DO_NOT_COPY_VALUES,&tmat);CHKERRQ(ierr); 4364 if (pcbddc->benign_change) { 4365 Mat M; 4366 4367 ierr = MatDuplicate(pcbddc->benign_change,MAT_COPY_VALUES,&M);CHKERRQ(ierr); 4368 ierr = MatDiagonalScale(M,pcis->vec1_N,NULL);CHKERRQ(ierr); 4369 ierr = MatISSetLocalMat(tmat,M);CHKERRQ(ierr); 4370 ierr = MatDestroy(&M);CHKERRQ(ierr); 4371 } else { 4372 Mat eye; 4373 PetscScalar *array; 4374 4375 ierr = VecGetArray(pcis->vec1_N,&array);CHKERRQ(ierr); 4376 ierr = MatCreateSeqAIJ(PETSC_COMM_SELF,pcis->n,pcis->n,1,NULL,&eye);CHKERRQ(ierr); 4377 for (i=0;i<pcis->n;i++) { 4378 ierr = MatSetValue(eye,i,i,array[i],INSERT_VALUES);CHKERRQ(ierr); 4379 } 4380 ierr = VecRestoreArray(pcis->vec1_N,&array);CHKERRQ(ierr); 4381 ierr = MatAssemblyBegin(eye,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4382 ierr = MatAssemblyEnd(eye,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4383 ierr = MatISSetLocalMat(tmat,eye);CHKERRQ(ierr); 4384 ierr = MatDestroy(&eye);CHKERRQ(ierr); 4385 } 4386 ierr = MatISGetMPIXAIJ(tmat,MAT_INITIAL_MATRIX,&benign_global);CHKERRQ(ierr); 4387 ierr = MatDestroy(&tmat);CHKERRQ(ierr); 4388 } 4389 if (pcbddc->user_ChangeOfBasisMatrix) { 4390 ierr = MatMatMult(pcbddc->user_ChangeOfBasisMatrix,benign_global,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 4391 ierr = MatDestroy(&benign_global);CHKERRQ(ierr); 4392 } else if (needglobal) { 4393 pcbddc->ChangeOfBasisMatrix = benign_global; 4394 } 4395 } 4396 if (pcbddc->switch_static && pcbddc->ChangeOfBasisMatrix) { /* need to save the local change */ 4397 IS is_global; 4398 const PetscInt *gidxs; 4399 4400 ierr = ISLocalToGlobalMappingGetIndices(pc->pmat->rmap->mapping,&gidxs);CHKERRQ(ierr); 4401 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),pcis->n,gidxs,PETSC_COPY_VALUES,&is_global);CHKERRQ(ierr); 4402 ierr = ISLocalToGlobalMappingRestoreIndices(pc->pmat->rmap->mapping,&gidxs);CHKERRQ(ierr); 4403 ierr = MatGetSubMatrixUnsorted(pcbddc->ChangeOfBasisMatrix,is_global,is_global,&pcbddc->switch_static_change);CHKERRQ(ierr); 4404 ierr = ISDestroy(&is_global);CHKERRQ(ierr); 4405 } 4406 } 4407 if (!pcbddc->fake_change && pcbddc->ChangeOfBasisMatrix && !pcbddc->work_change) { 4408 ierr = VecDuplicate(pcis->vec1_global,&pcbddc->work_change);CHKERRQ(ierr); 4409 } 4410 4411 if (!pcbddc->fake_change) { 4412 /* add pressure dofs to set of primal nodes for numbering purposes */ 4413 for (i=0;i<pcbddc->benign_n;i++) { 4414 pcbddc->local_primal_ref_node[pcbddc->local_primal_size_cc] = pcbddc->benign_p0_lidx[i]; 4415 pcbddc->primal_indices_local_idxs[pcbddc->local_primal_size] = pcbddc->benign_p0_lidx[i]; 4416 pcbddc->local_primal_ref_mult[pcbddc->local_primal_size_cc] = 1; 4417 pcbddc->local_primal_size_cc++; 4418 pcbddc->local_primal_size++; 4419 } 4420 4421 /* check if a new primal space has been introduced (also take into account benign trick) */ 4422 pcbddc->new_primal_space_local = PETSC_TRUE; 4423 if (olocal_primal_size == pcbddc->local_primal_size) { 4424 ierr = PetscMemcmp(pcbddc->local_primal_ref_node,olocal_primal_ref_node,olocal_primal_size_cc*sizeof(PetscInt),&pcbddc->new_primal_space_local);CHKERRQ(ierr); 4425 pcbddc->new_primal_space_local = (PetscBool)(!pcbddc->new_primal_space_local); 4426 if (!pcbddc->new_primal_space_local) { 4427 ierr = PetscMemcmp(pcbddc->local_primal_ref_mult,olocal_primal_ref_mult,olocal_primal_size_cc*sizeof(PetscInt),&pcbddc->new_primal_space_local);CHKERRQ(ierr); 4428 pcbddc->new_primal_space_local = (PetscBool)(!pcbddc->new_primal_space_local); 4429 } 4430 } 4431 /* new_primal_space will be used for numbering of coarse dofs, so it should be the same across all subdomains */ 4432 ierr = MPIU_Allreduce(&pcbddc->new_primal_space_local,&pcbddc->new_primal_space,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 4433 } 4434 ierr = PetscFree2(olocal_primal_ref_node,olocal_primal_ref_mult);CHKERRQ(ierr); 4435 4436 /* flush dbg viewer */ 4437 if (pcbddc->dbg_flag) { 4438 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4439 } 4440 4441 /* free workspace */ 4442 ierr = PetscBTDestroy(&qr_needed_idx);CHKERRQ(ierr); 4443 ierr = PetscBTDestroy(&change_basis);CHKERRQ(ierr); 4444 if (!pcbddc->adaptive_selection) { 4445 ierr = PetscFree3(constraints_idxs_ptr,constraints_data_ptr,constraints_n);CHKERRQ(ierr); 4446 ierr = PetscFree3(constraints_data,constraints_idxs,constraints_idxs_B);CHKERRQ(ierr); 4447 } else { 4448 ierr = PetscFree5(pcbddc->adaptive_constraints_n, 4449 pcbddc->adaptive_constraints_idxs_ptr, 4450 pcbddc->adaptive_constraints_data_ptr, 4451 pcbddc->adaptive_constraints_idxs, 4452 pcbddc->adaptive_constraints_data);CHKERRQ(ierr); 4453 ierr = PetscFree(constraints_n);CHKERRQ(ierr); 4454 ierr = PetscFree(constraints_idxs_B);CHKERRQ(ierr); 4455 } 4456 PetscFunctionReturn(0); 4457 } 4458 4459 #undef __FUNCT__ 4460 #define __FUNCT__ "PCBDDCAnalyzeInterface" 4461 PetscErrorCode PCBDDCAnalyzeInterface(PC pc) 4462 { 4463 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 4464 PC_IS *pcis = (PC_IS*)pc->data; 4465 Mat_IS *matis = (Mat_IS*)pc->pmat->data; 4466 PetscInt ierr,i,N; 4467 PetscViewer viewer=pcbddc->dbg_viewer; 4468 4469 PetscFunctionBegin; 4470 /* Reset previously computed graph */ 4471 ierr = PCBDDCGraphReset(pcbddc->mat_graph);CHKERRQ(ierr); 4472 /* Init local Graph struct */ 4473 ierr = MatGetSize(pc->pmat,&N,NULL);CHKERRQ(ierr); 4474 ierr = PCBDDCGraphInit(pcbddc->mat_graph,pcis->mapping,N);CHKERRQ(ierr); 4475 4476 /* Check validity of the csr graph passed in by the user */ 4477 if (pcbddc->mat_graph->nvtxs_csr && pcbddc->mat_graph->nvtxs_csr != pcbddc->mat_graph->nvtxs) { 4478 SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Invalid size of local CSR graph! Found %d, expected %d\n",pcbddc->mat_graph->nvtxs_csr,pcbddc->mat_graph->nvtxs); 4479 } 4480 4481 /* Set default CSR adjacency of local dofs if not provided by the user with PCBDDCSetLocalAdjacencyGraph */ 4482 if ( (!pcbddc->mat_graph->xadj || !pcbddc->mat_graph->adjncy) && pcbddc->use_local_adj) { 4483 PetscInt *xadj,*adjncy; 4484 PetscInt nvtxs; 4485 PetscBool flg_row=PETSC_FALSE; 4486 4487 ierr = MatGetRowIJ(matis->A,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);CHKERRQ(ierr); 4488 if (flg_row) { 4489 ierr = PCBDDCSetLocalAdjacencyGraph(pc,nvtxs,xadj,adjncy,PETSC_COPY_VALUES);CHKERRQ(ierr); 4490 pcbddc->computed_rowadj = PETSC_TRUE; 4491 } 4492 ierr = MatRestoreRowIJ(matis->A,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);CHKERRQ(ierr); 4493 } 4494 if (pcbddc->dbg_flag) { 4495 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4496 } 4497 4498 /* Set default dofs' splitting if no information has been provided by the user with PCBDDCSetDofsSplitting or PCBDDCSetDofsSplittingLocal */ 4499 if (pcbddc->user_provided_isfordofs) { 4500 if (pcbddc->n_ISForDofs) { /* need to convert from global to local and remove references to global dofs splitting */ 4501 ierr = PetscMalloc1(pcbddc->n_ISForDofs,&pcbddc->ISForDofsLocal);CHKERRQ(ierr); 4502 for (i=0;i<pcbddc->n_ISForDofs;i++) { 4503 ierr = PCBDDCGlobalToLocal(matis->rctx,pcis->vec1_global,pcis->vec1_N,pcbddc->ISForDofs[i],&pcbddc->ISForDofsLocal[i]);CHKERRQ(ierr); 4504 ierr = ISDestroy(&pcbddc->ISForDofs[i]);CHKERRQ(ierr); 4505 } 4506 pcbddc->n_ISForDofsLocal = pcbddc->n_ISForDofs; 4507 pcbddc->n_ISForDofs = 0; 4508 ierr = PetscFree(pcbddc->ISForDofs);CHKERRQ(ierr); 4509 } 4510 } else { 4511 if (!pcbddc->n_ISForDofsLocal) { /* field split not present, create it in local ordering */ 4512 ierr = MatGetBlockSize(pc->pmat,&pcbddc->n_ISForDofsLocal);CHKERRQ(ierr); 4513 ierr = PetscMalloc1(pcbddc->n_ISForDofsLocal,&pcbddc->ISForDofsLocal);CHKERRQ(ierr); 4514 for (i=0;i<pcbddc->n_ISForDofsLocal;i++) { 4515 ierr = ISCreateStride(PetscObjectComm((PetscObject)pc),pcis->n/pcbddc->n_ISForDofsLocal,i,pcbddc->n_ISForDofsLocal,&pcbddc->ISForDofsLocal[i]);CHKERRQ(ierr); 4516 } 4517 } 4518 } 4519 4520 /* Setup of Graph */ 4521 if (!pcbddc->DirichletBoundariesLocal && pcbddc->DirichletBoundaries) { /* need to convert from global to local */ 4522 ierr = PCBDDCGlobalToLocal(matis->rctx,pcis->vec1_global,pcis->vec1_N,pcbddc->DirichletBoundaries,&pcbddc->DirichletBoundariesLocal);CHKERRQ(ierr); 4523 } 4524 if (!pcbddc->NeumannBoundariesLocal && pcbddc->NeumannBoundaries) { /* need to convert from global to local */ 4525 ierr = PCBDDCGlobalToLocal(matis->rctx,pcis->vec1_global,pcis->vec1_N,pcbddc->NeumannBoundaries,&pcbddc->NeumannBoundariesLocal);CHKERRQ(ierr); 4526 } 4527 if (!pcbddc->user_primal_vertices_local && pcbddc->user_primal_vertices) { /* need to convert from global to local */ 4528 ierr = PCBDDCGlobalToLocal(matis->rctx,pcis->vec1_global,pcis->vec1_N,pcbddc->user_primal_vertices,&pcbddc->user_primal_vertices_local);CHKERRQ(ierr); 4529 } 4530 ierr = PCBDDCGraphSetUp(pcbddc->mat_graph,pcbddc->vertex_size,pcbddc->NeumannBoundariesLocal,pcbddc->DirichletBoundariesLocal,pcbddc->n_ISForDofsLocal,pcbddc->ISForDofsLocal,pcbddc->user_primal_vertices_local);CHKERRQ(ierr); 4531 4532 /* attach info on disconnected subdomains if present */ 4533 if (pcbddc->n_local_subs) { 4534 PetscInt *local_subs; 4535 4536 ierr = PetscMalloc1(N,&local_subs);CHKERRQ(ierr); 4537 for (i=0;i<pcbddc->n_local_subs;i++) { 4538 const PetscInt *idxs; 4539 PetscInt nl,j; 4540 4541 ierr = ISGetLocalSize(pcbddc->local_subs[i],&nl);CHKERRQ(ierr); 4542 ierr = ISGetIndices(pcbddc->local_subs[i],&idxs);CHKERRQ(ierr); 4543 for (j=0;j<nl;j++) { 4544 local_subs[idxs[j]] = i; 4545 } 4546 ierr = ISRestoreIndices(pcbddc->local_subs[i],&idxs);CHKERRQ(ierr); 4547 } 4548 pcbddc->mat_graph->n_local_subs = pcbddc->n_local_subs; 4549 pcbddc->mat_graph->local_subs = local_subs; 4550 } 4551 4552 /* Graph's connected components analysis */ 4553 ierr = PCBDDCGraphComputeConnectedComponents(pcbddc->mat_graph);CHKERRQ(ierr); 4554 4555 /* print some info to stdout */ 4556 if (pcbddc->dbg_flag) { 4557 ierr = PCBDDCGraphASCIIView(pcbddc->mat_graph,pcbddc->dbg_flag,viewer);CHKERRQ(ierr); 4558 } 4559 PetscFunctionReturn(0); 4560 } 4561 4562 /* given an index sets possibly with holes, renumbers the indexes removing the holes */ 4563 #undef __FUNCT__ 4564 #define __FUNCT__ "PCBDDCSubsetNumbering" 4565 PetscErrorCode PCBDDCSubsetNumbering(IS subset, IS subset_mult, PetscInt *N_n, IS *subset_n) 4566 { 4567 PetscSF sf; 4568 PetscLayout map; 4569 const PetscInt *idxs; 4570 PetscInt *leaf_data,*root_data,*gidxs; 4571 PetscInt N,n,i,lbounds[2],gbounds[2],Nl; 4572 PetscInt n_n,nlocals,start,first_index; 4573 PetscMPIInt commsize; 4574 PetscBool first_found; 4575 PetscErrorCode ierr; 4576 4577 PetscFunctionBegin; 4578 ierr = ISGetLocalSize(subset,&n);CHKERRQ(ierr); 4579 if (subset_mult) { 4580 PetscCheckSameComm(subset,1,subset_mult,2); 4581 ierr = ISGetLocalSize(subset,&i);CHKERRQ(ierr); 4582 if (i != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Local subset and multiplicity sizes don't match! %d != %d",n,i); 4583 } 4584 /* create workspace layout for computing global indices of subset */ 4585 ierr = ISGetIndices(subset,&idxs);CHKERRQ(ierr); 4586 lbounds[0] = lbounds[1] = 0; 4587 for (i=0;i<n;i++) { 4588 if (idxs[i] < lbounds[0]) lbounds[0] = idxs[i]; 4589 else if (idxs[i] > lbounds[1]) lbounds[1] = idxs[i]; 4590 } 4591 lbounds[0] = -lbounds[0]; 4592 ierr = MPIU_Allreduce(lbounds,gbounds,2,MPIU_INT,MPI_MAX,PetscObjectComm((PetscObject)subset));CHKERRQ(ierr); 4593 gbounds[0] = -gbounds[0]; 4594 N = gbounds[1] - gbounds[0] + 1; 4595 ierr = PetscLayoutCreate(PetscObjectComm((PetscObject)subset),&map);CHKERRQ(ierr); 4596 ierr = PetscLayoutSetBlockSize(map,1);CHKERRQ(ierr); 4597 ierr = PetscLayoutSetSize(map,N);CHKERRQ(ierr); 4598 ierr = PetscLayoutSetUp(map);CHKERRQ(ierr); 4599 ierr = PetscLayoutGetLocalSize(map,&Nl);CHKERRQ(ierr); 4600 4601 /* create sf : leaf_data == multiplicity of indexes, root data == global index in layout */ 4602 ierr = PetscMalloc2(n,&leaf_data,Nl,&root_data);CHKERRQ(ierr); 4603 if (subset_mult) { 4604 const PetscInt* idxs_mult; 4605 4606 ierr = ISGetIndices(subset_mult,&idxs_mult);CHKERRQ(ierr); 4607 ierr = PetscMemcpy(leaf_data,idxs_mult,n*sizeof(PetscInt));CHKERRQ(ierr); 4608 ierr = ISRestoreIndices(subset_mult,&idxs_mult);CHKERRQ(ierr); 4609 } else { 4610 for (i=0;i<n;i++) leaf_data[i] = 1; 4611 } 4612 /* local size of new subset */ 4613 n_n = 0; 4614 for (i=0;i<n;i++) n_n += leaf_data[i]; 4615 4616 /* global indexes in layout */ 4617 ierr = PetscMalloc1(n_n,&gidxs);CHKERRQ(ierr); /* allocating possibly extra space in gidxs which will be used later */ 4618 for (i=0;i<n;i++) gidxs[i] = idxs[i] - gbounds[0]; 4619 ierr = ISRestoreIndices(subset,&idxs);CHKERRQ(ierr); 4620 ierr = PetscSFCreate(PetscObjectComm((PetscObject)subset),&sf);CHKERRQ(ierr); 4621 ierr = PetscSFSetGraphLayout(sf,map,n,NULL,PETSC_COPY_VALUES,gidxs);CHKERRQ(ierr); 4622 ierr = PetscLayoutDestroy(&map);CHKERRQ(ierr); 4623 4624 /* reduce from leaves to roots */ 4625 ierr = PetscMemzero(root_data,Nl*sizeof(PetscInt));CHKERRQ(ierr); 4626 ierr = PetscSFReduceBegin(sf,MPIU_INT,leaf_data,root_data,MPI_MAX);CHKERRQ(ierr); 4627 ierr = PetscSFReduceEnd(sf,MPIU_INT,leaf_data,root_data,MPI_MAX);CHKERRQ(ierr); 4628 4629 /* count indexes in local part of layout */ 4630 nlocals = 0; 4631 first_index = -1; 4632 first_found = PETSC_FALSE; 4633 for (i=0;i<Nl;i++) { 4634 if (!first_found && root_data[i]) { 4635 first_found = PETSC_TRUE; 4636 first_index = i; 4637 } 4638 nlocals += root_data[i]; 4639 } 4640 4641 /* cumulative of number of indexes and size of subset without holes */ 4642 #if defined(PETSC_HAVE_MPI_EXSCAN) 4643 start = 0; 4644 ierr = MPI_Exscan(&nlocals,&start,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)subset));CHKERRQ(ierr); 4645 #else 4646 ierr = MPI_Scan(&nlocals,&start,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)subset));CHKERRQ(ierr); 4647 start = start-nlocals; 4648 #endif 4649 4650 if (N_n) { /* compute total size of new subset if requested */ 4651 *N_n = start + nlocals; 4652 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)subset),&commsize);CHKERRQ(ierr); 4653 ierr = MPI_Bcast(N_n,1,MPIU_INT,commsize-1,PetscObjectComm((PetscObject)subset));CHKERRQ(ierr); 4654 } 4655 4656 /* adapt root data with cumulative */ 4657 if (first_found) { 4658 PetscInt old_index; 4659 4660 root_data[first_index] += start; 4661 old_index = first_index; 4662 for (i=first_index+1;i<Nl;i++) { 4663 if (root_data[i]) { 4664 root_data[i] += root_data[old_index]; 4665 old_index = i; 4666 } 4667 } 4668 } 4669 4670 /* from roots to leaves */ 4671 ierr = PetscSFBcastBegin(sf,MPIU_INT,root_data,leaf_data);CHKERRQ(ierr); 4672 ierr = PetscSFBcastEnd(sf,MPIU_INT,root_data,leaf_data);CHKERRQ(ierr); 4673 ierr = PetscSFDestroy(&sf);CHKERRQ(ierr); 4674 4675 /* create new IS with global indexes without holes */ 4676 if (subset_mult) { 4677 const PetscInt* idxs_mult; 4678 PetscInt cum; 4679 4680 cum = 0; 4681 ierr = ISGetIndices(subset_mult,&idxs_mult);CHKERRQ(ierr); 4682 for (i=0;i<n;i++) { 4683 PetscInt j; 4684 for (j=0;j<idxs_mult[i];j++) gidxs[cum++] = leaf_data[i] - idxs_mult[i] + j; 4685 } 4686 ierr = ISRestoreIndices(subset_mult,&idxs_mult);CHKERRQ(ierr); 4687 } else { 4688 for (i=0;i<n;i++) { 4689 gidxs[i] = leaf_data[i]-1; 4690 } 4691 } 4692 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)subset),n_n,gidxs,PETSC_OWN_POINTER,subset_n);CHKERRQ(ierr); 4693 ierr = PetscFree2(leaf_data,root_data);CHKERRQ(ierr); 4694 PetscFunctionReturn(0); 4695 } 4696 4697 #undef __FUNCT__ 4698 #define __FUNCT__ "PCBDDCOrthonormalizeVecs" 4699 PetscErrorCode PCBDDCOrthonormalizeVecs(PetscInt n, Vec vecs[]) 4700 { 4701 PetscInt i,j; 4702 PetscScalar *alphas; 4703 PetscErrorCode ierr; 4704 4705 PetscFunctionBegin; 4706 /* this implements stabilized Gram-Schmidt */ 4707 ierr = PetscMalloc1(n,&alphas);CHKERRQ(ierr); 4708 for (i=0;i<n;i++) { 4709 ierr = VecNormalize(vecs[i],NULL);CHKERRQ(ierr); 4710 if (i<n) { ierr = VecMDot(vecs[i],n-i-1,&vecs[i+1],&alphas[i+1]);CHKERRQ(ierr); } 4711 for (j=i+1;j<n;j++) { ierr = VecAXPY(vecs[j],PetscConj(-alphas[j]),vecs[i]);CHKERRQ(ierr); } 4712 } 4713 ierr = PetscFree(alphas);CHKERRQ(ierr); 4714 PetscFunctionReturn(0); 4715 } 4716 4717 #undef __FUNCT__ 4718 #define __FUNCT__ "MatISGetSubassemblingPattern" 4719 PetscErrorCode MatISGetSubassemblingPattern(Mat mat, PetscInt *n_subdomains, PetscInt redprocs, IS* is_sends, PetscBool *have_void) 4720 { 4721 Mat A; 4722 PetscInt n_neighs,*neighs,*n_shared,**shared; 4723 PetscMPIInt size,rank,color; 4724 PetscInt *xadj,*adjncy; 4725 PetscInt *adjncy_wgt,*v_wgt,*ranks_send_to_idx; 4726 PetscInt im_active,active_procs,n,i,local_size,threshold=0; 4727 PetscInt void_procs,*procs_candidates = NULL; 4728 PetscBool ismatis,use_vwgt=PETSC_FALSE,use_square=PETSC_FALSE; 4729 PetscSubcomm subcomm; 4730 PetscErrorCode ierr; 4731 4732 PetscFunctionBegin; 4733 PetscValidHeaderSpecific(mat,MAT_CLASSID,1); 4734 ierr = PetscObjectTypeCompare((PetscObject)mat,MATIS,&ismatis);CHKERRQ(ierr); 4735 if (!ismatis) SETERRQ1(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot use %s on a matrix object which is not of type MATIS",__FUNCT__); 4736 PetscValidLogicalCollectiveInt(mat,*n_subdomains,2); 4737 PetscValidLogicalCollectiveInt(mat,redprocs,3); 4738 if (*n_subdomains <=0) SETERRQ1(PetscObjectComm((PetscObject)mat),PETSC_ERR_ARG_WRONG,"Invalid number of subdomains requested %d\n",*n_subdomains); 4739 4740 if (have_void) *have_void = PETSC_FALSE; 4741 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)mat),&size);CHKERRQ(ierr); 4742 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr); 4743 ierr = MatISGetLocalMat(mat,&A);CHKERRQ(ierr); 4744 ierr = MatGetLocalSize(A,&n,NULL);CHKERRQ(ierr); 4745 im_active = !!(n); 4746 ierr = MPIU_Allreduce(&im_active,&active_procs,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 4747 void_procs = size - active_procs; 4748 /* get ranks of of non-active processes in mat communicator */ 4749 if (void_procs) { 4750 PetscInt ncand; 4751 4752 if (have_void) *have_void = PETSC_TRUE; 4753 ierr = PetscMalloc1(size,&procs_candidates);CHKERRQ(ierr); 4754 ierr = MPI_Allgather(&im_active,1,MPIU_INT,procs_candidates,1,MPIU_INT,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 4755 for (i=0,ncand=0;i<size;i++) { 4756 if (!procs_candidates[i]) { 4757 procs_candidates[ncand++] = i; 4758 } 4759 } 4760 /* force n_subdomains to be not greater that the number of non-active processes */ 4761 *n_subdomains = PetscMin(void_procs,*n_subdomains); 4762 } 4763 4764 /* number of subdomains requested greater than active processes -> just shift the matrix */ 4765 if (active_procs < *n_subdomains) { 4766 PetscInt issize,isidx; 4767 if (im_active) { 4768 issize = 1; 4769 if (procs_candidates) { /* shift the pattern on non-active candidates (if any) */ 4770 isidx = procs_candidates[rank]; 4771 } else { 4772 isidx = rank; 4773 } 4774 } else { 4775 issize = 0; 4776 isidx = -1; 4777 } 4778 *n_subdomains = active_procs; 4779 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)mat),issize,&isidx,PETSC_COPY_VALUES,is_sends);CHKERRQ(ierr); 4780 PetscFunctionReturn(0); 4781 } 4782 ierr = PetscOptionsGetBool(NULL,NULL,"-matis_partitioning_use_square",&use_square,NULL);CHKERRQ(ierr); 4783 ierr = PetscOptionsGetBool(NULL,NULL,"-matis_partitioning_use_vwgt",&use_vwgt,NULL);CHKERRQ(ierr); 4784 ierr = PetscOptionsGetInt(NULL,NULL,"-matis_partitioning_threshold",&threshold,NULL);CHKERRQ(ierr); 4785 4786 /* Get info on mapping */ 4787 ierr = ISLocalToGlobalMappingGetSize(mat->rmap->mapping,&local_size);CHKERRQ(ierr); 4788 ierr = ISLocalToGlobalMappingGetInfo(mat->rmap->mapping,&n_neighs,&neighs,&n_shared,&shared);CHKERRQ(ierr); 4789 4790 /* build local CSR graph of subdomains' connectivity */ 4791 ierr = PetscMalloc1(2,&xadj);CHKERRQ(ierr); 4792 xadj[0] = 0; 4793 xadj[1] = PetscMax(n_neighs-1,0); 4794 ierr = PetscMalloc1(xadj[1],&adjncy);CHKERRQ(ierr); 4795 ierr = PetscMalloc1(xadj[1],&adjncy_wgt);CHKERRQ(ierr); 4796 4797 if (threshold) { 4798 PetscInt xadj_count = 0; 4799 for (i=1;i<n_neighs;i++) { 4800 if (n_shared[i] > threshold) { 4801 adjncy[xadj_count] = neighs[i]; 4802 adjncy_wgt[xadj_count] = n_shared[i]; 4803 xadj_count++; 4804 } 4805 } 4806 xadj[1] = xadj_count; 4807 } else { 4808 if (xadj[1]) { 4809 ierr = PetscMemcpy(adjncy,&neighs[1],xadj[1]*sizeof(*adjncy));CHKERRQ(ierr); 4810 ierr = PetscMemcpy(adjncy_wgt,&n_shared[1],xadj[1]*sizeof(*adjncy_wgt));CHKERRQ(ierr); 4811 } 4812 } 4813 ierr = ISLocalToGlobalMappingRestoreInfo(mat->rmap->mapping,&n_neighs,&neighs,&n_shared,&shared);CHKERRQ(ierr); 4814 if (use_square) { 4815 for (i=0;i<xadj[1];i++) { 4816 adjncy_wgt[i] = adjncy_wgt[i]*adjncy_wgt[i]; 4817 } 4818 } 4819 ierr = PetscSortIntWithArray(xadj[1],adjncy,adjncy_wgt);CHKERRQ(ierr); 4820 4821 ierr = PetscMalloc1(1,&ranks_send_to_idx);CHKERRQ(ierr); 4822 4823 /* 4824 Restrict work on active processes only. 4825 */ 4826 ierr = PetscSubcommCreate(PetscObjectComm((PetscObject)mat),&subcomm);CHKERRQ(ierr); 4827 ierr = PetscSubcommSetNumber(subcomm,2);CHKERRQ(ierr); /* 2 groups, active process and not active processes */ 4828 ierr = PetscMPIIntCast(!local_size,&color);CHKERRQ(ierr); 4829 ierr = PetscSubcommSetTypeGeneral(subcomm,color,rank);CHKERRQ(ierr); 4830 if (color) { 4831 ierr = PetscFree(xadj);CHKERRQ(ierr); 4832 ierr = PetscFree(adjncy);CHKERRQ(ierr); 4833 ierr = PetscFree(adjncy_wgt);CHKERRQ(ierr); 4834 } else { 4835 Mat subdomain_adj; 4836 IS new_ranks,new_ranks_contig; 4837 MatPartitioning partitioner; 4838 PetscInt prank,rstart=0,rend=0; 4839 PetscInt *is_indices,*oldranks; 4840 PetscMPIInt size; 4841 PetscBool aggregate; 4842 4843 ierr = MPI_Comm_size(PetscSubcommChild(subcomm),&size);CHKERRQ(ierr); 4844 ierr = PetscMalloc1(size,&oldranks);CHKERRQ(ierr); 4845 prank = rank; 4846 ierr = MPI_Allgather(&prank,1,MPIU_INT,oldranks,1,MPIU_INT,PetscSubcommChild(subcomm));CHKERRQ(ierr); 4847 /* 4848 for (i=0;i<size;i++) { 4849 PetscPrintf(subcomm->comm,"oldranks[%d] = %d\n",i,oldranks[i]); 4850 } 4851 */ 4852 for (i=0;i<xadj[1];i++) { 4853 ierr = PetscFindInt(adjncy[i],size,oldranks,&adjncy[i]);CHKERRQ(ierr); 4854 } 4855 ierr = PetscSortIntWithArray(xadj[1],adjncy,adjncy_wgt);CHKERRQ(ierr); 4856 aggregate = ((redprocs > 0 && redprocs < size) ? PETSC_TRUE : PETSC_FALSE); 4857 if (aggregate) { 4858 PetscInt lrows,row,ncols,*cols; 4859 PetscMPIInt nrank; 4860 PetscScalar *vals; 4861 4862 ierr = MPI_Comm_rank(PetscSubcommChild(subcomm),&nrank);CHKERRQ(ierr); 4863 lrows = 0; 4864 if (nrank<redprocs) { 4865 lrows = size/redprocs; 4866 if (nrank<size%redprocs) lrows++; 4867 } 4868 ierr = MatCreateAIJ(PetscSubcommChild(subcomm),lrows,lrows,size,size,50,NULL,50,NULL,&subdomain_adj);CHKERRQ(ierr); 4869 ierr = MatGetOwnershipRange(subdomain_adj,&rstart,&rend);CHKERRQ(ierr); 4870 ierr = MatSetOption(subdomain_adj,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr); 4871 ierr = MatSetOption(subdomain_adj,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr); 4872 row = nrank; 4873 ncols = xadj[1]-xadj[0]; 4874 cols = adjncy; 4875 ierr = PetscMalloc1(ncols,&vals);CHKERRQ(ierr); 4876 for (i=0;i<ncols;i++) vals[i] = adjncy_wgt[i]; 4877 ierr = MatSetValues(subdomain_adj,1,&row,ncols,cols,vals,INSERT_VALUES);CHKERRQ(ierr); 4878 ierr = MatAssemblyBegin(subdomain_adj,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4879 ierr = MatAssemblyEnd(subdomain_adj,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4880 ierr = PetscFree(xadj);CHKERRQ(ierr); 4881 ierr = PetscFree(adjncy);CHKERRQ(ierr); 4882 ierr = PetscFree(adjncy_wgt);CHKERRQ(ierr); 4883 ierr = PetscFree(vals);CHKERRQ(ierr); 4884 } else { 4885 ierr = MatCreateMPIAdj(PetscSubcommChild(subcomm),1,(PetscInt)size,xadj,adjncy,adjncy_wgt,&subdomain_adj);CHKERRQ(ierr); 4886 } 4887 /* ierr = MatView(subdomain_adj,0);CHKERRQ(ierr); */ 4888 4889 /* Partition */ 4890 ierr = MatPartitioningCreate(PetscSubcommChild(subcomm),&partitioner);CHKERRQ(ierr); 4891 ierr = MatPartitioningSetAdjacency(partitioner,subdomain_adj);CHKERRQ(ierr); 4892 if (use_vwgt) { 4893 ierr = PetscMalloc1(1,&v_wgt);CHKERRQ(ierr); 4894 v_wgt[0] = local_size; 4895 ierr = MatPartitioningSetVertexWeights(partitioner,v_wgt);CHKERRQ(ierr); 4896 } 4897 *n_subdomains = PetscMin((PetscInt)size,*n_subdomains); 4898 ierr = MatPartitioningSetNParts(partitioner,*n_subdomains);CHKERRQ(ierr); 4899 ierr = MatPartitioningSetFromOptions(partitioner);CHKERRQ(ierr); 4900 ierr = MatPartitioningApply(partitioner,&new_ranks);CHKERRQ(ierr); 4901 /* ierr = MatPartitioningView(partitioner,0);CHKERRQ(ierr); */ 4902 4903 /* renumber new_ranks to avoid "holes" in new set of processors */ 4904 ierr = PCBDDCSubsetNumbering(new_ranks,NULL,NULL,&new_ranks_contig);CHKERRQ(ierr); 4905 ierr = ISDestroy(&new_ranks);CHKERRQ(ierr); 4906 ierr = ISGetIndices(new_ranks_contig,(const PetscInt**)&is_indices);CHKERRQ(ierr); 4907 if (!aggregate) { 4908 if (procs_candidates) { /* shift the pattern on non-active candidates (if any) */ 4909 ranks_send_to_idx[0] = procs_candidates[oldranks[is_indices[0]]]; 4910 } else { 4911 ranks_send_to_idx[0] = oldranks[is_indices[0]]; 4912 } 4913 } else { 4914 PetscInt idxs[1]; 4915 PetscMPIInt tag; 4916 MPI_Request *reqs; 4917 4918 ierr = PetscObjectGetNewTag((PetscObject)subdomain_adj,&tag);CHKERRQ(ierr); 4919 ierr = PetscMalloc1(rend-rstart,&reqs);CHKERRQ(ierr); 4920 for (i=rstart;i<rend;i++) { 4921 ierr = MPI_Isend(is_indices+i-rstart,1,MPIU_INT,i,tag,PetscSubcommChild(subcomm),&reqs[i-rstart]);CHKERRQ(ierr); 4922 } 4923 ierr = MPI_Recv(idxs,1,MPIU_INT,MPI_ANY_SOURCE,tag,PetscSubcommChild(subcomm),MPI_STATUS_IGNORE);CHKERRQ(ierr); 4924 ierr = MPI_Waitall(rend-rstart,reqs,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 4925 ierr = PetscFree(reqs);CHKERRQ(ierr); 4926 if (procs_candidates) { /* shift the pattern on non-active candidates (if any) */ 4927 ranks_send_to_idx[0] = procs_candidates[oldranks[idxs[0]]]; 4928 } else { 4929 ranks_send_to_idx[0] = oldranks[idxs[0]]; 4930 } 4931 } 4932 ierr = ISRestoreIndices(new_ranks_contig,(const PetscInt**)&is_indices);CHKERRQ(ierr); 4933 /* clean up */ 4934 ierr = PetscFree(oldranks);CHKERRQ(ierr); 4935 ierr = ISDestroy(&new_ranks_contig);CHKERRQ(ierr); 4936 ierr = MatDestroy(&subdomain_adj);CHKERRQ(ierr); 4937 ierr = MatPartitioningDestroy(&partitioner);CHKERRQ(ierr); 4938 } 4939 ierr = PetscSubcommDestroy(&subcomm);CHKERRQ(ierr); 4940 ierr = PetscFree(procs_candidates);CHKERRQ(ierr); 4941 4942 /* assemble parallel IS for sends */ 4943 i = 1; 4944 if (color) i=0; 4945 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)mat),i,ranks_send_to_idx,PETSC_OWN_POINTER,is_sends);CHKERRQ(ierr); 4946 PetscFunctionReturn(0); 4947 } 4948 4949 typedef enum {MATDENSE_PRIVATE=0,MATAIJ_PRIVATE,MATBAIJ_PRIVATE,MATSBAIJ_PRIVATE}MatTypePrivate; 4950 4951 #undef __FUNCT__ 4952 #define __FUNCT__ "MatISSubassemble" 4953 PetscErrorCode MatISSubassemble(Mat mat, IS is_sends, PetscInt n_subdomains, PetscBool restrict_comm, PetscBool restrict_full, PetscBool reuse, Mat *mat_n, PetscInt nis, IS isarray[]) 4954 { 4955 Mat local_mat; 4956 IS is_sends_internal; 4957 PetscInt rows,cols,new_local_rows; 4958 PetscInt i,bs,buf_size_idxs,buf_size_idxs_is,buf_size_vals; 4959 PetscBool ismatis,isdense,newisdense,destroy_mat; 4960 ISLocalToGlobalMapping l2gmap; 4961 PetscInt* l2gmap_indices; 4962 const PetscInt* is_indices; 4963 MatType new_local_type; 4964 /* buffers */ 4965 PetscInt *ptr_idxs,*send_buffer_idxs,*recv_buffer_idxs; 4966 PetscInt *ptr_idxs_is,*send_buffer_idxs_is,*recv_buffer_idxs_is; 4967 PetscInt *recv_buffer_idxs_local; 4968 PetscScalar *ptr_vals,*send_buffer_vals,*recv_buffer_vals; 4969 /* MPI */ 4970 MPI_Comm comm,comm_n; 4971 PetscSubcomm subcomm; 4972 PetscMPIInt n_sends,n_recvs,commsize; 4973 PetscMPIInt *iflags,*ilengths_idxs,*ilengths_vals,*ilengths_idxs_is; 4974 PetscMPIInt *onodes,*onodes_is,*olengths_idxs,*olengths_idxs_is,*olengths_vals; 4975 PetscMPIInt len,tag_idxs,tag_idxs_is,tag_vals,source_dest; 4976 MPI_Request *send_req_idxs,*send_req_idxs_is,*send_req_vals; 4977 MPI_Request *recv_req_idxs,*recv_req_idxs_is,*recv_req_vals; 4978 PetscErrorCode ierr; 4979 4980 PetscFunctionBegin; 4981 PetscValidHeaderSpecific(mat,MAT_CLASSID,1); 4982 ierr = PetscObjectTypeCompare((PetscObject)mat,MATIS,&ismatis);CHKERRQ(ierr); 4983 if (!ismatis) SETERRQ1(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot use %s on a matrix object which is not of type MATIS",__FUNCT__); 4984 PetscValidLogicalCollectiveInt(mat,n_subdomains,3); 4985 PetscValidLogicalCollectiveBool(mat,restrict_comm,4); 4986 PetscValidLogicalCollectiveBool(mat,restrict_full,5); 4987 PetscValidLogicalCollectiveBool(mat,reuse,6); 4988 PetscValidLogicalCollectiveInt(mat,nis,8); 4989 4990 /* further checks */ 4991 ierr = MatISGetLocalMat(mat,&local_mat);CHKERRQ(ierr); 4992 ierr = PetscObjectTypeCompare((PetscObject)local_mat,MATSEQDENSE,&isdense);CHKERRQ(ierr); 4993 if (!isdense) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Currently cannot subassemble MATIS when local matrix type is not of type SEQDENSE"); 4994 ierr = MatGetSize(local_mat,&rows,&cols);CHKERRQ(ierr); 4995 if (rows != cols) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Local MATIS matrices should be square"); 4996 if (reuse && *mat_n) { 4997 PetscInt mrows,mcols,mnrows,mncols; 4998 PetscValidHeaderSpecific(*mat_n,MAT_CLASSID,7); 4999 ierr = PetscObjectTypeCompare((PetscObject)*mat_n,MATIS,&ismatis);CHKERRQ(ierr); 5000 if (!ismatis) SETERRQ(PetscObjectComm((PetscObject)*mat_n),PETSC_ERR_SUP,"Cannot reuse a matrix which is not of type MATIS"); 5001 ierr = MatGetSize(mat,&mrows,&mcols);CHKERRQ(ierr); 5002 ierr = MatGetSize(*mat_n,&mnrows,&mncols);CHKERRQ(ierr); 5003 if (mrows != mnrows) SETERRQ2(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot reuse matrix! Wrong number of rows %D != %D",mrows,mnrows); 5004 if (mcols != mncols) SETERRQ2(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot reuse matrix! Wrong number of cols %D != %D",mcols,mncols); 5005 } 5006 ierr = MatGetBlockSize(local_mat,&bs);CHKERRQ(ierr); 5007 PetscValidLogicalCollectiveInt(mat,bs,0); 5008 5009 /* prepare IS for sending if not provided */ 5010 if (!is_sends) { 5011 if (!n_subdomains) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"You should specify either an IS or a target number of subdomains"); 5012 ierr = MatISGetSubassemblingPattern(mat,&n_subdomains,0,&is_sends_internal,NULL);CHKERRQ(ierr); 5013 } else { 5014 ierr = PetscObjectReference((PetscObject)is_sends);CHKERRQ(ierr); 5015 is_sends_internal = is_sends; 5016 } 5017 5018 /* get comm */ 5019 ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr); 5020 5021 /* compute number of sends */ 5022 ierr = ISGetLocalSize(is_sends_internal,&i);CHKERRQ(ierr); 5023 ierr = PetscMPIIntCast(i,&n_sends);CHKERRQ(ierr); 5024 5025 /* compute number of receives */ 5026 ierr = MPI_Comm_size(comm,&commsize);CHKERRQ(ierr); 5027 ierr = PetscMalloc1(commsize,&iflags);CHKERRQ(ierr); 5028 ierr = PetscMemzero(iflags,commsize*sizeof(*iflags));CHKERRQ(ierr); 5029 ierr = ISGetIndices(is_sends_internal,&is_indices);CHKERRQ(ierr); 5030 for (i=0;i<n_sends;i++) iflags[is_indices[i]] = 1; 5031 ierr = PetscGatherNumberOfMessages(comm,iflags,NULL,&n_recvs);CHKERRQ(ierr); 5032 ierr = PetscFree(iflags);CHKERRQ(ierr); 5033 5034 /* restrict comm if requested */ 5035 subcomm = 0; 5036 destroy_mat = PETSC_FALSE; 5037 if (restrict_comm) { 5038 PetscMPIInt color,subcommsize; 5039 5040 color = 0; 5041 if (restrict_full) { 5042 if (!n_recvs) color = 1; /* processes not receiving anything will not partecipate in new comm (full restriction) */ 5043 } else { 5044 if (!n_recvs && n_sends) color = 1; /* just those processes that are sending but not receiving anything will not partecipate in new comm */ 5045 } 5046 ierr = MPIU_Allreduce(&color,&subcommsize,1,MPI_INT,MPI_SUM,comm);CHKERRQ(ierr); 5047 subcommsize = commsize - subcommsize; 5048 /* check if reuse has been requested */ 5049 if (reuse) { 5050 if (*mat_n) { 5051 PetscMPIInt subcommsize2; 5052 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)*mat_n),&subcommsize2);CHKERRQ(ierr); 5053 if (subcommsize != subcommsize2) SETERRQ2(PetscObjectComm((PetscObject)*mat_n),PETSC_ERR_PLIB,"Cannot reuse matrix! wrong subcomm size %d != %d",subcommsize,subcommsize2); 5054 comm_n = PetscObjectComm((PetscObject)*mat_n); 5055 } else { 5056 comm_n = PETSC_COMM_SELF; 5057 } 5058 } else { /* MAT_INITIAL_MATRIX */ 5059 PetscMPIInt rank; 5060 5061 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 5062 ierr = PetscSubcommCreate(comm,&subcomm);CHKERRQ(ierr); 5063 ierr = PetscSubcommSetNumber(subcomm,2);CHKERRQ(ierr); 5064 ierr = PetscSubcommSetTypeGeneral(subcomm,color,rank);CHKERRQ(ierr); 5065 comm_n = PetscSubcommChild(subcomm); 5066 } 5067 /* flag to destroy *mat_n if not significative */ 5068 if (color) destroy_mat = PETSC_TRUE; 5069 } else { 5070 comm_n = comm; 5071 } 5072 5073 /* prepare send/receive buffers */ 5074 ierr = PetscMalloc1(commsize,&ilengths_idxs);CHKERRQ(ierr); 5075 ierr = PetscMemzero(ilengths_idxs,commsize*sizeof(*ilengths_idxs));CHKERRQ(ierr); 5076 ierr = PetscMalloc1(commsize,&ilengths_vals);CHKERRQ(ierr); 5077 ierr = PetscMemzero(ilengths_vals,commsize*sizeof(*ilengths_vals));CHKERRQ(ierr); 5078 if (nis) { 5079 ierr = PetscCalloc1(commsize,&ilengths_idxs_is);CHKERRQ(ierr); 5080 } 5081 5082 /* Get data from local matrices */ 5083 if (!isdense) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Subassembling of AIJ local matrices not yet implemented"); 5084 /* TODO: See below some guidelines on how to prepare the local buffers */ 5085 /* 5086 send_buffer_vals should contain the raw values of the local matrix 5087 send_buffer_idxs should contain: 5088 - MatType_PRIVATE type 5089 - PetscInt size_of_l2gmap 5090 - PetscInt global_row_indices[size_of_l2gmap] 5091 - PetscInt all_other_info_which_is_needed_to_compute_preallocation_and_set_values 5092 */ 5093 else { 5094 ierr = MatDenseGetArray(local_mat,&send_buffer_vals);CHKERRQ(ierr); 5095 ierr = ISLocalToGlobalMappingGetSize(mat->rmap->mapping,&i);CHKERRQ(ierr); 5096 ierr = PetscMalloc1(i+2,&send_buffer_idxs);CHKERRQ(ierr); 5097 send_buffer_idxs[0] = (PetscInt)MATDENSE_PRIVATE; 5098 send_buffer_idxs[1] = i; 5099 ierr = ISLocalToGlobalMappingGetIndices(mat->rmap->mapping,(const PetscInt**)&ptr_idxs);CHKERRQ(ierr); 5100 ierr = PetscMemcpy(&send_buffer_idxs[2],ptr_idxs,i*sizeof(PetscInt));CHKERRQ(ierr); 5101 ierr = ISLocalToGlobalMappingRestoreIndices(mat->rmap->mapping,(const PetscInt**)&ptr_idxs);CHKERRQ(ierr); 5102 ierr = PetscMPIIntCast(i,&len);CHKERRQ(ierr); 5103 for (i=0;i<n_sends;i++) { 5104 ilengths_vals[is_indices[i]] = len*len; 5105 ilengths_idxs[is_indices[i]] = len+2; 5106 } 5107 } 5108 ierr = PetscGatherMessageLengths2(comm,n_sends,n_recvs,ilengths_idxs,ilengths_vals,&onodes,&olengths_idxs,&olengths_vals);CHKERRQ(ierr); 5109 /* additional is (if any) */ 5110 if (nis) { 5111 PetscMPIInt psum; 5112 PetscInt j; 5113 for (j=0,psum=0;j<nis;j++) { 5114 PetscInt plen; 5115 ierr = ISGetLocalSize(isarray[j],&plen);CHKERRQ(ierr); 5116 ierr = PetscMPIIntCast(plen,&len);CHKERRQ(ierr); 5117 psum += len+1; /* indices + lenght */ 5118 } 5119 ierr = PetscMalloc1(psum,&send_buffer_idxs_is);CHKERRQ(ierr); 5120 for (j=0,psum=0;j<nis;j++) { 5121 PetscInt plen; 5122 const PetscInt *is_array_idxs; 5123 ierr = ISGetLocalSize(isarray[j],&plen);CHKERRQ(ierr); 5124 send_buffer_idxs_is[psum] = plen; 5125 ierr = ISGetIndices(isarray[j],&is_array_idxs);CHKERRQ(ierr); 5126 ierr = PetscMemcpy(&send_buffer_idxs_is[psum+1],is_array_idxs,plen*sizeof(PetscInt));CHKERRQ(ierr); 5127 ierr = ISRestoreIndices(isarray[j],&is_array_idxs);CHKERRQ(ierr); 5128 psum += plen+1; /* indices + lenght */ 5129 } 5130 for (i=0;i<n_sends;i++) { 5131 ilengths_idxs_is[is_indices[i]] = psum; 5132 } 5133 ierr = PetscGatherMessageLengths(comm,n_sends,n_recvs,ilengths_idxs_is,&onodes_is,&olengths_idxs_is);CHKERRQ(ierr); 5134 } 5135 5136 buf_size_idxs = 0; 5137 buf_size_vals = 0; 5138 buf_size_idxs_is = 0; 5139 for (i=0;i<n_recvs;i++) { 5140 buf_size_idxs += (PetscInt)olengths_idxs[i]; 5141 buf_size_vals += (PetscInt)olengths_vals[i]; 5142 if (nis) buf_size_idxs_is += (PetscInt)olengths_idxs_is[i]; 5143 } 5144 ierr = PetscMalloc1(buf_size_idxs,&recv_buffer_idxs);CHKERRQ(ierr); 5145 ierr = PetscMalloc1(buf_size_vals,&recv_buffer_vals);CHKERRQ(ierr); 5146 ierr = PetscMalloc1(buf_size_idxs_is,&recv_buffer_idxs_is);CHKERRQ(ierr); 5147 5148 /* get new tags for clean communications */ 5149 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_idxs);CHKERRQ(ierr); 5150 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_vals);CHKERRQ(ierr); 5151 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_idxs_is);CHKERRQ(ierr); 5152 5153 /* allocate for requests */ 5154 ierr = PetscMalloc1(n_sends,&send_req_idxs);CHKERRQ(ierr); 5155 ierr = PetscMalloc1(n_sends,&send_req_vals);CHKERRQ(ierr); 5156 ierr = PetscMalloc1(n_sends,&send_req_idxs_is);CHKERRQ(ierr); 5157 ierr = PetscMalloc1(n_recvs,&recv_req_idxs);CHKERRQ(ierr); 5158 ierr = PetscMalloc1(n_recvs,&recv_req_vals);CHKERRQ(ierr); 5159 ierr = PetscMalloc1(n_recvs,&recv_req_idxs_is);CHKERRQ(ierr); 5160 5161 /* communications */ 5162 ptr_idxs = recv_buffer_idxs; 5163 ptr_vals = recv_buffer_vals; 5164 ptr_idxs_is = recv_buffer_idxs_is; 5165 for (i=0;i<n_recvs;i++) { 5166 source_dest = onodes[i]; 5167 ierr = MPI_Irecv(ptr_idxs,olengths_idxs[i],MPIU_INT,source_dest,tag_idxs,comm,&recv_req_idxs[i]);CHKERRQ(ierr); 5168 ierr = MPI_Irecv(ptr_vals,olengths_vals[i],MPIU_SCALAR,source_dest,tag_vals,comm,&recv_req_vals[i]);CHKERRQ(ierr); 5169 ptr_idxs += olengths_idxs[i]; 5170 ptr_vals += olengths_vals[i]; 5171 if (nis) { 5172 source_dest = onodes_is[i]; 5173 ierr = MPI_Irecv(ptr_idxs_is,olengths_idxs_is[i],MPIU_INT,source_dest,tag_idxs_is,comm,&recv_req_idxs_is[i]);CHKERRQ(ierr); 5174 ptr_idxs_is += olengths_idxs_is[i]; 5175 } 5176 } 5177 for (i=0;i<n_sends;i++) { 5178 ierr = PetscMPIIntCast(is_indices[i],&source_dest);CHKERRQ(ierr); 5179 ierr = MPI_Isend(send_buffer_idxs,ilengths_idxs[source_dest],MPIU_INT,source_dest,tag_idxs,comm,&send_req_idxs[i]);CHKERRQ(ierr); 5180 ierr = MPI_Isend(send_buffer_vals,ilengths_vals[source_dest],MPIU_SCALAR,source_dest,tag_vals,comm,&send_req_vals[i]);CHKERRQ(ierr); 5181 if (nis) { 5182 ierr = MPI_Isend(send_buffer_idxs_is,ilengths_idxs_is[source_dest],MPIU_INT,source_dest,tag_idxs_is,comm,&send_req_idxs_is[i]);CHKERRQ(ierr); 5183 } 5184 } 5185 ierr = ISRestoreIndices(is_sends_internal,&is_indices);CHKERRQ(ierr); 5186 ierr = ISDestroy(&is_sends_internal);CHKERRQ(ierr); 5187 5188 /* assemble new l2g map */ 5189 ierr = MPI_Waitall(n_recvs,recv_req_idxs,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 5190 ptr_idxs = recv_buffer_idxs; 5191 new_local_rows = 0; 5192 for (i=0;i<n_recvs;i++) { 5193 new_local_rows += *(ptr_idxs+1); /* second element is the local size of the l2gmap */ 5194 ptr_idxs += olengths_idxs[i]; 5195 } 5196 ierr = PetscMalloc1(new_local_rows,&l2gmap_indices);CHKERRQ(ierr); 5197 ptr_idxs = recv_buffer_idxs; 5198 new_local_rows = 0; 5199 for (i=0;i<n_recvs;i++) { 5200 ierr = PetscMemcpy(&l2gmap_indices[new_local_rows],ptr_idxs+2,(*(ptr_idxs+1))*sizeof(PetscInt));CHKERRQ(ierr); 5201 new_local_rows += *(ptr_idxs+1); /* second element is the local size of the l2gmap */ 5202 ptr_idxs += olengths_idxs[i]; 5203 } 5204 ierr = PetscSortRemoveDupsInt(&new_local_rows,l2gmap_indices);CHKERRQ(ierr); 5205 ierr = ISLocalToGlobalMappingCreate(comm_n,1,new_local_rows,l2gmap_indices,PETSC_COPY_VALUES,&l2gmap);CHKERRQ(ierr); 5206 ierr = PetscFree(l2gmap_indices);CHKERRQ(ierr); 5207 5208 /* infer new local matrix type from received local matrices type */ 5209 /* currently if all local matrices are of type X, then the resulting matrix will be of type X, except for the dense case */ 5210 /* it also assumes that if the block size is set, than it is the same among all local matrices (see checks at the beginning of the function) */ 5211 if (n_recvs) { 5212 MatTypePrivate new_local_type_private = (MatTypePrivate)send_buffer_idxs[0]; 5213 ptr_idxs = recv_buffer_idxs; 5214 for (i=0;i<n_recvs;i++) { 5215 if ((PetscInt)new_local_type_private != *ptr_idxs) { 5216 new_local_type_private = MATAIJ_PRIVATE; 5217 break; 5218 } 5219 ptr_idxs += olengths_idxs[i]; 5220 } 5221 switch (new_local_type_private) { 5222 case MATDENSE_PRIVATE: 5223 if (n_recvs>1) { /* subassembling of dense matrices does not give a dense matrix! */ 5224 new_local_type = MATSEQAIJ; 5225 bs = 1; 5226 } else { /* if I receive only 1 dense matrix */ 5227 new_local_type = MATSEQDENSE; 5228 bs = 1; 5229 } 5230 break; 5231 case MATAIJ_PRIVATE: 5232 new_local_type = MATSEQAIJ; 5233 bs = 1; 5234 break; 5235 case MATBAIJ_PRIVATE: 5236 new_local_type = MATSEQBAIJ; 5237 break; 5238 case MATSBAIJ_PRIVATE: 5239 new_local_type = MATSEQSBAIJ; 5240 break; 5241 default: 5242 SETERRQ2(comm,PETSC_ERR_SUP,"Unsupported private type %d in %s",new_local_type_private,__FUNCT__); 5243 break; 5244 } 5245 } else { /* by default, new_local_type is seqdense */ 5246 new_local_type = MATSEQDENSE; 5247 bs = 1; 5248 } 5249 5250 /* create MATIS object if needed */ 5251 if (!reuse) { 5252 ierr = MatGetSize(mat,&rows,&cols);CHKERRQ(ierr); 5253 ierr = MatCreateIS(comm_n,bs,PETSC_DECIDE,PETSC_DECIDE,rows,cols,l2gmap,NULL,mat_n);CHKERRQ(ierr); 5254 } else { 5255 /* it also destroys the local matrices */ 5256 if (*mat_n) { 5257 ierr = MatSetLocalToGlobalMapping(*mat_n,l2gmap,l2gmap);CHKERRQ(ierr); 5258 } else { /* this is a fake object */ 5259 ierr = MatCreateIS(comm_n,bs,PETSC_DECIDE,PETSC_DECIDE,rows,cols,l2gmap,NULL,mat_n);CHKERRQ(ierr); 5260 } 5261 } 5262 ierr = MatISGetLocalMat(*mat_n,&local_mat);CHKERRQ(ierr); 5263 ierr = MatSetType(local_mat,new_local_type);CHKERRQ(ierr); 5264 5265 ierr = MPI_Waitall(n_recvs,recv_req_vals,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 5266 5267 /* Global to local map of received indices */ 5268 ierr = PetscMalloc1(buf_size_idxs,&recv_buffer_idxs_local);CHKERRQ(ierr); /* needed for values insertion */ 5269 ierr = ISGlobalToLocalMappingApply(l2gmap,IS_GTOLM_MASK,buf_size_idxs,recv_buffer_idxs,&i,recv_buffer_idxs_local);CHKERRQ(ierr); 5270 ierr = ISLocalToGlobalMappingDestroy(&l2gmap);CHKERRQ(ierr); 5271 5272 /* restore attributes -> type of incoming data and its size */ 5273 buf_size_idxs = 0; 5274 for (i=0;i<n_recvs;i++) { 5275 recv_buffer_idxs_local[buf_size_idxs] = recv_buffer_idxs[buf_size_idxs]; 5276 recv_buffer_idxs_local[buf_size_idxs+1] = recv_buffer_idxs[buf_size_idxs+1]; 5277 buf_size_idxs += (PetscInt)olengths_idxs[i]; 5278 } 5279 ierr = PetscFree(recv_buffer_idxs);CHKERRQ(ierr); 5280 5281 /* set preallocation */ 5282 ierr = PetscObjectTypeCompare((PetscObject)local_mat,MATSEQDENSE,&newisdense);CHKERRQ(ierr); 5283 if (!newisdense) { 5284 PetscInt *new_local_nnz=0; 5285 5286 ptr_vals = recv_buffer_vals; 5287 ptr_idxs = recv_buffer_idxs_local; 5288 if (n_recvs) { 5289 ierr = PetscCalloc1(new_local_rows,&new_local_nnz);CHKERRQ(ierr); 5290 } 5291 for (i=0;i<n_recvs;i++) { 5292 PetscInt j; 5293 if (*ptr_idxs == (PetscInt)MATDENSE_PRIVATE) { /* preallocation provided for dense case only */ 5294 for (j=0;j<*(ptr_idxs+1);j++) { 5295 new_local_nnz[*(ptr_idxs+2+j)] += *(ptr_idxs+1); 5296 } 5297 } else { 5298 /* TODO */ 5299 } 5300 ptr_idxs += olengths_idxs[i]; 5301 } 5302 if (new_local_nnz) { 5303 for (i=0;i<new_local_rows;i++) new_local_nnz[i] = PetscMin(new_local_nnz[i],new_local_rows); 5304 ierr = MatSeqAIJSetPreallocation(local_mat,0,new_local_nnz);CHKERRQ(ierr); 5305 for (i=0;i<new_local_rows;i++) new_local_nnz[i] /= bs; 5306 ierr = MatSeqBAIJSetPreallocation(local_mat,bs,0,new_local_nnz);CHKERRQ(ierr); 5307 for (i=0;i<new_local_rows;i++) new_local_nnz[i] = PetscMax(new_local_nnz[i]-i,0); 5308 ierr = MatSeqSBAIJSetPreallocation(local_mat,bs,0,new_local_nnz);CHKERRQ(ierr); 5309 } else { 5310 ierr = MatSetUp(local_mat);CHKERRQ(ierr); 5311 } 5312 ierr = PetscFree(new_local_nnz);CHKERRQ(ierr); 5313 } else { 5314 ierr = MatSetUp(local_mat);CHKERRQ(ierr); 5315 } 5316 5317 /* set values */ 5318 ptr_vals = recv_buffer_vals; 5319 ptr_idxs = recv_buffer_idxs_local; 5320 for (i=0;i<n_recvs;i++) { 5321 if (*ptr_idxs == (PetscInt)MATDENSE_PRIVATE) { /* values insertion provided for dense case only */ 5322 ierr = MatSetOption(local_mat,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr); 5323 ierr = MatSetValues(local_mat,*(ptr_idxs+1),ptr_idxs+2,*(ptr_idxs+1),ptr_idxs+2,ptr_vals,ADD_VALUES);CHKERRQ(ierr); 5324 ierr = MatAssemblyBegin(local_mat,MAT_FLUSH_ASSEMBLY);CHKERRQ(ierr); 5325 ierr = MatAssemblyEnd(local_mat,MAT_FLUSH_ASSEMBLY);CHKERRQ(ierr); 5326 ierr = MatSetOption(local_mat,MAT_ROW_ORIENTED,PETSC_TRUE);CHKERRQ(ierr); 5327 } else { 5328 /* TODO */ 5329 } 5330 ptr_idxs += olengths_idxs[i]; 5331 ptr_vals += olengths_vals[i]; 5332 } 5333 ierr = MatAssemblyBegin(local_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5334 ierr = MatAssemblyEnd(local_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5335 ierr = MatAssemblyBegin(*mat_n,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5336 ierr = MatAssemblyEnd(*mat_n,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5337 ierr = PetscFree(recv_buffer_vals);CHKERRQ(ierr); 5338 ierr = PetscFree(recv_buffer_idxs_local);CHKERRQ(ierr); 5339 5340 #if 0 5341 if (!restrict_comm) { /* check */ 5342 Vec lvec,rvec; 5343 PetscReal infty_error; 5344 5345 ierr = MatCreateVecs(mat,&rvec,&lvec);CHKERRQ(ierr); 5346 ierr = VecSetRandom(rvec,NULL);CHKERRQ(ierr); 5347 ierr = MatMult(mat,rvec,lvec);CHKERRQ(ierr); 5348 ierr = VecScale(lvec,-1.0);CHKERRQ(ierr); 5349 ierr = MatMultAdd(*mat_n,rvec,lvec,lvec);CHKERRQ(ierr); 5350 ierr = VecNorm(lvec,NORM_INFINITY,&infty_error);CHKERRQ(ierr); 5351 ierr = PetscPrintf(PetscObjectComm((PetscObject)mat),"Infinity error subassembling %1.6e\n",infty_error); 5352 ierr = VecDestroy(&rvec);CHKERRQ(ierr); 5353 ierr = VecDestroy(&lvec);CHKERRQ(ierr); 5354 } 5355 #endif 5356 5357 /* assemble new additional is (if any) */ 5358 if (nis) { 5359 PetscInt **temp_idxs,*count_is,j,psum; 5360 5361 ierr = MPI_Waitall(n_recvs,recv_req_idxs_is,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 5362 ierr = PetscCalloc1(nis,&count_is);CHKERRQ(ierr); 5363 ptr_idxs = recv_buffer_idxs_is; 5364 psum = 0; 5365 for (i=0;i<n_recvs;i++) { 5366 for (j=0;j<nis;j++) { 5367 PetscInt plen = *(ptr_idxs); /* first element is the local size of IS's indices */ 5368 count_is[j] += plen; /* increment counting of buffer for j-th IS */ 5369 psum += plen; 5370 ptr_idxs += plen+1; /* shift pointer to received data */ 5371 } 5372 } 5373 ierr = PetscMalloc1(nis,&temp_idxs);CHKERRQ(ierr); 5374 ierr = PetscMalloc1(psum,&temp_idxs[0]);CHKERRQ(ierr); 5375 for (i=1;i<nis;i++) { 5376 temp_idxs[i] = temp_idxs[i-1]+count_is[i-1]; 5377 } 5378 ierr = PetscMemzero(count_is,nis*sizeof(PetscInt));CHKERRQ(ierr); 5379 ptr_idxs = recv_buffer_idxs_is; 5380 for (i=0;i<n_recvs;i++) { 5381 for (j=0;j<nis;j++) { 5382 PetscInt plen = *(ptr_idxs); /* first element is the local size of IS's indices */ 5383 ierr = PetscMemcpy(&temp_idxs[j][count_is[j]],ptr_idxs+1,plen*sizeof(PetscInt));CHKERRQ(ierr); 5384 count_is[j] += plen; /* increment starting point of buffer for j-th IS */ 5385 ptr_idxs += plen+1; /* shift pointer to received data */ 5386 } 5387 } 5388 for (i=0;i<nis;i++) { 5389 ierr = ISDestroy(&isarray[i]);CHKERRQ(ierr); 5390 ierr = PetscSortRemoveDupsInt(&count_is[i],temp_idxs[i]);CHKERRQ(ierr);CHKERRQ(ierr); 5391 ierr = ISCreateGeneral(comm_n,count_is[i],temp_idxs[i],PETSC_COPY_VALUES,&isarray[i]);CHKERRQ(ierr); 5392 } 5393 ierr = PetscFree(count_is);CHKERRQ(ierr); 5394 ierr = PetscFree(temp_idxs[0]);CHKERRQ(ierr); 5395 ierr = PetscFree(temp_idxs);CHKERRQ(ierr); 5396 } 5397 /* free workspace */ 5398 ierr = PetscFree(recv_buffer_idxs_is);CHKERRQ(ierr); 5399 ierr = MPI_Waitall(n_sends,send_req_idxs,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 5400 ierr = PetscFree(send_buffer_idxs);CHKERRQ(ierr); 5401 ierr = MPI_Waitall(n_sends,send_req_vals,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 5402 if (isdense) { 5403 ierr = MatISGetLocalMat(mat,&local_mat);CHKERRQ(ierr); 5404 ierr = MatDenseRestoreArray(local_mat,&send_buffer_vals);CHKERRQ(ierr); 5405 } else { 5406 /* ierr = PetscFree(send_buffer_vals);CHKERRQ(ierr); */ 5407 } 5408 if (nis) { 5409 ierr = MPI_Waitall(n_sends,send_req_idxs_is,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 5410 ierr = PetscFree(send_buffer_idxs_is);CHKERRQ(ierr); 5411 } 5412 ierr = PetscFree(recv_req_idxs);CHKERRQ(ierr); 5413 ierr = PetscFree(recv_req_vals);CHKERRQ(ierr); 5414 ierr = PetscFree(recv_req_idxs_is);CHKERRQ(ierr); 5415 ierr = PetscFree(send_req_idxs);CHKERRQ(ierr); 5416 ierr = PetscFree(send_req_vals);CHKERRQ(ierr); 5417 ierr = PetscFree(send_req_idxs_is);CHKERRQ(ierr); 5418 ierr = PetscFree(ilengths_vals);CHKERRQ(ierr); 5419 ierr = PetscFree(ilengths_idxs);CHKERRQ(ierr); 5420 ierr = PetscFree(olengths_vals);CHKERRQ(ierr); 5421 ierr = PetscFree(olengths_idxs);CHKERRQ(ierr); 5422 ierr = PetscFree(onodes);CHKERRQ(ierr); 5423 if (nis) { 5424 ierr = PetscFree(ilengths_idxs_is);CHKERRQ(ierr); 5425 ierr = PetscFree(olengths_idxs_is);CHKERRQ(ierr); 5426 ierr = PetscFree(onodes_is);CHKERRQ(ierr); 5427 } 5428 ierr = PetscSubcommDestroy(&subcomm);CHKERRQ(ierr); 5429 if (destroy_mat) { /* destroy mat is true only if restrict comm is true and process will not partecipate */ 5430 ierr = MatDestroy(mat_n);CHKERRQ(ierr); 5431 for (i=0;i<nis;i++) { 5432 ierr = ISDestroy(&isarray[i]);CHKERRQ(ierr); 5433 } 5434 *mat_n = NULL; 5435 } 5436 PetscFunctionReturn(0); 5437 } 5438 5439 /* temporary hack into ksp private data structure */ 5440 #include <petsc/private/kspimpl.h> 5441 5442 #undef __FUNCT__ 5443 #define __FUNCT__ "PCBDDCSetUpCoarseSolver" 5444 PetscErrorCode PCBDDCSetUpCoarseSolver(PC pc,PetscScalar* coarse_submat_vals) 5445 { 5446 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 5447 PC_IS *pcis = (PC_IS*)pc->data; 5448 Mat coarse_mat,coarse_mat_is,coarse_submat_dense; 5449 MatNullSpace CoarseNullSpace=NULL; 5450 ISLocalToGlobalMapping coarse_islg; 5451 IS coarse_is,*isarray; 5452 PetscInt i,im_active=-1,active_procs=-1; 5453 PetscInt nis,nisdofs,nisneu,nisvert; 5454 PC pc_temp; 5455 PCType coarse_pc_type; 5456 KSPType coarse_ksp_type; 5457 PetscBool multilevel_requested,multilevel_allowed; 5458 PetscBool isredundant,isbddc,isnn,coarse_reuse; 5459 Mat t_coarse_mat_is; 5460 PetscInt ncoarse; 5461 PetscBool compute_vecs = PETSC_FALSE; 5462 PetscScalar *array; 5463 MatReuse coarse_mat_reuse; 5464 PetscBool restr, full_restr, have_void; 5465 PetscErrorCode ierr; 5466 5467 PetscFunctionBegin; 5468 /* Assign global numbering to coarse dofs */ 5469 if (pcbddc->new_primal_space || pcbddc->coarse_size == -1) { /* a new primal space is present or it is the first initialization, so recompute global numbering */ 5470 PetscInt ocoarse_size; 5471 compute_vecs = PETSC_TRUE; 5472 ocoarse_size = pcbddc->coarse_size; 5473 ierr = PetscFree(pcbddc->global_primal_indices);CHKERRQ(ierr); 5474 ierr = PCBDDCComputePrimalNumbering(pc,&pcbddc->coarse_size,&pcbddc->global_primal_indices);CHKERRQ(ierr); 5475 /* see if we can avoid some work */ 5476 if (pcbddc->coarse_ksp) { /* coarse ksp has already been created */ 5477 /* if the coarse size is different or we are using adaptive selection, better to not reuse the coarse matrix */ 5478 if (ocoarse_size != pcbddc->coarse_size || pcbddc->adaptive_selection) { 5479 PC pc; 5480 PetscBool isbddc; 5481 5482 /* temporary workaround since PCBDDC does not have a reset method so far */ 5483 ierr = KSPGetPC(pcbddc->coarse_ksp,&pc);CHKERRQ(ierr); 5484 ierr = PetscObjectTypeCompare((PetscObject)pc,PCBDDC,&isbddc);CHKERRQ(ierr); 5485 if (isbddc) { 5486 ierr = KSPDestroy(&pcbddc->coarse_ksp);CHKERRQ(ierr); 5487 } else { 5488 ierr = KSPReset(pcbddc->coarse_ksp);CHKERRQ(ierr); 5489 } 5490 coarse_reuse = PETSC_FALSE; 5491 } else { /* we can safely reuse already computed coarse matrix */ 5492 coarse_reuse = PETSC_TRUE; 5493 } 5494 } else { /* there's no coarse ksp, so we need to create the coarse matrix too */ 5495 coarse_reuse = PETSC_FALSE; 5496 } 5497 /* reset any subassembling information */ 5498 if (!coarse_reuse || pcbddc->recompute_topography) { 5499 ierr = ISDestroy(&pcbddc->coarse_subassembling);CHKERRQ(ierr); 5500 } 5501 } else { /* primal space is unchanged, so we can reuse coarse matrix */ 5502 coarse_reuse = PETSC_TRUE; 5503 } 5504 /* assemble coarse matrix */ 5505 if (coarse_reuse && pcbddc->coarse_ksp) { 5506 ierr = KSPGetOperators(pcbddc->coarse_ksp,&coarse_mat,NULL);CHKERRQ(ierr); 5507 ierr = PetscObjectReference((PetscObject)coarse_mat);CHKERRQ(ierr); 5508 coarse_mat_reuse = MAT_REUSE_MATRIX; 5509 } else { 5510 coarse_mat = NULL; 5511 coarse_mat_reuse = MAT_INITIAL_MATRIX; 5512 } 5513 5514 /* creates temporary l2gmap and IS for coarse indexes */ 5515 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),pcbddc->local_primal_size,pcbddc->global_primal_indices,PETSC_COPY_VALUES,&coarse_is);CHKERRQ(ierr); 5516 ierr = ISLocalToGlobalMappingCreateIS(coarse_is,&coarse_islg);CHKERRQ(ierr); 5517 5518 /* creates temporary MATIS object for coarse matrix */ 5519 ierr = MatCreateSeqDense(PETSC_COMM_SELF,pcbddc->local_primal_size,pcbddc->local_primal_size,NULL,&coarse_submat_dense);CHKERRQ(ierr); 5520 ierr = MatDenseGetArray(coarse_submat_dense,&array);CHKERRQ(ierr); 5521 ierr = PetscMemcpy(array,coarse_submat_vals,sizeof(*coarse_submat_vals)*pcbddc->local_primal_size*pcbddc->local_primal_size);CHKERRQ(ierr); 5522 ierr = MatDenseRestoreArray(coarse_submat_dense,&array);CHKERRQ(ierr); 5523 ierr = MatCreateIS(PetscObjectComm((PetscObject)pc),1,PETSC_DECIDE,PETSC_DECIDE,pcbddc->coarse_size,pcbddc->coarse_size,coarse_islg,NULL,&t_coarse_mat_is);CHKERRQ(ierr); 5524 ierr = MatISSetLocalMat(t_coarse_mat_is,coarse_submat_dense);CHKERRQ(ierr); 5525 ierr = MatAssemblyBegin(t_coarse_mat_is,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5526 ierr = MatAssemblyEnd(t_coarse_mat_is,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5527 ierr = MatDestroy(&coarse_submat_dense);CHKERRQ(ierr); 5528 5529 /* count "active" (i.e. with positive local size) and "void" processes */ 5530 im_active = !!(pcis->n); 5531 ierr = MPIU_Allreduce(&im_active,&active_procs,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 5532 5533 /* determine number of process partecipating to coarse solver and compute subassembling pattern */ 5534 /* restr : whether if we want to exclude senders (which are not receivers) from the subassembling pattern */ 5535 /* full_restr : just use the receivers from the subassembling pattern */ 5536 coarse_mat_is = NULL; 5537 multilevel_allowed = PETSC_FALSE; 5538 multilevel_requested = PETSC_FALSE; 5539 full_restr = PETSC_TRUE; 5540 pcbddc->coarse_eqs_per_proc = PetscMin(pcbddc->coarse_size,pcbddc->coarse_eqs_per_proc); 5541 if (pcbddc->current_level < pcbddc->max_levels) multilevel_requested = PETSC_TRUE; 5542 if (multilevel_requested) { 5543 ncoarse = active_procs/pcbddc->coarsening_ratio; 5544 restr = PETSC_FALSE; 5545 full_restr = PETSC_FALSE; 5546 } else { 5547 ncoarse = pcbddc->coarse_size/pcbddc->coarse_eqs_per_proc; 5548 restr = PETSC_TRUE; 5549 full_restr = PETSC_TRUE; 5550 } 5551 ncoarse = PetscMax(1,ncoarse); 5552 if (!pcbddc->coarse_subassembling) { 5553 ierr = MatISGetSubassemblingPattern(t_coarse_mat_is,&ncoarse,pcbddc->coarse_adj_red,&pcbddc->coarse_subassembling,&have_void);CHKERRQ(ierr); 5554 } else { /* if a subassembling pattern exists, then we can reuse the coarse ksp and compute the number of process involved */ 5555 PetscInt psum; 5556 PetscMPIInt size; 5557 if (pcbddc->coarse_ksp) psum = 1; 5558 else psum = 0; 5559 ierr = MPIU_Allreduce(&psum,&ncoarse,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 5560 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)pc),&size);CHKERRQ(ierr); 5561 if (ncoarse < size) have_void = PETSC_TRUE; 5562 } 5563 /* determine if we can go multilevel */ 5564 if (multilevel_requested) { 5565 if (ncoarse > 1) multilevel_allowed = PETSC_TRUE; /* found enough processes */ 5566 else restr = full_restr = PETSC_TRUE; /* 1 subdomain, use a direct solver */ 5567 } 5568 if (multilevel_allowed && have_void) restr = PETSC_TRUE; 5569 5570 /* compute dofs splitting and neumann boundaries for coarse dofs */ 5571 if (multilevel_allowed && (pcbddc->n_ISForDofsLocal || pcbddc->NeumannBoundariesLocal || (pcbddc->benign_saddle_point && pcbddc->user_primal_vertices_local))) { /* protects from unneded computations */ 5572 PetscInt *tidxs,*tidxs2,nout,tsize,i; 5573 const PetscInt *idxs; 5574 ISLocalToGlobalMapping tmap; 5575 5576 /* create map between primal indices (in local representative ordering) and local primal numbering */ 5577 ierr = ISLocalToGlobalMappingCreate(PETSC_COMM_SELF,1,pcbddc->local_primal_size,pcbddc->primal_indices_local_idxs,PETSC_COPY_VALUES,&tmap);CHKERRQ(ierr); 5578 /* allocate space for temporary storage */ 5579 ierr = PetscMalloc1(pcbddc->local_primal_size,&tidxs);CHKERRQ(ierr); 5580 ierr = PetscMalloc1(pcbddc->local_primal_size,&tidxs2);CHKERRQ(ierr); 5581 /* allocate for IS array */ 5582 nisdofs = pcbddc->n_ISForDofsLocal; 5583 nisneu = !!pcbddc->NeumannBoundariesLocal; 5584 nisvert = 0; 5585 if (pcbddc->benign_saddle_point && pcbddc->user_primal_vertices_local) { 5586 nisvert = 1; 5587 } 5588 nis = nisdofs + nisneu + nisvert; 5589 ierr = PetscMalloc1(nis,&isarray);CHKERRQ(ierr); 5590 /* dofs splitting */ 5591 for (i=0;i<nisdofs;i++) { 5592 /* ierr = ISView(pcbddc->ISForDofsLocal[i],0);CHKERRQ(ierr); */ 5593 ierr = ISGetLocalSize(pcbddc->ISForDofsLocal[i],&tsize);CHKERRQ(ierr); 5594 ierr = ISGetIndices(pcbddc->ISForDofsLocal[i],&idxs);CHKERRQ(ierr); 5595 ierr = ISGlobalToLocalMappingApply(tmap,IS_GTOLM_DROP,tsize,idxs,&nout,tidxs);CHKERRQ(ierr); 5596 ierr = ISRestoreIndices(pcbddc->ISForDofsLocal[i],&idxs);CHKERRQ(ierr); 5597 ierr = ISLocalToGlobalMappingApply(coarse_islg,nout,tidxs,tidxs2);CHKERRQ(ierr); 5598 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),nout,tidxs2,PETSC_COPY_VALUES,&isarray[i]);CHKERRQ(ierr); 5599 /* ierr = ISView(isarray[i],0);CHKERRQ(ierr); */ 5600 } 5601 /* neumann boundaries */ 5602 if (pcbddc->NeumannBoundariesLocal) { 5603 /* ierr = ISView(pcbddc->NeumannBoundariesLocal,0);CHKERRQ(ierr); */ 5604 ierr = ISGetLocalSize(pcbddc->NeumannBoundariesLocal,&tsize);CHKERRQ(ierr); 5605 ierr = ISGetIndices(pcbddc->NeumannBoundariesLocal,&idxs);CHKERRQ(ierr); 5606 ierr = ISGlobalToLocalMappingApply(tmap,IS_GTOLM_DROP,tsize,idxs,&nout,tidxs);CHKERRQ(ierr); 5607 ierr = ISRestoreIndices(pcbddc->NeumannBoundariesLocal,&idxs);CHKERRQ(ierr); 5608 ierr = ISLocalToGlobalMappingApply(coarse_islg,nout,tidxs,tidxs2);CHKERRQ(ierr); 5609 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),nout,tidxs2,PETSC_COPY_VALUES,&isarray[nisdofs]);CHKERRQ(ierr); 5610 /* ierr = ISView(isarray[nisdofs],0);CHKERRQ(ierr); */ 5611 } 5612 /* primal vertices (benign) */ 5613 if (pcbddc->benign_saddle_point && pcbddc->user_primal_vertices_local) { 5614 ierr = ISGetLocalSize(pcbddc->user_primal_vertices_local,&tsize);CHKERRQ(ierr); 5615 ierr = ISGetIndices(pcbddc->user_primal_vertices_local,&idxs);CHKERRQ(ierr); 5616 ierr = ISGlobalToLocalMappingApply(tmap,IS_GTOLM_DROP,tsize,idxs,&nout,tidxs);CHKERRQ(ierr); 5617 ierr = ISRestoreIndices(pcbddc->user_primal_vertices_local,&idxs);CHKERRQ(ierr); 5618 ierr = ISLocalToGlobalMappingApply(coarse_islg,nout,tidxs,tidxs2);CHKERRQ(ierr); 5619 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),nout,tidxs2,PETSC_COPY_VALUES,&isarray[nis-1]);CHKERRQ(ierr); 5620 /* ierr = ISView(isarray[nis-1],0);CHKERRQ(ierr); */ 5621 } 5622 /* free memory */ 5623 ierr = PetscFree(tidxs);CHKERRQ(ierr); 5624 ierr = PetscFree(tidxs2);CHKERRQ(ierr); 5625 ierr = ISLocalToGlobalMappingDestroy(&tmap);CHKERRQ(ierr); 5626 } else { 5627 nis = 0; 5628 nisdofs = 0; 5629 nisneu = 0; 5630 nisvert = 0; 5631 isarray = NULL; 5632 } 5633 /* destroy no longer needed map */ 5634 ierr = ISLocalToGlobalMappingDestroy(&coarse_islg);CHKERRQ(ierr); 5635 5636 /* subassemble */ 5637 if (multilevel_allowed) { 5638 PetscBool reuse,reuser; 5639 if (coarse_mat) reuse = PETSC_TRUE; 5640 else reuse = PETSC_FALSE; 5641 ierr = MPIU_Allreduce(&reuse,&reuser,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 5642 if (reuser) { 5643 ierr = MatISSubassemble(t_coarse_mat_is,pcbddc->coarse_subassembling,0,restr,full_restr,PETSC_TRUE,&coarse_mat,nis,isarray);CHKERRQ(ierr); 5644 } else { 5645 ierr = MatISSubassemble(t_coarse_mat_is,pcbddc->coarse_subassembling,0,restr,full_restr,PETSC_FALSE,&coarse_mat_is,nis,isarray);CHKERRQ(ierr); 5646 } 5647 } else { 5648 ierr = MatISSubassemble(t_coarse_mat_is,pcbddc->coarse_subassembling,0,restr,full_restr,PETSC_FALSE,&coarse_mat_is,nis,isarray);CHKERRQ(ierr); 5649 } 5650 if (coarse_mat_is || coarse_mat) { 5651 PetscMPIInt size; 5652 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)coarse_mat_is),&size); 5653 if (!multilevel_allowed) { 5654 ierr = MatISGetMPIXAIJ(coarse_mat_is,coarse_mat_reuse,&coarse_mat);CHKERRQ(ierr); 5655 } else { 5656 Mat A; 5657 5658 /* if this matrix is present, it means we are not reusing the coarse matrix */ 5659 if (coarse_mat_is) { 5660 if (coarse_mat) SETERRQ(PetscObjectComm((PetscObject)coarse_mat_is),PETSC_ERR_PLIB,"This should not happen"); 5661 ierr = PetscObjectReference((PetscObject)coarse_mat_is);CHKERRQ(ierr); 5662 coarse_mat = coarse_mat_is; 5663 } 5664 /* be sure we don't have MatSeqDENSE as local mat */ 5665 ierr = MatISGetLocalMat(coarse_mat,&A);CHKERRQ(ierr); 5666 ierr = MatConvert(A,MATSEQAIJ,MAT_INPLACE_MATRIX,&A);CHKERRQ(ierr); 5667 } 5668 } 5669 ierr = MatDestroy(&t_coarse_mat_is);CHKERRQ(ierr); 5670 ierr = MatDestroy(&coarse_mat_is);CHKERRQ(ierr); 5671 5672 /* create local to global scatters for coarse problem */ 5673 if (compute_vecs) { 5674 PetscInt lrows; 5675 ierr = VecDestroy(&pcbddc->coarse_vec);CHKERRQ(ierr); 5676 if (coarse_mat) { 5677 ierr = MatGetLocalSize(coarse_mat,&lrows,NULL);CHKERRQ(ierr); 5678 } else { 5679 lrows = 0; 5680 } 5681 ierr = VecCreate(PetscObjectComm((PetscObject)pc),&pcbddc->coarse_vec);CHKERRQ(ierr); 5682 ierr = VecSetSizes(pcbddc->coarse_vec,lrows,PETSC_DECIDE);CHKERRQ(ierr); 5683 ierr = VecSetType(pcbddc->coarse_vec,VECSTANDARD);CHKERRQ(ierr); 5684 ierr = VecScatterDestroy(&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr); 5685 ierr = VecScatterCreate(pcbddc->vec1_P,NULL,pcbddc->coarse_vec,coarse_is,&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr); 5686 } 5687 ierr = ISDestroy(&coarse_is);CHKERRQ(ierr); 5688 5689 /* set defaults for coarse KSP and PC */ 5690 if (multilevel_allowed) { 5691 coarse_ksp_type = KSPRICHARDSON; 5692 coarse_pc_type = PCBDDC; 5693 } else { 5694 coarse_ksp_type = KSPPREONLY; 5695 coarse_pc_type = PCREDUNDANT; 5696 } 5697 5698 /* print some info if requested */ 5699 if (pcbddc->dbg_flag) { 5700 if (!multilevel_allowed) { 5701 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 5702 if (multilevel_requested) { 5703 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Not enough active processes on level %d (active processes %d, coarsening ratio %d)\n",pcbddc->current_level,active_procs,pcbddc->coarsening_ratio);CHKERRQ(ierr); 5704 } else if (pcbddc->max_levels) { 5705 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Maximum number of requested levels reached (%d)\n",pcbddc->max_levels);CHKERRQ(ierr); 5706 } 5707 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 5708 } 5709 } 5710 5711 /* create the coarse KSP object only once with defaults */ 5712 if (coarse_mat) { 5713 PetscViewer dbg_viewer = NULL; 5714 if (pcbddc->dbg_flag) { 5715 dbg_viewer = PETSC_VIEWER_STDOUT_(PetscObjectComm((PetscObject)coarse_mat)); 5716 ierr = PetscViewerASCIIAddTab(dbg_viewer,2*pcbddc->current_level);CHKERRQ(ierr); 5717 } 5718 if (!pcbddc->coarse_ksp) { 5719 char prefix[256],str_level[16]; 5720 size_t len; 5721 ierr = KSPCreate(PetscObjectComm((PetscObject)coarse_mat),&pcbddc->coarse_ksp);CHKERRQ(ierr); 5722 ierr = KSPSetErrorIfNotConverged(pcbddc->coarse_ksp,pc->erroriffailure);CHKERRQ(ierr); 5723 ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->coarse_ksp,(PetscObject)pc,1);CHKERRQ(ierr); 5724 ierr = KSPSetTolerances(pcbddc->coarse_ksp,PETSC_DEFAULT,PETSC_DEFAULT,PETSC_DEFAULT,1);CHKERRQ(ierr); 5725 ierr = KSPSetOperators(pcbddc->coarse_ksp,coarse_mat,coarse_mat);CHKERRQ(ierr); 5726 ierr = KSPSetType(pcbddc->coarse_ksp,coarse_ksp_type);CHKERRQ(ierr); 5727 ierr = KSPSetNormType(pcbddc->coarse_ksp,KSP_NORM_NONE);CHKERRQ(ierr); 5728 ierr = KSPGetPC(pcbddc->coarse_ksp,&pc_temp);CHKERRQ(ierr); 5729 ierr = PCSetType(pc_temp,coarse_pc_type);CHKERRQ(ierr); 5730 /* prefix */ 5731 ierr = PetscStrcpy(prefix,"");CHKERRQ(ierr); 5732 ierr = PetscStrcpy(str_level,"");CHKERRQ(ierr); 5733 if (!pcbddc->current_level) { 5734 ierr = PetscStrcpy(prefix,((PetscObject)pc)->prefix);CHKERRQ(ierr); 5735 ierr = PetscStrcat(prefix,"pc_bddc_coarse_");CHKERRQ(ierr); 5736 } else { 5737 ierr = PetscStrlen(((PetscObject)pc)->prefix,&len);CHKERRQ(ierr); 5738 if (pcbddc->current_level>1) len -= 3; /* remove "lX_" with X level number */ 5739 if (pcbddc->current_level>10) len -= 1; /* remove another char from level number */ 5740 ierr = PetscStrncpy(prefix,((PetscObject)pc)->prefix,len+1);CHKERRQ(ierr); 5741 sprintf(str_level,"l%d_",(int)(pcbddc->current_level)); 5742 ierr = PetscStrcat(prefix,str_level);CHKERRQ(ierr); 5743 } 5744 ierr = KSPSetOptionsPrefix(pcbddc->coarse_ksp,prefix);CHKERRQ(ierr); 5745 /* propagate BDDC info to the next level (these are dummy calls if pc_temp is not of type PCBDDC) */ 5746 ierr = PCBDDCSetLevel(pc_temp,pcbddc->current_level+1);CHKERRQ(ierr); 5747 ierr = PCBDDCSetCoarseningRatio(pc_temp,pcbddc->coarsening_ratio);CHKERRQ(ierr); 5748 ierr = PCBDDCSetLevels(pc_temp,pcbddc->max_levels);CHKERRQ(ierr); 5749 /* allow user customization */ 5750 ierr = KSPSetFromOptions(pcbddc->coarse_ksp);CHKERRQ(ierr); 5751 } 5752 /* propagate BDDC info to the next level (these are dummy calls if pc_temp is not of type PCBDDC) */ 5753 ierr = KSPGetPC(pcbddc->coarse_ksp,&pc_temp);CHKERRQ(ierr); 5754 if (nisdofs) { 5755 ierr = PCBDDCSetDofsSplitting(pc_temp,nisdofs,isarray);CHKERRQ(ierr); 5756 for (i=0;i<nisdofs;i++) { 5757 ierr = ISDestroy(&isarray[i]);CHKERRQ(ierr); 5758 } 5759 } 5760 if (nisneu) { 5761 ierr = PCBDDCSetNeumannBoundaries(pc_temp,isarray[nisdofs]);CHKERRQ(ierr); 5762 ierr = ISDestroy(&isarray[nisdofs]);CHKERRQ(ierr); 5763 } 5764 if (nisvert) { 5765 ierr = PCBDDCSetPrimalVerticesIS(pc_temp,isarray[nis-1]);CHKERRQ(ierr); 5766 ierr = ISDestroy(&isarray[nis-1]);CHKERRQ(ierr); 5767 } 5768 5769 /* get some info after set from options */ 5770 ierr = PetscObjectTypeCompare((PetscObject)pc_temp,PCNN,&isnn);CHKERRQ(ierr); 5771 ierr = PetscObjectTypeCompare((PetscObject)pc_temp,PCBDDC,&isbddc);CHKERRQ(ierr); 5772 ierr = PetscObjectTypeCompare((PetscObject)pc_temp,PCREDUNDANT,&isredundant);CHKERRQ(ierr); 5773 if (isbddc && !multilevel_allowed) { /* multilevel can only be requested via pc_bddc_set_levels */ 5774 ierr = PCSetType(pc_temp,coarse_pc_type);CHKERRQ(ierr); 5775 isbddc = PETSC_FALSE; 5776 } 5777 ierr = PCFactorSetReuseFill(pc_temp,PETSC_TRUE);CHKERRQ(ierr); 5778 if (isredundant) { 5779 KSP inner_ksp; 5780 PC inner_pc; 5781 ierr = PCRedundantGetKSP(pc_temp,&inner_ksp);CHKERRQ(ierr); 5782 ierr = KSPGetPC(inner_ksp,&inner_pc);CHKERRQ(ierr); 5783 ierr = PCFactorSetReuseFill(inner_pc,PETSC_TRUE);CHKERRQ(ierr); 5784 } 5785 5786 /* parameters which miss an API */ 5787 if (isbddc) { 5788 PC_BDDC* pcbddc_coarse = (PC_BDDC*)pc_temp->data; 5789 pcbddc_coarse->detect_disconnected = PETSC_TRUE; 5790 pcbddc_coarse->coarse_eqs_per_proc = pcbddc->coarse_eqs_per_proc; 5791 pcbddc_coarse->benign_saddle_point = pcbddc->benign_saddle_point; 5792 pcbddc_coarse->benign_compute_nonetflux = pcbddc->benign_compute_nonetflux; 5793 if (pcbddc_coarse->benign_compute_nonetflux) { 5794 pcbddc_coarse->adaptive_userdefined = PETSC_TRUE; 5795 } 5796 } 5797 5798 /* propagate symmetry info of coarse matrix */ 5799 ierr = MatSetOption(coarse_mat,MAT_STRUCTURALLY_SYMMETRIC,PETSC_TRUE);CHKERRQ(ierr); 5800 if (pc->pmat->symmetric_set) { 5801 ierr = MatSetOption(coarse_mat,MAT_SYMMETRIC,pc->pmat->symmetric);CHKERRQ(ierr); 5802 } 5803 if (pc->pmat->hermitian_set) { 5804 ierr = MatSetOption(coarse_mat,MAT_HERMITIAN,pc->pmat->hermitian);CHKERRQ(ierr); 5805 } 5806 if (pc->pmat->spd_set) { 5807 ierr = MatSetOption(coarse_mat,MAT_SPD,pc->pmat->spd);CHKERRQ(ierr); 5808 } 5809 /* set operators */ 5810 ierr = KSPSetOperators(pcbddc->coarse_ksp,coarse_mat,coarse_mat);CHKERRQ(ierr); 5811 if (pcbddc->dbg_flag) { 5812 ierr = PetscViewerASCIISubtractTab(dbg_viewer,2*pcbddc->current_level);CHKERRQ(ierr); 5813 } 5814 } 5815 ierr = PetscFree(isarray);CHKERRQ(ierr); 5816 #if 0 5817 { 5818 PetscViewer viewer; 5819 char filename[256]; 5820 sprintf(filename,"coarse_mat_level%d.m",pcbddc->current_level); 5821 ierr = PetscViewerASCIIOpen(PetscObjectComm((PetscObject)coarse_mat),filename,&viewer);CHKERRQ(ierr); 5822 ierr = PetscViewerPushFormat(viewer,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr); 5823 ierr = MatView(coarse_mat,viewer);CHKERRQ(ierr); 5824 ierr = PetscViewerPopFormat(viewer);CHKERRQ(ierr); 5825 ierr = PetscViewerDestroy(&viewer);CHKERRQ(ierr); 5826 } 5827 #endif 5828 5829 /* Compute coarse null space (special handling by BDDC only) */ 5830 #if 0 5831 if (pcbddc->NullSpace) { 5832 ierr = PCBDDCNullSpaceAssembleCoarse(pc,coarse_mat,&CoarseNullSpace);CHKERRQ(ierr); 5833 } 5834 #endif 5835 /* hack */ 5836 if (pcbddc->coarse_ksp) { 5837 Vec crhs,csol; 5838 5839 ierr = KSPGetSolution(pcbddc->coarse_ksp,&csol);CHKERRQ(ierr); 5840 ierr = KSPGetRhs(pcbddc->coarse_ksp,&crhs);CHKERRQ(ierr); 5841 if (!csol) { 5842 ierr = MatCreateVecs(coarse_mat,&((pcbddc->coarse_ksp)->vec_sol),NULL);CHKERRQ(ierr); 5843 } 5844 if (!crhs) { 5845 ierr = MatCreateVecs(coarse_mat,NULL,&((pcbddc->coarse_ksp)->vec_rhs));CHKERRQ(ierr); 5846 } 5847 } 5848 5849 /* compute null space for coarse solver if the benign trick has been requested */ 5850 if (pcbddc->benign_null) { 5851 5852 ierr = VecSet(pcbddc->vec1_P,0.);CHKERRQ(ierr); 5853 for (i=0;i<pcbddc->benign_n;i++) { 5854 ierr = VecSetValue(pcbddc->vec1_P,pcbddc->local_primal_size-pcbddc->benign_n+i,1.0,INSERT_VALUES);CHKERRQ(ierr); 5855 } 5856 ierr = VecAssemblyBegin(pcbddc->vec1_P);CHKERRQ(ierr); 5857 ierr = VecAssemblyEnd(pcbddc->vec1_P);CHKERRQ(ierr); 5858 ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,pcbddc->vec1_P,pcbddc->coarse_vec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 5859 ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,pcbddc->vec1_P,pcbddc->coarse_vec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 5860 if (coarse_mat) { 5861 Vec nullv; 5862 PetscScalar *array,*array2; 5863 PetscInt nl; 5864 5865 ierr = MatCreateVecs(coarse_mat,&nullv,NULL);CHKERRQ(ierr); 5866 ierr = VecGetLocalSize(nullv,&nl);CHKERRQ(ierr); 5867 ierr = VecGetArrayRead(pcbddc->coarse_vec,(const PetscScalar**)&array);CHKERRQ(ierr); 5868 ierr = VecGetArray(nullv,&array2);CHKERRQ(ierr); 5869 ierr = PetscMemcpy(array2,array,nl*sizeof(*array));CHKERRQ(ierr); 5870 ierr = VecRestoreArray(nullv,&array2);CHKERRQ(ierr); 5871 ierr = VecRestoreArrayRead(pcbddc->coarse_vec,(const PetscScalar**)&array);CHKERRQ(ierr); 5872 ierr = VecNormalize(nullv,NULL);CHKERRQ(ierr); 5873 ierr = MatNullSpaceCreate(PetscObjectComm((PetscObject)coarse_mat),PETSC_FALSE,1,&nullv,&CoarseNullSpace);CHKERRQ(ierr); 5874 ierr = VecDestroy(&nullv);CHKERRQ(ierr); 5875 } 5876 } 5877 5878 if (pcbddc->coarse_ksp) { 5879 PetscBool ispreonly; 5880 5881 if (CoarseNullSpace) { 5882 PetscBool isnull; 5883 ierr = MatNullSpaceTest(CoarseNullSpace,coarse_mat,&isnull);CHKERRQ(ierr); 5884 if (0) { 5885 if (isbddc && !pcbddc->benign_saddle_point) { 5886 ierr = PCBDDCSetNullSpace(pc_temp,CoarseNullSpace);CHKERRQ(ierr); 5887 } else { 5888 ierr = MatSetNullSpace(coarse_mat,CoarseNullSpace);CHKERRQ(ierr); 5889 } 5890 } else { 5891 ierr = MatNullSpaceDestroy(&CoarseNullSpace);CHKERRQ(ierr); 5892 } 5893 } 5894 /* setup coarse ksp */ 5895 ierr = KSPSetUp(pcbddc->coarse_ksp);CHKERRQ(ierr); 5896 /* Check coarse problem if in debug mode or if solving with an iterative method */ 5897 ierr = PetscObjectTypeCompare((PetscObject)pcbddc->coarse_ksp,KSPPREONLY,&ispreonly);CHKERRQ(ierr); 5898 if (pcbddc->dbg_flag || (!ispreonly && pcbddc->use_coarse_estimates) ) { 5899 KSP check_ksp; 5900 KSPType check_ksp_type; 5901 PC check_pc; 5902 Vec check_vec,coarse_vec; 5903 PetscReal abs_infty_error,infty_error,lambda_min=1.0,lambda_max=1.0; 5904 PetscInt its; 5905 PetscBool compute_eigs; 5906 PetscReal *eigs_r,*eigs_c; 5907 PetscInt neigs; 5908 const char *prefix; 5909 5910 /* Create ksp object suitable for estimation of extreme eigenvalues */ 5911 ierr = KSPCreate(PetscObjectComm((PetscObject)pcbddc->coarse_ksp),&check_ksp);CHKERRQ(ierr); 5912 ierr = KSPSetErrorIfNotConverged(pcbddc->coarse_ksp,pc->erroriffailure);CHKERRQ(ierr); 5913 ierr = KSPSetOperators(check_ksp,coarse_mat,coarse_mat);CHKERRQ(ierr); 5914 ierr = KSPSetTolerances(check_ksp,1.e-12,1.e-12,PETSC_DEFAULT,pcbddc->coarse_size);CHKERRQ(ierr); 5915 if (ispreonly) { 5916 check_ksp_type = KSPPREONLY; 5917 compute_eigs = PETSC_FALSE; 5918 } else { 5919 check_ksp_type = KSPGMRES; 5920 compute_eigs = PETSC_TRUE; 5921 } 5922 ierr = KSPSetType(check_ksp,check_ksp_type);CHKERRQ(ierr); 5923 ierr = KSPSetComputeSingularValues(check_ksp,compute_eigs);CHKERRQ(ierr); 5924 ierr = KSPSetComputeEigenvalues(check_ksp,compute_eigs);CHKERRQ(ierr); 5925 ierr = KSPGMRESSetRestart(check_ksp,pcbddc->coarse_size+1);CHKERRQ(ierr); 5926 ierr = KSPGetOptionsPrefix(pcbddc->coarse_ksp,&prefix);CHKERRQ(ierr); 5927 ierr = KSPSetOptionsPrefix(check_ksp,prefix);CHKERRQ(ierr); 5928 ierr = KSPAppendOptionsPrefix(check_ksp,"check_");CHKERRQ(ierr); 5929 ierr = KSPSetFromOptions(check_ksp);CHKERRQ(ierr); 5930 ierr = KSPSetUp(check_ksp);CHKERRQ(ierr); 5931 ierr = KSPGetPC(pcbddc->coarse_ksp,&check_pc);CHKERRQ(ierr); 5932 ierr = KSPSetPC(check_ksp,check_pc);CHKERRQ(ierr); 5933 /* create random vec */ 5934 ierr = KSPGetSolution(pcbddc->coarse_ksp,&coarse_vec);CHKERRQ(ierr); 5935 ierr = VecDuplicate(coarse_vec,&check_vec);CHKERRQ(ierr); 5936 ierr = VecSetRandom(check_vec,NULL);CHKERRQ(ierr); 5937 if (CoarseNullSpace) { 5938 ierr = MatNullSpaceRemove(CoarseNullSpace,check_vec);CHKERRQ(ierr); 5939 } 5940 ierr = MatMult(coarse_mat,check_vec,coarse_vec);CHKERRQ(ierr); 5941 /* solve coarse problem */ 5942 ierr = KSPSolve(check_ksp,coarse_vec,coarse_vec);CHKERRQ(ierr); 5943 if (CoarseNullSpace) { 5944 ierr = MatNullSpaceRemove(CoarseNullSpace,coarse_vec);CHKERRQ(ierr); 5945 } 5946 /* set eigenvalue estimation if preonly has not been requested */ 5947 if (compute_eigs) { 5948 ierr = PetscMalloc1(pcbddc->coarse_size+1,&eigs_r);CHKERRQ(ierr); 5949 ierr = PetscMalloc1(pcbddc->coarse_size+1,&eigs_c);CHKERRQ(ierr); 5950 ierr = KSPComputeEigenvalues(check_ksp,pcbddc->coarse_size+1,eigs_r,eigs_c,&neigs);CHKERRQ(ierr); 5951 lambda_max = eigs_r[neigs-1]; 5952 lambda_min = eigs_r[0]; 5953 if (pcbddc->use_coarse_estimates) { 5954 if (lambda_max>lambda_min) { 5955 ierr = KSPChebyshevSetEigenvalues(pcbddc->coarse_ksp,lambda_max,lambda_min);CHKERRQ(ierr); 5956 ierr = KSPRichardsonSetScale(pcbddc->coarse_ksp,2.0/(lambda_max+lambda_min));CHKERRQ(ierr); 5957 } 5958 } 5959 } 5960 5961 /* check coarse problem residual error */ 5962 if (pcbddc->dbg_flag) { 5963 PetscViewer dbg_viewer = PETSC_VIEWER_STDOUT_(PetscObjectComm((PetscObject)pcbddc->coarse_ksp)); 5964 ierr = PetscViewerASCIIAddTab(dbg_viewer,2*(pcbddc->current_level+1));CHKERRQ(ierr); 5965 ierr = VecAXPY(check_vec,-1.0,coarse_vec);CHKERRQ(ierr); 5966 ierr = VecNorm(check_vec,NORM_INFINITY,&infty_error);CHKERRQ(ierr); 5967 ierr = MatMult(coarse_mat,check_vec,coarse_vec);CHKERRQ(ierr); 5968 ierr = VecNorm(coarse_vec,NORM_INFINITY,&abs_infty_error);CHKERRQ(ierr); 5969 ierr = VecDestroy(&check_vec);CHKERRQ(ierr); 5970 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem details (use estimates %d)\n",pcbddc->use_coarse_estimates);CHKERRQ(ierr); 5971 ierr = PetscObjectPrintClassNamePrefixType((PetscObject)(pcbddc->coarse_ksp),dbg_viewer);CHKERRQ(ierr); 5972 ierr = PetscObjectPrintClassNamePrefixType((PetscObject)(check_pc),dbg_viewer);CHKERRQ(ierr); 5973 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem exact infty_error : %1.6e\n",infty_error);CHKERRQ(ierr); 5974 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem residual infty_error: %1.6e\n",abs_infty_error);CHKERRQ(ierr); 5975 if (CoarseNullSpace) { 5976 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem is singular\n");CHKERRQ(ierr); 5977 } 5978 if (compute_eigs) { 5979 PetscReal lambda_max_s,lambda_min_s; 5980 ierr = KSPGetType(check_ksp,&check_ksp_type);CHKERRQ(ierr); 5981 ierr = KSPGetIterationNumber(check_ksp,&its);CHKERRQ(ierr); 5982 ierr = KSPComputeExtremeSingularValues(check_ksp,&lambda_max_s,&lambda_min_s);CHKERRQ(ierr); 5983 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem eigenvalues (estimated with %d iterations of %s): %1.6e %1.6e (%1.6e %1.6e)\n",its,check_ksp_type,lambda_min,lambda_max,lambda_min_s,lambda_max_s);CHKERRQ(ierr); 5984 for (i=0;i<neigs;i++) { 5985 ierr = PetscViewerASCIIPrintf(dbg_viewer,"%1.6e %1.6ei\n",eigs_r[i],eigs_c[i]);CHKERRQ(ierr); 5986 } 5987 } 5988 ierr = PetscViewerFlush(dbg_viewer);CHKERRQ(ierr); 5989 ierr = PetscViewerASCIISubtractTab(dbg_viewer,2*(pcbddc->current_level+1));CHKERRQ(ierr); 5990 } 5991 ierr = KSPDestroy(&check_ksp);CHKERRQ(ierr); 5992 if (compute_eigs) { 5993 ierr = PetscFree(eigs_r);CHKERRQ(ierr); 5994 ierr = PetscFree(eigs_c);CHKERRQ(ierr); 5995 } 5996 } 5997 } 5998 /* print additional info */ 5999 if (pcbddc->dbg_flag) { 6000 /* waits until all processes reaches this point */ 6001 ierr = PetscBarrier((PetscObject)pc);CHKERRQ(ierr); 6002 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Coarse solver setup completed at level %d\n",pcbddc->current_level);CHKERRQ(ierr); 6003 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 6004 } 6005 6006 /* free memory */ 6007 ierr = MatNullSpaceDestroy(&CoarseNullSpace);CHKERRQ(ierr); 6008 ierr = MatDestroy(&coarse_mat);CHKERRQ(ierr); 6009 PetscFunctionReturn(0); 6010 } 6011 6012 #undef __FUNCT__ 6013 #define __FUNCT__ "PCBDDCComputePrimalNumbering" 6014 PetscErrorCode PCBDDCComputePrimalNumbering(PC pc,PetscInt* coarse_size_n,PetscInt** local_primal_indices_n) 6015 { 6016 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 6017 PC_IS* pcis = (PC_IS*)pc->data; 6018 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 6019 IS subset,subset_mult,subset_n; 6020 PetscInt local_size,coarse_size=0; 6021 PetscInt *local_primal_indices=NULL; 6022 const PetscInt *t_local_primal_indices; 6023 PetscErrorCode ierr; 6024 6025 PetscFunctionBegin; 6026 /* Compute global number of coarse dofs */ 6027 if (pcbddc->local_primal_size && !pcbddc->local_primal_ref_node) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"BDDC ConstraintsSetUp should be called first"); 6028 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)(pc->pmat)),pcbddc->local_primal_size_cc,pcbddc->local_primal_ref_node,PETSC_COPY_VALUES,&subset_n);CHKERRQ(ierr); 6029 ierr = ISLocalToGlobalMappingApplyIS(pcis->mapping,subset_n,&subset);CHKERRQ(ierr); 6030 ierr = ISDestroy(&subset_n);CHKERRQ(ierr); 6031 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)(pc->pmat)),pcbddc->local_primal_size_cc,pcbddc->local_primal_ref_mult,PETSC_COPY_VALUES,&subset_mult);CHKERRQ(ierr); 6032 ierr = PCBDDCSubsetNumbering(subset,subset_mult,&coarse_size,&subset_n);CHKERRQ(ierr); 6033 ierr = ISDestroy(&subset);CHKERRQ(ierr); 6034 ierr = ISDestroy(&subset_mult);CHKERRQ(ierr); 6035 ierr = ISGetLocalSize(subset_n,&local_size);CHKERRQ(ierr); 6036 if (local_size != pcbddc->local_primal_size) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Invalid number of local primal indices computed %D != %D",local_size,pcbddc->local_primal_size); 6037 ierr = PetscMalloc1(local_size,&local_primal_indices);CHKERRQ(ierr); 6038 ierr = ISGetIndices(subset_n,&t_local_primal_indices);CHKERRQ(ierr); 6039 ierr = PetscMemcpy(local_primal_indices,t_local_primal_indices,local_size*sizeof(PetscInt));CHKERRQ(ierr); 6040 ierr = ISRestoreIndices(subset_n,&t_local_primal_indices);CHKERRQ(ierr); 6041 ierr = ISDestroy(&subset_n);CHKERRQ(ierr); 6042 6043 /* check numbering */ 6044 if (pcbddc->dbg_flag) { 6045 PetscScalar coarsesum,*array,*array2; 6046 PetscInt i; 6047 PetscBool set_error = PETSC_FALSE,set_error_reduced = PETSC_FALSE; 6048 6049 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 6050 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 6051 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Check coarse indices\n");CHKERRQ(ierr); 6052 ierr = PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 6053 /* counter */ 6054 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 6055 ierr = VecSet(pcis->vec1_N,1.0);CHKERRQ(ierr); 6056 ierr = VecScatterBegin(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 6057 ierr = VecScatterEnd(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 6058 ierr = VecScatterBegin(matis->rctx,pcis->vec1_global,pcis->vec2_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 6059 ierr = VecScatterEnd(matis->rctx,pcis->vec1_global,pcis->vec2_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 6060 ierr = VecSet(pcis->vec1_N,0.0);CHKERRQ(ierr); 6061 for (i=0;i<pcbddc->local_primal_size;i++) { 6062 ierr = VecSetValue(pcis->vec1_N,pcbddc->primal_indices_local_idxs[i],1.0,INSERT_VALUES);CHKERRQ(ierr); 6063 } 6064 ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr); 6065 ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr); 6066 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 6067 ierr = VecScatterBegin(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 6068 ierr = VecScatterEnd(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 6069 ierr = VecScatterBegin(matis->rctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 6070 ierr = VecScatterEnd(matis->rctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 6071 ierr = VecGetArray(pcis->vec1_N,&array);CHKERRQ(ierr); 6072 ierr = VecGetArray(pcis->vec2_N,&array2);CHKERRQ(ierr); 6073 for (i=0;i<pcis->n;i++) { 6074 if (array[i] != 0.0 && array[i] != array2[i]) { 6075 PetscInt owned = (PetscInt)PetscRealPart(array[i]),gi; 6076 PetscInt neigh = (PetscInt)PetscRealPart(array2[i]); 6077 set_error = PETSC_TRUE; 6078 ierr = ISLocalToGlobalMappingApply(pcis->mapping,1,&i,&gi);CHKERRQ(ierr); 6079 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d: local index %d (gid %d) owned by %d processes instead of %d!\n",PetscGlobalRank,i,gi,owned,neigh);CHKERRQ(ierr); 6080 } 6081 } 6082 ierr = VecRestoreArray(pcis->vec2_N,&array2);CHKERRQ(ierr); 6083 ierr = MPIU_Allreduce(&set_error,&set_error_reduced,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 6084 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 6085 for (i=0;i<pcis->n;i++) { 6086 if (PetscRealPart(array[i]) > 0.0) array[i] = 1.0/PetscRealPart(array[i]); 6087 } 6088 ierr = VecRestoreArray(pcis->vec1_N,&array);CHKERRQ(ierr); 6089 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 6090 ierr = VecScatterBegin(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 6091 ierr = VecScatterEnd(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 6092 ierr = VecSum(pcis->vec1_global,&coarsesum);CHKERRQ(ierr); 6093 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Size of coarse problem is %d (%lf)\n",coarse_size,PetscRealPart(coarsesum));CHKERRQ(ierr); 6094 if (pcbddc->dbg_flag > 1 || set_error_reduced) { 6095 PetscInt *gidxs; 6096 6097 ierr = PetscMalloc1(pcbddc->local_primal_size,&gidxs);CHKERRQ(ierr); 6098 ierr = ISLocalToGlobalMappingApply(pcis->mapping,pcbddc->local_primal_size,pcbddc->primal_indices_local_idxs,gidxs);CHKERRQ(ierr); 6099 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Distribution of local primal indices\n");CHKERRQ(ierr); 6100 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 6101 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d\n",PetscGlobalRank);CHKERRQ(ierr); 6102 for (i=0;i<pcbddc->local_primal_size;i++) { 6103 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local_primal_indices[%d]=%d (%d,%d)\n",i,local_primal_indices[i],pcbddc->primal_indices_local_idxs[i],gidxs[i]);CHKERRQ(ierr); 6104 } 6105 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 6106 ierr = PetscFree(gidxs);CHKERRQ(ierr); 6107 } 6108 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 6109 ierr = PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 6110 if (set_error_reduced) SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"BDDC Numbering of coarse dofs failed"); 6111 } 6112 /* ierr = PetscPrintf(PetscObjectComm((PetscObject)pc),"Size of coarse problem is %d\n",coarse_size);CHKERRQ(ierr); */ 6113 /* get back data */ 6114 *coarse_size_n = coarse_size; 6115 *local_primal_indices_n = local_primal_indices; 6116 PetscFunctionReturn(0); 6117 } 6118 6119 #undef __FUNCT__ 6120 #define __FUNCT__ "PCBDDCGlobalToLocal" 6121 PetscErrorCode PCBDDCGlobalToLocal(VecScatter g2l_ctx,Vec gwork, Vec lwork, IS globalis, IS* localis) 6122 { 6123 IS localis_t; 6124 PetscInt i,lsize,*idxs,n; 6125 PetscScalar *vals; 6126 PetscErrorCode ierr; 6127 6128 PetscFunctionBegin; 6129 /* get indices in local ordering exploiting local to global map */ 6130 ierr = ISGetLocalSize(globalis,&lsize);CHKERRQ(ierr); 6131 ierr = PetscMalloc1(lsize,&vals);CHKERRQ(ierr); 6132 for (i=0;i<lsize;i++) vals[i] = 1.0; 6133 ierr = ISGetIndices(globalis,(const PetscInt**)&idxs);CHKERRQ(ierr); 6134 ierr = VecSet(gwork,0.0);CHKERRQ(ierr); 6135 ierr = VecSet(lwork,0.0);CHKERRQ(ierr); 6136 if (idxs) { /* multilevel guard */ 6137 ierr = VecSetValues(gwork,lsize,idxs,vals,INSERT_VALUES);CHKERRQ(ierr); 6138 } 6139 ierr = VecAssemblyBegin(gwork);CHKERRQ(ierr); 6140 ierr = ISRestoreIndices(globalis,(const PetscInt**)&idxs);CHKERRQ(ierr); 6141 ierr = PetscFree(vals);CHKERRQ(ierr); 6142 ierr = VecAssemblyEnd(gwork);CHKERRQ(ierr); 6143 /* now compute set in local ordering */ 6144 ierr = VecScatterBegin(g2l_ctx,gwork,lwork,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 6145 ierr = VecScatterEnd(g2l_ctx,gwork,lwork,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 6146 ierr = VecGetArrayRead(lwork,(const PetscScalar**)&vals);CHKERRQ(ierr); 6147 ierr = VecGetSize(lwork,&n);CHKERRQ(ierr); 6148 for (i=0,lsize=0;i<n;i++) { 6149 if (PetscRealPart(vals[i]) > 0.5) { 6150 lsize++; 6151 } 6152 } 6153 ierr = PetscMalloc1(lsize,&idxs);CHKERRQ(ierr); 6154 for (i=0,lsize=0;i<n;i++) { 6155 if (PetscRealPart(vals[i]) > 0.5) { 6156 idxs[lsize++] = i; 6157 } 6158 } 6159 ierr = VecRestoreArrayRead(lwork,(const PetscScalar**)&vals);CHKERRQ(ierr); 6160 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)gwork),lsize,idxs,PETSC_OWN_POINTER,&localis_t);CHKERRQ(ierr); 6161 *localis = localis_t; 6162 PetscFunctionReturn(0); 6163 } 6164 6165 #undef __FUNCT__ 6166 #define __FUNCT__ "PCBDDCSetUpSubSchurs" 6167 PetscErrorCode PCBDDCSetUpSubSchurs(PC pc) 6168 { 6169 PC_IS *pcis=(PC_IS*)pc->data; 6170 PC_BDDC *pcbddc=(PC_BDDC*)pc->data; 6171 PCBDDCSubSchurs sub_schurs=pcbddc->sub_schurs; 6172 Mat S_j; 6173 PetscInt *used_xadj,*used_adjncy; 6174 PetscBool free_used_adj; 6175 PetscErrorCode ierr; 6176 6177 PetscFunctionBegin; 6178 /* decide the adjacency to be used for determining internal problems for local schur on subsets */ 6179 free_used_adj = PETSC_FALSE; 6180 if (pcbddc->sub_schurs_layers == -1) { 6181 used_xadj = NULL; 6182 used_adjncy = NULL; 6183 } else { 6184 if (pcbddc->sub_schurs_use_useradj && pcbddc->mat_graph->xadj) { 6185 used_xadj = pcbddc->mat_graph->xadj; 6186 used_adjncy = pcbddc->mat_graph->adjncy; 6187 } else if (pcbddc->computed_rowadj) { 6188 used_xadj = pcbddc->mat_graph->xadj; 6189 used_adjncy = pcbddc->mat_graph->adjncy; 6190 } else { 6191 PetscBool flg_row=PETSC_FALSE; 6192 const PetscInt *xadj,*adjncy; 6193 PetscInt nvtxs; 6194 6195 ierr = MatGetRowIJ(pcbddc->local_mat,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,&xadj,&adjncy,&flg_row);CHKERRQ(ierr); 6196 if (flg_row) { 6197 ierr = PetscMalloc2(nvtxs+1,&used_xadj,xadj[nvtxs],&used_adjncy);CHKERRQ(ierr); 6198 ierr = PetscMemcpy(used_xadj,xadj,(nvtxs+1)*sizeof(*xadj));CHKERRQ(ierr); 6199 ierr = PetscMemcpy(used_adjncy,adjncy,(xadj[nvtxs])*sizeof(*adjncy));CHKERRQ(ierr); 6200 free_used_adj = PETSC_TRUE; 6201 } else { 6202 pcbddc->sub_schurs_layers = -1; 6203 used_xadj = NULL; 6204 used_adjncy = NULL; 6205 } 6206 ierr = MatRestoreRowIJ(pcbddc->local_mat,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,&xadj,&adjncy,&flg_row);CHKERRQ(ierr); 6207 } 6208 } 6209 6210 /* setup sub_schurs data */ 6211 ierr = MatCreateSchurComplement(pcis->A_II,pcis->A_II,pcis->A_IB,pcis->A_BI,pcis->A_BB,&S_j);CHKERRQ(ierr); 6212 if (!sub_schurs->schur_explicit) { 6213 /* pcbddc->ksp_D up to date only if not using MatFactor with Schur complement support */ 6214 ierr = MatSchurComplementSetKSP(S_j,pcbddc->ksp_D);CHKERRQ(ierr); 6215 ierr = PCBDDCSubSchursSetUp(sub_schurs,NULL,S_j,PETSC_FALSE,used_xadj,used_adjncy,pcbddc->sub_schurs_layers,NULL,pcbddc->adaptive_selection,PETSC_FALSE,PETSC_FALSE,0,NULL,NULL,NULL,NULL);CHKERRQ(ierr); 6216 } else { 6217 PetscBool reuse_solvers = (PetscBool)!pcbddc->use_change_of_basis; 6218 PetscBool isseqaij,need_change = PETSC_FALSE;; 6219 PetscInt benign_n; 6220 Mat change = NULL; 6221 Vec scaling = NULL; 6222 IS change_primal = NULL; 6223 6224 if (!pcbddc->use_vertices && reuse_solvers) { 6225 PetscInt n_vertices; 6226 6227 ierr = ISGetLocalSize(sub_schurs->is_vertices,&n_vertices);CHKERRQ(ierr); 6228 reuse_solvers = (PetscBool)!n_vertices; 6229 } 6230 ierr = PetscObjectTypeCompare((PetscObject)pcbddc->local_mat,MATSEQAIJ,&isseqaij);CHKERRQ(ierr); 6231 if (!isseqaij) { 6232 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 6233 if (matis->A == pcbddc->local_mat) { 6234 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 6235 ierr = MatConvert(matis->A,MATSEQAIJ,MAT_INITIAL_MATRIX,&pcbddc->local_mat);CHKERRQ(ierr); 6236 } else { 6237 ierr = MatConvert(pcbddc->local_mat,MATSEQAIJ,MAT_INPLACE_MATRIX,&pcbddc->local_mat);CHKERRQ(ierr); 6238 } 6239 } 6240 if (!pcbddc->benign_change_explicit) { 6241 benign_n = pcbddc->benign_n; 6242 } else { 6243 benign_n = 0; 6244 } 6245 /* sub_schurs->change is a local object; instead, PCBDDCConstraintsSetUp and the quantities used in the test below are logically collective on pc. 6246 We need a global reduction to avoid possible deadlocks. 6247 We assume that sub_schurs->change is created once, and then reused for different solves, unless the topography has been recomputed */ 6248 if (pcbddc->adaptive_userdefined || (pcbddc->deluxe_zerorows && !pcbddc->use_change_of_basis)) { 6249 PetscBool have_loc_change = !!(sub_schurs->change); 6250 ierr = MPIU_Allreduce(&have_loc_change,&need_change,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 6251 need_change = !need_change; 6252 } 6253 /* If the user defines additional constraints, we import them here. 6254 We need to compute the change of basis according to the quadrature weights attached to pmat via MatSetNearNullSpace, and this could not be done (at the moment) without some hacking */ 6255 if (need_change) { 6256 PC_IS *pcisf; 6257 PC_BDDC *pcbddcf; 6258 PC pcf; 6259 6260 if (pcbddc->sub_schurs_rebuild) { 6261 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Cannot compute change of basis with a different graph"); 6262 } 6263 ierr = PCCreate(PetscObjectComm((PetscObject)pc),&pcf);CHKERRQ(ierr); 6264 ierr = PCSetOperators(pcf,pc->mat,pc->pmat);CHKERRQ(ierr); 6265 ierr = PCSetType(pcf,PCBDDC);CHKERRQ(ierr); 6266 /* hacks */ 6267 pcisf = (PC_IS*)pcf->data; 6268 pcisf->is_B_local = pcis->is_B_local; 6269 pcisf->vec1_N = pcis->vec1_N; 6270 pcisf->BtoNmap = pcis->BtoNmap; 6271 pcisf->n = pcis->n; 6272 pcisf->n_B = pcis->n_B; 6273 pcbddcf = (PC_BDDC*)pcf->data; 6274 ierr = PetscFree(pcbddcf->mat_graph);CHKERRQ(ierr); 6275 pcbddcf->mat_graph = pcbddc->mat_graph; 6276 pcbddcf->use_faces = PETSC_TRUE; 6277 pcbddcf->use_change_of_basis = PETSC_TRUE; 6278 pcbddcf->use_change_on_faces = PETSC_TRUE; 6279 pcbddcf->use_qr_single = PETSC_TRUE; 6280 pcbddcf->fake_change = PETSC_TRUE; 6281 ierr = PCBDDCConstraintsSetUp(pcf);CHKERRQ(ierr); 6282 /* store information on primal vertices and change of basis (in local numbering) */ 6283 sub_schurs->change_with_qr = pcbddcf->use_qr_single; 6284 ierr = ISCreateGeneral(PETSC_COMM_SELF,pcbddcf->n_vertices,pcbddcf->local_primal_ref_node,PETSC_COPY_VALUES,&change_primal);CHKERRQ(ierr); 6285 change = pcbddcf->ConstraintMatrix; 6286 pcbddcf->ConstraintMatrix = NULL; 6287 /* free unneeded memory allocated in PCBDDCConstraintsSetUp */ 6288 ierr = PetscFree(pcbddcf->sub_schurs);CHKERRQ(ierr); 6289 ierr = MatNullSpaceDestroy(&pcbddcf->onearnullspace);CHKERRQ(ierr); 6290 ierr = PetscFree2(pcbddcf->local_primal_ref_node,pcbddcf->local_primal_ref_mult);CHKERRQ(ierr); 6291 ierr = PetscFree(pcbddcf->primal_indices_local_idxs);CHKERRQ(ierr); 6292 ierr = PetscFree(pcbddcf->onearnullvecs_state);CHKERRQ(ierr); 6293 ierr = PetscFree(pcf->data);CHKERRQ(ierr); 6294 pcf->ops->destroy = NULL; 6295 ierr = PCDestroy(&pcf);CHKERRQ(ierr); 6296 } 6297 if (!pcbddc->use_deluxe_scaling) scaling = pcis->D; 6298 ierr = PCBDDCSubSchursSetUp(sub_schurs,pcbddc->local_mat,S_j,pcbddc->sub_schurs_exact_schur,used_xadj,used_adjncy,pcbddc->sub_schurs_layers,scaling,pcbddc->adaptive_selection,reuse_solvers,pcbddc->benign_saddle_point,benign_n,pcbddc->benign_p0_lidx,pcbddc->benign_zerodiag_subs,change,change_primal);CHKERRQ(ierr); 6299 ierr = MatDestroy(&change);CHKERRQ(ierr); 6300 ierr = ISDestroy(&change_primal);CHKERRQ(ierr); 6301 } 6302 ierr = MatDestroy(&S_j);CHKERRQ(ierr); 6303 6304 /* free adjacency */ 6305 if (free_used_adj) { 6306 ierr = PetscFree2(used_xadj,used_adjncy);CHKERRQ(ierr); 6307 } 6308 PetscFunctionReturn(0); 6309 } 6310 6311 #undef __FUNCT__ 6312 #define __FUNCT__ "PCBDDCInitSubSchurs" 6313 PetscErrorCode PCBDDCInitSubSchurs(PC pc) 6314 { 6315 PC_IS *pcis=(PC_IS*)pc->data; 6316 PC_BDDC *pcbddc=(PC_BDDC*)pc->data; 6317 PCBDDCSubSchurs sub_schurs=pcbddc->sub_schurs; 6318 PCBDDCGraph graph; 6319 PetscErrorCode ierr; 6320 6321 PetscFunctionBegin; 6322 /* attach interface graph for determining subsets */ 6323 if (pcbddc->sub_schurs_rebuild) { /* in case rebuild has been requested, it uses a graph generated only by the neighbouring information */ 6324 IS verticesIS,verticescomm; 6325 PetscInt vsize,*idxs; 6326 6327 ierr = PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph,NULL,NULL,NULL,NULL,&verticesIS);CHKERRQ(ierr); 6328 ierr = ISGetSize(verticesIS,&vsize);CHKERRQ(ierr); 6329 ierr = ISGetIndices(verticesIS,(const PetscInt**)&idxs);CHKERRQ(ierr); 6330 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),vsize,idxs,PETSC_COPY_VALUES,&verticescomm);CHKERRQ(ierr); 6331 ierr = ISRestoreIndices(verticesIS,(const PetscInt**)&idxs);CHKERRQ(ierr); 6332 ierr = ISDestroy(&verticesIS);CHKERRQ(ierr); 6333 ierr = PCBDDCGraphCreate(&graph);CHKERRQ(ierr); 6334 ierr = PCBDDCGraphInit(graph,pcbddc->mat_graph->l2gmap,pcbddc->mat_graph->nvtxs_global);CHKERRQ(ierr); 6335 ierr = PCBDDCGraphSetUp(graph,pcbddc->mat_graph->custom_minimal_size,NULL,pcbddc->DirichletBoundariesLocal,0,NULL,verticescomm);CHKERRQ(ierr); 6336 ierr = ISDestroy(&verticescomm);CHKERRQ(ierr); 6337 ierr = PCBDDCGraphComputeConnectedComponents(graph);CHKERRQ(ierr); 6338 /* 6339 if (pcbddc->dbg_flag) { 6340 ierr = PCBDDCGraphASCIIView(graph,pcbddc->dbg_flag,pcbddc->dbg_viewer);CHKERRQ(ierr); 6341 } 6342 */ 6343 } else { 6344 graph = pcbddc->mat_graph; 6345 } 6346 6347 /* sub_schurs init */ 6348 ierr = PCBDDCSubSchursInit(sub_schurs,pcis->is_I_local,pcis->is_B_local,graph,pcis->BtoNmap);CHKERRQ(ierr); 6349 6350 /* free graph struct */ 6351 if (pcbddc->sub_schurs_rebuild) { 6352 ierr = PCBDDCGraphDestroy(&graph);CHKERRQ(ierr); 6353 } 6354 PetscFunctionReturn(0); 6355 } 6356 6357 #undef __FUNCT__ 6358 #define __FUNCT__ "PCBDDCCheckOperator" 6359 PetscErrorCode PCBDDCCheckOperator(PC pc) 6360 { 6361 PC_IS *pcis=(PC_IS*)pc->data; 6362 PC_BDDC *pcbddc=(PC_BDDC*)pc->data; 6363 PetscErrorCode ierr; 6364 6365 PetscFunctionBegin; 6366 if (pcbddc->n_vertices == pcbddc->local_primal_size) { 6367 IS zerodiag = NULL; 6368 Mat S_j,B0_B=NULL; 6369 Vec dummy_vec=NULL,vec_check_B,vec_scale_P; 6370 PetscScalar *p0_check,*array,*array2; 6371 PetscReal norm; 6372 PetscInt i; 6373 6374 /* B0 and B0_B */ 6375 if (zerodiag) { 6376 IS dummy; 6377 6378 ierr = ISCreateStride(PETSC_COMM_SELF,pcbddc->benign_n,0,1,&dummy);CHKERRQ(ierr); 6379 ierr = MatGetSubMatrix(pcbddc->benign_B0,dummy,pcis->is_B_local,MAT_INITIAL_MATRIX,&B0_B);CHKERRQ(ierr); 6380 ierr = MatCreateVecs(B0_B,NULL,&dummy_vec);CHKERRQ(ierr); 6381 ierr = ISDestroy(&dummy);CHKERRQ(ierr); 6382 } 6383 /* I need a primal vector to scale primal nodes since BDDC sums contibutions */ 6384 ierr = VecDuplicate(pcbddc->vec1_P,&vec_scale_P);CHKERRQ(ierr); 6385 ierr = VecSet(pcbddc->vec1_P,1.0);CHKERRQ(ierr); 6386 ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,pcbddc->vec1_P,pcbddc->coarse_vec,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 6387 ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,pcbddc->vec1_P,pcbddc->coarse_vec,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 6388 ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,pcbddc->coarse_vec,vec_scale_P,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 6389 ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,pcbddc->coarse_vec,vec_scale_P,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 6390 ierr = VecReciprocal(vec_scale_P);CHKERRQ(ierr); 6391 /* S_j */ 6392 ierr = MatCreateSchurComplement(pcis->A_II,pcis->A_II,pcis->A_IB,pcis->A_BI,pcis->A_BB,&S_j);CHKERRQ(ierr); 6393 ierr = MatSchurComplementSetKSP(S_j,pcbddc->ksp_D);CHKERRQ(ierr); 6394 6395 /* mimic vector in \widetilde{W}_\Gamma */ 6396 ierr = VecSetRandom(pcis->vec1_N,NULL);CHKERRQ(ierr); 6397 /* continuous in primal space */ 6398 ierr = VecSetRandom(pcbddc->coarse_vec,NULL);CHKERRQ(ierr); 6399 ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,pcbddc->coarse_vec,pcbddc->vec1_P,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 6400 ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,pcbddc->coarse_vec,pcbddc->vec1_P,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 6401 ierr = VecGetArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 6402 ierr = PetscCalloc1(pcbddc->benign_n,&p0_check);CHKERRQ(ierr); 6403 for (i=0;i<pcbddc->benign_n;i++) p0_check[i] = array[pcbddc->local_primal_size-pcbddc->benign_n+i]; 6404 ierr = VecSetValues(pcis->vec1_N,pcbddc->local_primal_size,pcbddc->local_primal_ref_node,array,INSERT_VALUES);CHKERRQ(ierr); 6405 ierr = VecRestoreArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 6406 ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr); 6407 ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr); 6408 ierr = VecScatterBegin(pcis->N_to_B,pcis->vec1_N,pcis->vec2_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 6409 ierr = VecScatterEnd(pcis->N_to_B,pcis->vec1_N,pcis->vec2_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 6410 ierr = VecDuplicate(pcis->vec2_B,&vec_check_B);CHKERRQ(ierr); 6411 ierr = VecCopy(pcis->vec2_B,vec_check_B);CHKERRQ(ierr); 6412 6413 /* assemble rhs for coarse problem */ 6414 /* widetilde{S}_\Gamma w_\Gamma + \widetilde{B0}^T_B p0 */ 6415 /* local with Schur */ 6416 ierr = MatMult(S_j,pcis->vec2_B,pcis->vec1_B);CHKERRQ(ierr); 6417 if (zerodiag) { 6418 ierr = VecGetArray(dummy_vec,&array);CHKERRQ(ierr); 6419 for (i=0;i<pcbddc->benign_n;i++) array[i] = p0_check[i]; 6420 ierr = VecRestoreArray(dummy_vec,&array);CHKERRQ(ierr); 6421 ierr = MatMultTransposeAdd(B0_B,dummy_vec,pcis->vec1_B,pcis->vec1_B);CHKERRQ(ierr); 6422 } 6423 /* sum on primal nodes the local contributions */ 6424 ierr = VecScatterBegin(pcis->N_to_B,pcis->vec1_B,pcis->vec1_N,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 6425 ierr = VecScatterEnd(pcis->N_to_B,pcis->vec1_B,pcis->vec1_N,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 6426 ierr = VecGetArray(pcis->vec1_N,&array);CHKERRQ(ierr); 6427 ierr = VecGetArray(pcbddc->vec1_P,&array2);CHKERRQ(ierr); 6428 for (i=0;i<pcbddc->local_primal_size;i++) array2[i] = array[pcbddc->local_primal_ref_node[i]]; 6429 ierr = VecRestoreArray(pcbddc->vec1_P,&array2);CHKERRQ(ierr); 6430 ierr = VecRestoreArray(pcis->vec1_N,&array);CHKERRQ(ierr); 6431 ierr = VecSet(pcbddc->coarse_vec,0.);CHKERRQ(ierr); 6432 ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,pcbddc->vec1_P,pcbddc->coarse_vec,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 6433 ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,pcbddc->vec1_P,pcbddc->coarse_vec,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 6434 ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,pcbddc->coarse_vec,pcbddc->vec1_P,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 6435 ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,pcbddc->coarse_vec,pcbddc->vec1_P,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 6436 ierr = VecGetArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 6437 /* scale primal nodes (BDDC sums contibutions) */ 6438 ierr = VecPointwiseMult(pcbddc->vec1_P,vec_scale_P,pcbddc->vec1_P);CHKERRQ(ierr); 6439 ierr = VecSetValues(pcis->vec1_N,pcbddc->local_primal_size,pcbddc->local_primal_ref_node,array,INSERT_VALUES);CHKERRQ(ierr); 6440 ierr = VecRestoreArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 6441 ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr); 6442 ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr); 6443 ierr = VecScatterBegin(pcis->N_to_B,pcis->vec1_N,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 6444 ierr = VecScatterEnd(pcis->N_to_B,pcis->vec1_N,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 6445 /* global: \widetilde{B0}_B w_\Gamma */ 6446 if (zerodiag) { 6447 ierr = MatMult(B0_B,pcis->vec2_B,dummy_vec);CHKERRQ(ierr); 6448 ierr = VecGetArray(dummy_vec,&array);CHKERRQ(ierr); 6449 for (i=0;i<pcbddc->benign_n;i++) pcbddc->benign_p0[i] = array[i]; 6450 ierr = VecRestoreArray(dummy_vec,&array);CHKERRQ(ierr); 6451 } 6452 /* BDDC */ 6453 ierr = VecSet(pcis->vec1_D,0.);CHKERRQ(ierr); 6454 ierr = PCBDDCApplyInterfacePreconditioner(pc,PETSC_FALSE);CHKERRQ(ierr); 6455 6456 ierr = VecCopy(pcis->vec1_B,pcis->vec2_B);CHKERRQ(ierr); 6457 ierr = VecAXPY(pcis->vec1_B,-1.0,vec_check_B);CHKERRQ(ierr); 6458 ierr = VecNorm(pcis->vec1_B,NORM_INFINITY,&norm);CHKERRQ(ierr); 6459 PetscPrintf(PETSC_COMM_SELF,"[%d] BDDC local error is %1.4e\n",PetscGlobalRank,norm); 6460 for (i=0;i<pcbddc->benign_n;i++) { 6461 PetscPrintf(PETSC_COMM_SELF,"[%d] BDDC p0[%d] error is %1.4e\n",PetscGlobalRank,i,PetscAbsScalar(pcbddc->benign_p0[i]-p0_check[i])); 6462 } 6463 ierr = PetscFree(p0_check);CHKERRQ(ierr); 6464 ierr = VecDestroy(&vec_scale_P);CHKERRQ(ierr); 6465 ierr = VecDestroy(&vec_check_B);CHKERRQ(ierr); 6466 ierr = VecDestroy(&dummy_vec);CHKERRQ(ierr); 6467 ierr = MatDestroy(&S_j);CHKERRQ(ierr); 6468 ierr = MatDestroy(&B0_B);CHKERRQ(ierr); 6469 } 6470 PetscFunctionReturn(0); 6471 } 6472