1 #include <../src/mat/impls/aij/seq/aij.h> 2 #include <../src/ksp/pc/impls/bddc/bddc.h> 3 #include <../src/ksp/pc/impls/bddc/bddcprivate.h> 4 #include <petscblaslapack.h> 5 #include <petsc/private/sfimpl.h> 6 7 #undef __FUNCT__ 8 #define __FUNCT__ "PCBDDCBenignMatMult_Private_Private" 9 PetscErrorCode PCBDDCBenignMatMult_Private_Private(Mat A, Vec x, Vec y, PetscBool transpose) 10 { 11 PCBDDCBenignMatMult_ctx ctx; 12 PetscErrorCode ierr; 13 PetscBool apply_right,apply_left,reset_x; 14 15 PetscFunctionBegin; 16 ierr = MatShellGetContext(A,&ctx);CHKERRQ(ierr); 17 if (transpose) { 18 apply_right = ctx->apply_left; 19 apply_left = ctx->apply_right; 20 } else { 21 apply_right = ctx->apply_right; 22 apply_left = ctx->apply_left; 23 } 24 reset_x = PETSC_FALSE; 25 if (apply_right) { 26 const PetscScalar *ax; 27 PetscInt nl,i; 28 29 ierr = VecGetLocalSize(x,&nl);CHKERRQ(ierr); 30 ierr = VecGetArrayRead(x,&ax);CHKERRQ(ierr); 31 ierr = PetscMemcpy(ctx->work,ax,nl*sizeof(PetscScalar));CHKERRQ(ierr); 32 ierr = VecRestoreArrayRead(x,&ax);CHKERRQ(ierr); 33 for (i=0;i<ctx->benign_n;i++) { 34 PetscScalar sum,val; 35 const PetscInt *idxs; 36 PetscInt nz,j; 37 ierr = ISGetLocalSize(ctx->benign_zerodiag_subs[i],&nz);CHKERRQ(ierr); 38 ierr = ISGetIndices(ctx->benign_zerodiag_subs[i],&idxs);CHKERRQ(ierr); 39 sum = 0.; 40 if (ctx->apply_p0) { 41 val = ctx->work[idxs[nz-1]]; 42 for (j=0;j<nz-1;j++) { 43 sum += ctx->work[idxs[j]]; 44 ctx->work[idxs[j]] += val; 45 } 46 } else { 47 for (j=0;j<nz-1;j++) { 48 sum += ctx->work[idxs[j]]; 49 } 50 } 51 ctx->work[idxs[nz-1]] -= sum; 52 ierr = ISRestoreIndices(ctx->benign_zerodiag_subs[i],&idxs);CHKERRQ(ierr); 53 } 54 ierr = VecPlaceArray(x,ctx->work);CHKERRQ(ierr); 55 reset_x = PETSC_TRUE; 56 } 57 if (transpose) { 58 ierr = MatMultTranspose(ctx->A,x,y);CHKERRQ(ierr); 59 } else { 60 ierr = MatMult(ctx->A,x,y);CHKERRQ(ierr); 61 } 62 if (reset_x) { 63 ierr = VecResetArray(x);CHKERRQ(ierr); 64 } 65 if (apply_left) { 66 PetscScalar *ay; 67 PetscInt i; 68 69 ierr = VecGetArray(y,&ay);CHKERRQ(ierr); 70 for (i=0;i<ctx->benign_n;i++) { 71 PetscScalar sum,val; 72 const PetscInt *idxs; 73 PetscInt nz,j; 74 ierr = ISGetLocalSize(ctx->benign_zerodiag_subs[i],&nz);CHKERRQ(ierr); 75 ierr = ISGetIndices(ctx->benign_zerodiag_subs[i],&idxs);CHKERRQ(ierr); 76 val = -ay[idxs[nz-1]]; 77 if (ctx->apply_p0) { 78 sum = 0.; 79 for (j=0;j<nz-1;j++) { 80 sum += ay[idxs[j]]; 81 ay[idxs[j]] += val; 82 } 83 ay[idxs[nz-1]] += sum; 84 } else { 85 for (j=0;j<nz-1;j++) { 86 ay[idxs[j]] += val; 87 } 88 ay[idxs[nz-1]] = 0.; 89 } 90 ierr = ISRestoreIndices(ctx->benign_zerodiag_subs[i],&idxs);CHKERRQ(ierr); 91 } 92 ierr = VecRestoreArray(y,&ay);CHKERRQ(ierr); 93 } 94 PetscFunctionReturn(0); 95 } 96 97 #undef __FUNCT__ 98 #define __FUNCT__ "PCBDDCBenignMatMultTranspose_Private" 99 PetscErrorCode PCBDDCBenignMatMultTranspose_Private(Mat A, Vec x, Vec y) 100 { 101 PetscErrorCode ierr; 102 103 PetscFunctionBegin; 104 ierr = PCBDDCBenignMatMult_Private_Private(A,x,y,PETSC_TRUE);CHKERRQ(ierr); 105 PetscFunctionReturn(0); 106 } 107 108 #undef __FUNCT__ 109 #define __FUNCT__ "PCBDDCBenignMatMult_Private" 110 PetscErrorCode PCBDDCBenignMatMult_Private(Mat A, Vec x, Vec y) 111 { 112 PetscErrorCode ierr; 113 114 PetscFunctionBegin; 115 ierr = PCBDDCBenignMatMult_Private_Private(A,x,y,PETSC_FALSE);CHKERRQ(ierr); 116 PetscFunctionReturn(0); 117 } 118 119 #undef __FUNCT__ 120 #define __FUNCT__ "PCBDDCBenignShellMat" 121 PetscErrorCode PCBDDCBenignShellMat(PC pc, PetscBool restore) 122 { 123 PC_IS *pcis = (PC_IS*)pc->data; 124 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 125 PCBDDCBenignMatMult_ctx ctx; 126 PetscErrorCode ierr; 127 128 PetscFunctionBegin; 129 if (!restore) { 130 Mat A_IB,A_BI; 131 PetscScalar *work; 132 PCBDDCReuseSolvers reuse = pcbddc->sub_schurs->reuse_solver; 133 134 if (pcbddc->benign_original_mat) { 135 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Benign original mat has not been restored"); 136 } 137 if (!pcbddc->benign_change || !pcbddc->benign_n || pcbddc->benign_change_explicit) { 138 PetscFunctionReturn(0); 139 } 140 ierr = PetscMalloc1(pcis->n,&work);CHKERRQ(ierr); 141 ierr = MatCreate(PETSC_COMM_SELF,&A_IB);CHKERRQ(ierr); 142 ierr = MatSetSizes(A_IB,pcis->n-pcis->n_B,pcis->n_B,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr); 143 ierr = MatSetType(A_IB,MATSHELL);CHKERRQ(ierr); 144 ierr = MatShellSetOperation(A_IB,MATOP_MULT,(void (*)(void))PCBDDCBenignMatMult_Private);CHKERRQ(ierr); 145 ierr = MatShellSetOperation(A_IB,MATOP_MULT_TRANSPOSE,(void (*)(void))PCBDDCBenignMatMultTranspose_Private);CHKERRQ(ierr); 146 ierr = PetscNew(&ctx);CHKERRQ(ierr); 147 ierr = MatShellSetContext(A_IB,ctx);CHKERRQ(ierr); 148 ctx->apply_left = PETSC_TRUE; 149 ctx->apply_right = PETSC_FALSE; 150 ctx->apply_p0 = PETSC_FALSE; 151 ctx->benign_n = pcbddc->benign_n; 152 if (reuse) { 153 ctx->benign_zerodiag_subs = reuse->benign_zerodiag_subs; 154 ctx->free = PETSC_FALSE; 155 } else { /* TODO: could be optimized for successive solves */ 156 ISLocalToGlobalMapping N_to_D; 157 PetscInt i; 158 159 ierr = ISLocalToGlobalMappingCreateIS(pcis->is_I_local,&N_to_D);CHKERRQ(ierr); 160 ierr = PetscMalloc1(pcbddc->benign_n,&ctx->benign_zerodiag_subs);CHKERRQ(ierr); 161 for (i=0;i<pcbddc->benign_n;i++) { 162 ierr = ISGlobalToLocalMappingApplyIS(N_to_D,IS_GTOLM_DROP,pcbddc->benign_zerodiag_subs[i],&ctx->benign_zerodiag_subs[i]);CHKERRQ(ierr); 163 } 164 ierr = ISLocalToGlobalMappingDestroy(&N_to_D);CHKERRQ(ierr); 165 ctx->free = PETSC_TRUE; 166 } 167 ctx->A = pcis->A_IB; 168 ctx->work = work; 169 ierr = MatSetUp(A_IB);CHKERRQ(ierr); 170 ierr = MatAssemblyBegin(A_IB,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 171 ierr = MatAssemblyEnd(A_IB,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 172 pcis->A_IB = A_IB; 173 174 /* A_BI as A_IB^T */ 175 ierr = MatCreateTranspose(A_IB,&A_BI);CHKERRQ(ierr); 176 pcbddc->benign_original_mat = pcis->A_BI; 177 pcis->A_BI = A_BI; 178 } else { 179 if (!pcbddc->benign_original_mat) { 180 PetscFunctionReturn(0); 181 } 182 ierr = MatShellGetContext(pcis->A_IB,&ctx);CHKERRQ(ierr); 183 ierr = MatDestroy(&pcis->A_IB);CHKERRQ(ierr); 184 pcis->A_IB = ctx->A; 185 ctx->A = NULL; 186 ierr = MatDestroy(&pcis->A_BI);CHKERRQ(ierr); 187 pcis->A_BI = pcbddc->benign_original_mat; 188 pcbddc->benign_original_mat = NULL; 189 if (ctx->free) { 190 PetscInt i; 191 for (i=0;i<ctx->benign_n;i++) { 192 ierr = ISDestroy(&ctx->benign_zerodiag_subs[i]);CHKERRQ(ierr); 193 } 194 ierr = PetscFree(ctx->benign_zerodiag_subs);CHKERRQ(ierr); 195 } 196 ierr = PetscFree(ctx->work);CHKERRQ(ierr); 197 ierr = PetscFree(ctx);CHKERRQ(ierr); 198 } 199 PetscFunctionReturn(0); 200 } 201 202 /* used just in bddc debug mode */ 203 #undef __FUNCT__ 204 #define __FUNCT__ "PCBDDCBenignProject" 205 PetscErrorCode PCBDDCBenignProject(PC pc, IS is1, IS is2, Mat *B) 206 { 207 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 208 Mat_IS *matis = (Mat_IS*)pc->pmat->data; 209 Mat An; 210 PetscErrorCode ierr; 211 212 PetscFunctionBegin; 213 ierr = MatPtAP(matis->A,pcbddc->benign_change,MAT_INITIAL_MATRIX,2.0,&An);CHKERRQ(ierr); 214 ierr = MatZeroRowsColumns(An,pcbddc->benign_n,pcbddc->benign_p0_lidx,1.0,NULL,NULL);CHKERRQ(ierr); 215 if (is1) { 216 ierr = MatGetSubMatrix(An,is1,is2,MAT_INITIAL_MATRIX,B);CHKERRQ(ierr); 217 ierr = MatDestroy(&An);CHKERRQ(ierr); 218 } else { 219 *B = An; 220 } 221 PetscFunctionReturn(0); 222 } 223 224 /* TODO: add reuse flag */ 225 #undef __FUNCT__ 226 #define __FUNCT__ "MatSeqAIJCompress" 227 PetscErrorCode MatSeqAIJCompress(Mat A, Mat *B) 228 { 229 Mat Bt; 230 PetscScalar *a,*bdata; 231 const PetscInt *ii,*ij; 232 PetscInt m,n,i,nnz,*bii,*bij; 233 PetscBool flg_row; 234 PetscErrorCode ierr; 235 236 PetscFunctionBegin; 237 ierr = MatGetSize(A,&n,&m);CHKERRQ(ierr); 238 ierr = MatGetRowIJ(A,0,PETSC_FALSE,PETSC_FALSE,&n,&ii,&ij,&flg_row);CHKERRQ(ierr); 239 ierr = MatSeqAIJGetArray(A,&a);CHKERRQ(ierr); 240 nnz = n; 241 for (i=0;i<ii[n];i++) { 242 if (PetscLikely(PetscAbsScalar(a[i]) > PETSC_SMALL)) nnz++; 243 } 244 ierr = PetscMalloc1(n+1,&bii);CHKERRQ(ierr); 245 ierr = PetscMalloc1(nnz,&bij);CHKERRQ(ierr); 246 ierr = PetscMalloc1(nnz,&bdata);CHKERRQ(ierr); 247 nnz = 0; 248 bii[0] = 0; 249 for (i=0;i<n;i++) { 250 PetscInt j; 251 for (j=ii[i];j<ii[i+1];j++) { 252 PetscScalar entry = a[j]; 253 if (PetscLikely(PetscAbsScalar(entry) > PETSC_SMALL) || ij[j] == i) { 254 bij[nnz] = ij[j]; 255 bdata[nnz] = entry; 256 nnz++; 257 } 258 } 259 bii[i+1] = nnz; 260 } 261 ierr = MatSeqAIJRestoreArray(A,&a);CHKERRQ(ierr); 262 ierr = MatCreateSeqAIJWithArrays(PetscObjectComm((PetscObject)A),n,m,bii,bij,bdata,&Bt);CHKERRQ(ierr); 263 ierr = MatRestoreRowIJ(A,0,PETSC_FALSE,PETSC_FALSE,&n,&ii,&ij,&flg_row);CHKERRQ(ierr); 264 { 265 Mat_SeqAIJ *b = (Mat_SeqAIJ*)(Bt->data); 266 b->free_a = PETSC_TRUE; 267 b->free_ij = PETSC_TRUE; 268 } 269 *B = Bt; 270 PetscFunctionReturn(0); 271 } 272 273 #undef __FUNCT__ 274 #define __FUNCT__ "MatDetectDisconnectedComponents" 275 PetscErrorCode MatDetectDisconnectedComponents(Mat A, PetscBool filter, PetscInt *ncc, IS* cc[]) 276 { 277 Mat B; 278 IS is_dummy,*cc_n; 279 ISLocalToGlobalMapping l2gmap_dummy; 280 PCBDDCGraph graph; 281 PetscInt i,n; 282 PetscInt *xadj,*adjncy; 283 PetscInt *xadj_filtered,*adjncy_filtered; 284 PetscBool flg_row,isseqaij; 285 PetscErrorCode ierr; 286 287 PetscFunctionBegin; 288 if (!A->rmap->N || !A->cmap->N) { 289 *ncc = 0; 290 *cc = NULL; 291 PetscFunctionReturn(0); 292 } 293 ierr = PetscObjectTypeCompare((PetscObject)A,MATSEQAIJ,&isseqaij);CHKERRQ(ierr); 294 if (!isseqaij && filter) { 295 PetscBool isseqdense; 296 297 ierr = PetscObjectTypeCompare((PetscObject)A,MATSEQDENSE,&isseqdense);CHKERRQ(ierr); 298 if (!isseqdense) { 299 ierr = MatConvert(A,MATSEQAIJ,MAT_INITIAL_MATRIX,&B);CHKERRQ(ierr); 300 } else { /* TODO: rectangular case and LDA */ 301 PetscScalar *array; 302 PetscReal chop=1.e-6; 303 304 ierr = MatDuplicate(A,MAT_COPY_VALUES,&B);CHKERRQ(ierr); 305 ierr = MatDenseGetArray(B,&array);CHKERRQ(ierr); 306 ierr = MatGetSize(B,&n,NULL);CHKERRQ(ierr); 307 for (i=0;i<n;i++) { 308 PetscInt j; 309 for (j=i+1;j<n;j++) { 310 PetscReal thresh = chop*(PetscAbsScalar(array[i*(n+1)])+PetscAbsScalar(array[j*(n+1)])); 311 if (PetscAbsScalar(array[i*n+j]) < thresh) array[i*n+j] = 0.; 312 if (PetscAbsScalar(array[j*n+i]) < thresh) array[j*n+i] = 0.; 313 } 314 } 315 ierr = MatDenseRestoreArray(B,&array);CHKERRQ(ierr); 316 ierr = MatConvert(B,MATSEQAIJ,MAT_INPLACE_MATRIX,&B);CHKERRQ(ierr); 317 } 318 } else { 319 B = A; 320 } 321 ierr = MatGetRowIJ(B,0,PETSC_TRUE,PETSC_FALSE,&n,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);CHKERRQ(ierr); 322 323 /* if filter is true, then removes entries lower than PETSC_SMALL in magnitude */ 324 if (filter) { 325 PetscScalar *data; 326 PetscInt j,cum; 327 328 ierr = PetscCalloc2(n+1,&xadj_filtered,xadj[n],&adjncy_filtered);CHKERRQ(ierr); 329 ierr = MatSeqAIJGetArray(B,&data);CHKERRQ(ierr); 330 cum = 0; 331 for (i=0;i<n;i++) { 332 PetscInt t; 333 334 for (j=xadj[i];j<xadj[i+1];j++) { 335 if (PetscUnlikely(PetscAbsScalar(data[j]) < PETSC_SMALL)) { 336 continue; 337 } 338 adjncy_filtered[cum+xadj_filtered[i]++] = adjncy[j]; 339 } 340 t = xadj_filtered[i]; 341 xadj_filtered[i] = cum; 342 cum += t; 343 } 344 ierr = MatSeqAIJRestoreArray(B,&data);CHKERRQ(ierr); 345 } else { 346 xadj_filtered = NULL; 347 adjncy_filtered = NULL; 348 } 349 350 /* compute local connected components using PCBDDCGraph */ 351 ierr = ISCreateStride(PETSC_COMM_SELF,n,0,1,&is_dummy);CHKERRQ(ierr); 352 ierr = ISLocalToGlobalMappingCreateIS(is_dummy,&l2gmap_dummy);CHKERRQ(ierr); 353 ierr = ISDestroy(&is_dummy);CHKERRQ(ierr); 354 ierr = PCBDDCGraphCreate(&graph);CHKERRQ(ierr); 355 ierr = PCBDDCGraphInit(graph,l2gmap_dummy,n);CHKERRQ(ierr); 356 ierr = ISLocalToGlobalMappingDestroy(&l2gmap_dummy);CHKERRQ(ierr); 357 if (xadj_filtered) { 358 graph->xadj = xadj_filtered; 359 graph->adjncy = adjncy_filtered; 360 } else { 361 graph->xadj = xadj; 362 graph->adjncy = adjncy; 363 } 364 ierr = PCBDDCGraphSetUp(graph,1,NULL,NULL,0,NULL,NULL);CHKERRQ(ierr); 365 ierr = PCBDDCGraphComputeConnectedComponents(graph);CHKERRQ(ierr); 366 /* partial clean up */ 367 ierr = PetscFree2(xadj_filtered,adjncy_filtered);CHKERRQ(ierr); 368 ierr = MatRestoreRowIJ(B,0,PETSC_TRUE,PETSC_FALSE,&n,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);CHKERRQ(ierr); 369 if (A != B) { 370 ierr = MatDestroy(&B);CHKERRQ(ierr); 371 } 372 373 /* get back data */ 374 if (ncc) *ncc = graph->ncc; 375 if (cc) { 376 ierr = PetscMalloc1(graph->ncc,&cc_n);CHKERRQ(ierr); 377 for (i=0;i<graph->ncc;i++) { 378 ierr = ISCreateGeneral(PETSC_COMM_SELF,graph->cptr[i+1]-graph->cptr[i],graph->queue+graph->cptr[i],PETSC_COPY_VALUES,&cc_n[i]);CHKERRQ(ierr); 379 } 380 *cc = cc_n; 381 } 382 /* clean up graph */ 383 graph->xadj = 0; 384 graph->adjncy = 0; 385 ierr = PCBDDCGraphDestroy(&graph);CHKERRQ(ierr); 386 PetscFunctionReturn(0); 387 } 388 389 #undef __FUNCT__ 390 #define __FUNCT__ "PCBDDCBenignCheck" 391 PetscErrorCode PCBDDCBenignCheck(PC pc, IS zerodiag) 392 { 393 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 394 PC_IS* pcis = (PC_IS*)(pc->data); 395 IS dirIS = NULL; 396 PetscInt i; 397 PetscErrorCode ierr; 398 399 PetscFunctionBegin; 400 ierr = PCBDDCGraphGetDirichletDofs(pcbddc->mat_graph,&dirIS);CHKERRQ(ierr); 401 if (zerodiag) { 402 Mat A; 403 Vec vec3_N; 404 PetscScalar *vals; 405 const PetscInt *idxs; 406 PetscInt nz,*count; 407 408 /* p0 */ 409 ierr = VecSet(pcis->vec1_N,0.);CHKERRQ(ierr); 410 ierr = PetscMalloc1(pcis->n,&vals);CHKERRQ(ierr); 411 ierr = ISGetLocalSize(zerodiag,&nz);CHKERRQ(ierr); 412 ierr = ISGetIndices(zerodiag,&idxs);CHKERRQ(ierr); 413 for (i=0;i<nz;i++) vals[i] = 1.; 414 ierr = VecSetValues(pcis->vec1_N,nz,idxs,vals,INSERT_VALUES);CHKERRQ(ierr); 415 ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr); 416 ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr); 417 /* v_I */ 418 ierr = VecSetRandom(pcis->vec2_N,NULL);CHKERRQ(ierr); 419 for (i=0;i<nz;i++) vals[i] = 0.; 420 ierr = VecSetValues(pcis->vec2_N,nz,idxs,vals,INSERT_VALUES);CHKERRQ(ierr); 421 ierr = ISRestoreIndices(zerodiag,&idxs);CHKERRQ(ierr); 422 ierr = ISGetIndices(pcis->is_B_local,&idxs);CHKERRQ(ierr); 423 for (i=0;i<pcis->n_B;i++) vals[i] = 0.; 424 ierr = VecSetValues(pcis->vec2_N,pcis->n_B,idxs,vals,INSERT_VALUES);CHKERRQ(ierr); 425 ierr = ISRestoreIndices(pcis->is_B_local,&idxs);CHKERRQ(ierr); 426 if (dirIS) { 427 PetscInt n; 428 429 ierr = ISGetLocalSize(dirIS,&n);CHKERRQ(ierr); 430 ierr = ISGetIndices(dirIS,&idxs);CHKERRQ(ierr); 431 for (i=0;i<n;i++) vals[i] = 0.; 432 ierr = VecSetValues(pcis->vec2_N,n,idxs,vals,INSERT_VALUES);CHKERRQ(ierr); 433 ierr = ISRestoreIndices(dirIS,&idxs);CHKERRQ(ierr); 434 } 435 ierr = VecAssemblyBegin(pcis->vec2_N);CHKERRQ(ierr); 436 ierr = VecAssemblyEnd(pcis->vec2_N);CHKERRQ(ierr); 437 ierr = VecDuplicate(pcis->vec1_N,&vec3_N);CHKERRQ(ierr); 438 ierr = VecSet(vec3_N,0.);CHKERRQ(ierr); 439 ierr = MatISGetLocalMat(pc->mat,&A);CHKERRQ(ierr); 440 ierr = MatMult(A,pcis->vec1_N,vec3_N);CHKERRQ(ierr); 441 ierr = VecDot(vec3_N,pcis->vec2_N,&vals[0]);CHKERRQ(ierr); 442 if (PetscAbsScalar(vals[0]) > 1.e-1) { 443 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"Benign trick can not be applied! b(v_I,p_0) = %1.6e (should be numerically 0.)",PetscAbsScalar(vals[0])); 444 } 445 ierr = PetscFree(vals);CHKERRQ(ierr); 446 ierr = VecDestroy(&vec3_N);CHKERRQ(ierr); 447 448 /* there should not be any pressure dofs lying on the interface */ 449 ierr = PetscCalloc1(pcis->n,&count);CHKERRQ(ierr); 450 ierr = ISGetIndices(pcis->is_B_local,&idxs);CHKERRQ(ierr); 451 for (i=0;i<pcis->n_B;i++) count[idxs[i]]++; 452 ierr = ISRestoreIndices(pcis->is_B_local,&idxs);CHKERRQ(ierr); 453 ierr = ISGetIndices(zerodiag,&idxs);CHKERRQ(ierr); 454 for (i=0;i<nz;i++) { 455 if (count[idxs[i]]) { 456 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"Benign trick can not be applied! pressure dof %d is an interface dof",idxs[i]); 457 } 458 } 459 ierr = ISRestoreIndices(zerodiag,&idxs);CHKERRQ(ierr); 460 ierr = PetscFree(count);CHKERRQ(ierr); 461 } 462 ierr = ISDestroy(&dirIS);CHKERRQ(ierr); 463 464 /* check PCBDDCBenignGetOrSetP0 */ 465 ierr = VecSetRandom(pcis->vec1_global,NULL);CHKERRQ(ierr); 466 for (i=0;i<pcbddc->benign_n;i++) pcbddc->benign_p0[i] = -PetscGlobalRank-i; 467 ierr = PCBDDCBenignGetOrSetP0(pc,pcis->vec1_global,PETSC_FALSE);CHKERRQ(ierr); 468 for (i=0;i<pcbddc->benign_n;i++) pcbddc->benign_p0[i] = 1; 469 ierr = PCBDDCBenignGetOrSetP0(pc,pcis->vec1_global,PETSC_TRUE);CHKERRQ(ierr); 470 for (i=0;i<pcbddc->benign_n;i++) { 471 if ((PetscInt)PetscRealPart(pcbddc->benign_p0[i]) != -PetscGlobalRank-i) { 472 SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error testing PCBDDCBenignGetOrSetP0! Found %1.4e at %d instead of %1.4e\n",pcbddc->benign_p0[i],i,-PetscGlobalRank-i);CHKERRQ(ierr); 473 } 474 } 475 PetscFunctionReturn(0); 476 } 477 478 #undef __FUNCT__ 479 #define __FUNCT__ "PCBDDCBenignDetectSaddlePoint" 480 PetscErrorCode PCBDDCBenignDetectSaddlePoint(PC pc, IS *zerodiaglocal) 481 { 482 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 483 IS pressures,zerodiag,*zerodiag_subs; 484 PetscInt nz,n; 485 PetscBool sorted,have_null,has_null_pressures,recompute_zerodiag; 486 PetscErrorCode ierr; 487 488 PetscFunctionBegin; 489 ierr = PetscSFDestroy(&pcbddc->benign_sf);CHKERRQ(ierr); 490 ierr = MatDestroy(&pcbddc->benign_B0);CHKERRQ(ierr); 491 for (n=0;n<pcbddc->benign_n;n++) { 492 ierr = ISDestroy(&pcbddc->benign_zerodiag_subs[n]);CHKERRQ(ierr); 493 } 494 ierr = PetscFree(pcbddc->benign_zerodiag_subs);CHKERRQ(ierr); 495 pcbddc->benign_n = 0; 496 /* if a local info on dofs is present, assumes the last field is represented by "pressures" 497 otherwise, it uses only zerodiagonal dofs (ok if the pressure block is all zero; it could fail if it is not) 498 Checks if all the pressure dofs in each subdomain have a zero diagonal 499 If not, a change of basis on pressures is not needed 500 since the local Schur complements are SPD 501 */ 502 has_null_pressures = PETSC_TRUE; 503 have_null = PETSC_TRUE; 504 if (pcbddc->n_ISForDofsLocal) { 505 PetscInt npl,*idxs,p = pcbddc->n_ISForDofsLocal-1; 506 507 /* Dofs splitting for BDDC cannot have PETSC_COMM_SELF, so create a sequential IS */ 508 ierr = ISGetLocalSize(pcbddc->ISForDofsLocal[p],&npl);CHKERRQ(ierr); 509 ierr = ISGetIndices(pcbddc->ISForDofsLocal[p],(const PetscInt**)&idxs);CHKERRQ(ierr); 510 ierr = ISCreateGeneral(PETSC_COMM_SELF,npl,idxs,PETSC_COPY_VALUES,&pressures);CHKERRQ(ierr); 511 ierr = ISRestoreIndices(pcbddc->ISForDofsLocal[p],(const PetscInt**)&idxs);CHKERRQ(ierr); 512 ierr = ISSorted(pressures,&sorted);CHKERRQ(ierr); 513 if (!sorted) { 514 ierr = ISSort(pressures);CHKERRQ(ierr); 515 } 516 } else { 517 pressures = NULL; 518 } 519 /* pcis has not been setup yet, so get the local size from the subdomain matrix */ 520 ierr = MatGetLocalSize(pcbddc->local_mat,&n,NULL);CHKERRQ(ierr); 521 ierr = MatFindZeroDiagonals(pcbddc->local_mat,&zerodiag);CHKERRQ(ierr); 522 ierr = ISSorted(zerodiag,&sorted);CHKERRQ(ierr); 523 if (!sorted) { 524 ierr = ISSort(zerodiag);CHKERRQ(ierr); 525 } 526 ierr = ISGetLocalSize(zerodiag,&nz);CHKERRQ(ierr); 527 if (!nz) { 528 if (n) have_null = PETSC_FALSE; 529 has_null_pressures = PETSC_FALSE; 530 ierr = ISDestroy(&zerodiag);CHKERRQ(ierr); 531 } 532 recompute_zerodiag = PETSC_FALSE; 533 /* in case disconnected subdomains info is present, split the pressures accordingly (otherwise the benign trick could fail) */ 534 zerodiag_subs = NULL; 535 pcbddc->benign_n = 0; 536 if (has_null_pressures) { 537 IS *subs; 538 PetscInt nsubs,i; 539 540 subs = pcbddc->local_subs; 541 nsubs = pcbddc->n_local_subs; 542 if (nsubs > 1) { 543 ierr = PetscCalloc1(nsubs,&zerodiag_subs);CHKERRQ(ierr); 544 for (i=0;i<nsubs;i++) { 545 ISLocalToGlobalMapping l2g; 546 IS t_zerodiag_subs; 547 PetscInt nl; 548 549 ierr = ISLocalToGlobalMappingCreateIS(subs[i],&l2g);CHKERRQ(ierr); 550 ierr = ISGlobalToLocalMappingApplyIS(l2g,IS_GTOLM_DROP,zerodiag,&t_zerodiag_subs);CHKERRQ(ierr); 551 ierr = ISGetLocalSize(t_zerodiag_subs,&nl);CHKERRQ(ierr); 552 if (nl) { 553 PetscBool valid = PETSC_TRUE; 554 555 if (pressures) { 556 IS t_pressure_subs; 557 ierr = ISGlobalToLocalMappingApplyIS(l2g,IS_GTOLM_DROP,pressures,&t_pressure_subs);CHKERRQ(ierr); 558 ierr = ISEqual(t_pressure_subs,t_zerodiag_subs,&valid);CHKERRQ(ierr); 559 ierr = ISDestroy(&t_pressure_subs);CHKERRQ(ierr); 560 } 561 if (valid) { 562 ierr = ISLocalToGlobalMappingApplyIS(l2g,t_zerodiag_subs,&zerodiag_subs[pcbddc->benign_n]);CHKERRQ(ierr); 563 pcbddc->benign_n++; 564 } else { 565 recompute_zerodiag = PETSC_TRUE; 566 } 567 } 568 ierr = ISDestroy(&t_zerodiag_subs);CHKERRQ(ierr); 569 ierr = ISLocalToGlobalMappingDestroy(&l2g);CHKERRQ(ierr); 570 } 571 } else { /* there's just one subdomain (or zero if they have not been detected */ 572 PetscBool valid = PETSC_TRUE; 573 if (pressures) { 574 ierr = ISEqual(pressures,zerodiag,&valid);CHKERRQ(ierr); 575 } 576 if (valid) { 577 pcbddc->benign_n = 1; 578 ierr = PetscMalloc1(pcbddc->benign_n,&zerodiag_subs);CHKERRQ(ierr); 579 ierr = PetscObjectReference((PetscObject)zerodiag);CHKERRQ(ierr); 580 zerodiag_subs[0] = zerodiag; 581 } 582 } 583 } 584 585 if (!pcbddc->benign_n) { 586 PetscInt n; 587 588 ierr = ISDestroy(&zerodiag);CHKERRQ(ierr); 589 recompute_zerodiag = PETSC_FALSE; 590 ierr = MatGetLocalSize(pcbddc->local_mat,&n,NULL);CHKERRQ(ierr); 591 if (n) { 592 has_null_pressures = PETSC_FALSE; 593 have_null = PETSC_FALSE; 594 } 595 } 596 597 /* final check for null pressures */ 598 if (zerodiag && pressures) { 599 PetscInt nz,np; 600 ierr = ISGetLocalSize(zerodiag,&nz);CHKERRQ(ierr); 601 ierr = ISGetLocalSize(pressures,&np);CHKERRQ(ierr); 602 if (nz != np) have_null = PETSC_FALSE; 603 } 604 605 if (recompute_zerodiag) { 606 ierr = ISDestroy(&zerodiag);CHKERRQ(ierr); 607 if (pcbddc->benign_n == 1) { 608 ierr = PetscObjectReference((PetscObject)zerodiag_subs[0]);CHKERRQ(ierr); 609 zerodiag = zerodiag_subs[0]; 610 } else { 611 PetscInt i,nzn,*new_idxs; 612 613 nzn = 0; 614 for (i=0;i<pcbddc->benign_n;i++) { 615 PetscInt ns; 616 ierr = ISGetLocalSize(zerodiag_subs[i],&ns);CHKERRQ(ierr); 617 nzn += ns; 618 } 619 ierr = PetscMalloc1(nzn,&new_idxs);CHKERRQ(ierr); 620 nzn = 0; 621 for (i=0;i<pcbddc->benign_n;i++) { 622 PetscInt ns,*idxs; 623 ierr = ISGetLocalSize(zerodiag_subs[i],&ns);CHKERRQ(ierr); 624 ierr = ISGetIndices(zerodiag_subs[i],(const PetscInt**)&idxs);CHKERRQ(ierr); 625 ierr = PetscMemcpy(new_idxs+nzn,idxs,ns*sizeof(PetscInt));CHKERRQ(ierr); 626 ierr = ISRestoreIndices(zerodiag_subs[i],(const PetscInt**)&idxs);CHKERRQ(ierr); 627 nzn += ns; 628 } 629 ierr = PetscSortInt(nzn,new_idxs);CHKERRQ(ierr); 630 ierr = ISCreateGeneral(PETSC_COMM_SELF,nzn,new_idxs,PETSC_OWN_POINTER,&zerodiag);CHKERRQ(ierr); 631 } 632 have_null = PETSC_FALSE; 633 } 634 635 /* no-net-flux */ 636 if (pcbddc->benign_compute_nonetflux) { 637 Mat_IS* matis = (Mat_IS*)(pc->pmat->data); 638 MatNullSpace near_null_space; 639 ISLocalToGlobalMapping rmap; 640 Vec quad_vec; 641 PetscScalar *pvals; 642 PetscInt i,np,*dummyins; 643 644 /* create vector to hold quadrature weights */ 645 ierr = MatCreateVecs(pc->pmat,&quad_vec,NULL);CHKERRQ(ierr); 646 ierr = VecSet(quad_vec,0.0);CHKERRQ(ierr); 647 ierr = MatGetLocalToGlobalMapping(pc->pmat,&rmap,NULL);CHKERRQ(ierr); 648 ierr = VecSetLocalToGlobalMapping(quad_vec,rmap);CHKERRQ(ierr); 649 650 /* compute B^{(i)T} * 1_p */ 651 np = 0; 652 if (zerodiag) { 653 ierr = ISGetLocalSize(zerodiag,&np);CHKERRQ(ierr); 654 } else if (pressures) { 655 ierr = ISGetLocalSize(pressures,&np);CHKERRQ(ierr); 656 } 657 ierr = PetscMalloc1(np,&pvals);CHKERRQ(ierr); 658 for (i=0;i<np;i++) pvals[i] = 1.; 659 ierr = VecSet(matis->x,0.);CHKERRQ(ierr); 660 if (np) { 661 const PetscInt *pidxs; 662 663 if (zerodiag) { 664 ierr = ISGetIndices(zerodiag,&pidxs);CHKERRQ(ierr); 665 } else if (pressures) { 666 ierr = ISGetIndices(pressures,&pidxs);CHKERRQ(ierr); 667 } 668 ierr = VecSetValues(matis->x,np,pidxs,pvals,INSERT_VALUES);CHKERRQ(ierr); 669 if (zerodiag) { 670 ierr = ISRestoreIndices(zerodiag,&pidxs);CHKERRQ(ierr); 671 } else if (pressures) { 672 ierr = ISRestoreIndices(pressures,&pidxs);CHKERRQ(ierr); 673 } 674 } 675 ierr = VecAssemblyBegin(matis->x);CHKERRQ(ierr); 676 ierr = VecAssemblyEnd(matis->x);CHKERRQ(ierr); 677 ierr = MatMult(matis->A,matis->x,matis->y);CHKERRQ(ierr); 678 ierr = PetscFree(pvals);CHKERRQ(ierr); 679 680 /* decide which of the sharing ranks (per dof) has to insert the values (should just be a matter of having a different orientation) */ 681 ierr = MatISSetUpSF(pc->pmat);CHKERRQ(ierr); 682 for (i=0;i<matis->sf->nroots;i++) matis->sf_rootdata[i] = -1; 683 for (i=0;i<matis->sf->nleaves;i++) matis->sf_leafdata[i] = PetscGlobalRank; 684 ierr = PetscSFReduceBegin(matis->sf,MPIU_INT,matis->sf_leafdata,matis->sf_rootdata,MPI_MAX);CHKERRQ(ierr); 685 ierr = PetscSFReduceEnd(matis->sf,MPIU_INT,matis->sf_leafdata,matis->sf_rootdata,MPI_MAX);CHKERRQ(ierr); 686 ierr = PetscSFBcastBegin(matis->sf,MPIU_INT,matis->sf_rootdata,matis->sf_leafdata);CHKERRQ(ierr); 687 ierr = PetscSFBcastEnd(matis->sf,MPIU_INT,matis->sf_rootdata,matis->sf_leafdata);CHKERRQ(ierr); 688 ierr = VecGetArray(matis->y,&pvals);CHKERRQ(ierr); 689 for (i=0;i<matis->sf->nleaves;i++) { 690 if (PetscGlobalRank != matis->sf_leafdata[i]) { 691 pvals[i] = 0.; 692 } 693 } 694 ierr = PetscMalloc1(matis->sf->nleaves,&dummyins);CHKERRQ(ierr); 695 for (i=0;i<matis->sf->nleaves;i++) dummyins[i] = i; 696 ierr = VecSetValuesLocal(quad_vec,matis->sf->nleaves,dummyins,pvals,ADD_VALUES);CHKERRQ(ierr); 697 ierr = VecRestoreArray(matis->y,&pvals);CHKERRQ(ierr); 698 ierr = PetscFree(dummyins);CHKERRQ(ierr); 699 700 /* assembly quadrature vec and attach near null space to pmat */ 701 ierr = VecAssemblyBegin(quad_vec);CHKERRQ(ierr); 702 ierr = VecAssemblyEnd(quad_vec);CHKERRQ(ierr); 703 ierr = VecNormalize(quad_vec,NULL);CHKERRQ(ierr); 704 ierr = MatNullSpaceCreate(PetscObjectComm((PetscObject)pc),PETSC_FALSE,1,&quad_vec,&near_null_space);CHKERRQ(ierr); 705 ierr = VecDestroy(&quad_vec);CHKERRQ(ierr); 706 ierr = MatSetNearNullSpace(pc->pmat,near_null_space);CHKERRQ(ierr); 707 ierr = MatNullSpaceDestroy(&near_null_space);CHKERRQ(ierr); 708 } 709 710 /* change of basis and p0 dofs */ 711 if (has_null_pressures) { 712 IS zerodiagc; 713 const PetscInt *idxs,*idxsc; 714 PetscInt i,s,*nnz; 715 716 ierr = ISGetLocalSize(zerodiag,&nz);CHKERRQ(ierr); 717 ierr = ISComplement(zerodiag,0,n,&zerodiagc);CHKERRQ(ierr); 718 ierr = ISGetIndices(zerodiagc,&idxsc);CHKERRQ(ierr); 719 /* local change of basis for pressures */ 720 ierr = MatDestroy(&pcbddc->benign_change);CHKERRQ(ierr); 721 ierr = MatCreate(PetscObjectComm((PetscObject)pcbddc->local_mat),&pcbddc->benign_change);CHKERRQ(ierr); 722 ierr = MatSetType(pcbddc->benign_change,MATAIJ);CHKERRQ(ierr); 723 ierr = MatSetSizes(pcbddc->benign_change,n,n,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr); 724 ierr = PetscMalloc1(n,&nnz);CHKERRQ(ierr); 725 for (i=0;i<n-nz;i++) nnz[idxsc[i]] = 1; /* identity on velocities plus pressure dofs for non-singular subdomains */ 726 for (i=0;i<pcbddc->benign_n;i++) { 727 PetscInt nzs,j; 728 729 ierr = ISGetLocalSize(zerodiag_subs[i],&nzs);CHKERRQ(ierr); 730 ierr = ISGetIndices(zerodiag_subs[i],&idxs);CHKERRQ(ierr); 731 for (j=0;j<nzs-1;j++) nnz[idxs[j]] = 2; /* change on pressures */ 732 nnz[idxs[nzs-1]] = nzs; /* last local pressure dof in subdomain */ 733 ierr = ISRestoreIndices(zerodiag_subs[i],&idxs);CHKERRQ(ierr); 734 } 735 ierr = MatSeqAIJSetPreallocation(pcbddc->benign_change,0,nnz);CHKERRQ(ierr); 736 ierr = PetscFree(nnz);CHKERRQ(ierr); 737 /* set identity on velocities */ 738 for (i=0;i<n-nz;i++) { 739 ierr = MatSetValue(pcbddc->benign_change,idxsc[i],idxsc[i],1.,INSERT_VALUES);CHKERRQ(ierr); 740 } 741 ierr = ISRestoreIndices(zerodiagc,&idxsc);CHKERRQ(ierr); 742 ierr = ISDestroy(&zerodiagc);CHKERRQ(ierr); 743 ierr = PetscFree3(pcbddc->benign_p0_lidx,pcbddc->benign_p0_gidx,pcbddc->benign_p0);CHKERRQ(ierr); 744 ierr = PetscMalloc3(pcbddc->benign_n,&pcbddc->benign_p0_lidx,pcbddc->benign_n,&pcbddc->benign_p0_gidx,pcbddc->benign_n,&pcbddc->benign_p0);CHKERRQ(ierr); 745 /* set change on pressures */ 746 for (s=0;s<pcbddc->benign_n;s++) { 747 PetscScalar *array; 748 PetscInt nzs; 749 750 ierr = ISGetLocalSize(zerodiag_subs[s],&nzs);CHKERRQ(ierr); 751 ierr = ISGetIndices(zerodiag_subs[s],&idxs);CHKERRQ(ierr); 752 for (i=0;i<nzs-1;i++) { 753 PetscScalar vals[2]; 754 PetscInt cols[2]; 755 756 cols[0] = idxs[i]; 757 cols[1] = idxs[nzs-1]; 758 vals[0] = 1.; 759 vals[1] = 1.; 760 ierr = MatSetValues(pcbddc->benign_change,1,cols,2,cols,vals,INSERT_VALUES);CHKERRQ(ierr); 761 } 762 ierr = PetscMalloc1(nzs,&array);CHKERRQ(ierr); 763 for (i=0;i<nzs-1;i++) array[i] = -1.; 764 array[nzs-1] = 1.; 765 ierr = MatSetValues(pcbddc->benign_change,1,idxs+nzs-1,nzs,idxs,array,INSERT_VALUES);CHKERRQ(ierr); 766 /* store local idxs for p0 */ 767 pcbddc->benign_p0_lidx[s] = idxs[nzs-1]; 768 ierr = ISRestoreIndices(zerodiag_subs[s],&idxs);CHKERRQ(ierr); 769 ierr = PetscFree(array);CHKERRQ(ierr); 770 } 771 ierr = MatAssemblyBegin(pcbddc->benign_change,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 772 ierr = MatAssemblyEnd(pcbddc->benign_change,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 773 /* project if needed */ 774 if (pcbddc->benign_change_explicit) { 775 Mat M; 776 777 ierr = MatPtAP(pcbddc->local_mat,pcbddc->benign_change,MAT_INITIAL_MATRIX,2.0,&M);CHKERRQ(ierr); 778 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 779 ierr = MatSeqAIJCompress(M,&pcbddc->local_mat);CHKERRQ(ierr); 780 ierr = MatDestroy(&M);CHKERRQ(ierr); 781 } 782 /* store global idxs for p0 */ 783 ierr = ISLocalToGlobalMappingApply(pc->pmat->rmap->mapping,pcbddc->benign_n,pcbddc->benign_p0_lidx,pcbddc->benign_p0_gidx);CHKERRQ(ierr); 784 } 785 pcbddc->benign_zerodiag_subs = zerodiag_subs; 786 ierr = ISDestroy(&pressures);CHKERRQ(ierr); 787 788 /* determines if the coarse solver will be singular or not */ 789 ierr = MPI_Allreduce(&have_null,&pcbddc->benign_null,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 790 *zerodiaglocal = zerodiag; 791 PetscFunctionReturn(0); 792 } 793 794 #undef __FUNCT__ 795 #define __FUNCT__ "PCBDDCBenignGetOrSetP0" 796 PetscErrorCode PCBDDCBenignGetOrSetP0(PC pc, Vec v, PetscBool get) 797 { 798 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 799 PetscScalar *array; 800 PetscErrorCode ierr; 801 802 PetscFunctionBegin; 803 if (!pcbddc->benign_sf) { 804 ierr = PetscSFCreate(PetscObjectComm((PetscObject)pc),&pcbddc->benign_sf);CHKERRQ(ierr); 805 ierr = PetscSFSetGraphLayout(pcbddc->benign_sf,pc->pmat->rmap,pcbddc->benign_n,NULL,PETSC_OWN_POINTER,pcbddc->benign_p0_gidx);CHKERRQ(ierr); 806 } 807 if (get) { 808 ierr = VecGetArrayRead(v,(const PetscScalar**)&array);CHKERRQ(ierr); 809 ierr = PetscSFBcastBegin(pcbddc->benign_sf,MPIU_SCALAR,array,pcbddc->benign_p0);CHKERRQ(ierr); 810 ierr = PetscSFBcastEnd(pcbddc->benign_sf,MPIU_SCALAR,array,pcbddc->benign_p0);CHKERRQ(ierr); 811 ierr = VecRestoreArrayRead(v,(const PetscScalar**)&array);CHKERRQ(ierr); 812 } else { 813 ierr = VecGetArray(v,&array);CHKERRQ(ierr); 814 ierr = PetscSFReduceBegin(pcbddc->benign_sf,MPIU_SCALAR,pcbddc->benign_p0,array,MPIU_REPLACE);CHKERRQ(ierr); 815 ierr = PetscSFReduceEnd(pcbddc->benign_sf,MPIU_SCALAR,pcbddc->benign_p0,array,MPIU_REPLACE);CHKERRQ(ierr); 816 ierr = VecRestoreArray(v,&array);CHKERRQ(ierr); 817 } 818 PetscFunctionReturn(0); 819 } 820 821 #undef __FUNCT__ 822 #define __FUNCT__ "PCBDDCBenignPopOrPushB0" 823 PetscErrorCode PCBDDCBenignPopOrPushB0(PC pc, PetscBool pop) 824 { 825 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 826 PetscErrorCode ierr; 827 828 PetscFunctionBegin; 829 /* TODO: add error checking 830 - avoid nested pop (or push) calls. 831 - cannot push before pop. 832 - cannot call this if pcbddc->local_mat is NULL 833 */ 834 if (!pcbddc->benign_n) { 835 PetscFunctionReturn(0); 836 } 837 if (pop) { 838 if (pcbddc->benign_change_explicit) { 839 IS is_p0; 840 MatReuse reuse; 841 842 /* extract B_0 */ 843 reuse = MAT_INITIAL_MATRIX; 844 if (pcbddc->benign_B0) { 845 reuse = MAT_REUSE_MATRIX; 846 } 847 ierr = ISCreateGeneral(PETSC_COMM_SELF,pcbddc->benign_n,pcbddc->benign_p0_lidx,PETSC_COPY_VALUES,&is_p0);CHKERRQ(ierr); 848 ierr = MatGetSubMatrix(pcbddc->local_mat,is_p0,NULL,reuse,&pcbddc->benign_B0);CHKERRQ(ierr); 849 /* remove rows and cols from local problem */ 850 ierr = MatSetOption(pcbddc->local_mat,MAT_KEEP_NONZERO_PATTERN,PETSC_TRUE);CHKERRQ(ierr); 851 ierr = MatSetOption(pcbddc->local_mat,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr); 852 ierr = MatZeroRowsColumnsIS(pcbddc->local_mat,is_p0,1.0,NULL,NULL);CHKERRQ(ierr); 853 ierr = ISDestroy(&is_p0);CHKERRQ(ierr); 854 } else { 855 Mat_IS *matis = (Mat_IS*)pc->pmat->data; 856 PetscScalar *vals; 857 PetscInt i,n,*idxs_ins; 858 859 ierr = VecGetLocalSize(matis->y,&n);CHKERRQ(ierr); 860 ierr = PetscMalloc2(n,&idxs_ins,n,&vals);CHKERRQ(ierr); 861 if (!pcbddc->benign_B0) { 862 PetscInt *nnz; 863 ierr = MatCreate(PetscObjectComm((PetscObject)pcbddc->local_mat),&pcbddc->benign_B0);CHKERRQ(ierr); 864 ierr = MatSetType(pcbddc->benign_B0,MATAIJ);CHKERRQ(ierr); 865 ierr = MatSetSizes(pcbddc->benign_B0,pcbddc->benign_n,n,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr); 866 ierr = PetscMalloc1(pcbddc->benign_n,&nnz);CHKERRQ(ierr); 867 for (i=0;i<pcbddc->benign_n;i++) { 868 ierr = ISGetLocalSize(pcbddc->benign_zerodiag_subs[i],&nnz[i]);CHKERRQ(ierr); 869 nnz[i] = n - nnz[i]; 870 } 871 ierr = MatSeqAIJSetPreallocation(pcbddc->benign_B0,0,nnz);CHKERRQ(ierr); 872 ierr = PetscFree(nnz);CHKERRQ(ierr); 873 } 874 875 for (i=0;i<pcbddc->benign_n;i++) { 876 PetscScalar *array; 877 PetscInt *idxs,j,nz,cum; 878 879 ierr = VecSet(matis->x,0.);CHKERRQ(ierr); 880 ierr = ISGetLocalSize(pcbddc->benign_zerodiag_subs[i],&nz);CHKERRQ(ierr); 881 ierr = ISGetIndices(pcbddc->benign_zerodiag_subs[i],(const PetscInt**)&idxs);CHKERRQ(ierr); 882 for (j=0;j<nz;j++) vals[j] = 1.; 883 ierr = VecSetValues(matis->x,nz,idxs,vals,INSERT_VALUES);CHKERRQ(ierr); 884 ierr = VecAssemblyBegin(matis->x);CHKERRQ(ierr); 885 ierr = VecAssemblyEnd(matis->x);CHKERRQ(ierr); 886 ierr = VecSet(matis->y,0.);CHKERRQ(ierr); 887 ierr = MatMult(matis->A,matis->x,matis->y);CHKERRQ(ierr); 888 ierr = VecGetArray(matis->y,&array);CHKERRQ(ierr); 889 cum = 0; 890 for (j=0;j<n;j++) { 891 if (PetscUnlikely(PetscAbsReal(array[j]) > PETSC_SMALL)) { 892 vals[cum] = array[j]; 893 idxs_ins[cum] = j; 894 cum++; 895 } 896 } 897 ierr = MatSetValues(pcbddc->benign_B0,1,&i,cum,idxs_ins,vals,INSERT_VALUES);CHKERRQ(ierr); 898 ierr = VecRestoreArray(matis->y,&array);CHKERRQ(ierr); 899 ierr = ISRestoreIndices(pcbddc->benign_zerodiag_subs[i],(const PetscInt**)&idxs);CHKERRQ(ierr); 900 } 901 ierr = MatAssemblyBegin(pcbddc->benign_B0,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 902 ierr = MatAssemblyEnd(pcbddc->benign_B0,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 903 ierr = PetscFree2(idxs_ins,vals);CHKERRQ(ierr); 904 } 905 } else { /* push */ 906 if (pcbddc->benign_change_explicit) { 907 PetscInt i; 908 909 for (i=0;i<pcbddc->benign_n;i++) { 910 PetscScalar *B0_vals; 911 PetscInt *B0_cols,B0_ncol; 912 913 ierr = MatGetRow(pcbddc->benign_B0,i,&B0_ncol,(const PetscInt**)&B0_cols,(const PetscScalar**)&B0_vals);CHKERRQ(ierr); 914 ierr = MatSetValues(pcbddc->local_mat,1,pcbddc->benign_p0_lidx+i,B0_ncol,B0_cols,B0_vals,INSERT_VALUES);CHKERRQ(ierr); 915 ierr = MatSetValues(pcbddc->local_mat,B0_ncol,B0_cols,1,pcbddc->benign_p0_lidx+i,B0_vals,INSERT_VALUES);CHKERRQ(ierr); 916 ierr = MatSetValue(pcbddc->local_mat,pcbddc->benign_p0_lidx[i],pcbddc->benign_p0_lidx[i],0.0,INSERT_VALUES);CHKERRQ(ierr); 917 ierr = MatRestoreRow(pcbddc->benign_B0,i,&B0_ncol,(const PetscInt**)&B0_cols,(const PetscScalar**)&B0_vals);CHKERRQ(ierr); 918 } 919 ierr = MatAssemblyBegin(pcbddc->local_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 920 ierr = MatAssemblyEnd(pcbddc->local_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 921 } else { 922 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Cannot push B0!\n"); 923 } 924 } 925 PetscFunctionReturn(0); 926 } 927 928 #undef __FUNCT__ 929 #define __FUNCT__ "PCBDDCAdaptiveSelection" 930 PetscErrorCode PCBDDCAdaptiveSelection(PC pc) 931 { 932 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 933 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 934 PetscBLASInt B_dummyint,B_neigs,B_ierr,B_lwork; 935 PetscBLASInt *B_iwork,*B_ifail; 936 PetscScalar *work,lwork; 937 PetscScalar *St,*S,*eigv; 938 PetscScalar *Sarray,*Starray; 939 PetscReal *eigs,thresh; 940 PetscInt i,nmax,nmin,nv,cum,mss,cum2,cumarray,maxneigs; 941 PetscBool allocated_S_St; 942 #if defined(PETSC_USE_COMPLEX) 943 PetscReal *rwork; 944 #endif 945 PetscErrorCode ierr; 946 947 PetscFunctionBegin; 948 if (!sub_schurs->schur_explicit) SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_SUP,"Adaptive selection of constraints requires MUMPS and/or MKL_CPARDISO"); 949 if (sub_schurs->n_subs && (!sub_schurs->is_hermitian || !sub_schurs->is_posdef)) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_SUP,"Adaptive selection not yet implemented for general matrix pencils (herm %d, posdef %d)\n",sub_schurs->is_hermitian,sub_schurs->is_posdef); 950 951 if (pcbddc->dbg_flag) { 952 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 953 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 954 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Check adaptive selection of constraints\n");CHKERRQ(ierr); 955 ierr = PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 956 } 957 958 if (pcbddc->dbg_flag) { 959 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d cc %d (%d,%d).\n",PetscGlobalRank,sub_schurs->n_subs,sub_schurs->is_hermitian,sub_schurs->is_posdef); 960 } 961 962 /* max size of subsets */ 963 mss = 0; 964 for (i=0;i<sub_schurs->n_subs;i++) { 965 PetscInt subset_size; 966 967 ierr = ISGetLocalSize(sub_schurs->is_subs[i],&subset_size);CHKERRQ(ierr); 968 mss = PetscMax(mss,subset_size); 969 } 970 971 /* min/max and threshold */ 972 nmax = pcbddc->adaptive_nmax > 0 ? pcbddc->adaptive_nmax : mss; 973 nmin = pcbddc->adaptive_nmin > 0 ? pcbddc->adaptive_nmin : 0; 974 nmax = PetscMax(nmin,nmax); 975 allocated_S_St = PETSC_FALSE; 976 if (nmin) { 977 allocated_S_St = PETSC_TRUE; 978 } 979 980 /* allocate lapack workspace */ 981 cum = cum2 = 0; 982 maxneigs = 0; 983 for (i=0;i<sub_schurs->n_subs;i++) { 984 PetscInt n,subset_size; 985 986 ierr = ISGetLocalSize(sub_schurs->is_subs[i],&subset_size);CHKERRQ(ierr); 987 n = PetscMin(subset_size,nmax); 988 cum += subset_size; 989 cum2 += subset_size*n; 990 maxneigs = PetscMax(maxneigs,n); 991 } 992 if (mss) { 993 if (sub_schurs->is_hermitian && sub_schurs->is_posdef) { 994 PetscBLASInt B_itype = 1; 995 PetscBLASInt B_N = mss; 996 PetscReal zero = 0.0; 997 PetscReal eps = 0.0; /* dlamch? */ 998 999 B_lwork = -1; 1000 S = NULL; 1001 St = NULL; 1002 eigs = NULL; 1003 eigv = NULL; 1004 B_iwork = NULL; 1005 B_ifail = NULL; 1006 #if defined(PETSC_USE_COMPLEX) 1007 rwork = NULL; 1008 #endif 1009 thresh = 1.0; 1010 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 1011 #if defined(PETSC_USE_COMPLEX) 1012 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&zero,&thresh,&B_dummyint,&B_dummyint,&eps,&B_neigs,eigs,eigv,&B_N,&lwork,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr)); 1013 #else 1014 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&zero,&thresh,&B_dummyint,&B_dummyint,&eps,&B_neigs,eigs,eigv,&B_N,&lwork,&B_lwork,B_iwork,B_ifail,&B_ierr)); 1015 #endif 1016 if (B_ierr != 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to SYGVX Lapack routine %d",(int)B_ierr); 1017 ierr = PetscFPTrapPop();CHKERRQ(ierr); 1018 } else { 1019 /* TODO */ 1020 } 1021 } else { 1022 lwork = 0; 1023 } 1024 1025 nv = 0; 1026 if (sub_schurs->is_vertices && pcbddc->use_vertices) { /* complement set of active subsets, each entry is a vertex (boundary made by active subsets, vertices and dirichlet dofs) */ 1027 ierr = ISGetLocalSize(sub_schurs->is_vertices,&nv);CHKERRQ(ierr); 1028 } 1029 ierr = PetscBLASIntCast((PetscInt)PetscRealPart(lwork),&B_lwork);CHKERRQ(ierr); 1030 if (allocated_S_St) { 1031 ierr = PetscMalloc2(mss*mss,&S,mss*mss,&St);CHKERRQ(ierr); 1032 } 1033 ierr = PetscMalloc5(mss*mss,&eigv,mss,&eigs,B_lwork,&work,5*mss,&B_iwork,mss,&B_ifail);CHKERRQ(ierr); 1034 #if defined(PETSC_USE_COMPLEX) 1035 ierr = PetscMalloc1(7*mss,&rwork);CHKERRQ(ierr); 1036 #endif 1037 ierr = PetscMalloc5(nv+sub_schurs->n_subs,&pcbddc->adaptive_constraints_n, 1038 nv+sub_schurs->n_subs+1,&pcbddc->adaptive_constraints_idxs_ptr, 1039 nv+sub_schurs->n_subs+1,&pcbddc->adaptive_constraints_data_ptr, 1040 nv+cum,&pcbddc->adaptive_constraints_idxs, 1041 nv+cum2,&pcbddc->adaptive_constraints_data);CHKERRQ(ierr); 1042 ierr = PetscMemzero(pcbddc->adaptive_constraints_n,(nv+sub_schurs->n_subs)*sizeof(PetscInt));CHKERRQ(ierr); 1043 1044 maxneigs = 0; 1045 cum = cumarray = 0; 1046 pcbddc->adaptive_constraints_idxs_ptr[0] = 0; 1047 pcbddc->adaptive_constraints_data_ptr[0] = 0; 1048 if (sub_schurs->is_vertices && pcbddc->use_vertices) { 1049 const PetscInt *idxs; 1050 1051 ierr = ISGetIndices(sub_schurs->is_vertices,&idxs);CHKERRQ(ierr); 1052 for (cum=0;cum<nv;cum++) { 1053 pcbddc->adaptive_constraints_n[cum] = 1; 1054 pcbddc->adaptive_constraints_idxs[cum] = idxs[cum]; 1055 pcbddc->adaptive_constraints_data[cum] = 1.0; 1056 pcbddc->adaptive_constraints_idxs_ptr[cum+1] = pcbddc->adaptive_constraints_idxs_ptr[cum]+1; 1057 pcbddc->adaptive_constraints_data_ptr[cum+1] = pcbddc->adaptive_constraints_data_ptr[cum]+1; 1058 } 1059 ierr = ISRestoreIndices(sub_schurs->is_vertices,&idxs);CHKERRQ(ierr); 1060 } 1061 1062 if (mss) { /* multilevel */ 1063 ierr = MatSeqAIJGetArray(sub_schurs->sum_S_Ej_inv_all,&Sarray);CHKERRQ(ierr); 1064 ierr = MatSeqAIJGetArray(sub_schurs->sum_S_Ej_tilda_all,&Starray);CHKERRQ(ierr); 1065 } 1066 1067 thresh = pcbddc->adaptive_threshold; 1068 for (i=0;i<sub_schurs->n_subs;i++) { 1069 const PetscInt *idxs; 1070 PetscReal upper,lower; 1071 PetscInt j,subset_size,eigs_start = 0; 1072 PetscBLASInt B_N; 1073 PetscBool same_data = PETSC_FALSE; 1074 1075 if (pcbddc->use_deluxe_scaling) { 1076 upper = PETSC_MAX_REAL; 1077 lower = thresh; 1078 } else { 1079 upper = 1./thresh; 1080 lower = 0.; 1081 } 1082 ierr = ISGetLocalSize(sub_schurs->is_subs[i],&subset_size);CHKERRQ(ierr); 1083 ierr = ISGetIndices(sub_schurs->is_subs[i],&idxs);CHKERRQ(ierr); 1084 ierr = PetscBLASIntCast(subset_size,&B_N);CHKERRQ(ierr); 1085 if (allocated_S_St) { /* S and S_t should be copied since we could need them later */ 1086 if (sub_schurs->is_hermitian) { 1087 PetscInt j,k; 1088 if (sub_schurs->n_subs == 1) { /* zeroing memory to use PetscMemcmp later */ 1089 ierr = PetscMemzero(S,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 1090 ierr = PetscMemzero(St,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 1091 } 1092 for (j=0;j<subset_size;j++) { 1093 for (k=j;k<subset_size;k++) { 1094 S [j*subset_size+k] = Sarray [cumarray+j*subset_size+k]; 1095 St[j*subset_size+k] = Starray[cumarray+j*subset_size+k]; 1096 } 1097 } 1098 } else { 1099 ierr = PetscMemcpy(S,Sarray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 1100 ierr = PetscMemcpy(St,Starray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 1101 } 1102 } else { 1103 S = Sarray + cumarray; 1104 St = Starray + cumarray; 1105 } 1106 /* see if we can save some work */ 1107 if (sub_schurs->n_subs == 1 && pcbddc->use_deluxe_scaling) { 1108 ierr = PetscMemcmp(S,St,subset_size*subset_size*sizeof(PetscScalar),&same_data);CHKERRQ(ierr); 1109 } 1110 1111 if (same_data && !sub_schurs->change) { /* there's no need of constraints here */ 1112 B_neigs = 0; 1113 } else { 1114 if (sub_schurs->is_hermitian && sub_schurs->is_posdef) { 1115 PetscBLASInt B_itype = 1; 1116 PetscBLASInt B_IL, B_IU; 1117 PetscReal eps = -1.0; /* dlamch? */ 1118 PetscInt nmin_s; 1119 PetscBool compute_range = PETSC_FALSE; 1120 1121 if (pcbddc->dbg_flag) { 1122 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Computing for sub %d/%d %d %d.\n",i,sub_schurs->n_subs,subset_size,pcbddc->mat_graph->count[idxs[0]]); 1123 } 1124 1125 compute_range = PETSC_FALSE; 1126 if (thresh > 1.+PETSC_SMALL && !same_data) { 1127 compute_range = PETSC_TRUE; 1128 } 1129 1130 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 1131 if (compute_range) { 1132 1133 /* ask for eigenvalues larger than thresh */ 1134 #if defined(PETSC_USE_COMPLEX) 1135 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&lower,&upper,&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr)); 1136 #else 1137 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&lower,&upper,&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr)); 1138 #endif 1139 } else if (!same_data) { 1140 B_IU = PetscMax(1,PetscMin(B_N,nmax)); 1141 B_IL = 1; 1142 #if defined(PETSC_USE_COMPLEX) 1143 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","I","L",&B_N,St,&B_N,S,&B_N,&lower,&upper,&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr)); 1144 #else 1145 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","I","L",&B_N,St,&B_N,S,&B_N,&lower,&upper,&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr)); 1146 #endif 1147 } else { /* same_data is true, so get the adaptive function requested by the user */ 1148 PetscInt k; 1149 if (!sub_schurs->change_primal_sub) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"This should not happen"); 1150 ierr = ISGetLocalSize(sub_schurs->change_primal_sub[i],&nmax);CHKERRQ(ierr); 1151 ierr = PetscBLASIntCast(nmax,&B_neigs);CHKERRQ(ierr); 1152 nmin = nmax; 1153 ierr = PetscMemzero(eigv,subset_size*nmax*sizeof(PetscScalar));CHKERRQ(ierr); 1154 for (k=0;k<nmax;k++) { 1155 eigs[k] = 1./PETSC_SMALL; 1156 eigv[k*(subset_size+1)] = 1.0; 1157 } 1158 } 1159 ierr = PetscFPTrapPop();CHKERRQ(ierr); 1160 if (B_ierr) { 1161 if (B_ierr < 0 ) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: illegal value for argument %d",-(int)B_ierr); 1162 else if (B_ierr <= B_N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: %d eigenvalues failed to converge",(int)B_ierr); 1163 else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: leading minor of order %d is not positive definite",(int)B_ierr-B_N-1); 1164 } 1165 1166 if (B_neigs > nmax) { 1167 if (pcbddc->dbg_flag) { 1168 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," found %d eigs, more than maximum required %d.\n",B_neigs,nmax); 1169 } 1170 if (pcbddc->use_deluxe_scaling) eigs_start = B_neigs -nmax; 1171 B_neigs = nmax; 1172 } 1173 1174 nmin_s = PetscMin(nmin,B_N); 1175 if (B_neigs < nmin_s) { 1176 PetscBLASInt B_neigs2; 1177 1178 if (pcbddc->use_deluxe_scaling) { 1179 B_IL = B_N - nmin_s + 1; 1180 B_IU = B_N - B_neigs; 1181 } else { 1182 B_IL = B_neigs + 1; 1183 B_IU = nmin_s; 1184 } 1185 if (pcbddc->dbg_flag) { 1186 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," found %d eigs, less than minimum required %d. Asking for %d to %d incl (fortran like)\n",B_neigs,nmin,B_IL,B_IU); 1187 } 1188 if (sub_schurs->is_hermitian) { 1189 PetscInt j; 1190 for (j=0;j<subset_size;j++) { 1191 ierr = PetscMemcpy(S+j*(subset_size+1),Sarray+cumarray+j*(subset_size+1),(subset_size-j)*sizeof(PetscScalar));CHKERRQ(ierr); 1192 } 1193 for (j=0;j<subset_size;j++) { 1194 ierr = PetscMemcpy(St+j*(subset_size+1),Starray+cumarray+j*(subset_size+1),(subset_size-j)*sizeof(PetscScalar));CHKERRQ(ierr); 1195 } 1196 } else { 1197 ierr = PetscMemcpy(S,Sarray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 1198 ierr = PetscMemcpy(St,Starray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 1199 } 1200 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 1201 #if defined(PETSC_USE_COMPLEX) 1202 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","I","L",&B_N,St,&B_N,S,&B_N,&lower,&upper,&B_IL,&B_IU,&eps,&B_neigs2,eigs+B_neigs,eigv+B_neigs*subset_size,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr)); 1203 #else 1204 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","I","L",&B_N,St,&B_N,S,&B_N,&lower,&upper,&B_IL,&B_IU,&eps,&B_neigs2,eigs+B_neigs,eigv+B_neigs*subset_size,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr)); 1205 #endif 1206 ierr = PetscFPTrapPop();CHKERRQ(ierr); 1207 B_neigs += B_neigs2; 1208 } 1209 if (B_ierr) { 1210 if (B_ierr < 0 ) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: illegal value for argument %d",-(int)B_ierr); 1211 else if (B_ierr <= B_N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: %d eigenvalues failed to converge",(int)B_ierr); 1212 else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: leading minor of order %d is not positive definite",(int)B_ierr-B_N-1); 1213 } 1214 if (pcbddc->dbg_flag) { 1215 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," -> Got %d eigs\n",B_neigs);CHKERRQ(ierr); 1216 for (j=0;j<B_neigs;j++) { 1217 if (eigs[j] == 0.0) { 1218 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," Inf\n");CHKERRQ(ierr); 1219 } else { 1220 if (pcbddc->use_deluxe_scaling) { 1221 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," %1.6e\n",eigs[j+eigs_start]);CHKERRQ(ierr); 1222 } else { 1223 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," %1.6e\n",1./eigs[j+eigs_start]);CHKERRQ(ierr); 1224 } 1225 } 1226 } 1227 } 1228 } else { 1229 /* TODO */ 1230 } 1231 } 1232 /* change the basis back to the original one */ 1233 if (sub_schurs->change) { 1234 Mat change,phi,phit; 1235 1236 if (pcbddc->dbg_flag > 1) { 1237 PetscInt ii; 1238 for (ii=0;ii<B_neigs;ii++) { 1239 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," -> Eigenvector (old basis) %d/%d (%d)\n",ii,B_neigs,B_N);CHKERRQ(ierr); 1240 for (j=0;j<B_N;j++) { 1241 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," %1.4e\n",eigv[(ii+eigs_start)*subset_size+j]);CHKERRQ(ierr); 1242 } 1243 } 1244 } 1245 ierr = KSPGetOperators(sub_schurs->change[i],&change,NULL);CHKERRQ(ierr); 1246 ierr = MatCreateSeqDense(PETSC_COMM_SELF,subset_size,B_neigs,eigv+eigs_start*subset_size,&phit);CHKERRQ(ierr); 1247 ierr = MatMatMult(change,phit,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&phi);CHKERRQ(ierr); 1248 ierr = MatCopy(phi,phit,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 1249 ierr = MatDestroy(&phit);CHKERRQ(ierr); 1250 ierr = MatDestroy(&phi);CHKERRQ(ierr); 1251 } 1252 maxneigs = PetscMax(B_neigs,maxneigs); 1253 pcbddc->adaptive_constraints_n[i+nv] = B_neigs; 1254 if (B_neigs) { 1255 ierr = PetscMemcpy(pcbddc->adaptive_constraints_data+pcbddc->adaptive_constraints_data_ptr[cum],eigv+eigs_start*subset_size,B_neigs*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 1256 1257 if (pcbddc->dbg_flag > 1) { 1258 PetscInt ii; 1259 for (ii=0;ii<B_neigs;ii++) { 1260 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," -> Eigenvector %d/%d (%d)\n",ii,B_neigs,B_N);CHKERRQ(ierr); 1261 for (j=0;j<B_N;j++) { 1262 #if defined(PETSC_USE_COMPLEX) 1263 PetscReal r = PetscRealPart(pcbddc->adaptive_constraints_data[ii*subset_size+j+pcbddc->adaptive_constraints_data_ptr[cum]]); 1264 PetscReal c = PetscImaginaryPart(pcbddc->adaptive_constraints_data[ii*subset_size+j+pcbddc->adaptive_constraints_data_ptr[cum]]); 1265 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," %1.4e + %1.4e i\n",r,c);CHKERRQ(ierr); 1266 #else 1267 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," %1.4e\n",pcbddc->adaptive_constraints_data[ii*subset_size+j+pcbddc->adaptive_constraints_data_ptr[cum]]);CHKERRQ(ierr); 1268 #endif 1269 } 1270 } 1271 } 1272 ierr = PetscMemcpy(pcbddc->adaptive_constraints_idxs+pcbddc->adaptive_constraints_idxs_ptr[cum],idxs,subset_size*sizeof(PetscInt));CHKERRQ(ierr); 1273 pcbddc->adaptive_constraints_idxs_ptr[cum+1] = pcbddc->adaptive_constraints_idxs_ptr[cum] + subset_size; 1274 pcbddc->adaptive_constraints_data_ptr[cum+1] = pcbddc->adaptive_constraints_data_ptr[cum] + subset_size*B_neigs; 1275 cum++; 1276 } 1277 ierr = ISRestoreIndices(sub_schurs->is_subs[i],&idxs);CHKERRQ(ierr); 1278 /* shift for next computation */ 1279 cumarray += subset_size*subset_size; 1280 } 1281 if (pcbddc->dbg_flag) { 1282 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 1283 } 1284 1285 if (mss) { 1286 ierr = MatSeqAIJRestoreArray(sub_schurs->sum_S_Ej_inv_all,&Sarray);CHKERRQ(ierr); 1287 ierr = MatSeqAIJRestoreArray(sub_schurs->sum_S_Ej_tilda_all,&Starray);CHKERRQ(ierr); 1288 /* destroy matrices (junk) */ 1289 ierr = MatDestroy(&sub_schurs->sum_S_Ej_inv_all);CHKERRQ(ierr); 1290 ierr = MatDestroy(&sub_schurs->sum_S_Ej_tilda_all);CHKERRQ(ierr); 1291 } 1292 if (allocated_S_St) { 1293 ierr = PetscFree2(S,St);CHKERRQ(ierr); 1294 } 1295 ierr = PetscFree5(eigv,eigs,work,B_iwork,B_ifail);CHKERRQ(ierr); 1296 #if defined(PETSC_USE_COMPLEX) 1297 ierr = PetscFree(rwork);CHKERRQ(ierr); 1298 #endif 1299 if (pcbddc->dbg_flag) { 1300 PetscInt maxneigs_r; 1301 ierr = MPIU_Allreduce(&maxneigs,&maxneigs_r,1,MPIU_INT,MPI_MAX,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 1302 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Maximum number of constraints per cc %d\n",maxneigs_r);CHKERRQ(ierr); 1303 } 1304 PetscFunctionReturn(0); 1305 } 1306 1307 #undef __FUNCT__ 1308 #define __FUNCT__ "PCBDDCSetUpSolvers" 1309 PetscErrorCode PCBDDCSetUpSolvers(PC pc) 1310 { 1311 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 1312 PetscScalar *coarse_submat_vals; 1313 PetscErrorCode ierr; 1314 1315 PetscFunctionBegin; 1316 /* Setup local scatters R_to_B and (optionally) R_to_D */ 1317 /* PCBDDCSetUpLocalWorkVectors should be called first! */ 1318 ierr = PCBDDCSetUpLocalScatters(pc);CHKERRQ(ierr); 1319 1320 /* Setup local neumann solver ksp_R */ 1321 /* PCBDDCSetUpLocalScatters should be called first! */ 1322 ierr = PCBDDCSetUpLocalSolvers(pc,PETSC_FALSE,PETSC_TRUE);CHKERRQ(ierr); 1323 1324 /* Change global null space passed in by the user if change of basis has been requested */ 1325 if (pcbddc->NullSpace && pcbddc->ChangeOfBasisMatrix) { 1326 ierr = PCBDDCNullSpaceAdaptGlobal(pc);CHKERRQ(ierr); 1327 } 1328 1329 /* 1330 Setup local correction and local part of coarse basis. 1331 Gives back the dense local part of the coarse matrix in column major ordering 1332 */ 1333 ierr = PCBDDCSetUpCorrection(pc,&coarse_submat_vals);CHKERRQ(ierr); 1334 1335 /* Compute total number of coarse nodes and setup coarse solver */ 1336 ierr = PCBDDCSetUpCoarseSolver(pc,coarse_submat_vals);CHKERRQ(ierr); 1337 1338 /* free */ 1339 ierr = PetscFree(coarse_submat_vals);CHKERRQ(ierr); 1340 PetscFunctionReturn(0); 1341 } 1342 1343 #undef __FUNCT__ 1344 #define __FUNCT__ "PCBDDCResetCustomization" 1345 PetscErrorCode PCBDDCResetCustomization(PC pc) 1346 { 1347 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 1348 PetscErrorCode ierr; 1349 1350 PetscFunctionBegin; 1351 ierr = PCBDDCGraphResetCSR(pcbddc->mat_graph);CHKERRQ(ierr); 1352 ierr = ISDestroy(&pcbddc->user_primal_vertices);CHKERRQ(ierr); 1353 ierr = ISDestroy(&pcbddc->user_primal_vertices_local);CHKERRQ(ierr); 1354 ierr = MatNullSpaceDestroy(&pcbddc->NullSpace);CHKERRQ(ierr); 1355 ierr = ISDestroy(&pcbddc->NeumannBoundaries);CHKERRQ(ierr); 1356 ierr = ISDestroy(&pcbddc->NeumannBoundariesLocal);CHKERRQ(ierr); 1357 ierr = ISDestroy(&pcbddc->DirichletBoundaries);CHKERRQ(ierr); 1358 ierr = MatNullSpaceDestroy(&pcbddc->onearnullspace);CHKERRQ(ierr); 1359 ierr = PetscFree(pcbddc->onearnullvecs_state);CHKERRQ(ierr); 1360 ierr = ISDestroy(&pcbddc->DirichletBoundariesLocal);CHKERRQ(ierr); 1361 ierr = PCBDDCSetDofsSplitting(pc,0,NULL);CHKERRQ(ierr); 1362 ierr = PCBDDCSetDofsSplittingLocal(pc,0,NULL);CHKERRQ(ierr); 1363 PetscFunctionReturn(0); 1364 } 1365 1366 #undef __FUNCT__ 1367 #define __FUNCT__ "PCBDDCResetTopography" 1368 PetscErrorCode PCBDDCResetTopography(PC pc) 1369 { 1370 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 1371 PetscInt i; 1372 PetscErrorCode ierr; 1373 1374 PetscFunctionBegin; 1375 ierr = MatDestroy(&pcbddc->user_ChangeOfBasisMatrix);CHKERRQ(ierr); 1376 ierr = MatDestroy(&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 1377 ierr = MatDestroy(&pcbddc->switch_static_change);CHKERRQ(ierr); 1378 ierr = VecDestroy(&pcbddc->work_change);CHKERRQ(ierr); 1379 ierr = MatDestroy(&pcbddc->ConstraintMatrix);CHKERRQ(ierr); 1380 ierr = PCBDDCGraphReset(pcbddc->mat_graph);CHKERRQ(ierr); 1381 for (i=0;i<pcbddc->n_local_subs;i++) { 1382 ierr = ISDestroy(&pcbddc->local_subs[i]);CHKERRQ(ierr); 1383 } 1384 ierr = PetscFree(pcbddc->local_subs);CHKERRQ(ierr); 1385 ierr = PCBDDCSubSchursReset(pcbddc->sub_schurs);CHKERRQ(ierr); 1386 PetscFunctionReturn(0); 1387 } 1388 1389 #undef __FUNCT__ 1390 #define __FUNCT__ "PCBDDCResetSolvers" 1391 PetscErrorCode PCBDDCResetSolvers(PC pc) 1392 { 1393 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 1394 PetscErrorCode ierr; 1395 1396 PetscFunctionBegin; 1397 ierr = VecDestroy(&pcbddc->coarse_vec);CHKERRQ(ierr); 1398 if (pcbddc->coarse_phi_B) { 1399 PetscScalar *array; 1400 ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&array);CHKERRQ(ierr); 1401 ierr = PetscFree(array);CHKERRQ(ierr); 1402 } 1403 ierr = MatDestroy(&pcbddc->coarse_phi_B);CHKERRQ(ierr); 1404 ierr = MatDestroy(&pcbddc->coarse_phi_D);CHKERRQ(ierr); 1405 ierr = MatDestroy(&pcbddc->coarse_psi_B);CHKERRQ(ierr); 1406 ierr = MatDestroy(&pcbddc->coarse_psi_D);CHKERRQ(ierr); 1407 ierr = VecDestroy(&pcbddc->vec1_P);CHKERRQ(ierr); 1408 ierr = VecDestroy(&pcbddc->vec1_C);CHKERRQ(ierr); 1409 ierr = MatDestroy(&pcbddc->local_auxmat2);CHKERRQ(ierr); 1410 ierr = MatDestroy(&pcbddc->local_auxmat1);CHKERRQ(ierr); 1411 ierr = VecDestroy(&pcbddc->vec1_R);CHKERRQ(ierr); 1412 ierr = VecDestroy(&pcbddc->vec2_R);CHKERRQ(ierr); 1413 ierr = ISDestroy(&pcbddc->is_R_local);CHKERRQ(ierr); 1414 ierr = VecScatterDestroy(&pcbddc->R_to_B);CHKERRQ(ierr); 1415 ierr = VecScatterDestroy(&pcbddc->R_to_D);CHKERRQ(ierr); 1416 ierr = VecScatterDestroy(&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr); 1417 ierr = KSPDestroy(&pcbddc->ksp_D);CHKERRQ(ierr); 1418 ierr = KSPDestroy(&pcbddc->ksp_R);CHKERRQ(ierr); 1419 ierr = KSPDestroy(&pcbddc->coarse_ksp);CHKERRQ(ierr); 1420 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 1421 ierr = PetscFree(pcbddc->primal_indices_local_idxs);CHKERRQ(ierr); 1422 ierr = PetscFree2(pcbddc->local_primal_ref_node,pcbddc->local_primal_ref_mult);CHKERRQ(ierr); 1423 ierr = PetscFree(pcbddc->global_primal_indices);CHKERRQ(ierr); 1424 ierr = ISDestroy(&pcbddc->coarse_subassembling);CHKERRQ(ierr); 1425 ierr = MatDestroy(&pcbddc->benign_change);CHKERRQ(ierr); 1426 ierr = VecDestroy(&pcbddc->benign_vec);CHKERRQ(ierr); 1427 ierr = PCBDDCBenignShellMat(pc,PETSC_TRUE);CHKERRQ(ierr); 1428 ierr = MatDestroy(&pcbddc->benign_B0);CHKERRQ(ierr); 1429 ierr = PetscSFDestroy(&pcbddc->benign_sf);CHKERRQ(ierr); 1430 if (pcbddc->benign_zerodiag_subs) { 1431 PetscInt i; 1432 for (i=0;i<pcbddc->benign_n;i++) { 1433 ierr = ISDestroy(&pcbddc->benign_zerodiag_subs[i]);CHKERRQ(ierr); 1434 } 1435 ierr = PetscFree(pcbddc->benign_zerodiag_subs);CHKERRQ(ierr); 1436 } 1437 ierr = PetscFree3(pcbddc->benign_p0_lidx,pcbddc->benign_p0_gidx,pcbddc->benign_p0);CHKERRQ(ierr); 1438 PetscFunctionReturn(0); 1439 } 1440 1441 #undef __FUNCT__ 1442 #define __FUNCT__ "PCBDDCSetUpLocalWorkVectors" 1443 PetscErrorCode PCBDDCSetUpLocalWorkVectors(PC pc) 1444 { 1445 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 1446 PC_IS *pcis = (PC_IS*)pc->data; 1447 VecType impVecType; 1448 PetscInt n_constraints,n_R,old_size; 1449 PetscErrorCode ierr; 1450 1451 PetscFunctionBegin; 1452 if (!pcbddc->ConstraintMatrix) SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"BDDC Constraint matrix has not been created"); 1453 /* get sizes */ 1454 n_constraints = pcbddc->local_primal_size - pcbddc->benign_n - pcbddc->n_vertices; 1455 n_R = pcis->n - pcbddc->n_vertices; 1456 ierr = VecGetType(pcis->vec1_N,&impVecType);CHKERRQ(ierr); 1457 /* local work vectors (try to avoid unneeded work)*/ 1458 /* R nodes */ 1459 old_size = -1; 1460 if (pcbddc->vec1_R) { 1461 ierr = VecGetSize(pcbddc->vec1_R,&old_size);CHKERRQ(ierr); 1462 } 1463 if (n_R != old_size) { 1464 ierr = VecDestroy(&pcbddc->vec1_R);CHKERRQ(ierr); 1465 ierr = VecDestroy(&pcbddc->vec2_R);CHKERRQ(ierr); 1466 ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_R);CHKERRQ(ierr); 1467 ierr = VecSetSizes(pcbddc->vec1_R,PETSC_DECIDE,n_R);CHKERRQ(ierr); 1468 ierr = VecSetType(pcbddc->vec1_R,impVecType);CHKERRQ(ierr); 1469 ierr = VecDuplicate(pcbddc->vec1_R,&pcbddc->vec2_R);CHKERRQ(ierr); 1470 } 1471 /* local primal dofs */ 1472 old_size = -1; 1473 if (pcbddc->vec1_P) { 1474 ierr = VecGetSize(pcbddc->vec1_P,&old_size);CHKERRQ(ierr); 1475 } 1476 if (pcbddc->local_primal_size != old_size) { 1477 ierr = VecDestroy(&pcbddc->vec1_P);CHKERRQ(ierr); 1478 ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_P);CHKERRQ(ierr); 1479 ierr = VecSetSizes(pcbddc->vec1_P,PETSC_DECIDE,pcbddc->local_primal_size);CHKERRQ(ierr); 1480 ierr = VecSetType(pcbddc->vec1_P,impVecType);CHKERRQ(ierr); 1481 } 1482 /* local explicit constraints */ 1483 old_size = -1; 1484 if (pcbddc->vec1_C) { 1485 ierr = VecGetSize(pcbddc->vec1_C,&old_size);CHKERRQ(ierr); 1486 } 1487 if (n_constraints && n_constraints != old_size) { 1488 ierr = VecDestroy(&pcbddc->vec1_C);CHKERRQ(ierr); 1489 ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_C);CHKERRQ(ierr); 1490 ierr = VecSetSizes(pcbddc->vec1_C,PETSC_DECIDE,n_constraints);CHKERRQ(ierr); 1491 ierr = VecSetType(pcbddc->vec1_C,impVecType);CHKERRQ(ierr); 1492 } 1493 PetscFunctionReturn(0); 1494 } 1495 1496 #undef __FUNCT__ 1497 #define __FUNCT__ "PCBDDCSetUpCorrection" 1498 PetscErrorCode PCBDDCSetUpCorrection(PC pc, PetscScalar **coarse_submat_vals_n) 1499 { 1500 PetscErrorCode ierr; 1501 /* pointers to pcis and pcbddc */ 1502 PC_IS* pcis = (PC_IS*)pc->data; 1503 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 1504 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 1505 /* submatrices of local problem */ 1506 Mat A_RV,A_VR,A_VV,local_auxmat2_R; 1507 /* submatrices of local coarse problem */ 1508 Mat S_VV,S_CV,S_VC,S_CC; 1509 /* working matrices */ 1510 Mat C_CR; 1511 /* additional working stuff */ 1512 PC pc_R; 1513 Mat F; 1514 Vec dummy_vec; 1515 PetscBool isLU,isCHOL,isILU,need_benign_correction; 1516 PetscScalar *coarse_submat_vals; /* TODO: use a PETSc matrix */ 1517 PetscScalar *work; 1518 PetscInt *idx_V_B; 1519 PetscInt lda_rhs,n,n_vertices,n_constraints,*p0_lidx_I; 1520 PetscInt i,n_R,n_D,n_B; 1521 1522 /* some shortcuts to scalars */ 1523 PetscScalar one=1.0,m_one=-1.0; 1524 1525 PetscFunctionBegin; 1526 if (!pcbddc->symmetric_primal && pcbddc->benign_n) { 1527 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Non-symmetric primal basis computation with benign trick not yet implemented"); 1528 } 1529 1530 /* Set Non-overlapping dimensions */ 1531 n_vertices = pcbddc->n_vertices; 1532 n_constraints = pcbddc->local_primal_size - pcbddc->benign_n - n_vertices; 1533 n_B = pcis->n_B; 1534 n_D = pcis->n - n_B; 1535 n_R = pcis->n - n_vertices; 1536 1537 /* vertices in boundary numbering */ 1538 ierr = PetscMalloc1(n_vertices,&idx_V_B);CHKERRQ(ierr); 1539 ierr = ISGlobalToLocalMappingApply(pcis->BtoNmap,IS_GTOLM_DROP,n_vertices,pcbddc->local_primal_ref_node,&i,idx_V_B);CHKERRQ(ierr); 1540 if (i != n_vertices) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error in boundary numbering for BDDC vertices! %D != %D\n",n_vertices,i); 1541 1542 /* Subdomain contribution (Non-overlapping) to coarse matrix */ 1543 ierr = PetscCalloc1(pcbddc->local_primal_size*pcbddc->local_primal_size,&coarse_submat_vals);CHKERRQ(ierr); 1544 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_vertices,n_vertices,coarse_submat_vals,&S_VV);CHKERRQ(ierr); 1545 ierr = MatSeqDenseSetLDA(S_VV,pcbddc->local_primal_size);CHKERRQ(ierr); 1546 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_constraints,n_vertices,coarse_submat_vals+n_vertices,&S_CV);CHKERRQ(ierr); 1547 ierr = MatSeqDenseSetLDA(S_CV,pcbddc->local_primal_size);CHKERRQ(ierr); 1548 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_vertices,n_constraints,coarse_submat_vals+pcbddc->local_primal_size*n_vertices,&S_VC);CHKERRQ(ierr); 1549 ierr = MatSeqDenseSetLDA(S_VC,pcbddc->local_primal_size);CHKERRQ(ierr); 1550 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_constraints,n_constraints,coarse_submat_vals+(pcbddc->local_primal_size+1)*n_vertices,&S_CC);CHKERRQ(ierr); 1551 ierr = MatSeqDenseSetLDA(S_CC,pcbddc->local_primal_size);CHKERRQ(ierr); 1552 1553 /* determine if can use MatSolve routines instead of calling KSPSolve on ksp_R */ 1554 ierr = KSPGetPC(pcbddc->ksp_R,&pc_R);CHKERRQ(ierr); 1555 ierr = PetscObjectTypeCompare((PetscObject)pc_R,PCLU,&isLU);CHKERRQ(ierr); 1556 ierr = PetscObjectTypeCompare((PetscObject)pc_R,PCILU,&isILU);CHKERRQ(ierr); 1557 ierr = PetscObjectTypeCompare((PetscObject)pc_R,PCCHOLESKY,&isCHOL);CHKERRQ(ierr); 1558 lda_rhs = n_R; 1559 need_benign_correction = PETSC_FALSE; 1560 if (isLU || isILU || isCHOL) { 1561 ierr = PCFactorGetMatrix(pc_R,&F);CHKERRQ(ierr); 1562 } else if (sub_schurs->reuse_solver) { 1563 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 1564 MatFactorType type; 1565 1566 F = reuse_solver->F; 1567 ierr = MatGetFactorType(F,&type);CHKERRQ(ierr); 1568 if (type == MAT_FACTOR_CHOLESKY) isCHOL = PETSC_TRUE; 1569 ierr = MatGetSize(F,&lda_rhs,NULL);CHKERRQ(ierr); 1570 need_benign_correction = !!reuse_solver->benign_n; 1571 } else { 1572 F = NULL; 1573 } 1574 1575 /* allocate workspace */ 1576 n = 0; 1577 if (n_constraints) { 1578 n += lda_rhs*n_constraints; 1579 } 1580 if (n_vertices) { 1581 n = PetscMax(2*lda_rhs*n_vertices,n); 1582 n = PetscMax((lda_rhs+n_B)*n_vertices,n); 1583 } 1584 ierr = PetscMalloc1(n,&work);CHKERRQ(ierr); 1585 1586 /* create dummy vector to modify rhs and sol of MatMatSolve (work array will never be used) */ 1587 dummy_vec = NULL; 1588 if (need_benign_correction && lda_rhs != n_R && F) { 1589 ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,1,lda_rhs,work,&dummy_vec);CHKERRQ(ierr); 1590 } 1591 1592 /* Precompute stuffs needed for preprocessing and application of BDDC*/ 1593 if (n_constraints) { 1594 Mat M1,M2,M3,C_B; 1595 IS is_aux; 1596 PetscScalar *array,*array2; 1597 1598 ierr = MatDestroy(&pcbddc->local_auxmat1);CHKERRQ(ierr); 1599 ierr = MatDestroy(&pcbddc->local_auxmat2);CHKERRQ(ierr); 1600 1601 /* Extract constraints on R nodes: C_{CR} */ 1602 ierr = ISCreateStride(PETSC_COMM_SELF,n_constraints,n_vertices,1,&is_aux);CHKERRQ(ierr); 1603 ierr = MatGetSubMatrix(pcbddc->ConstraintMatrix,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&C_CR);CHKERRQ(ierr); 1604 ierr = MatGetSubMatrix(pcbddc->ConstraintMatrix,is_aux,pcis->is_B_local,MAT_INITIAL_MATRIX,&C_B);CHKERRQ(ierr); 1605 1606 /* Assemble local_auxmat2_R = (- A_{RR}^{-1} C^T_{CR}) needed by BDDC setup */ 1607 /* Assemble pcbddc->local_auxmat2 = R_to_B (- A_{RR}^{-1} C^T_{CR}) needed by BDDC application */ 1608 ierr = PetscMemzero(work,lda_rhs*n_constraints*sizeof(PetscScalar));CHKERRQ(ierr); 1609 for (i=0;i<n_constraints;i++) { 1610 const PetscScalar *row_cmat_values; 1611 const PetscInt *row_cmat_indices; 1612 PetscInt size_of_constraint,j; 1613 1614 ierr = MatGetRow(C_CR,i,&size_of_constraint,&row_cmat_indices,&row_cmat_values);CHKERRQ(ierr); 1615 for (j=0;j<size_of_constraint;j++) { 1616 work[row_cmat_indices[j]+i*lda_rhs] = -row_cmat_values[j]; 1617 } 1618 ierr = MatRestoreRow(C_CR,i,&size_of_constraint,&row_cmat_indices,&row_cmat_values);CHKERRQ(ierr); 1619 } 1620 ierr = MatCreateSeqDense(PETSC_COMM_SELF,lda_rhs,n_constraints,NULL,&local_auxmat2_R);CHKERRQ(ierr); 1621 if (F) { 1622 Mat B; 1623 1624 ierr = MatCreateSeqDense(PETSC_COMM_SELF,lda_rhs,n_constraints,work,&B);CHKERRQ(ierr); 1625 if (need_benign_correction) { 1626 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 1627 1628 /* rhs is already zero on interior dofs, no need to change the rhs */ 1629 ierr = PetscMemzero(reuse_solver->benign_save_vals,pcbddc->benign_n*sizeof(PetscScalar));CHKERRQ(ierr); 1630 } 1631 ierr = MatMatSolve(F,B,local_auxmat2_R);CHKERRQ(ierr); 1632 if (need_benign_correction) { 1633 PetscScalar *marr; 1634 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 1635 1636 ierr = MatDenseGetArray(local_auxmat2_R,&marr);CHKERRQ(ierr); 1637 if (lda_rhs != n_R) { 1638 for (i=0;i<n_constraints;i++) { 1639 ierr = VecPlaceArray(dummy_vec,marr+i*lda_rhs);CHKERRQ(ierr); 1640 ierr = PCBDDCReuseSolversBenignAdapt(reuse_solver,dummy_vec,NULL,PETSC_TRUE,PETSC_TRUE);CHKERRQ(ierr); 1641 ierr = VecResetArray(dummy_vec);CHKERRQ(ierr); 1642 } 1643 } else { 1644 for (i=0;i<n_constraints;i++) { 1645 ierr = VecPlaceArray(pcbddc->vec1_R,marr+i*lda_rhs);CHKERRQ(ierr); 1646 ierr = PCBDDCReuseSolversBenignAdapt(reuse_solver,pcbddc->vec1_R,NULL,PETSC_TRUE,PETSC_TRUE);CHKERRQ(ierr); 1647 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 1648 } 1649 } 1650 ierr = MatDenseRestoreArray(local_auxmat2_R,&marr);CHKERRQ(ierr); 1651 } 1652 ierr = MatDestroy(&B);CHKERRQ(ierr); 1653 } else { 1654 PetscScalar *marr; 1655 1656 ierr = MatDenseGetArray(local_auxmat2_R,&marr);CHKERRQ(ierr); 1657 for (i=0;i<n_constraints;i++) { 1658 ierr = VecPlaceArray(pcbddc->vec1_R,work+i*lda_rhs);CHKERRQ(ierr); 1659 ierr = VecPlaceArray(pcbddc->vec2_R,marr+i*lda_rhs);CHKERRQ(ierr); 1660 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 1661 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 1662 ierr = VecResetArray(pcbddc->vec2_R);CHKERRQ(ierr); 1663 } 1664 ierr = MatDenseRestoreArray(local_auxmat2_R,&marr);CHKERRQ(ierr); 1665 } 1666 if (!pcbddc->switch_static) { 1667 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_B,n_constraints,NULL,&pcbddc->local_auxmat2);CHKERRQ(ierr); 1668 ierr = MatDenseGetArray(pcbddc->local_auxmat2,&array);CHKERRQ(ierr); 1669 ierr = MatDenseGetArray(local_auxmat2_R,&array2);CHKERRQ(ierr); 1670 for (i=0;i<n_constraints;i++) { 1671 ierr = VecPlaceArray(pcbddc->vec1_R,array2+i*lda_rhs);CHKERRQ(ierr); 1672 ierr = VecPlaceArray(pcis->vec1_B,array+i*n_B);CHKERRQ(ierr); 1673 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1674 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1675 ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr); 1676 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 1677 } 1678 ierr = MatDenseRestoreArray(local_auxmat2_R,&array2);CHKERRQ(ierr); 1679 ierr = MatDenseRestoreArray(pcbddc->local_auxmat2,&array);CHKERRQ(ierr); 1680 ierr = MatMatMult(C_B,pcbddc->local_auxmat2,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&M3);CHKERRQ(ierr); 1681 } else { 1682 if (lda_rhs != n_R) { 1683 IS dummy; 1684 1685 ierr = ISCreateStride(PETSC_COMM_SELF,n_R,0,1,&dummy);CHKERRQ(ierr); 1686 ierr = MatGetSubMatrix(local_auxmat2_R,dummy,NULL,MAT_INITIAL_MATRIX,&pcbddc->local_auxmat2);CHKERRQ(ierr); 1687 ierr = ISDestroy(&dummy);CHKERRQ(ierr); 1688 } else { 1689 ierr = PetscObjectReference((PetscObject)local_auxmat2_R);CHKERRQ(ierr); 1690 pcbddc->local_auxmat2 = local_auxmat2_R; 1691 } 1692 ierr = MatMatMult(C_CR,pcbddc->local_auxmat2,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&M3);CHKERRQ(ierr); 1693 } 1694 ierr = ISDestroy(&is_aux);CHKERRQ(ierr); 1695 /* Assemble explicitly S_CC = ( C_{CR} A_{RR}^{-1} C^T_{CR} )^{-1} */ 1696 ierr = MatScale(M3,m_one);CHKERRQ(ierr); 1697 ierr = MatDuplicate(M3,MAT_DO_NOT_COPY_VALUES,&M1);CHKERRQ(ierr); 1698 ierr = MatDuplicate(M3,MAT_DO_NOT_COPY_VALUES,&M2);CHKERRQ(ierr); 1699 if (isCHOL) { 1700 ierr = MatCholeskyFactor(M3,NULL,NULL);CHKERRQ(ierr); 1701 } else { 1702 ierr = MatLUFactor(M3,NULL,NULL,NULL);CHKERRQ(ierr); 1703 } 1704 ierr = VecSet(pcbddc->vec1_C,one);CHKERRQ(ierr); 1705 ierr = MatDiagonalSet(M2,pcbddc->vec1_C,INSERT_VALUES);CHKERRQ(ierr); 1706 ierr = MatMatSolve(M3,M2,M1);CHKERRQ(ierr); 1707 ierr = MatDestroy(&M2);CHKERRQ(ierr); 1708 ierr = MatDestroy(&M3);CHKERRQ(ierr); 1709 /* Assemble local_auxmat1 = S_CC*C_{CB} needed by BDDC application in KSP and in preproc */ 1710 ierr = MatMatMult(M1,C_B,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&pcbddc->local_auxmat1);CHKERRQ(ierr); 1711 ierr = MatDestroy(&C_B);CHKERRQ(ierr); 1712 ierr = MatCopy(M1,S_CC,SAME_NONZERO_PATTERN);CHKERRQ(ierr); /* S_CC can have a different LDA, MatMatSolve doesn't support it */ 1713 ierr = MatDestroy(&M1);CHKERRQ(ierr); 1714 } 1715 1716 /* Get submatrices from subdomain matrix */ 1717 if (n_vertices) { 1718 IS is_aux; 1719 1720 if (sub_schurs->reuse_solver) { /* is_R_local is not sorted, ISComplement doesn't like it */ 1721 IS tis; 1722 1723 ierr = ISDuplicate(pcbddc->is_R_local,&tis);CHKERRQ(ierr); 1724 ierr = ISSort(tis);CHKERRQ(ierr); 1725 ierr = ISComplement(tis,0,pcis->n,&is_aux);CHKERRQ(ierr); 1726 ierr = ISDestroy(&tis);CHKERRQ(ierr); 1727 } else { 1728 ierr = ISComplement(pcbddc->is_R_local,0,pcis->n,&is_aux);CHKERRQ(ierr); 1729 } 1730 ierr = MatGetSubMatrix(pcbddc->local_mat,pcbddc->is_R_local,is_aux,MAT_INITIAL_MATRIX,&A_RV);CHKERRQ(ierr); 1731 ierr = MatGetSubMatrix(pcbddc->local_mat,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&A_VR);CHKERRQ(ierr); 1732 ierr = MatGetSubMatrix(pcbddc->local_mat,is_aux,is_aux,MAT_INITIAL_MATRIX,&A_VV);CHKERRQ(ierr); 1733 ierr = ISDestroy(&is_aux);CHKERRQ(ierr); 1734 } 1735 1736 /* Matrix of coarse basis functions (local) */ 1737 if (pcbddc->coarse_phi_B) { 1738 PetscInt on_B,on_primal,on_D=n_D; 1739 if (pcbddc->coarse_phi_D) { 1740 ierr = MatGetSize(pcbddc->coarse_phi_D,&on_D,NULL);CHKERRQ(ierr); 1741 } 1742 ierr = MatGetSize(pcbddc->coarse_phi_B,&on_B,&on_primal);CHKERRQ(ierr); 1743 if (on_B != n_B || on_primal != pcbddc->local_primal_size || on_D != n_D) { 1744 PetscScalar *marray; 1745 1746 ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&marray);CHKERRQ(ierr); 1747 ierr = PetscFree(marray);CHKERRQ(ierr); 1748 ierr = MatDestroy(&pcbddc->coarse_phi_B);CHKERRQ(ierr); 1749 ierr = MatDestroy(&pcbddc->coarse_psi_B);CHKERRQ(ierr); 1750 ierr = MatDestroy(&pcbddc->coarse_phi_D);CHKERRQ(ierr); 1751 ierr = MatDestroy(&pcbddc->coarse_psi_D);CHKERRQ(ierr); 1752 } 1753 } 1754 1755 if (!pcbddc->coarse_phi_B) { 1756 PetscScalar *marray; 1757 1758 n = n_B*pcbddc->local_primal_size; 1759 if (pcbddc->switch_static || pcbddc->dbg_flag) { 1760 n += n_D*pcbddc->local_primal_size; 1761 } 1762 if (!pcbddc->symmetric_primal) { 1763 n *= 2; 1764 } 1765 ierr = PetscCalloc1(n,&marray);CHKERRQ(ierr); 1766 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_B,pcbddc->local_primal_size,marray,&pcbddc->coarse_phi_B);CHKERRQ(ierr); 1767 n = n_B*pcbddc->local_primal_size; 1768 if (pcbddc->switch_static || pcbddc->dbg_flag) { 1769 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_D,pcbddc->local_primal_size,marray+n,&pcbddc->coarse_phi_D);CHKERRQ(ierr); 1770 n += n_D*pcbddc->local_primal_size; 1771 } 1772 if (!pcbddc->symmetric_primal) { 1773 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_B,pcbddc->local_primal_size,marray+n,&pcbddc->coarse_psi_B);CHKERRQ(ierr); 1774 if (pcbddc->switch_static || pcbddc->dbg_flag) { 1775 n = n_B*pcbddc->local_primal_size; 1776 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_D,pcbddc->local_primal_size,marray+n,&pcbddc->coarse_psi_D);CHKERRQ(ierr); 1777 } 1778 } else { 1779 ierr = PetscObjectReference((PetscObject)pcbddc->coarse_phi_B);CHKERRQ(ierr); 1780 pcbddc->coarse_psi_B = pcbddc->coarse_phi_B; 1781 if (pcbddc->switch_static || pcbddc->dbg_flag) { 1782 ierr = PetscObjectReference((PetscObject)pcbddc->coarse_phi_D);CHKERRQ(ierr); 1783 pcbddc->coarse_psi_D = pcbddc->coarse_phi_D; 1784 } 1785 } 1786 } 1787 1788 /* We are now ready to evaluate coarse basis functions and subdomain contribution to coarse problem */ 1789 p0_lidx_I = NULL; 1790 if (pcbddc->benign_n && (pcbddc->switch_static || pcbddc->dbg_flag)) { 1791 const PetscInt *idxs; 1792 1793 ierr = ISGetIndices(pcis->is_I_local,&idxs);CHKERRQ(ierr); 1794 ierr = PetscMalloc1(pcbddc->benign_n,&p0_lidx_I);CHKERRQ(ierr); 1795 for (i=0;i<pcbddc->benign_n;i++) { 1796 ierr = PetscFindInt(pcbddc->benign_p0_lidx[i],pcis->n-pcis->n_B,idxs,&p0_lidx_I[i]);CHKERRQ(ierr); 1797 } 1798 ierr = ISRestoreIndices(pcis->is_I_local,&idxs);CHKERRQ(ierr); 1799 } 1800 1801 /* vertices */ 1802 if (n_vertices) { 1803 1804 ierr = MatConvert(A_VV,MATDENSE,MAT_INPLACE_MATRIX,&A_VV);CHKERRQ(ierr); 1805 1806 if (n_R) { 1807 Mat A_RRmA_RV,A_RV_bcorr=NULL,S_VVt; /* S_VVt with LDA=N */ 1808 PetscBLASInt B_N,B_one = 1; 1809 PetscScalar *x,*y; 1810 PetscBool isseqaij; 1811 1812 ierr = MatScale(A_RV,m_one);CHKERRQ(ierr); 1813 if (need_benign_correction) { 1814 ISLocalToGlobalMapping RtoN; 1815 IS is_p0; 1816 PetscInt *idxs_p0,n; 1817 1818 ierr = PetscMalloc1(pcbddc->benign_n,&idxs_p0);CHKERRQ(ierr); 1819 ierr = ISLocalToGlobalMappingCreateIS(pcbddc->is_R_local,&RtoN);CHKERRQ(ierr); 1820 ierr = ISGlobalToLocalMappingApply(RtoN,IS_GTOLM_DROP,pcbddc->benign_n,pcbddc->benign_p0_lidx,&n,idxs_p0);CHKERRQ(ierr); 1821 if (n != pcbddc->benign_n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error in R numbering for benign p0! %d != %d\n",n,pcbddc->benign_n); 1822 ierr = ISLocalToGlobalMappingDestroy(&RtoN);CHKERRQ(ierr); 1823 ierr = ISCreateGeneral(PETSC_COMM_SELF,n,idxs_p0,PETSC_OWN_POINTER,&is_p0);CHKERRQ(ierr); 1824 ierr = MatGetSubMatrix(A_RV,is_p0,NULL,MAT_INITIAL_MATRIX,&A_RV_bcorr);CHKERRQ(ierr); 1825 ierr = ISDestroy(&is_p0);CHKERRQ(ierr); 1826 } 1827 1828 if (lda_rhs == n_R) { 1829 ierr = MatConvert(A_RV,MATDENSE,MAT_INPLACE_MATRIX,&A_RV);CHKERRQ(ierr); 1830 } else { 1831 PetscScalar *av,*array; 1832 const PetscInt *xadj,*adjncy; 1833 PetscInt n; 1834 PetscBool flg_row; 1835 1836 array = work+lda_rhs*n_vertices; 1837 ierr = PetscMemzero(array,lda_rhs*n_vertices*sizeof(PetscScalar));CHKERRQ(ierr); 1838 ierr = MatConvert(A_RV,MATSEQAIJ,MAT_INPLACE_MATRIX,&A_RV);CHKERRQ(ierr); 1839 ierr = MatGetRowIJ(A_RV,0,PETSC_FALSE,PETSC_FALSE,&n,&xadj,&adjncy,&flg_row);CHKERRQ(ierr); 1840 ierr = MatSeqAIJGetArray(A_RV,&av);CHKERRQ(ierr); 1841 for (i=0;i<n;i++) { 1842 PetscInt j; 1843 for (j=xadj[i];j<xadj[i+1];j++) array[lda_rhs*adjncy[j]+i] = av[j]; 1844 } 1845 ierr = MatRestoreRowIJ(A_RV,0,PETSC_FALSE,PETSC_FALSE,&n,&xadj,&adjncy,&flg_row);CHKERRQ(ierr); 1846 ierr = MatDestroy(&A_RV);CHKERRQ(ierr); 1847 ierr = MatCreateSeqDense(PETSC_COMM_SELF,lda_rhs,n_vertices,array,&A_RV);CHKERRQ(ierr); 1848 } 1849 ierr = MatCreateSeqDense(PETSC_COMM_SELF,lda_rhs,n_vertices,work,&A_RRmA_RV);CHKERRQ(ierr); 1850 if (need_benign_correction) { 1851 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 1852 PetscScalar *marr; 1853 1854 ierr = MatDenseGetArray(A_RV,&marr);CHKERRQ(ierr); 1855 /* need \Phi^T A_RV = (I+L)A_RV, L given by 1856 1857 | 0 0 0 | (V) 1858 L = | 0 0 -1 | (P-p0) 1859 | 0 0 -1 | (p0) 1860 1861 */ 1862 for (i=0;i<reuse_solver->benign_n;i++) { 1863 const PetscScalar *vals; 1864 const PetscInt *idxs,*idxs_zero; 1865 PetscInt n,j,nz; 1866 1867 ierr = ISGetLocalSize(reuse_solver->benign_zerodiag_subs[i],&nz);CHKERRQ(ierr); 1868 ierr = ISGetIndices(reuse_solver->benign_zerodiag_subs[i],&idxs_zero);CHKERRQ(ierr); 1869 ierr = MatGetRow(A_RV_bcorr,i,&n,&idxs,&vals);CHKERRQ(ierr); 1870 for (j=0;j<n;j++) { 1871 PetscScalar val = vals[j]; 1872 PetscInt k,col = idxs[j]; 1873 for (k=0;k<nz;k++) marr[idxs_zero[k]+lda_rhs*col] -= val; 1874 } 1875 ierr = MatRestoreRow(A_RV_bcorr,i,&n,&idxs,&vals);CHKERRQ(ierr); 1876 ierr = ISRestoreIndices(reuse_solver->benign_zerodiag_subs[i],&idxs_zero);CHKERRQ(ierr); 1877 } 1878 ierr = MatDenseRestoreArray(A_RV,&marr);CHKERRQ(ierr); 1879 } 1880 if (F) { 1881 /* need to correct the rhs */ 1882 if (need_benign_correction) { 1883 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 1884 PetscScalar *marr; 1885 1886 ierr = MatDenseGetArray(A_RV,&marr);CHKERRQ(ierr); 1887 if (lda_rhs != n_R) { 1888 for (i=0;i<n_vertices;i++) { 1889 ierr = VecPlaceArray(dummy_vec,marr+i*lda_rhs);CHKERRQ(ierr); 1890 ierr = PCBDDCReuseSolversBenignAdapt(reuse_solver,dummy_vec,NULL,PETSC_FALSE,PETSC_TRUE);CHKERRQ(ierr); 1891 ierr = VecResetArray(dummy_vec);CHKERRQ(ierr); 1892 } 1893 } else { 1894 for (i=0;i<n_vertices;i++) { 1895 ierr = VecPlaceArray(pcbddc->vec1_R,marr+i*lda_rhs);CHKERRQ(ierr); 1896 ierr = PCBDDCReuseSolversBenignAdapt(reuse_solver,pcbddc->vec1_R,NULL,PETSC_FALSE,PETSC_TRUE);CHKERRQ(ierr); 1897 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 1898 } 1899 } 1900 ierr = MatDenseRestoreArray(A_RV,&marr);CHKERRQ(ierr); 1901 } 1902 ierr = MatMatSolve(F,A_RV,A_RRmA_RV);CHKERRQ(ierr); 1903 /* need to correct the solution */ 1904 if (need_benign_correction) { 1905 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 1906 PetscScalar *marr; 1907 1908 ierr = MatDenseGetArray(A_RRmA_RV,&marr);CHKERRQ(ierr); 1909 if (lda_rhs != n_R) { 1910 for (i=0;i<n_vertices;i++) { 1911 ierr = VecPlaceArray(dummy_vec,marr+i*lda_rhs);CHKERRQ(ierr); 1912 ierr = PCBDDCReuseSolversBenignAdapt(reuse_solver,dummy_vec,NULL,PETSC_TRUE,PETSC_TRUE);CHKERRQ(ierr); 1913 ierr = VecResetArray(dummy_vec);CHKERRQ(ierr); 1914 } 1915 } else { 1916 for (i=0;i<n_vertices;i++) { 1917 ierr = VecPlaceArray(pcbddc->vec1_R,marr+i*lda_rhs);CHKERRQ(ierr); 1918 ierr = PCBDDCReuseSolversBenignAdapt(reuse_solver,pcbddc->vec1_R,NULL,PETSC_TRUE,PETSC_TRUE);CHKERRQ(ierr); 1919 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 1920 } 1921 } 1922 ierr = MatDenseRestoreArray(A_RRmA_RV,&marr);CHKERRQ(ierr); 1923 } 1924 } else { 1925 ierr = MatDenseGetArray(A_RV,&y);CHKERRQ(ierr); 1926 for (i=0;i<n_vertices;i++) { 1927 ierr = VecPlaceArray(pcbddc->vec1_R,y+i*lda_rhs);CHKERRQ(ierr); 1928 ierr = VecPlaceArray(pcbddc->vec2_R,work+i*lda_rhs);CHKERRQ(ierr); 1929 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 1930 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 1931 ierr = VecResetArray(pcbddc->vec2_R);CHKERRQ(ierr); 1932 } 1933 ierr = MatDenseRestoreArray(A_RV,&y);CHKERRQ(ierr); 1934 } 1935 ierr = MatDestroy(&A_RV);CHKERRQ(ierr); 1936 /* S_VV and S_CV */ 1937 if (n_constraints) { 1938 Mat B; 1939 1940 ierr = PetscMemzero(work+lda_rhs*n_vertices,n_B*n_vertices*sizeof(PetscScalar));CHKERRQ(ierr); 1941 for (i=0;i<n_vertices;i++) { 1942 ierr = VecPlaceArray(pcbddc->vec1_R,work+i*lda_rhs);CHKERRQ(ierr); 1943 ierr = VecPlaceArray(pcis->vec1_B,work+lda_rhs*n_vertices+i*n_B);CHKERRQ(ierr); 1944 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1945 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1946 ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr); 1947 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 1948 } 1949 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_B,n_vertices,work+lda_rhs*n_vertices,&B);CHKERRQ(ierr); 1950 ierr = MatMatMult(pcbddc->local_auxmat1,B,MAT_REUSE_MATRIX,PETSC_DEFAULT,&S_CV);CHKERRQ(ierr); 1951 ierr = MatDestroy(&B);CHKERRQ(ierr); 1952 ierr = MatCreateSeqDense(PETSC_COMM_SELF,lda_rhs,n_vertices,work+lda_rhs*n_vertices,&B);CHKERRQ(ierr); 1953 ierr = MatMatMult(local_auxmat2_R,S_CV,MAT_REUSE_MATRIX,PETSC_DEFAULT,&B);CHKERRQ(ierr); 1954 ierr = MatScale(S_CV,m_one);CHKERRQ(ierr); 1955 ierr = PetscBLASIntCast(lda_rhs*n_vertices,&B_N);CHKERRQ(ierr); 1956 PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&B_N,&one,work+lda_rhs*n_vertices,&B_one,work,&B_one)); 1957 ierr = MatDestroy(&B);CHKERRQ(ierr); 1958 } 1959 ierr = PetscObjectTypeCompare((PetscObject)A_VR,MATSEQAIJ,&isseqaij);CHKERRQ(ierr); 1960 if (!isseqaij) { /* MatMatMult with SEQ(S)BAIJ below will raise an error */ 1961 ierr = MatConvert(A_VR,MATSEQAIJ,MAT_INPLACE_MATRIX,&A_VR);CHKERRQ(ierr); 1962 } 1963 if (lda_rhs != n_R) { 1964 ierr = MatDestroy(&A_RRmA_RV);CHKERRQ(ierr); 1965 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_vertices,work,&A_RRmA_RV);CHKERRQ(ierr); 1966 ierr = MatSeqDenseSetLDA(A_RRmA_RV,lda_rhs);CHKERRQ(ierr); 1967 } 1968 ierr = MatMatMult(A_VR,A_RRmA_RV,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&S_VVt);CHKERRQ(ierr); 1969 /* need A_VR * \Phi * A_RRmA_RV = A_VR * (I+L)^T * A_RRmA_RV, L given as before */ 1970 if (need_benign_correction) { 1971 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 1972 PetscScalar *marr,*sums; 1973 1974 ierr = PetscMalloc1(n_vertices,&sums);CHKERRQ(ierr); 1975 ierr = MatDenseGetArray(S_VVt,&marr); 1976 for (i=0;i<reuse_solver->benign_n;i++) { 1977 const PetscScalar *vals; 1978 const PetscInt *idxs,*idxs_zero; 1979 PetscInt n,j,nz; 1980 1981 ierr = ISGetLocalSize(reuse_solver->benign_zerodiag_subs[i],&nz);CHKERRQ(ierr); 1982 ierr = ISGetIndices(reuse_solver->benign_zerodiag_subs[i],&idxs_zero);CHKERRQ(ierr); 1983 for (j=0;j<n_vertices;j++) { 1984 PetscInt k; 1985 sums[j] = 0.; 1986 for (k=0;k<nz;k++) sums[j] += work[idxs_zero[k]+j*lda_rhs]; 1987 } 1988 ierr = MatGetRow(A_RV_bcorr,i,&n,&idxs,&vals);CHKERRQ(ierr); 1989 for (j=0;j<n;j++) { 1990 PetscScalar val = vals[j]; 1991 PetscInt k; 1992 for (k=0;k<n_vertices;k++) { 1993 marr[idxs[j]+k*n_vertices] += val*sums[k]; 1994 } 1995 } 1996 ierr = MatRestoreRow(A_RV_bcorr,i,&n,&idxs,&vals);CHKERRQ(ierr); 1997 ierr = ISRestoreIndices(reuse_solver->benign_zerodiag_subs[i],&idxs_zero);CHKERRQ(ierr); 1998 } 1999 ierr = PetscFree(sums);CHKERRQ(ierr); 2000 ierr = MatDenseRestoreArray(S_VVt,&marr); 2001 ierr = MatDestroy(&A_RV_bcorr);CHKERRQ(ierr); 2002 } 2003 ierr = MatDestroy(&A_RRmA_RV);CHKERRQ(ierr); 2004 ierr = PetscBLASIntCast(n_vertices*n_vertices,&B_N);CHKERRQ(ierr); 2005 ierr = MatDenseGetArray(A_VV,&x);CHKERRQ(ierr); 2006 ierr = MatDenseGetArray(S_VVt,&y);CHKERRQ(ierr); 2007 PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&B_N,&one,x,&B_one,y,&B_one)); 2008 ierr = MatDenseRestoreArray(A_VV,&x);CHKERRQ(ierr); 2009 ierr = MatDenseRestoreArray(S_VVt,&y);CHKERRQ(ierr); 2010 ierr = MatCopy(S_VVt,S_VV,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 2011 ierr = MatDestroy(&S_VVt);CHKERRQ(ierr); 2012 } else { 2013 ierr = MatCopy(A_VV,S_VV,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 2014 } 2015 ierr = MatDestroy(&A_VV);CHKERRQ(ierr); 2016 2017 /* coarse basis functions */ 2018 for (i=0;i<n_vertices;i++) { 2019 PetscScalar *y; 2020 2021 ierr = VecPlaceArray(pcbddc->vec1_R,work+lda_rhs*i);CHKERRQ(ierr); 2022 ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&y);CHKERRQ(ierr); 2023 ierr = VecPlaceArray(pcis->vec1_B,y+n_B*i);CHKERRQ(ierr); 2024 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2025 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2026 y[n_B*i+idx_V_B[i]] = 1.0; 2027 ierr = MatDenseRestoreArray(pcbddc->coarse_phi_B,&y);CHKERRQ(ierr); 2028 ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr); 2029 2030 if (pcbddc->switch_static || pcbddc->dbg_flag) { 2031 PetscInt j; 2032 2033 ierr = MatDenseGetArray(pcbddc->coarse_phi_D,&y);CHKERRQ(ierr); 2034 ierr = VecPlaceArray(pcis->vec1_D,y+n_D*i);CHKERRQ(ierr); 2035 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2036 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2037 ierr = VecResetArray(pcis->vec1_D);CHKERRQ(ierr); 2038 for (j=0;j<pcbddc->benign_n;j++) y[n_D*i+p0_lidx_I[j]] = 0.0; 2039 ierr = MatDenseRestoreArray(pcbddc->coarse_phi_D,&y);CHKERRQ(ierr); 2040 } 2041 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 2042 } 2043 /* if n_R == 0 the object is not destroyed */ 2044 ierr = MatDestroy(&A_RV);CHKERRQ(ierr); 2045 } 2046 ierr = VecDestroy(&dummy_vec);CHKERRQ(ierr); 2047 2048 if (n_constraints) { 2049 Mat B; 2050 2051 ierr = MatCreateSeqDense(PETSC_COMM_SELF,lda_rhs,n_constraints,work,&B);CHKERRQ(ierr); 2052 ierr = MatScale(S_CC,m_one);CHKERRQ(ierr); 2053 ierr = MatMatMult(local_auxmat2_R,S_CC,MAT_REUSE_MATRIX,PETSC_DEFAULT,&B);CHKERRQ(ierr); 2054 ierr = MatScale(S_CC,m_one);CHKERRQ(ierr); 2055 if (n_vertices) { 2056 if (isCHOL) { /* if we can solve the interior problem with cholesky, we should also be fine with transposing here */ 2057 ierr = MatTranspose(S_CV,MAT_REUSE_MATRIX,&S_VC);CHKERRQ(ierr); 2058 } else { 2059 Mat S_VCt; 2060 2061 if (lda_rhs != n_R) { 2062 ierr = MatDestroy(&B);CHKERRQ(ierr); 2063 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_constraints,work,&B);CHKERRQ(ierr); 2064 ierr = MatSeqDenseSetLDA(B,lda_rhs);CHKERRQ(ierr); 2065 } 2066 ierr = MatMatMult(A_VR,B,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&S_VCt);CHKERRQ(ierr); 2067 ierr = MatCopy(S_VCt,S_VC,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 2068 ierr = MatDestroy(&S_VCt);CHKERRQ(ierr); 2069 } 2070 } 2071 ierr = MatDestroy(&B);CHKERRQ(ierr); 2072 /* coarse basis functions */ 2073 for (i=0;i<n_constraints;i++) { 2074 PetscScalar *y; 2075 2076 ierr = VecPlaceArray(pcbddc->vec1_R,work+lda_rhs*i);CHKERRQ(ierr); 2077 ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&y);CHKERRQ(ierr); 2078 ierr = VecPlaceArray(pcis->vec1_B,y+n_B*(i+n_vertices));CHKERRQ(ierr); 2079 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2080 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2081 ierr = MatDenseRestoreArray(pcbddc->coarse_phi_B,&y);CHKERRQ(ierr); 2082 ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr); 2083 if (pcbddc->switch_static || pcbddc->dbg_flag) { 2084 PetscInt j; 2085 2086 ierr = MatDenseGetArray(pcbddc->coarse_phi_D,&y);CHKERRQ(ierr); 2087 ierr = VecPlaceArray(pcis->vec1_D,y+n_D*(i+n_vertices));CHKERRQ(ierr); 2088 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2089 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2090 ierr = VecResetArray(pcis->vec1_D);CHKERRQ(ierr); 2091 for (j=0;j<pcbddc->benign_n;j++) y[n_D*i+p0_lidx_I[j]] = 0.0; 2092 ierr = MatDenseRestoreArray(pcbddc->coarse_phi_D,&y);CHKERRQ(ierr); 2093 } 2094 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 2095 } 2096 } 2097 if (n_constraints) { 2098 ierr = MatDestroy(&local_auxmat2_R);CHKERRQ(ierr); 2099 } 2100 ierr = PetscFree(p0_lidx_I);CHKERRQ(ierr); 2101 2102 /* coarse matrix entries relative to B_0 */ 2103 if (pcbddc->benign_n) { 2104 Mat B0_B,B0_BPHI; 2105 IS is_dummy; 2106 PetscScalar *data; 2107 PetscInt j; 2108 2109 ierr = ISCreateStride(PETSC_COMM_SELF,pcbddc->benign_n,0,1,&is_dummy);CHKERRQ(ierr); 2110 ierr = MatGetSubMatrix(pcbddc->benign_B0,is_dummy,pcis->is_B_local,MAT_INITIAL_MATRIX,&B0_B);CHKERRQ(ierr); 2111 ierr = ISDestroy(&is_dummy);CHKERRQ(ierr); 2112 ierr = MatMatMult(B0_B,pcbddc->coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&B0_BPHI);CHKERRQ(ierr); 2113 ierr = MatConvert(B0_BPHI,MATSEQDENSE,MAT_INPLACE_MATRIX,&B0_BPHI);CHKERRQ(ierr); 2114 ierr = MatDenseGetArray(B0_BPHI,&data);CHKERRQ(ierr); 2115 for (j=0;j<pcbddc->benign_n;j++) { 2116 PetscInt primal_idx = pcbddc->local_primal_size - pcbddc->benign_n + j; 2117 for (i=0;i<pcbddc->local_primal_size;i++) { 2118 coarse_submat_vals[primal_idx*pcbddc->local_primal_size+i] = data[i*pcbddc->benign_n+j]; 2119 coarse_submat_vals[i*pcbddc->local_primal_size+primal_idx] = data[i*pcbddc->benign_n+j]; 2120 } 2121 } 2122 ierr = MatDenseRestoreArray(B0_BPHI,&data);CHKERRQ(ierr); 2123 ierr = MatDestroy(&B0_B);CHKERRQ(ierr); 2124 ierr = MatDestroy(&B0_BPHI);CHKERRQ(ierr); 2125 } 2126 2127 /* compute other basis functions for non-symmetric problems */ 2128 if (!pcbddc->symmetric_primal) { 2129 Mat B_V=NULL,B_C=NULL; 2130 PetscScalar *marray; 2131 2132 if (n_constraints) { 2133 Mat S_CCT,C_CRT; 2134 2135 ierr = MatTranspose(C_CR,MAT_INPLACE_MATRIX,&C_CRT);CHKERRQ(ierr); 2136 ierr = MatTranspose(S_CC,MAT_INITIAL_MATRIX,&S_CCT);CHKERRQ(ierr); 2137 ierr = MatMatMult(C_CRT,S_CCT,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&B_C);CHKERRQ(ierr); 2138 ierr = MatDestroy(&S_CCT);CHKERRQ(ierr); 2139 if (n_vertices) { 2140 Mat S_VCT; 2141 2142 ierr = MatTranspose(S_VC,MAT_INITIAL_MATRIX,&S_VCT);CHKERRQ(ierr); 2143 ierr = MatMatMult(C_CRT,S_VCT,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&B_V);CHKERRQ(ierr); 2144 ierr = MatDestroy(&S_VCT);CHKERRQ(ierr); 2145 } 2146 ierr = MatDestroy(&C_CRT);CHKERRQ(ierr); 2147 } 2148 if (n_vertices && n_R) { 2149 PetscScalar *av,*marray; 2150 const PetscInt *xadj,*adjncy; 2151 PetscInt n; 2152 PetscBool flg_row; 2153 2154 /* B_V = B_V - A_VR^T */ 2155 ierr = MatConvert(A_VR,MATSEQAIJ,MAT_INPLACE_MATRIX,&A_VR);CHKERRQ(ierr); 2156 ierr = MatGetRowIJ(A_VR,0,PETSC_FALSE,PETSC_FALSE,&n,&xadj,&adjncy,&flg_row);CHKERRQ(ierr); 2157 ierr = MatSeqAIJGetArray(A_VR,&av);CHKERRQ(ierr); 2158 ierr = MatDenseGetArray(B_V,&marray);CHKERRQ(ierr); 2159 for (i=0;i<n;i++) { 2160 PetscInt j; 2161 for (j=xadj[i];j<xadj[i+1];j++) marray[i*n_R + adjncy[j]] -= av[j]; 2162 } 2163 ierr = MatDenseRestoreArray(B_V,&marray);CHKERRQ(ierr); 2164 ierr = MatRestoreRowIJ(A_VR,0,PETSC_FALSE,PETSC_FALSE,&n,&xadj,&adjncy,&flg_row);CHKERRQ(ierr); 2165 ierr = MatDestroy(&A_VR);CHKERRQ(ierr); 2166 } 2167 2168 /* currently there's no support for MatTransposeMatSolve(F,B,X) */ 2169 ierr = MatDenseGetArray(B_V,&marray);CHKERRQ(ierr); 2170 for (i=0;i<n_vertices;i++) { 2171 ierr = VecPlaceArray(pcbddc->vec1_R,marray+i*n_R);CHKERRQ(ierr); 2172 ierr = VecPlaceArray(pcbddc->vec2_R,work+i*n_R);CHKERRQ(ierr); 2173 ierr = KSPSolveTranspose(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 2174 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 2175 ierr = VecResetArray(pcbddc->vec2_R);CHKERRQ(ierr); 2176 } 2177 ierr = MatDenseRestoreArray(B_V,&marray);CHKERRQ(ierr); 2178 ierr = MatDenseGetArray(B_C,&marray);CHKERRQ(ierr); 2179 for (i=n_vertices;i<n_constraints+n_vertices;i++) { 2180 ierr = VecPlaceArray(pcbddc->vec1_R,marray+(i-n_vertices)*n_R);CHKERRQ(ierr); 2181 ierr = VecPlaceArray(pcbddc->vec2_R,work+i*n_R);CHKERRQ(ierr); 2182 ierr = KSPSolveTranspose(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 2183 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 2184 ierr = VecResetArray(pcbddc->vec2_R);CHKERRQ(ierr); 2185 } 2186 ierr = MatDenseRestoreArray(B_C,&marray);CHKERRQ(ierr); 2187 /* coarse basis functions */ 2188 for (i=0;i<pcbddc->local_primal_size;i++) { 2189 PetscScalar *y; 2190 2191 ierr = VecPlaceArray(pcbddc->vec1_R,work+i*n_R);CHKERRQ(ierr); 2192 ierr = MatDenseGetArray(pcbddc->coarse_psi_B,&y);CHKERRQ(ierr); 2193 ierr = VecPlaceArray(pcis->vec1_B,y+n_B*i);CHKERRQ(ierr); 2194 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2195 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2196 if (i<n_vertices) { 2197 y[n_B*i+idx_V_B[i]] = 1.0; 2198 } 2199 ierr = MatDenseRestoreArray(pcbddc->coarse_psi_B,&y);CHKERRQ(ierr); 2200 ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr); 2201 2202 if (pcbddc->switch_static || pcbddc->dbg_flag) { 2203 ierr = MatDenseGetArray(pcbddc->coarse_psi_D,&y);CHKERRQ(ierr); 2204 ierr = VecPlaceArray(pcis->vec1_D,y+n_D*i);CHKERRQ(ierr); 2205 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2206 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2207 ierr = VecResetArray(pcis->vec1_D);CHKERRQ(ierr); 2208 ierr = MatDenseRestoreArray(pcbddc->coarse_psi_D,&y);CHKERRQ(ierr); 2209 } 2210 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 2211 } 2212 ierr = MatDestroy(&B_V);CHKERRQ(ierr); 2213 ierr = MatDestroy(&B_C);CHKERRQ(ierr); 2214 } 2215 /* free memory */ 2216 ierr = PetscFree(idx_V_B);CHKERRQ(ierr); 2217 ierr = MatDestroy(&S_VV);CHKERRQ(ierr); 2218 ierr = MatDestroy(&S_CV);CHKERRQ(ierr); 2219 ierr = MatDestroy(&S_VC);CHKERRQ(ierr); 2220 ierr = MatDestroy(&S_CC);CHKERRQ(ierr); 2221 ierr = PetscFree(work);CHKERRQ(ierr); 2222 if (n_vertices) { 2223 ierr = MatDestroy(&A_VR);CHKERRQ(ierr); 2224 } 2225 if (n_constraints) { 2226 ierr = MatDestroy(&C_CR);CHKERRQ(ierr); 2227 } 2228 /* Checking coarse_sub_mat and coarse basis functios */ 2229 /* Symmetric case : It should be \Phi^{(j)^T} A^{(j)} \Phi^{(j)}=coarse_sub_mat */ 2230 /* Non-symmetric case : It should be \Psi^{(j)^T} A^{(j)} \Phi^{(j)}=coarse_sub_mat */ 2231 if (pcbddc->dbg_flag) { 2232 Mat coarse_sub_mat; 2233 Mat AUXMAT,TM1,TM2,TM3,TM4; 2234 Mat coarse_phi_D,coarse_phi_B; 2235 Mat coarse_psi_D,coarse_psi_B; 2236 Mat A_II,A_BB,A_IB,A_BI; 2237 Mat C_B,CPHI; 2238 IS is_dummy; 2239 Vec mones; 2240 MatType checkmattype=MATSEQAIJ; 2241 PetscReal real_value; 2242 2243 if (pcbddc->benign_n && !pcbddc->benign_change_explicit) { 2244 Mat A; 2245 ierr = PCBDDCBenignProject(pc,NULL,NULL,&A);CHKERRQ(ierr); 2246 ierr = MatGetSubMatrix(A,pcis->is_I_local,pcis->is_I_local,MAT_INITIAL_MATRIX,&A_II);CHKERRQ(ierr); 2247 ierr = MatGetSubMatrix(A,pcis->is_I_local,pcis->is_B_local,MAT_INITIAL_MATRIX,&A_IB);CHKERRQ(ierr); 2248 ierr = MatGetSubMatrix(A,pcis->is_B_local,pcis->is_I_local,MAT_INITIAL_MATRIX,&A_BI);CHKERRQ(ierr); 2249 ierr = MatGetSubMatrix(A,pcis->is_B_local,pcis->is_B_local,MAT_INITIAL_MATRIX,&A_BB);CHKERRQ(ierr); 2250 ierr = MatDestroy(&A);CHKERRQ(ierr); 2251 } else { 2252 ierr = MatConvert(pcis->A_II,checkmattype,MAT_INITIAL_MATRIX,&A_II);CHKERRQ(ierr); 2253 ierr = MatConvert(pcis->A_IB,checkmattype,MAT_INITIAL_MATRIX,&A_IB);CHKERRQ(ierr); 2254 ierr = MatConvert(pcis->A_BI,checkmattype,MAT_INITIAL_MATRIX,&A_BI);CHKERRQ(ierr); 2255 ierr = MatConvert(pcis->A_BB,checkmattype,MAT_INITIAL_MATRIX,&A_BB);CHKERRQ(ierr); 2256 } 2257 ierr = MatConvert(pcbddc->coarse_phi_D,checkmattype,MAT_INITIAL_MATRIX,&coarse_phi_D);CHKERRQ(ierr); 2258 ierr = MatConvert(pcbddc->coarse_phi_B,checkmattype,MAT_INITIAL_MATRIX,&coarse_phi_B);CHKERRQ(ierr); 2259 if (!pcbddc->symmetric_primal) { 2260 ierr = MatConvert(pcbddc->coarse_psi_D,checkmattype,MAT_INITIAL_MATRIX,&coarse_psi_D);CHKERRQ(ierr); 2261 ierr = MatConvert(pcbddc->coarse_psi_B,checkmattype,MAT_INITIAL_MATRIX,&coarse_psi_B);CHKERRQ(ierr); 2262 } 2263 ierr = MatCreateSeqDense(PETSC_COMM_SELF,pcbddc->local_primal_size,pcbddc->local_primal_size,coarse_submat_vals,&coarse_sub_mat);CHKERRQ(ierr); 2264 2265 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 2266 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Check coarse sub mat computation (symmetric %d)\n",pcbddc->symmetric_primal);CHKERRQ(ierr); 2267 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 2268 if (!pcbddc->symmetric_primal) { 2269 ierr = MatMatMult(A_II,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 2270 ierr = MatTransposeMatMult(coarse_psi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM1);CHKERRQ(ierr); 2271 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 2272 ierr = MatMatMult(A_BB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 2273 ierr = MatTransposeMatMult(coarse_psi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM2);CHKERRQ(ierr); 2274 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 2275 ierr = MatMatMult(A_IB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 2276 ierr = MatTransposeMatMult(coarse_psi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM3);CHKERRQ(ierr); 2277 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 2278 ierr = MatMatMult(A_BI,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 2279 ierr = MatTransposeMatMult(coarse_psi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM4);CHKERRQ(ierr); 2280 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 2281 } else { 2282 ierr = MatPtAP(A_II,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&TM1);CHKERRQ(ierr); 2283 ierr = MatPtAP(A_BB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&TM2);CHKERRQ(ierr); 2284 ierr = MatMatMult(A_IB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 2285 ierr = MatTransposeMatMult(coarse_phi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM3);CHKERRQ(ierr); 2286 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 2287 ierr = MatMatMult(A_BI,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 2288 ierr = MatTransposeMatMult(coarse_phi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM4);CHKERRQ(ierr); 2289 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 2290 } 2291 ierr = MatAXPY(TM1,one,TM2,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 2292 ierr = MatAXPY(TM1,one,TM3,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 2293 ierr = MatAXPY(TM1,one,TM4,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 2294 ierr = MatConvert(TM1,MATSEQDENSE,MAT_INPLACE_MATRIX,&TM1);CHKERRQ(ierr); 2295 if (pcbddc->benign_n) { 2296 Mat B0_B,B0_BPHI; 2297 PetscScalar *data,*data2; 2298 PetscInt j; 2299 2300 ierr = ISCreateStride(PETSC_COMM_SELF,pcbddc->benign_n,0,1,&is_dummy);CHKERRQ(ierr); 2301 ierr = MatGetSubMatrix(pcbddc->benign_B0,is_dummy,pcis->is_B_local,MAT_INITIAL_MATRIX,&B0_B);CHKERRQ(ierr); 2302 ierr = MatMatMult(B0_B,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&B0_BPHI);CHKERRQ(ierr); 2303 ierr = MatConvert(B0_BPHI,MATSEQDENSE,MAT_INPLACE_MATRIX,&B0_BPHI);CHKERRQ(ierr); 2304 ierr = MatDenseGetArray(TM1,&data);CHKERRQ(ierr); 2305 ierr = MatDenseGetArray(B0_BPHI,&data2);CHKERRQ(ierr); 2306 for (j=0;j<pcbddc->benign_n;j++) { 2307 PetscInt primal_idx = pcbddc->local_primal_size - pcbddc->benign_n + j; 2308 for (i=0;i<pcbddc->local_primal_size;i++) { 2309 data[primal_idx*pcbddc->local_primal_size+i] += data2[i*pcbddc->benign_n+j]; 2310 data[i*pcbddc->local_primal_size+primal_idx] += data2[i*pcbddc->benign_n+j]; 2311 } 2312 } 2313 ierr = MatDenseRestoreArray(TM1,&data);CHKERRQ(ierr); 2314 ierr = MatDenseRestoreArray(B0_BPHI,&data2);CHKERRQ(ierr); 2315 ierr = MatDestroy(&B0_B);CHKERRQ(ierr); 2316 ierr = ISDestroy(&is_dummy);CHKERRQ(ierr); 2317 ierr = MatDestroy(&B0_BPHI);CHKERRQ(ierr); 2318 } 2319 #if 0 2320 { 2321 PetscViewer viewer; 2322 char filename[256]; 2323 sprintf(filename,"details_local_coarse_mat%d_level%d.m",PetscGlobalRank,pcbddc->current_level); 2324 ierr = PetscViewerASCIIOpen(PETSC_COMM_SELF,filename,&viewer);CHKERRQ(ierr); 2325 ierr = PetscViewerSetFormat(viewer,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr); 2326 ierr = PetscObjectSetName((PetscObject)coarse_sub_mat,"computed");CHKERRQ(ierr); 2327 ierr = MatView(coarse_sub_mat,viewer);CHKERRQ(ierr); 2328 ierr = PetscObjectSetName((PetscObject)TM1,"projected");CHKERRQ(ierr); 2329 ierr = MatView(TM1,viewer);CHKERRQ(ierr); 2330 if (save_change) { 2331 Mat phi_B; 2332 ierr = MatMatMult(save_change,pcbddc->coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&phi_B);CHKERRQ(ierr); 2333 ierr = PetscObjectSetName((PetscObject)phi_B,"phi_B");CHKERRQ(ierr); 2334 ierr = MatView(phi_B,viewer);CHKERRQ(ierr); 2335 ierr = MatDestroy(&phi_B);CHKERRQ(ierr); 2336 } else { 2337 ierr = PetscObjectSetName((PetscObject)pcbddc->coarse_phi_B,"phi_B");CHKERRQ(ierr); 2338 ierr = MatView(pcbddc->coarse_phi_B,viewer);CHKERRQ(ierr); 2339 } 2340 if (pcbddc->coarse_phi_D) { 2341 ierr = PetscObjectSetName((PetscObject)pcbddc->coarse_phi_D,"phi_D");CHKERRQ(ierr); 2342 ierr = MatView(pcbddc->coarse_phi_D,viewer);CHKERRQ(ierr); 2343 } 2344 if (pcbddc->coarse_psi_B) { 2345 ierr = PetscObjectSetName((PetscObject)pcbddc->coarse_psi_B,"psi_B");CHKERRQ(ierr); 2346 ierr = MatView(pcbddc->coarse_psi_B,viewer);CHKERRQ(ierr); 2347 } 2348 if (pcbddc->coarse_psi_D) { 2349 ierr = PetscObjectSetName((PetscObject)pcbddc->coarse_psi_D,"psi_D");CHKERRQ(ierr); 2350 ierr = MatView(pcbddc->coarse_psi_D,viewer);CHKERRQ(ierr); 2351 } 2352 ierr = PetscViewerDestroy(&viewer);CHKERRQ(ierr); 2353 } 2354 #endif 2355 ierr = MatAXPY(TM1,m_one,coarse_sub_mat,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 2356 ierr = MatNorm(TM1,NORM_FROBENIUS,&real_value);CHKERRQ(ierr); 2357 ierr = PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 2358 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d matrix error % 1.14e\n",PetscGlobalRank,real_value);CHKERRQ(ierr); 2359 2360 /* check constraints */ 2361 ierr = ISCreateStride(PETSC_COMM_SELF,pcbddc->local_primal_size-pcbddc->benign_n,0,1,&is_dummy);CHKERRQ(ierr); 2362 ierr = MatGetSubMatrix(pcbddc->ConstraintMatrix,is_dummy,pcis->is_B_local,MAT_INITIAL_MATRIX,&C_B);CHKERRQ(ierr); 2363 if (!pcbddc->benign_n) { /* TODO: add benign case */ 2364 ierr = MatMatMult(C_B,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&CPHI);CHKERRQ(ierr); 2365 } else { 2366 PetscScalar *data; 2367 Mat tmat; 2368 ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&data);CHKERRQ(ierr); 2369 ierr = MatCreateSeqDense(PETSC_COMM_SELF,pcis->n_B,pcbddc->local_primal_size-pcbddc->benign_n,data,&tmat);CHKERRQ(ierr); 2370 ierr = MatDenseRestoreArray(pcbddc->coarse_phi_B,&data);CHKERRQ(ierr); 2371 ierr = MatMatMult(C_B,tmat,MAT_INITIAL_MATRIX,1.0,&CPHI);CHKERRQ(ierr); 2372 ierr = MatDestroy(&tmat);CHKERRQ(ierr); 2373 } 2374 ierr = MatCreateVecs(CPHI,&mones,NULL);CHKERRQ(ierr); 2375 ierr = VecSet(mones,-1.0);CHKERRQ(ierr); 2376 ierr = MatDiagonalSet(CPHI,mones,ADD_VALUES);CHKERRQ(ierr); 2377 ierr = MatNorm(CPHI,NORM_FROBENIUS,&real_value);CHKERRQ(ierr); 2378 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d phi constraints error % 1.14e\n",PetscGlobalRank,real_value);CHKERRQ(ierr); 2379 if (!pcbddc->symmetric_primal) { 2380 ierr = MatMatMult(C_B,coarse_psi_B,MAT_REUSE_MATRIX,1.0,&CPHI);CHKERRQ(ierr); 2381 ierr = VecSet(mones,-1.0);CHKERRQ(ierr); 2382 ierr = MatDiagonalSet(CPHI,mones,ADD_VALUES);CHKERRQ(ierr); 2383 ierr = MatNorm(CPHI,NORM_FROBENIUS,&real_value);CHKERRQ(ierr); 2384 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d psi constraints error % 1.14e\n",PetscGlobalRank,real_value);CHKERRQ(ierr); 2385 } 2386 ierr = MatDestroy(&C_B);CHKERRQ(ierr); 2387 ierr = MatDestroy(&CPHI);CHKERRQ(ierr); 2388 ierr = ISDestroy(&is_dummy);CHKERRQ(ierr); 2389 ierr = VecDestroy(&mones);CHKERRQ(ierr); 2390 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 2391 ierr = MatDestroy(&A_II);CHKERRQ(ierr); 2392 ierr = MatDestroy(&A_BB);CHKERRQ(ierr); 2393 ierr = MatDestroy(&A_IB);CHKERRQ(ierr); 2394 ierr = MatDestroy(&A_BI);CHKERRQ(ierr); 2395 ierr = MatDestroy(&TM1);CHKERRQ(ierr); 2396 ierr = MatDestroy(&TM2);CHKERRQ(ierr); 2397 ierr = MatDestroy(&TM3);CHKERRQ(ierr); 2398 ierr = MatDestroy(&TM4);CHKERRQ(ierr); 2399 ierr = MatDestroy(&coarse_phi_D);CHKERRQ(ierr); 2400 ierr = MatDestroy(&coarse_phi_B);CHKERRQ(ierr); 2401 if (!pcbddc->symmetric_primal) { 2402 ierr = MatDestroy(&coarse_psi_D);CHKERRQ(ierr); 2403 ierr = MatDestroy(&coarse_psi_B);CHKERRQ(ierr); 2404 } 2405 ierr = MatDestroy(&coarse_sub_mat);CHKERRQ(ierr); 2406 } 2407 /* get back data */ 2408 *coarse_submat_vals_n = coarse_submat_vals; 2409 PetscFunctionReturn(0); 2410 } 2411 2412 #undef __FUNCT__ 2413 #define __FUNCT__ "MatGetSubMatrixUnsorted" 2414 PetscErrorCode MatGetSubMatrixUnsorted(Mat A, IS isrow, IS iscol, Mat* B) 2415 { 2416 Mat *work_mat; 2417 IS isrow_s,iscol_s; 2418 PetscBool rsorted,csorted; 2419 PetscInt rsize,*idxs_perm_r,csize,*idxs_perm_c; 2420 PetscErrorCode ierr; 2421 2422 PetscFunctionBegin; 2423 ierr = ISSorted(isrow,&rsorted);CHKERRQ(ierr); 2424 ierr = ISSorted(iscol,&csorted);CHKERRQ(ierr); 2425 ierr = ISGetLocalSize(isrow,&rsize);CHKERRQ(ierr); 2426 ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr); 2427 2428 if (!rsorted) { 2429 const PetscInt *idxs; 2430 PetscInt *idxs_sorted,i; 2431 2432 ierr = PetscMalloc1(rsize,&idxs_perm_r);CHKERRQ(ierr); 2433 ierr = PetscMalloc1(rsize,&idxs_sorted);CHKERRQ(ierr); 2434 for (i=0;i<rsize;i++) { 2435 idxs_perm_r[i] = i; 2436 } 2437 ierr = ISGetIndices(isrow,&idxs);CHKERRQ(ierr); 2438 ierr = PetscSortIntWithPermutation(rsize,idxs,idxs_perm_r);CHKERRQ(ierr); 2439 for (i=0;i<rsize;i++) { 2440 idxs_sorted[i] = idxs[idxs_perm_r[i]]; 2441 } 2442 ierr = ISRestoreIndices(isrow,&idxs);CHKERRQ(ierr); 2443 ierr = ISCreateGeneral(PETSC_COMM_SELF,rsize,idxs_sorted,PETSC_OWN_POINTER,&isrow_s);CHKERRQ(ierr); 2444 } else { 2445 ierr = PetscObjectReference((PetscObject)isrow);CHKERRQ(ierr); 2446 isrow_s = isrow; 2447 } 2448 2449 if (!csorted) { 2450 if (isrow == iscol) { 2451 ierr = PetscObjectReference((PetscObject)isrow_s);CHKERRQ(ierr); 2452 iscol_s = isrow_s; 2453 } else { 2454 const PetscInt *idxs; 2455 PetscInt *idxs_sorted,i; 2456 2457 ierr = PetscMalloc1(csize,&idxs_perm_c);CHKERRQ(ierr); 2458 ierr = PetscMalloc1(csize,&idxs_sorted);CHKERRQ(ierr); 2459 for (i=0;i<csize;i++) { 2460 idxs_perm_c[i] = i; 2461 } 2462 ierr = ISGetIndices(iscol,&idxs);CHKERRQ(ierr); 2463 ierr = PetscSortIntWithPermutation(csize,idxs,idxs_perm_c);CHKERRQ(ierr); 2464 for (i=0;i<csize;i++) { 2465 idxs_sorted[i] = idxs[idxs_perm_c[i]]; 2466 } 2467 ierr = ISRestoreIndices(iscol,&idxs);CHKERRQ(ierr); 2468 ierr = ISCreateGeneral(PETSC_COMM_SELF,csize,idxs_sorted,PETSC_OWN_POINTER,&iscol_s);CHKERRQ(ierr); 2469 } 2470 } else { 2471 ierr = PetscObjectReference((PetscObject)iscol);CHKERRQ(ierr); 2472 iscol_s = iscol; 2473 } 2474 2475 ierr = MatGetSubMatrices(A,1,&isrow_s,&iscol_s,MAT_INITIAL_MATRIX,&work_mat);CHKERRQ(ierr); 2476 2477 if (!rsorted || !csorted) { 2478 Mat new_mat; 2479 IS is_perm_r,is_perm_c; 2480 2481 if (!rsorted) { 2482 PetscInt *idxs_r,i; 2483 ierr = PetscMalloc1(rsize,&idxs_r);CHKERRQ(ierr); 2484 for (i=0;i<rsize;i++) { 2485 idxs_r[idxs_perm_r[i]] = i; 2486 } 2487 ierr = PetscFree(idxs_perm_r);CHKERRQ(ierr); 2488 ierr = ISCreateGeneral(PETSC_COMM_SELF,rsize,idxs_r,PETSC_OWN_POINTER,&is_perm_r);CHKERRQ(ierr); 2489 } else { 2490 ierr = ISCreateStride(PETSC_COMM_SELF,rsize,0,1,&is_perm_r);CHKERRQ(ierr); 2491 } 2492 ierr = ISSetPermutation(is_perm_r);CHKERRQ(ierr); 2493 2494 if (!csorted) { 2495 if (isrow_s == iscol_s) { 2496 ierr = PetscObjectReference((PetscObject)is_perm_r);CHKERRQ(ierr); 2497 is_perm_c = is_perm_r; 2498 } else { 2499 PetscInt *idxs_c,i; 2500 ierr = PetscMalloc1(csize,&idxs_c);CHKERRQ(ierr); 2501 for (i=0;i<csize;i++) { 2502 idxs_c[idxs_perm_c[i]] = i; 2503 } 2504 ierr = PetscFree(idxs_perm_c);CHKERRQ(ierr); 2505 ierr = ISCreateGeneral(PETSC_COMM_SELF,csize,idxs_c,PETSC_OWN_POINTER,&is_perm_c);CHKERRQ(ierr); 2506 } 2507 } else { 2508 ierr = ISCreateStride(PETSC_COMM_SELF,csize,0,1,&is_perm_c);CHKERRQ(ierr); 2509 } 2510 ierr = ISSetPermutation(is_perm_c);CHKERRQ(ierr); 2511 2512 ierr = MatPermute(work_mat[0],is_perm_r,is_perm_c,&new_mat);CHKERRQ(ierr); 2513 ierr = MatDestroy(&work_mat[0]);CHKERRQ(ierr); 2514 work_mat[0] = new_mat; 2515 ierr = ISDestroy(&is_perm_r);CHKERRQ(ierr); 2516 ierr = ISDestroy(&is_perm_c);CHKERRQ(ierr); 2517 } 2518 2519 ierr = PetscObjectReference((PetscObject)work_mat[0]);CHKERRQ(ierr); 2520 *B = work_mat[0]; 2521 ierr = MatDestroyMatrices(1,&work_mat);CHKERRQ(ierr); 2522 ierr = ISDestroy(&isrow_s);CHKERRQ(ierr); 2523 ierr = ISDestroy(&iscol_s);CHKERRQ(ierr); 2524 PetscFunctionReturn(0); 2525 } 2526 2527 #undef __FUNCT__ 2528 #define __FUNCT__ "PCBDDCComputeLocalMatrix" 2529 PetscErrorCode PCBDDCComputeLocalMatrix(PC pc, Mat ChangeOfBasisMatrix) 2530 { 2531 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 2532 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 2533 Mat new_mat; 2534 IS is_local,is_global; 2535 PetscInt local_size; 2536 PetscBool isseqaij; 2537 PetscErrorCode ierr; 2538 2539 PetscFunctionBegin; 2540 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 2541 ierr = MatGetSize(matis->A,&local_size,NULL);CHKERRQ(ierr); 2542 ierr = ISCreateStride(PetscObjectComm((PetscObject)matis->A),local_size,0,1,&is_local);CHKERRQ(ierr); 2543 ierr = ISLocalToGlobalMappingApplyIS(pc->pmat->rmap->mapping,is_local,&is_global);CHKERRQ(ierr); 2544 ierr = ISDestroy(&is_local);CHKERRQ(ierr); 2545 ierr = MatGetSubMatrixUnsorted(ChangeOfBasisMatrix,is_global,is_global,&new_mat);CHKERRQ(ierr); 2546 ierr = ISDestroy(&is_global);CHKERRQ(ierr); 2547 2548 /* check */ 2549 if (pcbddc->dbg_flag) { 2550 Vec x,x_change; 2551 PetscReal error; 2552 2553 ierr = MatCreateVecs(ChangeOfBasisMatrix,&x,&x_change);CHKERRQ(ierr); 2554 ierr = VecSetRandom(x,NULL);CHKERRQ(ierr); 2555 ierr = MatMult(ChangeOfBasisMatrix,x,x_change);CHKERRQ(ierr); 2556 ierr = VecScatterBegin(matis->cctx,x,matis->x,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2557 ierr = VecScatterEnd(matis->cctx,x,matis->x,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2558 ierr = MatMult(new_mat,matis->x,matis->y);CHKERRQ(ierr); 2559 ierr = VecScatterBegin(matis->rctx,matis->y,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2560 ierr = VecScatterEnd(matis->rctx,matis->y,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2561 ierr = VecAXPY(x,-1.0,x_change);CHKERRQ(ierr); 2562 ierr = VecNorm(x,NORM_INFINITY,&error);CHKERRQ(ierr); 2563 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 2564 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Error global vs local change on N: %1.6e\n",error);CHKERRQ(ierr); 2565 ierr = VecDestroy(&x);CHKERRQ(ierr); 2566 ierr = VecDestroy(&x_change);CHKERRQ(ierr); 2567 } 2568 2569 /* TODO: HOW TO WORK WITH BAIJ and SBAIJ and SEQDENSE? */ 2570 ierr = PetscObjectTypeCompare((PetscObject)matis->A,MATSEQAIJ,&isseqaij);CHKERRQ(ierr); 2571 if (isseqaij) { 2572 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 2573 ierr = MatPtAP(matis->A,new_mat,MAT_INITIAL_MATRIX,2.0,&pcbddc->local_mat);CHKERRQ(ierr); 2574 } else { 2575 Mat work_mat; 2576 2577 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 2578 ierr = MatConvert(matis->A,MATSEQAIJ,MAT_INITIAL_MATRIX,&work_mat);CHKERRQ(ierr); 2579 ierr = MatPtAP(work_mat,new_mat,MAT_INITIAL_MATRIX,2.0,&pcbddc->local_mat);CHKERRQ(ierr); 2580 } 2581 if (matis->A->symmetric_set) { 2582 ierr = MatSetOption(pcbddc->local_mat,MAT_SYMMETRIC,matis->A->symmetric);CHKERRQ(ierr); 2583 #if !defined(PETSC_USE_COMPLEX) 2584 ierr = MatSetOption(pcbddc->local_mat,MAT_HERMITIAN,matis->A->symmetric);CHKERRQ(ierr); 2585 #endif 2586 } 2587 ierr = MatDestroy(&new_mat);CHKERRQ(ierr); 2588 PetscFunctionReturn(0); 2589 } 2590 2591 #undef __FUNCT__ 2592 #define __FUNCT__ "PCBDDCSetUpLocalScatters" 2593 PetscErrorCode PCBDDCSetUpLocalScatters(PC pc) 2594 { 2595 PC_IS* pcis = (PC_IS*)(pc->data); 2596 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 2597 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 2598 PetscInt *idx_R_local=NULL; 2599 PetscInt n_vertices,i,j,n_R,n_D,n_B; 2600 PetscInt vbs,bs; 2601 PetscBT bitmask=NULL; 2602 PetscErrorCode ierr; 2603 2604 PetscFunctionBegin; 2605 /* 2606 No need to setup local scatters if 2607 - primal space is unchanged 2608 AND 2609 - we actually have locally some primal dofs (could not be true in multilevel or for isolated subdomains) 2610 AND 2611 - we are not in debugging mode (this is needed since there are Synchronized prints at the end of the subroutine 2612 */ 2613 if (!pcbddc->new_primal_space_local && pcbddc->local_primal_size && !pcbddc->dbg_flag) { 2614 PetscFunctionReturn(0); 2615 } 2616 /* destroy old objects */ 2617 ierr = ISDestroy(&pcbddc->is_R_local);CHKERRQ(ierr); 2618 ierr = VecScatterDestroy(&pcbddc->R_to_B);CHKERRQ(ierr); 2619 ierr = VecScatterDestroy(&pcbddc->R_to_D);CHKERRQ(ierr); 2620 /* Set Non-overlapping dimensions */ 2621 n_B = pcis->n_B; 2622 n_D = pcis->n - n_B; 2623 n_vertices = pcbddc->n_vertices; 2624 2625 /* Dohrmann's notation: dofs splitted in R (Remaining: all dofs but the vertices) and V (Vertices) */ 2626 2627 /* create auxiliary bitmask and allocate workspace */ 2628 if (!sub_schurs->reuse_solver) { 2629 ierr = PetscMalloc1(pcis->n-n_vertices,&idx_R_local);CHKERRQ(ierr); 2630 ierr = PetscBTCreate(pcis->n,&bitmask);CHKERRQ(ierr); 2631 for (i=0;i<n_vertices;i++) { 2632 ierr = PetscBTSet(bitmask,pcbddc->local_primal_ref_node[i]);CHKERRQ(ierr); 2633 } 2634 2635 for (i=0, n_R=0; i<pcis->n; i++) { 2636 if (!PetscBTLookup(bitmask,i)) { 2637 idx_R_local[n_R++] = i; 2638 } 2639 } 2640 } else { /* A different ordering (already computed) is present if we are reusing the Schur solver */ 2641 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 2642 2643 ierr = ISGetIndices(reuse_solver->is_R,(const PetscInt**)&idx_R_local);CHKERRQ(ierr); 2644 ierr = ISGetLocalSize(reuse_solver->is_R,&n_R);CHKERRQ(ierr); 2645 } 2646 2647 /* Block code */ 2648 vbs = 1; 2649 ierr = MatGetBlockSize(pcbddc->local_mat,&bs);CHKERRQ(ierr); 2650 if (bs>1 && !(n_vertices%bs)) { 2651 PetscBool is_blocked = PETSC_TRUE; 2652 PetscInt *vary; 2653 if (!sub_schurs->reuse_solver) { 2654 ierr = PetscMalloc1(pcis->n/bs,&vary);CHKERRQ(ierr); 2655 ierr = PetscMemzero(vary,pcis->n/bs*sizeof(PetscInt));CHKERRQ(ierr); 2656 /* Verify that the vertex indices correspond to each element in a block (code taken from sbaij2.c) */ 2657 /* it is ok to check this way since local_primal_ref_node are always sorted by local numbering and idx_R_local is obtained as a complement */ 2658 for (i=0; i<n_vertices; i++) vary[pcbddc->local_primal_ref_node[i]/bs]++; 2659 for (i=0; i<pcis->n/bs; i++) { 2660 if (vary[i]!=0 && vary[i]!=bs) { 2661 is_blocked = PETSC_FALSE; 2662 break; 2663 } 2664 } 2665 ierr = PetscFree(vary);CHKERRQ(ierr); 2666 } else { 2667 /* Verify directly the R set */ 2668 for (i=0; i<n_R/bs; i++) { 2669 PetscInt j,node=idx_R_local[bs*i]; 2670 for (j=1; j<bs; j++) { 2671 if (node != idx_R_local[bs*i+j]-j) { 2672 is_blocked = PETSC_FALSE; 2673 break; 2674 } 2675 } 2676 } 2677 } 2678 if (is_blocked) { /* build compressed IS for R nodes (complement of vertices) */ 2679 vbs = bs; 2680 for (i=0;i<n_R/vbs;i++) { 2681 idx_R_local[i] = idx_R_local[vbs*i]/vbs; 2682 } 2683 } 2684 } 2685 ierr = ISCreateBlock(PETSC_COMM_SELF,vbs,n_R/vbs,idx_R_local,PETSC_COPY_VALUES,&pcbddc->is_R_local);CHKERRQ(ierr); 2686 if (sub_schurs->reuse_solver) { 2687 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 2688 2689 ierr = ISRestoreIndices(reuse_solver->is_R,(const PetscInt**)&idx_R_local);CHKERRQ(ierr); 2690 ierr = ISDestroy(&reuse_solver->is_R);CHKERRQ(ierr); 2691 ierr = PetscObjectReference((PetscObject)pcbddc->is_R_local);CHKERRQ(ierr); 2692 reuse_solver->is_R = pcbddc->is_R_local; 2693 } else { 2694 ierr = PetscFree(idx_R_local);CHKERRQ(ierr); 2695 } 2696 2697 /* print some info if requested */ 2698 if (pcbddc->dbg_flag) { 2699 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 2700 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 2701 ierr = PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 2702 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d local dimensions\n",PetscGlobalRank);CHKERRQ(ierr); 2703 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local_size = %d, dirichlet_size = %d, boundary_size = %d\n",pcis->n,n_D,n_B);CHKERRQ(ierr); 2704 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"r_size = %d, v_size = %d, constraints = %d, local_primal_size = %d\n",n_R,n_vertices,pcbddc->local_primal_size-n_vertices-pcbddc->benign_n,pcbddc->local_primal_size);CHKERRQ(ierr); 2705 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 2706 } 2707 2708 /* VecScatters pcbddc->R_to_B and (optionally) pcbddc->R_to_D */ 2709 if (!sub_schurs->reuse_solver) { 2710 IS is_aux1,is_aux2; 2711 PetscInt *aux_array1,*aux_array2,*is_indices,*idx_R_local; 2712 2713 ierr = ISGetIndices(pcbddc->is_R_local,(const PetscInt**)&idx_R_local);CHKERRQ(ierr); 2714 ierr = PetscMalloc1(pcis->n_B-n_vertices,&aux_array1);CHKERRQ(ierr); 2715 ierr = PetscMalloc1(pcis->n_B-n_vertices,&aux_array2);CHKERRQ(ierr); 2716 ierr = ISGetIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 2717 for (i=0; i<n_D; i++) { 2718 ierr = PetscBTSet(bitmask,is_indices[i]);CHKERRQ(ierr); 2719 } 2720 ierr = ISRestoreIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 2721 for (i=0, j=0; i<n_R; i++) { 2722 if (!PetscBTLookup(bitmask,idx_R_local[i])) { 2723 aux_array1[j++] = i; 2724 } 2725 } 2726 ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array1,PETSC_OWN_POINTER,&is_aux1);CHKERRQ(ierr); 2727 ierr = ISGetIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 2728 for (i=0, j=0; i<n_B; i++) { 2729 if (!PetscBTLookup(bitmask,is_indices[i])) { 2730 aux_array2[j++] = i; 2731 } 2732 } 2733 ierr = ISRestoreIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 2734 ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array2,PETSC_OWN_POINTER,&is_aux2);CHKERRQ(ierr); 2735 ierr = VecScatterCreate(pcbddc->vec1_R,is_aux1,pcis->vec1_B,is_aux2,&pcbddc->R_to_B);CHKERRQ(ierr); 2736 ierr = ISDestroy(&is_aux1);CHKERRQ(ierr); 2737 ierr = ISDestroy(&is_aux2);CHKERRQ(ierr); 2738 2739 if (pcbddc->switch_static || pcbddc->dbg_flag) { 2740 ierr = PetscMalloc1(n_D,&aux_array1);CHKERRQ(ierr); 2741 for (i=0, j=0; i<n_R; i++) { 2742 if (PetscBTLookup(bitmask,idx_R_local[i])) { 2743 aux_array1[j++] = i; 2744 } 2745 } 2746 ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array1,PETSC_OWN_POINTER,&is_aux1);CHKERRQ(ierr); 2747 ierr = VecScatterCreate(pcbddc->vec1_R,is_aux1,pcis->vec1_D,(IS)0,&pcbddc->R_to_D);CHKERRQ(ierr); 2748 ierr = ISDestroy(&is_aux1);CHKERRQ(ierr); 2749 } 2750 ierr = PetscBTDestroy(&bitmask);CHKERRQ(ierr); 2751 ierr = ISRestoreIndices(pcbddc->is_R_local,(const PetscInt**)&idx_R_local);CHKERRQ(ierr); 2752 } else { 2753 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 2754 IS tis; 2755 PetscInt schur_size; 2756 2757 ierr = ISGetLocalSize(reuse_solver->is_B,&schur_size);CHKERRQ(ierr); 2758 ierr = ISCreateStride(PETSC_COMM_SELF,schur_size,n_D,1,&tis);CHKERRQ(ierr); 2759 ierr = VecScatterCreate(pcbddc->vec1_R,tis,pcis->vec1_B,reuse_solver->is_B,&pcbddc->R_to_B);CHKERRQ(ierr); 2760 ierr = ISDestroy(&tis);CHKERRQ(ierr); 2761 if (pcbddc->switch_static || pcbddc->dbg_flag) { 2762 ierr = ISCreateStride(PETSC_COMM_SELF,n_D,0,1,&tis);CHKERRQ(ierr); 2763 ierr = VecScatterCreate(pcbddc->vec1_R,tis,pcis->vec1_D,(IS)0,&pcbddc->R_to_D);CHKERRQ(ierr); 2764 ierr = ISDestroy(&tis);CHKERRQ(ierr); 2765 } 2766 } 2767 PetscFunctionReturn(0); 2768 } 2769 2770 2771 #undef __FUNCT__ 2772 #define __FUNCT__ "PCBDDCSetUpLocalSolvers" 2773 PetscErrorCode PCBDDCSetUpLocalSolvers(PC pc, PetscBool dirichlet, PetscBool neumann) 2774 { 2775 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 2776 PC_IS *pcis = (PC_IS*)pc->data; 2777 PC pc_temp; 2778 Mat A_RR; 2779 MatReuse reuse; 2780 PetscScalar m_one = -1.0; 2781 PetscReal value; 2782 PetscInt n_D,n_R; 2783 PetscBool use_exact,use_exact_reduced,issbaij; 2784 PetscErrorCode ierr; 2785 /* prefixes stuff */ 2786 char dir_prefix[256],neu_prefix[256],str_level[16]; 2787 size_t len; 2788 2789 PetscFunctionBegin; 2790 2791 /* compute prefixes */ 2792 ierr = PetscStrcpy(dir_prefix,"");CHKERRQ(ierr); 2793 ierr = PetscStrcpy(neu_prefix,"");CHKERRQ(ierr); 2794 if (!pcbddc->current_level) { 2795 ierr = PetscStrcpy(dir_prefix,((PetscObject)pc)->prefix);CHKERRQ(ierr); 2796 ierr = PetscStrcpy(neu_prefix,((PetscObject)pc)->prefix);CHKERRQ(ierr); 2797 ierr = PetscStrcat(dir_prefix,"pc_bddc_dirichlet_");CHKERRQ(ierr); 2798 ierr = PetscStrcat(neu_prefix,"pc_bddc_neumann_");CHKERRQ(ierr); 2799 } else { 2800 ierr = PetscStrcpy(str_level,"");CHKERRQ(ierr); 2801 sprintf(str_level,"l%d_",(int)(pcbddc->current_level)); 2802 ierr = PetscStrlen(((PetscObject)pc)->prefix,&len);CHKERRQ(ierr); 2803 len -= 15; /* remove "pc_bddc_coarse_" */ 2804 if (pcbddc->current_level>1) len -= 3; /* remove "lX_" with X level number */ 2805 if (pcbddc->current_level>10) len -= 1; /* remove another char from level number */ 2806 ierr = PetscStrncpy(dir_prefix,((PetscObject)pc)->prefix,len+1);CHKERRQ(ierr); 2807 ierr = PetscStrncpy(neu_prefix,((PetscObject)pc)->prefix,len+1);CHKERRQ(ierr); 2808 ierr = PetscStrcat(dir_prefix,"pc_bddc_dirichlet_");CHKERRQ(ierr); 2809 ierr = PetscStrcat(neu_prefix,"pc_bddc_neumann_");CHKERRQ(ierr); 2810 ierr = PetscStrcat(dir_prefix,str_level);CHKERRQ(ierr); 2811 ierr = PetscStrcat(neu_prefix,str_level);CHKERRQ(ierr); 2812 } 2813 2814 /* DIRICHLET PROBLEM */ 2815 if (dirichlet) { 2816 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 2817 if (pcbddc->benign_n && !pcbddc->benign_change_explicit) { 2818 if (!sub_schurs->reuse_solver) { 2819 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Not yet implemented\n"); 2820 } 2821 if (pcbddc->dbg_flag) { 2822 Mat A_IIn; 2823 2824 ierr = PCBDDCBenignProject(pc,pcis->is_I_local,pcis->is_I_local,&A_IIn);CHKERRQ(ierr); 2825 ierr = MatDestroy(&pcis->A_II);CHKERRQ(ierr); 2826 pcis->A_II = A_IIn; 2827 } 2828 } 2829 if (pcbddc->local_mat->symmetric_set) { 2830 ierr = MatSetOption(pcis->A_II,MAT_SYMMETRIC,pcbddc->local_mat->symmetric_set);CHKERRQ(ierr); 2831 } 2832 /* Matrix for Dirichlet problem is pcis->A_II */ 2833 n_D = pcis->n - pcis->n_B; 2834 if (!pcbddc->ksp_D) { /* create object if not yet build */ 2835 ierr = KSPCreate(PETSC_COMM_SELF,&pcbddc->ksp_D);CHKERRQ(ierr); 2836 ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->ksp_D,(PetscObject)pc,1);CHKERRQ(ierr); 2837 /* default */ 2838 ierr = KSPSetType(pcbddc->ksp_D,KSPPREONLY);CHKERRQ(ierr); 2839 ierr = KSPSetOptionsPrefix(pcbddc->ksp_D,dir_prefix);CHKERRQ(ierr); 2840 ierr = PetscObjectTypeCompare((PetscObject)pcis->A_II,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr); 2841 ierr = KSPGetPC(pcbddc->ksp_D,&pc_temp);CHKERRQ(ierr); 2842 if (issbaij) { 2843 ierr = PCSetType(pc_temp,PCCHOLESKY);CHKERRQ(ierr); 2844 } else { 2845 ierr = PCSetType(pc_temp,PCLU);CHKERRQ(ierr); 2846 } 2847 /* Allow user's customization */ 2848 ierr = KSPSetFromOptions(pcbddc->ksp_D);CHKERRQ(ierr); 2849 ierr = PCFactorSetReuseFill(pc_temp,PETSC_TRUE);CHKERRQ(ierr); 2850 } 2851 ierr = KSPSetOperators(pcbddc->ksp_D,pcis->A_II,pcis->A_II);CHKERRQ(ierr); 2852 if (sub_schurs->reuse_solver) { 2853 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 2854 2855 ierr = KSPSetPC(pcbddc->ksp_D,reuse_solver->interior_solver);CHKERRQ(ierr); 2856 } 2857 /* umfpack interface has a bug when matrix dimension is zero. TODO solve from umfpack interface */ 2858 if (!n_D) { 2859 ierr = KSPGetPC(pcbddc->ksp_D,&pc_temp);CHKERRQ(ierr); 2860 ierr = PCSetType(pc_temp,PCNONE);CHKERRQ(ierr); 2861 } 2862 /* Set Up KSP for Dirichlet problem of BDDC */ 2863 ierr = KSPSetUp(pcbddc->ksp_D);CHKERRQ(ierr); 2864 /* set ksp_D into pcis data */ 2865 ierr = KSPDestroy(&pcis->ksp_D);CHKERRQ(ierr); 2866 ierr = PetscObjectReference((PetscObject)pcbddc->ksp_D);CHKERRQ(ierr); 2867 pcis->ksp_D = pcbddc->ksp_D; 2868 } 2869 2870 /* NEUMANN PROBLEM */ 2871 A_RR = 0; 2872 if (neumann) { 2873 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 2874 PetscInt ibs,mbs; 2875 PetscBool issbaij; 2876 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 2877 /* Matrix for Neumann problem is A_RR -> we need to create/reuse it at this point */ 2878 ierr = ISGetSize(pcbddc->is_R_local,&n_R);CHKERRQ(ierr); 2879 if (pcbddc->ksp_R) { /* already created ksp */ 2880 PetscInt nn_R; 2881 ierr = KSPGetOperators(pcbddc->ksp_R,NULL,&A_RR);CHKERRQ(ierr); 2882 ierr = PetscObjectReference((PetscObject)A_RR);CHKERRQ(ierr); 2883 ierr = MatGetSize(A_RR,&nn_R,NULL);CHKERRQ(ierr); 2884 if (nn_R != n_R) { /* old ksp is not reusable, so reset it */ 2885 ierr = KSPReset(pcbddc->ksp_R);CHKERRQ(ierr); 2886 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 2887 reuse = MAT_INITIAL_MATRIX; 2888 } else { /* same sizes, but nonzero pattern depend on primal vertices so it can be changed */ 2889 if (pcbddc->new_primal_space_local) { /* we are not sure the matrix will have the same nonzero pattern */ 2890 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 2891 reuse = MAT_INITIAL_MATRIX; 2892 } else { /* safe to reuse the matrix */ 2893 reuse = MAT_REUSE_MATRIX; 2894 } 2895 } 2896 /* last check */ 2897 if (pc->flag == DIFFERENT_NONZERO_PATTERN) { 2898 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 2899 reuse = MAT_INITIAL_MATRIX; 2900 } 2901 } else { /* first time, so we need to create the matrix */ 2902 reuse = MAT_INITIAL_MATRIX; 2903 } 2904 /* convert pcbddc->local_mat if needed later in PCBDDCSetUpCorrection */ 2905 ierr = MatGetBlockSize(pcbddc->local_mat,&mbs);CHKERRQ(ierr); 2906 ierr = ISGetBlockSize(pcbddc->is_R_local,&ibs);CHKERRQ(ierr); 2907 ierr = PetscObjectTypeCompare((PetscObject)pcbddc->local_mat,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr); 2908 if (ibs != mbs) { /* need to convert to SEQAIJ to extract any submatrix with is_R_local */ 2909 if (matis->A == pcbddc->local_mat) { 2910 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 2911 ierr = MatConvert(matis->A,MATSEQAIJ,MAT_INITIAL_MATRIX,&pcbddc->local_mat);CHKERRQ(ierr); 2912 } else { 2913 ierr = MatConvert(pcbddc->local_mat,MATSEQAIJ,MAT_INPLACE_MATRIX,&pcbddc->local_mat);CHKERRQ(ierr); 2914 } 2915 } else if (issbaij) { /* need to convert to BAIJ to get offdiagonal blocks */ 2916 if (matis->A == pcbddc->local_mat) { 2917 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 2918 ierr = MatConvert(matis->A,MATSEQBAIJ,MAT_INITIAL_MATRIX,&pcbddc->local_mat);CHKERRQ(ierr); 2919 } else { 2920 ierr = MatConvert(pcbddc->local_mat,MATSEQBAIJ,MAT_INPLACE_MATRIX,&pcbddc->local_mat);CHKERRQ(ierr); 2921 } 2922 } 2923 /* extract A_RR */ 2924 if (sub_schurs->reuse_solver) { 2925 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 2926 2927 if (pcbddc->dbg_flag) { /* we need A_RR to test the solver later */ 2928 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 2929 if (reuse_solver->benign_n) { /* we are not using the explicit change of basis on the pressures */ 2930 ierr = PCBDDCBenignProject(pc,pcbddc->is_R_local,pcbddc->is_R_local,&A_RR);CHKERRQ(ierr); 2931 } else { 2932 ierr = MatGetSubMatrix(pcbddc->local_mat,pcbddc->is_R_local,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&A_RR);CHKERRQ(ierr); 2933 } 2934 } else { 2935 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 2936 ierr = PCGetOperators(reuse_solver->correction_solver,&A_RR,NULL);CHKERRQ(ierr); 2937 ierr = PetscObjectReference((PetscObject)A_RR);CHKERRQ(ierr); 2938 } 2939 } else { /* we have to build the neumann solver, so we need to extract the relevant matrix */ 2940 ierr = MatGetSubMatrix(pcbddc->local_mat,pcbddc->is_R_local,pcbddc->is_R_local,reuse,&A_RR);CHKERRQ(ierr); 2941 } 2942 if (pcbddc->local_mat->symmetric_set) { 2943 ierr = MatSetOption(A_RR,MAT_SYMMETRIC,pcbddc->local_mat->symmetric_set);CHKERRQ(ierr); 2944 } 2945 if (!pcbddc->ksp_R) { /* create object if not present */ 2946 ierr = KSPCreate(PETSC_COMM_SELF,&pcbddc->ksp_R);CHKERRQ(ierr); 2947 ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->ksp_R,(PetscObject)pc,1);CHKERRQ(ierr); 2948 /* default */ 2949 ierr = KSPSetType(pcbddc->ksp_R,KSPPREONLY);CHKERRQ(ierr); 2950 ierr = KSPSetOptionsPrefix(pcbddc->ksp_R,neu_prefix);CHKERRQ(ierr); 2951 ierr = KSPGetPC(pcbddc->ksp_R,&pc_temp);CHKERRQ(ierr); 2952 ierr = PetscObjectTypeCompare((PetscObject)A_RR,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr); 2953 if (issbaij) { 2954 ierr = PCSetType(pc_temp,PCCHOLESKY);CHKERRQ(ierr); 2955 } else { 2956 ierr = PCSetType(pc_temp,PCLU);CHKERRQ(ierr); 2957 } 2958 /* Allow user's customization */ 2959 ierr = KSPSetFromOptions(pcbddc->ksp_R);CHKERRQ(ierr); 2960 ierr = PCFactorSetReuseFill(pc_temp,PETSC_TRUE);CHKERRQ(ierr); 2961 } 2962 /* umfpack interface has a bug when matrix dimension is zero. TODO solve from umfpack interface */ 2963 if (!n_R) { 2964 ierr = KSPGetPC(pcbddc->ksp_R,&pc_temp);CHKERRQ(ierr); 2965 ierr = PCSetType(pc_temp,PCNONE);CHKERRQ(ierr); 2966 } 2967 ierr = KSPSetOperators(pcbddc->ksp_R,A_RR,A_RR);CHKERRQ(ierr); 2968 /* Reuse solver if it is present */ 2969 if (sub_schurs->reuse_solver) { 2970 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 2971 2972 ierr = KSPSetPC(pcbddc->ksp_R,reuse_solver->correction_solver);CHKERRQ(ierr); 2973 } 2974 /* Set Up KSP for Neumann problem of BDDC */ 2975 ierr = KSPSetUp(pcbddc->ksp_R);CHKERRQ(ierr); 2976 } 2977 2978 /* check Dirichlet and Neumann solvers and adapt them if a nullspace correction is needed */ 2979 if (pcbddc->NullSpace || pcbddc->dbg_flag) { 2980 if (pcbddc->dbg_flag) { 2981 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 2982 ierr = PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 2983 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 2984 } 2985 if (dirichlet) { /* Dirichlet */ 2986 ierr = VecSetRandom(pcis->vec1_D,NULL);CHKERRQ(ierr); 2987 ierr = MatMult(pcis->A_II,pcis->vec1_D,pcis->vec2_D);CHKERRQ(ierr); 2988 ierr = KSPSolve(pcbddc->ksp_D,pcis->vec2_D,pcis->vec2_D);CHKERRQ(ierr); 2989 ierr = VecAXPY(pcis->vec1_D,m_one,pcis->vec2_D);CHKERRQ(ierr); 2990 ierr = VecNorm(pcis->vec1_D,NORM_INFINITY,&value);CHKERRQ(ierr); 2991 /* need to be adapted? */ 2992 use_exact = (PetscAbsReal(value) > 1.e-4 ? PETSC_FALSE : PETSC_TRUE); 2993 ierr = MPIU_Allreduce(&use_exact,&use_exact_reduced,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 2994 ierr = PCBDDCSetUseExactDirichlet(pc,use_exact_reduced);CHKERRQ(ierr); 2995 /* print info */ 2996 if (pcbddc->dbg_flag) { 2997 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d infinity error for Dirichlet solve (%s) = % 1.14e \n",PetscGlobalRank,((PetscObject)(pcbddc->ksp_D))->prefix,value);CHKERRQ(ierr); 2998 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 2999 } 3000 if (pcbddc->NullSpace && !use_exact_reduced && !pcbddc->switch_static) { 3001 ierr = PCBDDCNullSpaceAssembleCorrection(pc,PETSC_TRUE,pcis->is_I_local);CHKERRQ(ierr); 3002 } 3003 } 3004 if (neumann) { /* Neumann */ 3005 ierr = VecSetRandom(pcbddc->vec1_R,NULL);CHKERRQ(ierr); 3006 ierr = MatMult(A_RR,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 3007 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec2_R,pcbddc->vec2_R);CHKERRQ(ierr); 3008 ierr = VecAXPY(pcbddc->vec1_R,m_one,pcbddc->vec2_R);CHKERRQ(ierr); 3009 ierr = VecNorm(pcbddc->vec1_R,NORM_INFINITY,&value);CHKERRQ(ierr); 3010 /* need to be adapted? */ 3011 use_exact = (PetscAbsReal(value) > 1.e-4 ? PETSC_FALSE : PETSC_TRUE); 3012 ierr = MPIU_Allreduce(&use_exact,&use_exact_reduced,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 3013 /* print info */ 3014 if (pcbddc->dbg_flag) { 3015 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d infinity error for Neumann solve (%s) = % 1.14e\n",PetscGlobalRank,((PetscObject)(pcbddc->ksp_R))->prefix,value);CHKERRQ(ierr); 3016 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 3017 } 3018 if (pcbddc->NullSpace && !use_exact_reduced) { /* is it the right logic? */ 3019 ierr = PCBDDCNullSpaceAssembleCorrection(pc,PETSC_FALSE,pcbddc->is_R_local);CHKERRQ(ierr); 3020 } 3021 } 3022 } 3023 /* free Neumann problem's matrix */ 3024 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 3025 PetscFunctionReturn(0); 3026 } 3027 3028 #undef __FUNCT__ 3029 #define __FUNCT__ "PCBDDCSolveSubstructureCorrection" 3030 static PetscErrorCode PCBDDCSolveSubstructureCorrection(PC pc, Vec inout_B, Vec inout_D, PetscBool applytranspose) 3031 { 3032 PetscErrorCode ierr; 3033 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 3034 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 3035 3036 PetscFunctionBegin; 3037 if (!sub_schurs->reuse_solver) { 3038 ierr = VecSet(pcbddc->vec1_R,0.);CHKERRQ(ierr); 3039 } 3040 if (!pcbddc->switch_static) { 3041 if (applytranspose && pcbddc->local_auxmat1) { 3042 ierr = MatMultTranspose(pcbddc->local_auxmat2,inout_B,pcbddc->vec1_C);CHKERRQ(ierr); 3043 ierr = MatMultTransposeAdd(pcbddc->local_auxmat1,pcbddc->vec1_C,inout_B,inout_B);CHKERRQ(ierr); 3044 } 3045 if (!sub_schurs->reuse_solver) { 3046 ierr = VecScatterBegin(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 3047 ierr = VecScatterEnd(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 3048 } else { 3049 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 3050 3051 ierr = VecScatterBegin(reuse_solver->correction_scatter_B,inout_B,reuse_solver->rhs_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3052 ierr = VecScatterEnd(reuse_solver->correction_scatter_B,inout_B,reuse_solver->rhs_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3053 } 3054 } else { 3055 ierr = VecScatterBegin(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 3056 ierr = VecScatterEnd(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 3057 ierr = VecScatterBegin(pcbddc->R_to_D,inout_D,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 3058 ierr = VecScatterEnd(pcbddc->R_to_D,inout_D,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 3059 if (applytranspose && pcbddc->local_auxmat1) { 3060 ierr = MatMultTranspose(pcbddc->local_auxmat2,pcbddc->vec1_R,pcbddc->vec1_C);CHKERRQ(ierr); 3061 ierr = MatMultTransposeAdd(pcbddc->local_auxmat1,pcbddc->vec1_C,inout_B,inout_B);CHKERRQ(ierr); 3062 ierr = VecScatterBegin(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 3063 ierr = VecScatterEnd(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 3064 } 3065 } 3066 if (!sub_schurs->reuse_solver || pcbddc->switch_static) { 3067 if (applytranspose) { 3068 ierr = KSPSolveTranspose(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr); 3069 } else { 3070 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr); 3071 } 3072 } else { 3073 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 3074 3075 if (applytranspose) { 3076 ierr = MatFactorSolveSchurComplementTranspose(reuse_solver->F,reuse_solver->rhs_B,reuse_solver->sol_B);CHKERRQ(ierr); 3077 } else { 3078 ierr = MatFactorSolveSchurComplement(reuse_solver->F,reuse_solver->rhs_B,reuse_solver->sol_B);CHKERRQ(ierr); 3079 } 3080 } 3081 ierr = VecSet(inout_B,0.);CHKERRQ(ierr); 3082 if (!pcbddc->switch_static) { 3083 if (!sub_schurs->reuse_solver) { 3084 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3085 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3086 } else { 3087 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 3088 3089 ierr = VecScatterBegin(reuse_solver->correction_scatter_B,reuse_solver->sol_B,inout_B,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 3090 ierr = VecScatterEnd(reuse_solver->correction_scatter_B,reuse_solver->sol_B,inout_B,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 3091 } 3092 if (!applytranspose && pcbddc->local_auxmat1) { 3093 ierr = MatMult(pcbddc->local_auxmat1,inout_B,pcbddc->vec1_C);CHKERRQ(ierr); 3094 ierr = MatMultAdd(pcbddc->local_auxmat2,pcbddc->vec1_C,inout_B,inout_B);CHKERRQ(ierr); 3095 } 3096 } else { 3097 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3098 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3099 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,inout_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3100 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,inout_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3101 if (!applytranspose && pcbddc->local_auxmat1) { 3102 ierr = MatMult(pcbddc->local_auxmat1,inout_B,pcbddc->vec1_C);CHKERRQ(ierr); 3103 ierr = MatMultAdd(pcbddc->local_auxmat2,pcbddc->vec1_C,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr); 3104 } 3105 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3106 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3107 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,inout_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3108 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,inout_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3109 } 3110 PetscFunctionReturn(0); 3111 } 3112 3113 /* parameter apply transpose determines if the interface preconditioner should be applied transposed or not */ 3114 #undef __FUNCT__ 3115 #define __FUNCT__ "PCBDDCApplyInterfacePreconditioner" 3116 PetscErrorCode PCBDDCApplyInterfacePreconditioner(PC pc, PetscBool applytranspose) 3117 { 3118 PetscErrorCode ierr; 3119 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 3120 PC_IS* pcis = (PC_IS*) (pc->data); 3121 const PetscScalar zero = 0.0; 3122 3123 PetscFunctionBegin; 3124 /* Application of PSI^T or PHI^T (depending on applytranspose, see comment above) */ 3125 if (!pcbddc->benign_apply_coarse_only) { 3126 if (applytranspose) { 3127 ierr = MatMultTranspose(pcbddc->coarse_phi_B,pcis->vec1_B,pcbddc->vec1_P);CHKERRQ(ierr); 3128 if (pcbddc->switch_static) { ierr = MatMultTransposeAdd(pcbddc->coarse_phi_D,pcis->vec1_D,pcbddc->vec1_P,pcbddc->vec1_P);CHKERRQ(ierr); } 3129 } else { 3130 ierr = MatMultTranspose(pcbddc->coarse_psi_B,pcis->vec1_B,pcbddc->vec1_P);CHKERRQ(ierr); 3131 if (pcbddc->switch_static) { ierr = MatMultTransposeAdd(pcbddc->coarse_psi_D,pcis->vec1_D,pcbddc->vec1_P,pcbddc->vec1_P);CHKERRQ(ierr); } 3132 } 3133 } else { 3134 ierr = VecSet(pcbddc->vec1_P,zero);CHKERRQ(ierr); 3135 } 3136 3137 /* add p0 to the last value of vec1_P holding the coarse dof relative to p0 */ 3138 if (pcbddc->benign_n) { 3139 PetscScalar *array; 3140 PetscInt j; 3141 3142 ierr = VecGetArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 3143 for (j=0;j<pcbddc->benign_n;j++) array[pcbddc->local_primal_size-pcbddc->benign_n+j] += pcbddc->benign_p0[j]; 3144 ierr = VecRestoreArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 3145 } 3146 3147 /* start communications from local primal nodes to rhs of coarse solver */ 3148 ierr = VecSet(pcbddc->coarse_vec,zero);CHKERRQ(ierr); 3149 ierr = PCBDDCScatterCoarseDataBegin(pc,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3150 ierr = PCBDDCScatterCoarseDataEnd(pc,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3151 3152 /* Coarse solution -> rhs and sol updated inside PCBDDCScattarCoarseDataBegin/End */ 3153 /* TODO remove null space when doing multilevel */ 3154 if (pcbddc->coarse_ksp) { 3155 Mat coarse_mat; 3156 Vec rhs,sol; 3157 MatNullSpace nullsp; 3158 3159 ierr = KSPGetRhs(pcbddc->coarse_ksp,&rhs);CHKERRQ(ierr); 3160 ierr = KSPGetSolution(pcbddc->coarse_ksp,&sol);CHKERRQ(ierr); 3161 ierr = KSPGetOperators(pcbddc->coarse_ksp,&coarse_mat,NULL);CHKERRQ(ierr); 3162 ierr = MatGetNullSpace(coarse_mat,&nullsp);CHKERRQ(ierr); 3163 if (nullsp) { 3164 ierr = MatNullSpaceRemove(nullsp,rhs);CHKERRQ(ierr); 3165 } 3166 if (applytranspose) { 3167 ierr = KSPSolveTranspose(pcbddc->coarse_ksp,rhs,sol);CHKERRQ(ierr); 3168 } else { 3169 ierr = KSPSolve(pcbddc->coarse_ksp,rhs,sol);CHKERRQ(ierr); 3170 } 3171 if (nullsp) { 3172 ierr = MatNullSpaceRemove(nullsp,sol);CHKERRQ(ierr); 3173 } 3174 } 3175 3176 /* Local solution on R nodes */ 3177 if (pcis->n && !pcbddc->benign_apply_coarse_only) { 3178 ierr = PCBDDCSolveSubstructureCorrection(pc,pcis->vec1_B,pcis->vec1_D,applytranspose);CHKERRQ(ierr); 3179 } 3180 /* communications from coarse sol to local primal nodes */ 3181 ierr = PCBDDCScatterCoarseDataBegin(pc,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 3182 ierr = PCBDDCScatterCoarseDataEnd(pc,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 3183 3184 /* Sum contributions from the two levels */ 3185 if (!pcbddc->benign_apply_coarse_only) { 3186 if (applytranspose) { 3187 ierr = MatMultAdd(pcbddc->coarse_psi_B,pcbddc->vec1_P,pcis->vec1_B,pcis->vec1_B);CHKERRQ(ierr); 3188 if (pcbddc->switch_static) { ierr = MatMultAdd(pcbddc->coarse_psi_D,pcbddc->vec1_P,pcis->vec1_D,pcis->vec1_D);CHKERRQ(ierr); } 3189 } else { 3190 ierr = MatMultAdd(pcbddc->coarse_phi_B,pcbddc->vec1_P,pcis->vec1_B,pcis->vec1_B);CHKERRQ(ierr); 3191 if (pcbddc->switch_static) { ierr = MatMultAdd(pcbddc->coarse_phi_D,pcbddc->vec1_P,pcis->vec1_D,pcis->vec1_D);CHKERRQ(ierr); } 3192 } 3193 /* store p0 */ 3194 if (pcbddc->benign_n) { 3195 PetscScalar *array; 3196 PetscInt j; 3197 3198 ierr = VecGetArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 3199 for (j=0;j<pcbddc->benign_n;j++) pcbddc->benign_p0[j] = array[pcbddc->local_primal_size-pcbddc->benign_n+j]; 3200 ierr = VecRestoreArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 3201 } 3202 } else { /* expand the coarse solution */ 3203 if (applytranspose) { 3204 ierr = MatMult(pcbddc->coarse_psi_B,pcbddc->vec1_P,pcis->vec1_B);CHKERRQ(ierr); 3205 } else { 3206 ierr = MatMult(pcbddc->coarse_phi_B,pcbddc->vec1_P,pcis->vec1_B);CHKERRQ(ierr); 3207 } 3208 } 3209 PetscFunctionReturn(0); 3210 } 3211 3212 #undef __FUNCT__ 3213 #define __FUNCT__ "PCBDDCScatterCoarseDataBegin" 3214 PetscErrorCode PCBDDCScatterCoarseDataBegin(PC pc,InsertMode imode, ScatterMode smode) 3215 { 3216 PetscErrorCode ierr; 3217 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 3218 PetscScalar *array; 3219 Vec from,to; 3220 3221 PetscFunctionBegin; 3222 if (smode == SCATTER_REVERSE) { /* from global to local -> get data from coarse solution */ 3223 from = pcbddc->coarse_vec; 3224 to = pcbddc->vec1_P; 3225 if (pcbddc->coarse_ksp) { /* get array from coarse processes */ 3226 Vec tvec; 3227 3228 ierr = KSPGetRhs(pcbddc->coarse_ksp,&tvec);CHKERRQ(ierr); 3229 ierr = VecResetArray(tvec);CHKERRQ(ierr); 3230 ierr = KSPGetSolution(pcbddc->coarse_ksp,&tvec);CHKERRQ(ierr); 3231 ierr = VecGetArray(tvec,&array);CHKERRQ(ierr); 3232 ierr = VecPlaceArray(from,array);CHKERRQ(ierr); 3233 ierr = VecRestoreArray(tvec,&array);CHKERRQ(ierr); 3234 } 3235 } else { /* from local to global -> put data in coarse right hand side */ 3236 from = pcbddc->vec1_P; 3237 to = pcbddc->coarse_vec; 3238 } 3239 ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,from,to,imode,smode);CHKERRQ(ierr); 3240 PetscFunctionReturn(0); 3241 } 3242 3243 #undef __FUNCT__ 3244 #define __FUNCT__ "PCBDDCScatterCoarseDataEnd" 3245 PetscErrorCode PCBDDCScatterCoarseDataEnd(PC pc, InsertMode imode, ScatterMode smode) 3246 { 3247 PetscErrorCode ierr; 3248 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 3249 PetscScalar *array; 3250 Vec from,to; 3251 3252 PetscFunctionBegin; 3253 if (smode == SCATTER_REVERSE) { /* from global to local -> get data from coarse solution */ 3254 from = pcbddc->coarse_vec; 3255 to = pcbddc->vec1_P; 3256 } else { /* from local to global -> put data in coarse right hand side */ 3257 from = pcbddc->vec1_P; 3258 to = pcbddc->coarse_vec; 3259 } 3260 ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,from,to,imode,smode);CHKERRQ(ierr); 3261 if (smode == SCATTER_FORWARD) { 3262 if (pcbddc->coarse_ksp) { /* get array from coarse processes */ 3263 Vec tvec; 3264 3265 ierr = KSPGetRhs(pcbddc->coarse_ksp,&tvec);CHKERRQ(ierr); 3266 ierr = VecGetArray(to,&array);CHKERRQ(ierr); 3267 ierr = VecPlaceArray(tvec,array);CHKERRQ(ierr); 3268 ierr = VecRestoreArray(to,&array);CHKERRQ(ierr); 3269 } 3270 } else { 3271 if (pcbddc->coarse_ksp) { /* restore array of pcbddc->coarse_vec */ 3272 ierr = VecResetArray(from);CHKERRQ(ierr); 3273 } 3274 } 3275 PetscFunctionReturn(0); 3276 } 3277 3278 /* uncomment for testing purposes */ 3279 /* #define PETSC_MISSING_LAPACK_GESVD 1 */ 3280 #undef __FUNCT__ 3281 #define __FUNCT__ "PCBDDCConstraintsSetUp" 3282 PetscErrorCode PCBDDCConstraintsSetUp(PC pc) 3283 { 3284 PetscErrorCode ierr; 3285 PC_IS* pcis = (PC_IS*)(pc->data); 3286 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 3287 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 3288 /* one and zero */ 3289 PetscScalar one=1.0,zero=0.0; 3290 /* space to store constraints and their local indices */ 3291 PetscScalar *constraints_data; 3292 PetscInt *constraints_idxs,*constraints_idxs_B; 3293 PetscInt *constraints_idxs_ptr,*constraints_data_ptr; 3294 PetscInt *constraints_n; 3295 /* iterators */ 3296 PetscInt i,j,k,total_counts,total_counts_cc,cum; 3297 /* BLAS integers */ 3298 PetscBLASInt lwork,lierr; 3299 PetscBLASInt Blas_N,Blas_M,Blas_K,Blas_one=1; 3300 PetscBLASInt Blas_LDA,Blas_LDB,Blas_LDC; 3301 /* reuse */ 3302 PetscInt olocal_primal_size,olocal_primal_size_cc; 3303 PetscInt *olocal_primal_ref_node,*olocal_primal_ref_mult; 3304 /* change of basis */ 3305 PetscBool qr_needed; 3306 PetscBT change_basis,qr_needed_idx; 3307 /* auxiliary stuff */ 3308 PetscInt *nnz,*is_indices; 3309 PetscInt ncc; 3310 /* some quantities */ 3311 PetscInt n_vertices,total_primal_vertices,valid_constraints; 3312 PetscInt size_of_constraint,max_size_of_constraint=0,max_constraints,temp_constraints; 3313 3314 PetscFunctionBegin; 3315 /* Destroy Mat objects computed previously */ 3316 ierr = MatDestroy(&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 3317 ierr = MatDestroy(&pcbddc->ConstraintMatrix);CHKERRQ(ierr); 3318 ierr = MatDestroy(&pcbddc->switch_static_change);CHKERRQ(ierr); 3319 /* save info on constraints from previous setup (if any) */ 3320 olocal_primal_size = pcbddc->local_primal_size; 3321 olocal_primal_size_cc = pcbddc->local_primal_size_cc; 3322 ierr = PetscMalloc2(olocal_primal_size_cc,&olocal_primal_ref_node,olocal_primal_size_cc,&olocal_primal_ref_mult);CHKERRQ(ierr); 3323 ierr = PetscMemcpy(olocal_primal_ref_node,pcbddc->local_primal_ref_node,olocal_primal_size_cc*sizeof(PetscInt));CHKERRQ(ierr); 3324 ierr = PetscMemcpy(olocal_primal_ref_mult,pcbddc->local_primal_ref_mult,olocal_primal_size_cc*sizeof(PetscInt));CHKERRQ(ierr); 3325 ierr = PetscFree2(pcbddc->local_primal_ref_node,pcbddc->local_primal_ref_mult);CHKERRQ(ierr); 3326 ierr = PetscFree(pcbddc->primal_indices_local_idxs);CHKERRQ(ierr); 3327 3328 /* print some info */ 3329 if (pcbddc->dbg_flag) { 3330 IS vertices; 3331 PetscInt nv,nedges,nfaces; 3332 ierr = PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph,&nfaces,NULL,&nedges,NULL,&vertices);CHKERRQ(ierr); 3333 ierr = ISGetSize(vertices,&nv);CHKERRQ(ierr); 3334 ierr = ISDestroy(&vertices);CHKERRQ(ierr); 3335 ierr = PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 3336 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"--------------------------------------------------------------\n");CHKERRQ(ierr); 3337 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate vertices (%d)\n",PetscGlobalRank,nv,pcbddc->use_vertices);CHKERRQ(ierr); 3338 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate edges (%d)\n",PetscGlobalRank,nedges,pcbddc->use_edges);CHKERRQ(ierr); 3339 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate faces (%d)\n",PetscGlobalRank,nfaces,pcbddc->use_faces);CHKERRQ(ierr); 3340 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 3341 ierr = PetscViewerASCIIPopSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 3342 } 3343 3344 if (!pcbddc->adaptive_selection) { 3345 IS ISForVertices,*ISForFaces,*ISForEdges; 3346 MatNullSpace nearnullsp; 3347 const Vec *nearnullvecs; 3348 Vec *localnearnullsp; 3349 PetscScalar *array; 3350 PetscInt n_ISForFaces,n_ISForEdges,nnsp_size; 3351 PetscBool nnsp_has_cnst; 3352 /* LAPACK working arrays for SVD or POD */ 3353 PetscBool skip_lapack,boolforchange; 3354 PetscScalar *work; 3355 PetscReal *singular_vals; 3356 #if defined(PETSC_USE_COMPLEX) 3357 PetscReal *rwork; 3358 #endif 3359 #if defined(PETSC_MISSING_LAPACK_GESVD) 3360 PetscScalar *temp_basis,*correlation_mat; 3361 #else 3362 PetscBLASInt dummy_int=1; 3363 PetscScalar dummy_scalar=1.; 3364 #endif 3365 3366 /* Get index sets for faces, edges and vertices from graph */ 3367 ierr = PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph,&n_ISForFaces,&ISForFaces,&n_ISForEdges,&ISForEdges,&ISForVertices);CHKERRQ(ierr); 3368 /* free unneeded index sets */ 3369 if (!pcbddc->use_vertices) { 3370 ierr = ISDestroy(&ISForVertices);CHKERRQ(ierr); 3371 } 3372 if (!pcbddc->use_edges) { 3373 for (i=0;i<n_ISForEdges;i++) { 3374 ierr = ISDestroy(&ISForEdges[i]);CHKERRQ(ierr); 3375 } 3376 ierr = PetscFree(ISForEdges);CHKERRQ(ierr); 3377 n_ISForEdges = 0; 3378 } 3379 if (!pcbddc->use_faces) { 3380 for (i=0;i<n_ISForFaces;i++) { 3381 ierr = ISDestroy(&ISForFaces[i]);CHKERRQ(ierr); 3382 } 3383 ierr = PetscFree(ISForFaces);CHKERRQ(ierr); 3384 n_ISForFaces = 0; 3385 } 3386 3387 #if defined(PETSC_USE_DEBUG) 3388 /* HACK: when solving singular problems not using vertices, a change of basis is mandatory. 3389 Also use_change_of_basis should be consistent among processors */ 3390 if (pcbddc->NullSpace) { 3391 PetscBool tbool[2],gbool[2]; 3392 3393 if (!ISForVertices && !pcbddc->user_ChangeOfBasisMatrix) { 3394 pcbddc->use_change_of_basis = PETSC_TRUE; 3395 if (!ISForEdges) { 3396 pcbddc->use_change_on_faces = PETSC_TRUE; 3397 } 3398 } 3399 tbool[0] = pcbddc->use_change_of_basis; 3400 tbool[1] = pcbddc->use_change_on_faces; 3401 ierr = MPIU_Allreduce(tbool,gbool,2,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 3402 pcbddc->use_change_of_basis = gbool[0]; 3403 pcbddc->use_change_on_faces = gbool[1]; 3404 } 3405 #endif 3406 3407 /* check if near null space is attached to global mat */ 3408 ierr = MatGetNearNullSpace(pc->pmat,&nearnullsp);CHKERRQ(ierr); 3409 if (nearnullsp) { 3410 ierr = MatNullSpaceGetVecs(nearnullsp,&nnsp_has_cnst,&nnsp_size,&nearnullvecs);CHKERRQ(ierr); 3411 /* remove any stored info */ 3412 ierr = MatNullSpaceDestroy(&pcbddc->onearnullspace);CHKERRQ(ierr); 3413 ierr = PetscFree(pcbddc->onearnullvecs_state);CHKERRQ(ierr); 3414 /* store information for BDDC solver reuse */ 3415 ierr = PetscObjectReference((PetscObject)nearnullsp);CHKERRQ(ierr); 3416 pcbddc->onearnullspace = nearnullsp; 3417 ierr = PetscMalloc1(nnsp_size,&pcbddc->onearnullvecs_state);CHKERRQ(ierr); 3418 for (i=0;i<nnsp_size;i++) { 3419 ierr = PetscObjectStateGet((PetscObject)nearnullvecs[i],&pcbddc->onearnullvecs_state[i]);CHKERRQ(ierr); 3420 } 3421 } else { /* if near null space is not provided BDDC uses constants by default */ 3422 nnsp_size = 0; 3423 nnsp_has_cnst = PETSC_TRUE; 3424 } 3425 /* get max number of constraints on a single cc */ 3426 max_constraints = nnsp_size; 3427 if (nnsp_has_cnst) max_constraints++; 3428 3429 /* 3430 Evaluate maximum storage size needed by the procedure 3431 - Indices for connected component i stored at "constraints_idxs + constraints_idxs_ptr[i]" 3432 - Values for constraints on connected component i stored at "constraints_data + constraints_data_ptr[i]" 3433 There can be multiple constraints per connected component 3434 */ 3435 n_vertices = 0; 3436 if (ISForVertices) { 3437 ierr = ISGetSize(ISForVertices,&n_vertices);CHKERRQ(ierr); 3438 } 3439 ncc = n_vertices+n_ISForFaces+n_ISForEdges; 3440 ierr = PetscMalloc3(ncc+1,&constraints_idxs_ptr,ncc+1,&constraints_data_ptr,ncc,&constraints_n);CHKERRQ(ierr); 3441 3442 total_counts = n_ISForFaces+n_ISForEdges; 3443 total_counts *= max_constraints; 3444 total_counts += n_vertices; 3445 ierr = PetscBTCreate(total_counts,&change_basis);CHKERRQ(ierr); 3446 3447 total_counts = 0; 3448 max_size_of_constraint = 0; 3449 for (i=0;i<n_ISForEdges+n_ISForFaces;i++) { 3450 IS used_is; 3451 if (i<n_ISForEdges) { 3452 used_is = ISForEdges[i]; 3453 } else { 3454 used_is = ISForFaces[i-n_ISForEdges]; 3455 } 3456 ierr = ISGetSize(used_is,&j);CHKERRQ(ierr); 3457 total_counts += j; 3458 max_size_of_constraint = PetscMax(j,max_size_of_constraint); 3459 } 3460 ierr = PetscMalloc3(total_counts*max_constraints+n_vertices,&constraints_data,total_counts+n_vertices,&constraints_idxs,total_counts+n_vertices,&constraints_idxs_B);CHKERRQ(ierr); 3461 3462 /* get local part of global near null space vectors */ 3463 ierr = PetscMalloc1(nnsp_size,&localnearnullsp);CHKERRQ(ierr); 3464 for (k=0;k<nnsp_size;k++) { 3465 ierr = VecDuplicate(pcis->vec1_N,&localnearnullsp[k]);CHKERRQ(ierr); 3466 ierr = VecScatterBegin(matis->rctx,nearnullvecs[k],localnearnullsp[k],INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3467 ierr = VecScatterEnd(matis->rctx,nearnullvecs[k],localnearnullsp[k],INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3468 } 3469 3470 /* whether or not to skip lapack calls */ 3471 skip_lapack = PETSC_TRUE; 3472 if (n_ISForFaces+n_ISForEdges && max_constraints > 1 && !pcbddc->use_nnsp_true) skip_lapack = PETSC_FALSE; 3473 3474 /* First we issue queries to allocate optimal workspace for LAPACKgesvd (or LAPACKsyev if SVD is missing) */ 3475 if (!skip_lapack) { 3476 PetscScalar temp_work; 3477 3478 #if defined(PETSC_MISSING_LAPACK_GESVD) 3479 /* Proper Orthogonal Decomposition (POD) using the snapshot method */ 3480 ierr = PetscMalloc1(max_constraints*max_constraints,&correlation_mat);CHKERRQ(ierr); 3481 ierr = PetscMalloc1(max_constraints,&singular_vals);CHKERRQ(ierr); 3482 ierr = PetscMalloc1(max_size_of_constraint*max_constraints,&temp_basis);CHKERRQ(ierr); 3483 #if defined(PETSC_USE_COMPLEX) 3484 ierr = PetscMalloc1(3*max_constraints,&rwork);CHKERRQ(ierr); 3485 #endif 3486 /* now we evaluate the optimal workspace using query with lwork=-1 */ 3487 ierr = PetscBLASIntCast(max_constraints,&Blas_N);CHKERRQ(ierr); 3488 ierr = PetscBLASIntCast(max_constraints,&Blas_LDA);CHKERRQ(ierr); 3489 lwork = -1; 3490 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 3491 #if !defined(PETSC_USE_COMPLEX) 3492 PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,&temp_work,&lwork,&lierr)); 3493 #else 3494 PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,&temp_work,&lwork,rwork,&lierr)); 3495 #endif 3496 ierr = PetscFPTrapPop();CHKERRQ(ierr); 3497 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to SYEV Lapack routine %d",(int)lierr); 3498 #else /* on missing GESVD */ 3499 /* SVD */ 3500 PetscInt max_n,min_n; 3501 max_n = max_size_of_constraint; 3502 min_n = max_constraints; 3503 if (max_size_of_constraint < max_constraints) { 3504 min_n = max_size_of_constraint; 3505 max_n = max_constraints; 3506 } 3507 ierr = PetscMalloc1(min_n,&singular_vals);CHKERRQ(ierr); 3508 #if defined(PETSC_USE_COMPLEX) 3509 ierr = PetscMalloc1(5*min_n,&rwork);CHKERRQ(ierr); 3510 #endif 3511 /* now we evaluate the optimal workspace using query with lwork=-1 */ 3512 lwork = -1; 3513 ierr = PetscBLASIntCast(max_n,&Blas_M);CHKERRQ(ierr); 3514 ierr = PetscBLASIntCast(min_n,&Blas_N);CHKERRQ(ierr); 3515 ierr = PetscBLASIntCast(max_n,&Blas_LDA);CHKERRQ(ierr); 3516 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 3517 #if !defined(PETSC_USE_COMPLEX) 3518 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&constraints_data[0],&Blas_LDA,singular_vals,&dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,&temp_work,&lwork,&lierr)); 3519 #else 3520 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&constraints_data[0],&Blas_LDA,singular_vals,&dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,&temp_work,&lwork,rwork,&lierr)); 3521 #endif 3522 ierr = PetscFPTrapPop();CHKERRQ(ierr); 3523 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to GESVD Lapack routine %d",(int)lierr); 3524 #endif /* on missing GESVD */ 3525 /* Allocate optimal workspace */ 3526 ierr = PetscBLASIntCast((PetscInt)PetscRealPart(temp_work),&lwork);CHKERRQ(ierr); 3527 ierr = PetscMalloc1(lwork,&work);CHKERRQ(ierr); 3528 } 3529 /* Now we can loop on constraining sets */ 3530 total_counts = 0; 3531 constraints_idxs_ptr[0] = 0; 3532 constraints_data_ptr[0] = 0; 3533 /* vertices */ 3534 if (n_vertices) { 3535 ierr = ISGetIndices(ISForVertices,(const PetscInt**)&is_indices);CHKERRQ(ierr); 3536 ierr = PetscMemcpy(constraints_idxs,is_indices,n_vertices*sizeof(PetscInt));CHKERRQ(ierr); 3537 for (i=0;i<n_vertices;i++) { 3538 constraints_n[total_counts] = 1; 3539 constraints_data[total_counts] = 1.0; 3540 constraints_idxs_ptr[total_counts+1] = constraints_idxs_ptr[total_counts]+1; 3541 constraints_data_ptr[total_counts+1] = constraints_data_ptr[total_counts]+1; 3542 total_counts++; 3543 } 3544 ierr = ISRestoreIndices(ISForVertices,(const PetscInt**)&is_indices);CHKERRQ(ierr); 3545 n_vertices = total_counts; 3546 } 3547 3548 /* edges and faces */ 3549 total_counts_cc = total_counts; 3550 for (ncc=0;ncc<n_ISForEdges+n_ISForFaces;ncc++) { 3551 IS used_is; 3552 PetscBool idxs_copied = PETSC_FALSE; 3553 3554 if (ncc<n_ISForEdges) { 3555 used_is = ISForEdges[ncc]; 3556 boolforchange = pcbddc->use_change_of_basis; /* change or not the basis on the edge */ 3557 } else { 3558 used_is = ISForFaces[ncc-n_ISForEdges]; 3559 boolforchange = (PetscBool)(pcbddc->use_change_of_basis && pcbddc->use_change_on_faces); /* change or not the basis on the face */ 3560 } 3561 temp_constraints = 0; /* zero the number of constraints I have on this conn comp */ 3562 3563 ierr = ISGetSize(used_is,&size_of_constraint);CHKERRQ(ierr); 3564 ierr = ISGetIndices(used_is,(const PetscInt**)&is_indices);CHKERRQ(ierr); 3565 /* change of basis should not be performed on local periodic nodes */ 3566 if (pcbddc->mat_graph->mirrors && pcbddc->mat_graph->mirrors[is_indices[0]]) boolforchange = PETSC_FALSE; 3567 if (nnsp_has_cnst) { 3568 PetscScalar quad_value; 3569 3570 ierr = PetscMemcpy(constraints_idxs + constraints_idxs_ptr[total_counts_cc],is_indices,size_of_constraint*sizeof(PetscInt));CHKERRQ(ierr); 3571 idxs_copied = PETSC_TRUE; 3572 3573 if (!pcbddc->use_nnsp_true) { 3574 quad_value = (PetscScalar)(1.0/PetscSqrtReal((PetscReal)size_of_constraint)); 3575 } else { 3576 quad_value = 1.0; 3577 } 3578 for (j=0;j<size_of_constraint;j++) { 3579 constraints_data[constraints_data_ptr[total_counts_cc]+j] = quad_value; 3580 } 3581 temp_constraints++; 3582 total_counts++; 3583 } 3584 for (k=0;k<nnsp_size;k++) { 3585 PetscReal real_value; 3586 PetscScalar *ptr_to_data; 3587 3588 ierr = VecGetArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr); 3589 ptr_to_data = &constraints_data[constraints_data_ptr[total_counts_cc]+temp_constraints*size_of_constraint]; 3590 for (j=0;j<size_of_constraint;j++) { 3591 ptr_to_data[j] = array[is_indices[j]]; 3592 } 3593 ierr = VecRestoreArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr); 3594 /* check if array is null on the connected component */ 3595 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 3596 PetscStackCallBLAS("BLASasum",real_value = BLASasum_(&Blas_N,ptr_to_data,&Blas_one)); 3597 if (real_value > 0.0) { /* keep indices and values */ 3598 temp_constraints++; 3599 total_counts++; 3600 if (!idxs_copied) { 3601 ierr = PetscMemcpy(constraints_idxs + constraints_idxs_ptr[total_counts_cc],is_indices,size_of_constraint*sizeof(PetscInt));CHKERRQ(ierr); 3602 idxs_copied = PETSC_TRUE; 3603 } 3604 } 3605 } 3606 ierr = ISRestoreIndices(used_is,(const PetscInt**)&is_indices);CHKERRQ(ierr); 3607 valid_constraints = temp_constraints; 3608 if (!pcbddc->use_nnsp_true && temp_constraints) { 3609 if (temp_constraints == 1) { /* just normalize the constraint */ 3610 PetscScalar norm,*ptr_to_data; 3611 3612 ptr_to_data = &constraints_data[constraints_data_ptr[total_counts_cc]]; 3613 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 3614 PetscStackCallBLAS("BLASdot",norm = BLASdot_(&Blas_N,ptr_to_data,&Blas_one,ptr_to_data,&Blas_one)); 3615 norm = 1.0/PetscSqrtReal(PetscRealPart(norm)); 3616 PetscStackCallBLAS("BLASscal",BLASscal_(&Blas_N,&norm,ptr_to_data,&Blas_one)); 3617 } else { /* perform SVD */ 3618 PetscReal tol = 1.0e-8; /* tolerance for retaining eigenmodes */ 3619 PetscScalar *ptr_to_data = &constraints_data[constraints_data_ptr[total_counts_cc]]; 3620 3621 #if defined(PETSC_MISSING_LAPACK_GESVD) 3622 /* SVD: Y = U*S*V^H -> U (eigenvectors of Y*Y^H) = Y*V*(S)^\dag 3623 POD: Y^H*Y = V*D*V^H, D = S^H*S -> U = Y*V*D^(-1/2) 3624 -> When PETSC_USE_COMPLEX and PETSC_MISSING_LAPACK_GESVD are defined 3625 the constraints basis will differ (by a complex factor with absolute value equal to 1) 3626 from that computed using LAPACKgesvd 3627 -> This is due to a different computation of eigenvectors in LAPACKheev 3628 -> The quality of the POD-computed basis will be the same */ 3629 ierr = PetscMemzero(correlation_mat,temp_constraints*temp_constraints*sizeof(PetscScalar));CHKERRQ(ierr); 3630 /* Store upper triangular part of correlation matrix */ 3631 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 3632 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 3633 for (j=0;j<temp_constraints;j++) { 3634 for (k=0;k<j+1;k++) { 3635 PetscStackCallBLAS("BLASdot",correlation_mat[j*temp_constraints+k] = BLASdot_(&Blas_N,ptr_to_data+k*size_of_constraint,&Blas_one,ptr_to_data+j*size_of_constraint,&Blas_one)); 3636 } 3637 } 3638 /* compute eigenvalues and eigenvectors of correlation matrix */ 3639 ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr); 3640 ierr = PetscBLASIntCast(temp_constraints,&Blas_LDA);CHKERRQ(ierr); 3641 #if !defined(PETSC_USE_COMPLEX) 3642 PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,work,&lwork,&lierr)); 3643 #else 3644 PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,work,&lwork,rwork,&lierr)); 3645 #endif 3646 ierr = PetscFPTrapPop();CHKERRQ(ierr); 3647 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYEV Lapack routine %d",(int)lierr); 3648 /* retain eigenvalues greater than tol: note that LAPACKsyev gives eigs in ascending order */ 3649 j = 0; 3650 while (j < temp_constraints && singular_vals[j] < tol) j++; 3651 total_counts = total_counts-j; 3652 valid_constraints = temp_constraints-j; 3653 /* scale and copy POD basis into used quadrature memory */ 3654 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 3655 ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr); 3656 ierr = PetscBLASIntCast(temp_constraints,&Blas_K);CHKERRQ(ierr); 3657 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 3658 ierr = PetscBLASIntCast(temp_constraints,&Blas_LDB);CHKERRQ(ierr); 3659 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDC);CHKERRQ(ierr); 3660 if (j<temp_constraints) { 3661 PetscInt ii; 3662 for (k=j;k<temp_constraints;k++) singular_vals[k] = 1.0/PetscSqrtReal(singular_vals[k]); 3663 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 3664 PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&Blas_M,&Blas_N,&Blas_K,&one,ptr_to_data,&Blas_LDA,correlation_mat,&Blas_LDB,&zero,temp_basis,&Blas_LDC)); 3665 ierr = PetscFPTrapPop();CHKERRQ(ierr); 3666 for (k=0;k<temp_constraints-j;k++) { 3667 for (ii=0;ii<size_of_constraint;ii++) { 3668 ptr_to_data[k*size_of_constraint+ii] = singular_vals[temp_constraints-1-k]*temp_basis[(temp_constraints-1-k)*size_of_constraint+ii]; 3669 } 3670 } 3671 } 3672 #else /* on missing GESVD */ 3673 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 3674 ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr); 3675 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 3676 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 3677 #if !defined(PETSC_USE_COMPLEX) 3678 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,ptr_to_data,&Blas_LDA,singular_vals,&dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,work,&lwork,&lierr)); 3679 #else 3680 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,ptr_to_data,&Blas_LDA,singular_vals,&dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,work,&lwork,rwork,&lierr)); 3681 #endif 3682 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in GESVD Lapack routine %d",(int)lierr); 3683 ierr = PetscFPTrapPop();CHKERRQ(ierr); 3684 /* retain eigenvalues greater than tol: note that LAPACKgesvd gives eigs in descending order */ 3685 k = temp_constraints; 3686 if (k > size_of_constraint) k = size_of_constraint; 3687 j = 0; 3688 while (j < k && singular_vals[k-j-1] < tol) j++; 3689 valid_constraints = k-j; 3690 total_counts = total_counts-temp_constraints+valid_constraints; 3691 #endif /* on missing GESVD */ 3692 } 3693 } 3694 /* update pointers information */ 3695 if (valid_constraints) { 3696 constraints_n[total_counts_cc] = valid_constraints; 3697 constraints_idxs_ptr[total_counts_cc+1] = constraints_idxs_ptr[total_counts_cc]+size_of_constraint; 3698 constraints_data_ptr[total_counts_cc+1] = constraints_data_ptr[total_counts_cc]+size_of_constraint*valid_constraints; 3699 /* set change_of_basis flag */ 3700 if (boolforchange) { 3701 PetscBTSet(change_basis,total_counts_cc); 3702 } 3703 total_counts_cc++; 3704 } 3705 } 3706 /* free workspace */ 3707 if (!skip_lapack) { 3708 ierr = PetscFree(work);CHKERRQ(ierr); 3709 #if defined(PETSC_USE_COMPLEX) 3710 ierr = PetscFree(rwork);CHKERRQ(ierr); 3711 #endif 3712 ierr = PetscFree(singular_vals);CHKERRQ(ierr); 3713 #if defined(PETSC_MISSING_LAPACK_GESVD) 3714 ierr = PetscFree(correlation_mat);CHKERRQ(ierr); 3715 ierr = PetscFree(temp_basis);CHKERRQ(ierr); 3716 #endif 3717 } 3718 for (k=0;k<nnsp_size;k++) { 3719 ierr = VecDestroy(&localnearnullsp[k]);CHKERRQ(ierr); 3720 } 3721 ierr = PetscFree(localnearnullsp);CHKERRQ(ierr); 3722 /* free index sets of faces, edges and vertices */ 3723 for (i=0;i<n_ISForFaces;i++) { 3724 ierr = ISDestroy(&ISForFaces[i]);CHKERRQ(ierr); 3725 } 3726 if (n_ISForFaces) { 3727 ierr = PetscFree(ISForFaces);CHKERRQ(ierr); 3728 } 3729 for (i=0;i<n_ISForEdges;i++) { 3730 ierr = ISDestroy(&ISForEdges[i]);CHKERRQ(ierr); 3731 } 3732 if (n_ISForEdges) { 3733 ierr = PetscFree(ISForEdges);CHKERRQ(ierr); 3734 } 3735 ierr = ISDestroy(&ISForVertices);CHKERRQ(ierr); 3736 } else { 3737 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 3738 3739 total_counts = 0; 3740 n_vertices = 0; 3741 if (sub_schurs->is_vertices && pcbddc->use_vertices) { 3742 ierr = ISGetLocalSize(sub_schurs->is_vertices,&n_vertices);CHKERRQ(ierr); 3743 } 3744 max_constraints = 0; 3745 total_counts_cc = 0; 3746 for (i=0;i<sub_schurs->n_subs+n_vertices;i++) { 3747 total_counts += pcbddc->adaptive_constraints_n[i]; 3748 if (pcbddc->adaptive_constraints_n[i]) total_counts_cc++; 3749 max_constraints = PetscMax(max_constraints,pcbddc->adaptive_constraints_n[i]); 3750 } 3751 constraints_idxs_ptr = pcbddc->adaptive_constraints_idxs_ptr; 3752 constraints_data_ptr = pcbddc->adaptive_constraints_data_ptr; 3753 constraints_idxs = pcbddc->adaptive_constraints_idxs; 3754 constraints_data = pcbddc->adaptive_constraints_data; 3755 /* constraints_n differs from pcbddc->adaptive_constraints_n */ 3756 ierr = PetscMalloc1(total_counts_cc,&constraints_n);CHKERRQ(ierr); 3757 total_counts_cc = 0; 3758 for (i=0;i<sub_schurs->n_subs+n_vertices;i++) { 3759 if (pcbddc->adaptive_constraints_n[i]) { 3760 constraints_n[total_counts_cc++] = pcbddc->adaptive_constraints_n[i]; 3761 } 3762 } 3763 #if 0 3764 printf("Found %d totals (%d)\n",total_counts_cc,total_counts); 3765 for (i=0;i<total_counts_cc;i++) { 3766 printf("const %d, start %d",i,constraints_idxs_ptr[i]); 3767 printf(" end %d:\n",constraints_idxs_ptr[i+1]); 3768 for (j=constraints_idxs_ptr[i];j<constraints_idxs_ptr[i+1];j++) { 3769 printf(" %d",constraints_idxs[j]); 3770 } 3771 printf("\n"); 3772 printf("number of cc: %d\n",constraints_n[i]); 3773 } 3774 for (i=0;i<n_vertices;i++) { 3775 PetscPrintf(PETSC_COMM_SELF,"[%d] vertex %d, n %d\n",PetscGlobalRank,i,pcbddc->adaptive_constraints_n[i]); 3776 } 3777 for (i=0;i<sub_schurs->n_subs;i++) { 3778 PetscPrintf(PETSC_COMM_SELF,"[%d] sub %d, edge %d, n %d\n",PetscGlobalRank,i,(PetscBool)PetscBTLookup(sub_schurs->is_edge,i),pcbddc->adaptive_constraints_n[i+n_vertices]); 3779 } 3780 #endif 3781 3782 max_size_of_constraint = 0; 3783 for (i=0;i<total_counts_cc;i++) max_size_of_constraint = PetscMax(max_size_of_constraint,constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i]); 3784 ierr = PetscMalloc1(constraints_idxs_ptr[total_counts_cc],&constraints_idxs_B);CHKERRQ(ierr); 3785 /* Change of basis */ 3786 ierr = PetscBTCreate(total_counts_cc,&change_basis);CHKERRQ(ierr); 3787 if (pcbddc->use_change_of_basis) { 3788 for (i=0;i<sub_schurs->n_subs;i++) { 3789 if (PetscBTLookup(sub_schurs->is_edge,i) || pcbddc->use_change_on_faces) { 3790 ierr = PetscBTSet(change_basis,i+n_vertices);CHKERRQ(ierr); 3791 } 3792 } 3793 } 3794 } 3795 pcbddc->local_primal_size = total_counts; 3796 ierr = PetscMalloc1(pcbddc->local_primal_size+pcbddc->benign_n,&pcbddc->primal_indices_local_idxs);CHKERRQ(ierr); 3797 3798 /* map constraints_idxs in boundary numbering */ 3799 ierr = ISGlobalToLocalMappingApply(pcis->BtoNmap,IS_GTOLM_DROP,constraints_idxs_ptr[total_counts_cc],constraints_idxs,&i,constraints_idxs_B);CHKERRQ(ierr); 3800 if (i != constraints_idxs_ptr[total_counts_cc]) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_SUP,"Error in boundary numbering for constraints indices %D != %D\n",constraints_idxs_ptr[total_counts_cc],i); 3801 3802 /* Create constraint matrix */ 3803 ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->ConstraintMatrix);CHKERRQ(ierr); 3804 ierr = MatSetType(pcbddc->ConstraintMatrix,MATAIJ);CHKERRQ(ierr); 3805 ierr = MatSetSizes(pcbddc->ConstraintMatrix,pcbddc->local_primal_size,pcis->n,pcbddc->local_primal_size,pcis->n);CHKERRQ(ierr); 3806 3807 /* find primal_dofs: subdomain corners plus dofs selected as primal after change of basis */ 3808 /* determine if a QR strategy is needed for change of basis */ 3809 qr_needed = PETSC_FALSE; 3810 ierr = PetscBTCreate(total_counts_cc,&qr_needed_idx);CHKERRQ(ierr); 3811 total_primal_vertices=0; 3812 pcbddc->local_primal_size_cc = 0; 3813 for (i=0;i<total_counts_cc;i++) { 3814 size_of_constraint = constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i]; 3815 if (size_of_constraint == 1 && pcbddc->mat_graph->custom_minimal_size) { 3816 pcbddc->primal_indices_local_idxs[total_primal_vertices++] = constraints_idxs[constraints_idxs_ptr[i]]; 3817 pcbddc->local_primal_size_cc += 1; 3818 } else if (PetscBTLookup(change_basis,i)) { 3819 for (k=0;k<constraints_n[i];k++) { 3820 pcbddc->primal_indices_local_idxs[total_primal_vertices++] = constraints_idxs[constraints_idxs_ptr[i]+k]; 3821 } 3822 pcbddc->local_primal_size_cc += constraints_n[i]; 3823 if (constraints_n[i] > 1 || pcbddc->use_qr_single) { 3824 PetscBTSet(qr_needed_idx,i); 3825 qr_needed = PETSC_TRUE; 3826 } 3827 } else { 3828 pcbddc->local_primal_size_cc += 1; 3829 } 3830 } 3831 /* note that the local variable n_vertices used below stores the number of pointwise constraints */ 3832 pcbddc->n_vertices = total_primal_vertices; 3833 /* permute indices in order to have a sorted set of vertices */ 3834 ierr = PetscSortInt(total_primal_vertices,pcbddc->primal_indices_local_idxs);CHKERRQ(ierr); 3835 3836 ierr = PetscMalloc2(pcbddc->local_primal_size_cc+pcbddc->benign_n,&pcbddc->local_primal_ref_node,pcbddc->local_primal_size_cc+pcbddc->benign_n,&pcbddc->local_primal_ref_mult);CHKERRQ(ierr); 3837 ierr = PetscMemcpy(pcbddc->local_primal_ref_node,pcbddc->primal_indices_local_idxs,total_primal_vertices*sizeof(PetscInt));CHKERRQ(ierr); 3838 for (i=0;i<total_primal_vertices;i++) pcbddc->local_primal_ref_mult[i] = 1; 3839 3840 /* nonzero structure of constraint matrix */ 3841 /* and get reference dof for local constraints */ 3842 ierr = PetscMalloc1(pcbddc->local_primal_size,&nnz);CHKERRQ(ierr); 3843 for (i=0;i<total_primal_vertices;i++) nnz[i] = 1; 3844 3845 j = total_primal_vertices; 3846 total_counts = total_primal_vertices; 3847 cum = total_primal_vertices; 3848 for (i=n_vertices;i<total_counts_cc;i++) { 3849 if (!PetscBTLookup(change_basis,i)) { 3850 pcbddc->local_primal_ref_node[cum] = constraints_idxs[constraints_idxs_ptr[i]]; 3851 pcbddc->local_primal_ref_mult[cum] = constraints_n[i]; 3852 cum++; 3853 size_of_constraint = constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i]; 3854 for (k=0;k<constraints_n[i];k++) { 3855 pcbddc->primal_indices_local_idxs[total_counts++] = constraints_idxs[constraints_idxs_ptr[i]+k]; 3856 nnz[j+k] = size_of_constraint; 3857 } 3858 j += constraints_n[i]; 3859 } 3860 } 3861 ierr = MatSeqAIJSetPreallocation(pcbddc->ConstraintMatrix,0,nnz);CHKERRQ(ierr); 3862 ierr = PetscFree(nnz);CHKERRQ(ierr); 3863 3864 /* set values in constraint matrix */ 3865 for (i=0;i<total_primal_vertices;i++) { 3866 ierr = MatSetValue(pcbddc->ConstraintMatrix,i,pcbddc->local_primal_ref_node[i],1.0,INSERT_VALUES);CHKERRQ(ierr); 3867 } 3868 total_counts = total_primal_vertices; 3869 for (i=n_vertices;i<total_counts_cc;i++) { 3870 if (!PetscBTLookup(change_basis,i)) { 3871 PetscInt *cols; 3872 3873 size_of_constraint = constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i]; 3874 cols = constraints_idxs+constraints_idxs_ptr[i]; 3875 for (k=0;k<constraints_n[i];k++) { 3876 PetscInt row = total_counts+k; 3877 PetscScalar *vals; 3878 3879 vals = constraints_data+constraints_data_ptr[i]+k*size_of_constraint; 3880 ierr = MatSetValues(pcbddc->ConstraintMatrix,1,&row,size_of_constraint,cols,vals,INSERT_VALUES);CHKERRQ(ierr); 3881 } 3882 total_counts += constraints_n[i]; 3883 } 3884 } 3885 /* assembling */ 3886 ierr = MatAssemblyBegin(pcbddc->ConstraintMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3887 ierr = MatAssemblyEnd(pcbddc->ConstraintMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3888 3889 /* 3890 ierr = PetscViewerPushFormat(PETSC_VIEWER_STDOUT_SELF,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr); 3891 ierr = MatView(pcbddc->ConstraintMatrix,(PetscViewer)0);CHKERRQ(ierr); 3892 ierr = PetscViewerPopFormat(PETSC_VIEWER_STDOUT_SELF);CHKERRQ(ierr); 3893 */ 3894 /* Create matrix for change of basis. We don't need it in case pcbddc->use_change_of_basis is FALSE */ 3895 if (pcbddc->use_change_of_basis) { 3896 /* dual and primal dofs on a single cc */ 3897 PetscInt dual_dofs,primal_dofs; 3898 /* working stuff for GEQRF */ 3899 PetscScalar *qr_basis,*qr_tau = NULL,*qr_work,lqr_work_t; 3900 PetscBLASInt lqr_work; 3901 /* working stuff for UNGQR */ 3902 PetscScalar *gqr_work,lgqr_work_t; 3903 PetscBLASInt lgqr_work; 3904 /* working stuff for TRTRS */ 3905 PetscScalar *trs_rhs; 3906 PetscBLASInt Blas_NRHS; 3907 /* pointers for values insertion into change of basis matrix */ 3908 PetscInt *start_rows,*start_cols; 3909 PetscScalar *start_vals; 3910 /* working stuff for values insertion */ 3911 PetscBT is_primal; 3912 PetscInt *aux_primal_numbering_B; 3913 /* matrix sizes */ 3914 PetscInt global_size,local_size; 3915 /* temporary change of basis */ 3916 Mat localChangeOfBasisMatrix; 3917 /* extra space for debugging */ 3918 PetscScalar *dbg_work; 3919 3920 /* local temporary change of basis acts on local interfaces -> dimension is n_B x n_B */ 3921 ierr = MatCreate(PETSC_COMM_SELF,&localChangeOfBasisMatrix);CHKERRQ(ierr); 3922 ierr = MatSetType(localChangeOfBasisMatrix,MATAIJ);CHKERRQ(ierr); 3923 ierr = MatSetSizes(localChangeOfBasisMatrix,pcis->n,pcis->n,pcis->n,pcis->n);CHKERRQ(ierr); 3924 /* nonzeros for local mat */ 3925 ierr = PetscMalloc1(pcis->n,&nnz);CHKERRQ(ierr); 3926 if (!pcbddc->benign_change || pcbddc->fake_change) { 3927 for (i=0;i<pcis->n;i++) nnz[i]=1; 3928 } else { 3929 const PetscInt *ii; 3930 PetscInt n; 3931 PetscBool flg_row; 3932 ierr = MatGetRowIJ(pcbddc->benign_change,0,PETSC_FALSE,PETSC_FALSE,&n,&ii,NULL,&flg_row);CHKERRQ(ierr); 3933 for (i=0;i<n;i++) nnz[i] = ii[i+1]-ii[i]; 3934 ierr = MatRestoreRowIJ(pcbddc->benign_change,0,PETSC_FALSE,PETSC_FALSE,&n,&ii,NULL,&flg_row);CHKERRQ(ierr); 3935 } 3936 for (i=n_vertices;i<total_counts_cc;i++) { 3937 if (PetscBTLookup(change_basis,i)) { 3938 size_of_constraint = constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i]; 3939 if (PetscBTLookup(qr_needed_idx,i)) { 3940 for (j=0;j<size_of_constraint;j++) nnz[constraints_idxs[constraints_idxs_ptr[i]+j]] = size_of_constraint; 3941 } else { 3942 nnz[constraints_idxs[constraints_idxs_ptr[i]]] = size_of_constraint; 3943 for (j=1;j<size_of_constraint;j++) nnz[constraints_idxs[constraints_idxs_ptr[i]+j]] = 2; 3944 } 3945 } 3946 } 3947 ierr = MatSeqAIJSetPreallocation(localChangeOfBasisMatrix,0,nnz);CHKERRQ(ierr); 3948 ierr = PetscFree(nnz);CHKERRQ(ierr); 3949 /* Set interior change in the matrix */ 3950 if (!pcbddc->benign_change || pcbddc->fake_change) { 3951 for (i=0;i<pcis->n;i++) { 3952 ierr = MatSetValue(localChangeOfBasisMatrix,i,i,1.0,INSERT_VALUES);CHKERRQ(ierr); 3953 } 3954 } else { 3955 const PetscInt *ii,*jj; 3956 PetscScalar *aa; 3957 PetscInt n; 3958 PetscBool flg_row; 3959 ierr = MatGetRowIJ(pcbddc->benign_change,0,PETSC_FALSE,PETSC_FALSE,&n,&ii,&jj,&flg_row);CHKERRQ(ierr); 3960 ierr = MatSeqAIJGetArray(pcbddc->benign_change,&aa);CHKERRQ(ierr); 3961 for (i=0;i<n;i++) { 3962 ierr = MatSetValues(localChangeOfBasisMatrix,1,&i,ii[i+1]-ii[i],jj+ii[i],aa+ii[i],INSERT_VALUES);CHKERRQ(ierr); 3963 } 3964 ierr = MatSeqAIJRestoreArray(pcbddc->benign_change,&aa);CHKERRQ(ierr); 3965 ierr = MatRestoreRowIJ(pcbddc->benign_change,0,PETSC_FALSE,PETSC_FALSE,&n,&ii,&jj,&flg_row);CHKERRQ(ierr); 3966 } 3967 3968 if (pcbddc->dbg_flag) { 3969 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"--------------------------------------------------------------\n");CHKERRQ(ierr); 3970 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Checking change of basis computation for subdomain %04d\n",PetscGlobalRank);CHKERRQ(ierr); 3971 } 3972 3973 3974 /* Now we loop on the constraints which need a change of basis */ 3975 /* 3976 Change of basis matrix is evaluated similarly to the FIRST APPROACH in 3977 Klawonn and Widlund, Dual-primal FETI-DP methods for linear elasticity, (see Sect 6.2.1) 3978 3979 Basic blocks of change of basis matrix T computed by 3980 3981 - Using the following block transformation if there is only a primal dof on the cc (and -pc_bddc_use_qr_single is not specified) 3982 3983 | 1 0 ... 0 s_1/S | 3984 | 0 1 ... 0 s_2/S | 3985 | ... | 3986 | 0 ... 1 s_{n-1}/S | 3987 | -s_1/s_n ... -s_{n-1}/s_n s_n/S | 3988 3989 with S = \sum_{i=1}^n s_i^2 3990 NOTE: in the above example, the primal dof is the last one of the edge in LOCAL ordering 3991 in the current implementation, the primal dof is the first one of the edge in GLOBAL ordering 3992 3993 - QR decomposition of constraints otherwise 3994 */ 3995 if (qr_needed) { 3996 /* space to store Q */ 3997 ierr = PetscMalloc1(max_size_of_constraint*max_size_of_constraint,&qr_basis);CHKERRQ(ierr); 3998 /* first we issue queries for optimal work */ 3999 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_M);CHKERRQ(ierr); 4000 ierr = PetscBLASIntCast(max_constraints,&Blas_N);CHKERRQ(ierr); 4001 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 4002 lqr_work = -1; 4003 PetscStackCallBLAS("LAPACKgeqrf",LAPACKgeqrf_(&Blas_M,&Blas_N,qr_basis,&Blas_LDA,qr_tau,&lqr_work_t,&lqr_work,&lierr)); 4004 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to GEQRF Lapack routine %d",(int)lierr); 4005 ierr = PetscBLASIntCast((PetscInt)PetscRealPart(lqr_work_t),&lqr_work);CHKERRQ(ierr); 4006 ierr = PetscMalloc1((PetscInt)PetscRealPart(lqr_work_t),&qr_work);CHKERRQ(ierr); 4007 lgqr_work = -1; 4008 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_M);CHKERRQ(ierr); 4009 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_N);CHKERRQ(ierr); 4010 ierr = PetscBLASIntCast(max_constraints,&Blas_K);CHKERRQ(ierr); 4011 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 4012 if (Blas_K>Blas_M) Blas_K=Blas_M; /* adjust just for computing optimal work */ 4013 PetscStackCallBLAS("LAPACKungqr",LAPACKungqr_(&Blas_M,&Blas_N,&Blas_K,qr_basis,&Blas_LDA,qr_tau,&lgqr_work_t,&lgqr_work,&lierr)); 4014 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to UNGQR Lapack routine %d",(int)lierr); 4015 ierr = PetscBLASIntCast((PetscInt)PetscRealPart(lgqr_work_t),&lgqr_work);CHKERRQ(ierr); 4016 ierr = PetscMalloc1((PetscInt)PetscRealPart(lgqr_work_t),&gqr_work);CHKERRQ(ierr); 4017 /* array to store scaling factors for reflectors */ 4018 ierr = PetscMalloc1(max_constraints,&qr_tau);CHKERRQ(ierr); 4019 /* array to store rhs and solution of triangular solver */ 4020 ierr = PetscMalloc1(max_constraints*max_constraints,&trs_rhs);CHKERRQ(ierr); 4021 /* allocating workspace for check */ 4022 if (pcbddc->dbg_flag) { 4023 ierr = PetscMalloc1(max_size_of_constraint*(max_constraints+max_size_of_constraint),&dbg_work);CHKERRQ(ierr); 4024 } 4025 } 4026 /* array to store whether a node is primal or not */ 4027 ierr = PetscBTCreate(pcis->n_B,&is_primal);CHKERRQ(ierr); 4028 ierr = PetscMalloc1(total_primal_vertices,&aux_primal_numbering_B);CHKERRQ(ierr); 4029 ierr = ISGlobalToLocalMappingApply(pcis->BtoNmap,IS_GTOLM_DROP,total_primal_vertices,pcbddc->local_primal_ref_node,&i,aux_primal_numbering_B);CHKERRQ(ierr); 4030 if (i != total_primal_vertices) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_SUP,"Error in boundary numbering for BDDC vertices! %D != %D\n",total_primal_vertices,i); 4031 for (i=0;i<total_primal_vertices;i++) { 4032 ierr = PetscBTSet(is_primal,aux_primal_numbering_B[i]);CHKERRQ(ierr); 4033 } 4034 ierr = PetscFree(aux_primal_numbering_B);CHKERRQ(ierr); 4035 4036 /* loop on constraints and see whether or not they need a change of basis and compute it */ 4037 for (total_counts=n_vertices;total_counts<total_counts_cc;total_counts++) { 4038 size_of_constraint = constraints_idxs_ptr[total_counts+1]-constraints_idxs_ptr[total_counts]; 4039 if (PetscBTLookup(change_basis,total_counts)) { 4040 /* get constraint info */ 4041 primal_dofs = constraints_n[total_counts]; 4042 dual_dofs = size_of_constraint-primal_dofs; 4043 4044 if (pcbddc->dbg_flag) { 4045 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Constraints %d: %d need a change of basis (size %d)\n",total_counts,primal_dofs,size_of_constraint);CHKERRQ(ierr); 4046 } 4047 4048 if (PetscBTLookup(qr_needed_idx,total_counts)) { /* QR */ 4049 4050 /* copy quadrature constraints for change of basis check */ 4051 if (pcbddc->dbg_flag) { 4052 ierr = PetscMemcpy(dbg_work,&constraints_data[constraints_data_ptr[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr); 4053 } 4054 /* copy temporary constraints into larger work vector (in order to store all columns of Q) */ 4055 ierr = PetscMemcpy(qr_basis,&constraints_data[constraints_data_ptr[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr); 4056 4057 /* compute QR decomposition of constraints */ 4058 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 4059 ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr); 4060 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 4061 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 4062 PetscStackCallBLAS("LAPACKgeqrf",LAPACKgeqrf_(&Blas_M,&Blas_N,qr_basis,&Blas_LDA,qr_tau,qr_work,&lqr_work,&lierr)); 4063 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in GEQRF Lapack routine %d",(int)lierr); 4064 ierr = PetscFPTrapPop();CHKERRQ(ierr); 4065 4066 /* explictly compute R^-T */ 4067 ierr = PetscMemzero(trs_rhs,primal_dofs*primal_dofs*sizeof(*trs_rhs));CHKERRQ(ierr); 4068 for (j=0;j<primal_dofs;j++) trs_rhs[j*(primal_dofs+1)] = 1.0; 4069 ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr); 4070 ierr = PetscBLASIntCast(primal_dofs,&Blas_NRHS);CHKERRQ(ierr); 4071 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 4072 ierr = PetscBLASIntCast(primal_dofs,&Blas_LDB);CHKERRQ(ierr); 4073 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 4074 PetscStackCallBLAS("LAPACKtrtrs",LAPACKtrtrs_("U","T","N",&Blas_N,&Blas_NRHS,qr_basis,&Blas_LDA,trs_rhs,&Blas_LDB,&lierr)); 4075 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in TRTRS Lapack routine %d",(int)lierr); 4076 ierr = PetscFPTrapPop();CHKERRQ(ierr); 4077 4078 /* explicitly compute all columns of Q (Q = [Q1 | Q2] ) overwriting QR factorization in qr_basis */ 4079 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 4080 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 4081 ierr = PetscBLASIntCast(primal_dofs,&Blas_K);CHKERRQ(ierr); 4082 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 4083 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 4084 PetscStackCallBLAS("LAPACKungqr",LAPACKungqr_(&Blas_M,&Blas_N,&Blas_K,qr_basis,&Blas_LDA,qr_tau,gqr_work,&lgqr_work,&lierr)); 4085 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in UNGQR Lapack routine %d",(int)lierr); 4086 ierr = PetscFPTrapPop();CHKERRQ(ierr); 4087 4088 /* first primal_dofs columns of Q need to be re-scaled in order to be unitary w.r.t constraints 4089 i.e. C_{pxn}*Q_{nxn} should be equal to [I_pxp | 0_pxd] (see check below) 4090 where n=size_of_constraint, p=primal_dofs, d=dual_dofs (n=p+d), I and 0 identity and null matrix resp. */ 4091 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 4092 ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr); 4093 ierr = PetscBLASIntCast(primal_dofs,&Blas_K);CHKERRQ(ierr); 4094 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 4095 ierr = PetscBLASIntCast(primal_dofs,&Blas_LDB);CHKERRQ(ierr); 4096 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDC);CHKERRQ(ierr); 4097 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 4098 PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&Blas_M,&Blas_N,&Blas_K,&one,qr_basis,&Blas_LDA,trs_rhs,&Blas_LDB,&zero,constraints_data+constraints_data_ptr[total_counts],&Blas_LDC)); 4099 ierr = PetscFPTrapPop();CHKERRQ(ierr); 4100 ierr = PetscMemcpy(qr_basis,&constraints_data[constraints_data_ptr[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr); 4101 4102 /* insert values in change of basis matrix respecting global ordering of new primal dofs */ 4103 start_rows = &constraints_idxs[constraints_idxs_ptr[total_counts]]; 4104 /* insert cols for primal dofs */ 4105 for (j=0;j<primal_dofs;j++) { 4106 start_vals = &qr_basis[j*size_of_constraint]; 4107 start_cols = &constraints_idxs[constraints_idxs_ptr[total_counts]+j]; 4108 ierr = MatSetValues(localChangeOfBasisMatrix,size_of_constraint,start_rows,1,start_cols,start_vals,INSERT_VALUES);CHKERRQ(ierr); 4109 } 4110 /* insert cols for dual dofs */ 4111 for (j=0,k=0;j<dual_dofs;k++) { 4112 if (!PetscBTLookup(is_primal,constraints_idxs_B[constraints_idxs_ptr[total_counts]+k])) { 4113 start_vals = &qr_basis[(primal_dofs+j)*size_of_constraint]; 4114 start_cols = &constraints_idxs[constraints_idxs_ptr[total_counts]+k]; 4115 ierr = MatSetValues(localChangeOfBasisMatrix,size_of_constraint,start_rows,1,start_cols,start_vals,INSERT_VALUES);CHKERRQ(ierr); 4116 j++; 4117 } 4118 } 4119 4120 /* check change of basis */ 4121 if (pcbddc->dbg_flag) { 4122 PetscInt ii,jj; 4123 PetscBool valid_qr=PETSC_TRUE; 4124 ierr = PetscBLASIntCast(primal_dofs,&Blas_M);CHKERRQ(ierr); 4125 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 4126 ierr = PetscBLASIntCast(size_of_constraint,&Blas_K);CHKERRQ(ierr); 4127 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 4128 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDB);CHKERRQ(ierr); 4129 ierr = PetscBLASIntCast(primal_dofs,&Blas_LDC);CHKERRQ(ierr); 4130 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 4131 PetscStackCallBLAS("BLASgemm",BLASgemm_("T","N",&Blas_M,&Blas_N,&Blas_K,&one,dbg_work,&Blas_LDA,qr_basis,&Blas_LDB,&zero,&dbg_work[size_of_constraint*primal_dofs],&Blas_LDC)); 4132 ierr = PetscFPTrapPop();CHKERRQ(ierr); 4133 for (jj=0;jj<size_of_constraint;jj++) { 4134 for (ii=0;ii<primal_dofs;ii++) { 4135 if (ii != jj && PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]) > 1.e-12) valid_qr = PETSC_FALSE; 4136 if (ii == jj && PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]-1.0) > 1.e-12) valid_qr = PETSC_FALSE; 4137 } 4138 } 4139 if (!valid_qr) { 4140 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> wrong change of basis!\n");CHKERRQ(ierr); 4141 for (jj=0;jj<size_of_constraint;jj++) { 4142 for (ii=0;ii<primal_dofs;ii++) { 4143 if (ii != jj && PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]) > 1.e-12) { 4144 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\tQr basis function %d is not orthogonal to constraint %d (%1.14e)!\n",jj,ii,PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii])); 4145 } 4146 if (ii == jj && PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]-1.0) > 1.e-12) { 4147 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\tQr basis function %d is not unitary w.r.t constraint %d (%1.14e)!\n",jj,ii,PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii])); 4148 } 4149 } 4150 } 4151 } else { 4152 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> right change of basis!\n");CHKERRQ(ierr); 4153 } 4154 } 4155 } else { /* simple transformation block */ 4156 PetscInt row,col; 4157 PetscScalar val,norm; 4158 4159 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 4160 PetscStackCallBLAS("BLASdot",norm = BLASdot_(&Blas_N,constraints_data+constraints_data_ptr[total_counts],&Blas_one,constraints_data+constraints_data_ptr[total_counts],&Blas_one)); 4161 for (j=0;j<size_of_constraint;j++) { 4162 PetscInt row_B = constraints_idxs_B[constraints_idxs_ptr[total_counts]+j]; 4163 row = constraints_idxs[constraints_idxs_ptr[total_counts]+j]; 4164 if (!PetscBTLookup(is_primal,row_B)) { 4165 col = constraints_idxs[constraints_idxs_ptr[total_counts]]; 4166 ierr = MatSetValue(localChangeOfBasisMatrix,row,row,1.0,INSERT_VALUES);CHKERRQ(ierr); 4167 ierr = MatSetValue(localChangeOfBasisMatrix,row,col,constraints_data[constraints_data_ptr[total_counts]+j]/norm,INSERT_VALUES);CHKERRQ(ierr); 4168 } else { 4169 for (k=0;k<size_of_constraint;k++) { 4170 col = constraints_idxs[constraints_idxs_ptr[total_counts]+k]; 4171 if (row != col) { 4172 val = -constraints_data[constraints_data_ptr[total_counts]+k]/constraints_data[constraints_data_ptr[total_counts]]; 4173 } else { 4174 val = constraints_data[constraints_data_ptr[total_counts]]/norm; 4175 } 4176 ierr = MatSetValue(localChangeOfBasisMatrix,row,col,val,INSERT_VALUES);CHKERRQ(ierr); 4177 } 4178 } 4179 } 4180 if (pcbddc->dbg_flag) { 4181 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> using standard change of basis\n");CHKERRQ(ierr); 4182 } 4183 } 4184 } else { 4185 if (pcbddc->dbg_flag) { 4186 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Constraint %d does not need a change of basis (size %d)\n",total_counts,size_of_constraint);CHKERRQ(ierr); 4187 } 4188 } 4189 } 4190 4191 /* free workspace */ 4192 if (qr_needed) { 4193 if (pcbddc->dbg_flag) { 4194 ierr = PetscFree(dbg_work);CHKERRQ(ierr); 4195 } 4196 ierr = PetscFree(trs_rhs);CHKERRQ(ierr); 4197 ierr = PetscFree(qr_tau);CHKERRQ(ierr); 4198 ierr = PetscFree(qr_work);CHKERRQ(ierr); 4199 ierr = PetscFree(gqr_work);CHKERRQ(ierr); 4200 ierr = PetscFree(qr_basis);CHKERRQ(ierr); 4201 } 4202 ierr = PetscBTDestroy(&is_primal);CHKERRQ(ierr); 4203 ierr = MatAssemblyBegin(localChangeOfBasisMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4204 ierr = MatAssemblyEnd(localChangeOfBasisMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4205 4206 /* assembling of global change of variable */ 4207 if (!pcbddc->fake_change) { 4208 Mat tmat; 4209 PetscInt bs; 4210 4211 ierr = VecGetSize(pcis->vec1_global,&global_size);CHKERRQ(ierr); 4212 ierr = VecGetLocalSize(pcis->vec1_global,&local_size);CHKERRQ(ierr); 4213 ierr = MatDuplicate(pc->pmat,MAT_DO_NOT_COPY_VALUES,&tmat);CHKERRQ(ierr); 4214 ierr = MatISSetLocalMat(tmat,localChangeOfBasisMatrix);CHKERRQ(ierr); 4215 ierr = MatCreate(PetscObjectComm((PetscObject)pc),&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 4216 ierr = MatSetType(pcbddc->ChangeOfBasisMatrix,MATAIJ);CHKERRQ(ierr); 4217 ierr = MatGetBlockSize(pc->pmat,&bs);CHKERRQ(ierr); 4218 ierr = MatSetBlockSize(pcbddc->ChangeOfBasisMatrix,bs);CHKERRQ(ierr); 4219 ierr = MatSetSizes(pcbddc->ChangeOfBasisMatrix,local_size,local_size,global_size,global_size);CHKERRQ(ierr); 4220 ierr = MatISSetMPIXAIJPreallocation_Private(tmat,pcbddc->ChangeOfBasisMatrix,PETSC_TRUE);CHKERRQ(ierr); 4221 ierr = MatISGetMPIXAIJ(tmat,MAT_REUSE_MATRIX,&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 4222 ierr = MatDestroy(&tmat);CHKERRQ(ierr); 4223 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 4224 ierr = VecSet(pcis->vec1_N,1.0);CHKERRQ(ierr); 4225 ierr = VecScatterBegin(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4226 ierr = VecScatterEnd(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4227 ierr = VecReciprocal(pcis->vec1_global);CHKERRQ(ierr); 4228 ierr = MatDiagonalScale(pcbddc->ChangeOfBasisMatrix,pcis->vec1_global,NULL);CHKERRQ(ierr); 4229 4230 /* check */ 4231 if (pcbddc->dbg_flag) { 4232 PetscReal error; 4233 Vec x,x_change; 4234 4235 ierr = VecDuplicate(pcis->vec1_global,&x);CHKERRQ(ierr); 4236 ierr = VecDuplicate(pcis->vec1_global,&x_change);CHKERRQ(ierr); 4237 ierr = VecSetRandom(x,NULL);CHKERRQ(ierr); 4238 ierr = VecCopy(x,pcis->vec1_global);CHKERRQ(ierr); 4239 ierr = VecScatterBegin(matis->rctx,x,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4240 ierr = VecScatterEnd(matis->rctx,x,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4241 ierr = MatMult(localChangeOfBasisMatrix,pcis->vec1_N,pcis->vec2_N);CHKERRQ(ierr); 4242 ierr = VecScatterBegin(matis->rctx,pcis->vec2_N,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4243 ierr = VecScatterEnd(matis->rctx,pcis->vec2_N,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4244 ierr = MatMult(pcbddc->ChangeOfBasisMatrix,pcis->vec1_global,x_change);CHKERRQ(ierr); 4245 ierr = VecAXPY(x,-1.0,x_change);CHKERRQ(ierr); 4246 ierr = VecNorm(x,NORM_INFINITY,&error);CHKERRQ(ierr); 4247 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4248 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Error global vs local change: %1.6e\n",error);CHKERRQ(ierr); 4249 ierr = VecDestroy(&x);CHKERRQ(ierr); 4250 ierr = VecDestroy(&x_change);CHKERRQ(ierr); 4251 } 4252 /* adapt sub_schurs computed (if any) */ 4253 if (pcbddc->use_deluxe_scaling) { 4254 PCBDDCSubSchurs sub_schurs=pcbddc->sub_schurs; 4255 4256 if (pcbddc->use_change_of_basis && pcbddc->adaptive_userdefined) { 4257 SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_SUP,"Cannot mix automatic change of basis, adaptive selection and user-defined constraints");CHKERRQ(ierr); 4258 } 4259 if (sub_schurs->S_Ej_all) { 4260 Mat S_new,tmat; 4261 IS is_all_N,is_V_Sall = NULL; 4262 4263 ierr = ISLocalToGlobalMappingApplyIS(pcis->BtoNmap,sub_schurs->is_Ej_all,&is_all_N);CHKERRQ(ierr); 4264 ierr = MatGetSubMatrix(localChangeOfBasisMatrix,is_all_N,is_all_N,MAT_INITIAL_MATRIX,&tmat);CHKERRQ(ierr); 4265 if (pcbddc->deluxe_zerorows) { 4266 ISLocalToGlobalMapping NtoSall; 4267 IS is_V; 4268 ierr = ISCreateGeneral(PETSC_COMM_SELF,pcbddc->n_vertices,pcbddc->local_primal_ref_node,PETSC_COPY_VALUES,&is_V);CHKERRQ(ierr); 4269 ierr = ISLocalToGlobalMappingCreateIS(is_all_N,&NtoSall);CHKERRQ(ierr); 4270 ierr = ISGlobalToLocalMappingApplyIS(NtoSall,IS_GTOLM_DROP,is_V,&is_V_Sall);CHKERRQ(ierr); 4271 ierr = ISLocalToGlobalMappingDestroy(&NtoSall);CHKERRQ(ierr); 4272 ierr = ISDestroy(&is_V);CHKERRQ(ierr); 4273 } 4274 ierr = ISDestroy(&is_all_N);CHKERRQ(ierr); 4275 ierr = MatPtAP(sub_schurs->S_Ej_all,tmat,MAT_INITIAL_MATRIX,1.0,&S_new);CHKERRQ(ierr); 4276 ierr = MatDestroy(&sub_schurs->S_Ej_all);CHKERRQ(ierr); 4277 ierr = PetscObjectReference((PetscObject)S_new);CHKERRQ(ierr); 4278 if (pcbddc->deluxe_zerorows) { 4279 const PetscScalar *array; 4280 const PetscInt *idxs_V,*idxs_all; 4281 PetscInt i,n_V; 4282 4283 ierr = MatZeroRowsColumnsIS(S_new,is_V_Sall,1.,NULL,NULL);CHKERRQ(ierr); 4284 ierr = ISGetLocalSize(is_V_Sall,&n_V);CHKERRQ(ierr); 4285 ierr = ISGetIndices(is_V_Sall,&idxs_V);CHKERRQ(ierr); 4286 ierr = ISGetIndices(sub_schurs->is_Ej_all,&idxs_all);CHKERRQ(ierr); 4287 ierr = VecGetArrayRead(pcis->D,&array);CHKERRQ(ierr); 4288 for (i=0;i<n_V;i++) { 4289 PetscScalar val; 4290 PetscInt idx; 4291 4292 idx = idxs_V[i]; 4293 val = array[idxs_all[idxs_V[i]]]; 4294 ierr = MatSetValue(S_new,idx,idx,val,INSERT_VALUES);CHKERRQ(ierr); 4295 } 4296 ierr = MatAssemblyBegin(S_new,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4297 ierr = MatAssemblyEnd(S_new,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4298 ierr = VecRestoreArrayRead(pcis->D,&array);CHKERRQ(ierr); 4299 ierr = ISRestoreIndices(sub_schurs->is_Ej_all,&idxs_all);CHKERRQ(ierr); 4300 ierr = ISRestoreIndices(is_V_Sall,&idxs_V);CHKERRQ(ierr); 4301 } 4302 sub_schurs->S_Ej_all = S_new; 4303 ierr = MatDestroy(&S_new);CHKERRQ(ierr); 4304 if (sub_schurs->sum_S_Ej_all) { 4305 ierr = MatPtAP(sub_schurs->sum_S_Ej_all,tmat,MAT_INITIAL_MATRIX,1.0,&S_new);CHKERRQ(ierr); 4306 ierr = MatDestroy(&sub_schurs->sum_S_Ej_all);CHKERRQ(ierr); 4307 ierr = PetscObjectReference((PetscObject)S_new);CHKERRQ(ierr); 4308 if (pcbddc->deluxe_zerorows) { 4309 ierr = MatZeroRowsColumnsIS(S_new,is_V_Sall,1.,NULL,NULL);CHKERRQ(ierr); 4310 } 4311 sub_schurs->sum_S_Ej_all = S_new; 4312 ierr = MatDestroy(&S_new);CHKERRQ(ierr); 4313 } 4314 ierr = ISDestroy(&is_V_Sall);CHKERRQ(ierr); 4315 ierr = MatDestroy(&tmat);CHKERRQ(ierr); 4316 } 4317 /* destroy any change of basis context in sub_schurs */ 4318 if (sub_schurs->change) { 4319 PetscInt i; 4320 4321 for (i=0;i<sub_schurs->n_subs;i++) { 4322 ierr = KSPDestroy(&sub_schurs->change[i]);CHKERRQ(ierr); 4323 } 4324 ierr = PetscFree(sub_schurs->change);CHKERRQ(ierr); 4325 } 4326 } 4327 if (pcbddc->switch_static) { /* need to save the local change */ 4328 pcbddc->switch_static_change = localChangeOfBasisMatrix; 4329 } else { 4330 ierr = MatDestroy(&localChangeOfBasisMatrix);CHKERRQ(ierr); 4331 } 4332 /* determine if any process has changed the pressures locally */ 4333 if (pcbddc->benign_saddle_point) { 4334 PetscBool have_null = (PetscBool)!!pcbddc->benign_change; 4335 ierr = MPI_Allreduce(&have_null,&pcbddc->change_interior,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 4336 } 4337 } else { /* fake change (get back change of basis into ConstraintMatrix and info on qr) */ 4338 ierr = MatDestroy(&pcbddc->ConstraintMatrix);CHKERRQ(ierr); 4339 pcbddc->ConstraintMatrix = localChangeOfBasisMatrix; 4340 pcbddc->use_qr_single = qr_needed; 4341 } 4342 } else if (pcbddc->user_ChangeOfBasisMatrix || pcbddc->benign_saddle_point) { 4343 PetscBool needglobal = PETSC_FALSE; 4344 if (pcbddc->benign_saddle_point) { 4345 PetscBool have_null = (PetscBool)!!pcbddc->benign_change; 4346 ierr = MPI_Allreduce(&have_null,&needglobal,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 4347 } 4348 if (!needglobal && pcbddc->user_ChangeOfBasisMatrix) { 4349 ierr = PetscObjectReference((PetscObject)pcbddc->user_ChangeOfBasisMatrix);CHKERRQ(ierr); 4350 pcbddc->ChangeOfBasisMatrix = pcbddc->user_ChangeOfBasisMatrix; 4351 } else { 4352 Mat benign_global = NULL; 4353 if (needglobal) { 4354 Mat tmat; 4355 4356 pcbddc->change_interior = PETSC_TRUE; 4357 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 4358 ierr = VecSet(pcis->vec1_N,1.0);CHKERRQ(ierr); 4359 ierr = VecScatterBegin(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4360 ierr = VecScatterEnd(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4361 ierr = VecReciprocal(pcis->vec1_global);CHKERRQ(ierr); 4362 ierr = VecScatterBegin(matis->rctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4363 ierr = VecScatterEnd(matis->rctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4364 ierr = MatDuplicate(pc->pmat,MAT_DO_NOT_COPY_VALUES,&tmat);CHKERRQ(ierr); 4365 if (pcbddc->benign_change) { 4366 Mat M; 4367 4368 ierr = MatDuplicate(pcbddc->benign_change,MAT_COPY_VALUES,&M);CHKERRQ(ierr); 4369 ierr = MatDiagonalScale(M,pcis->vec1_N,NULL);CHKERRQ(ierr); 4370 ierr = MatISSetLocalMat(tmat,M);CHKERRQ(ierr); 4371 ierr = MatDestroy(&M);CHKERRQ(ierr); 4372 } else { 4373 Mat eye; 4374 PetscScalar *array; 4375 4376 ierr = VecGetArray(pcis->vec1_N,&array);CHKERRQ(ierr); 4377 ierr = MatCreateSeqAIJ(PETSC_COMM_SELF,pcis->n,pcis->n,1,NULL,&eye);CHKERRQ(ierr); 4378 for (i=0;i<pcis->n;i++) { 4379 ierr = MatSetValue(eye,i,i,array[i],INSERT_VALUES);CHKERRQ(ierr); 4380 } 4381 ierr = VecRestoreArray(pcis->vec1_N,&array);CHKERRQ(ierr); 4382 ierr = MatAssemblyBegin(eye,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4383 ierr = MatAssemblyEnd(eye,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4384 ierr = MatISSetLocalMat(tmat,eye);CHKERRQ(ierr); 4385 ierr = MatDestroy(&eye);CHKERRQ(ierr); 4386 } 4387 ierr = MatISGetMPIXAIJ(tmat,MAT_INITIAL_MATRIX,&benign_global);CHKERRQ(ierr); 4388 ierr = MatDestroy(&tmat);CHKERRQ(ierr); 4389 } 4390 if (pcbddc->user_ChangeOfBasisMatrix) { 4391 ierr = MatMatMult(pcbddc->user_ChangeOfBasisMatrix,benign_global,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 4392 ierr = MatDestroy(&benign_global);CHKERRQ(ierr); 4393 } else if (needglobal) { 4394 pcbddc->ChangeOfBasisMatrix = benign_global; 4395 } 4396 } 4397 if (pcbddc->switch_static && pcbddc->ChangeOfBasisMatrix) { /* need to save the local change */ 4398 IS is_global; 4399 const PetscInt *gidxs; 4400 4401 ierr = ISLocalToGlobalMappingGetIndices(pc->pmat->rmap->mapping,&gidxs);CHKERRQ(ierr); 4402 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),pcis->n,gidxs,PETSC_COPY_VALUES,&is_global);CHKERRQ(ierr); 4403 ierr = ISLocalToGlobalMappingRestoreIndices(pc->pmat->rmap->mapping,&gidxs);CHKERRQ(ierr); 4404 ierr = MatGetSubMatrixUnsorted(pcbddc->ChangeOfBasisMatrix,is_global,is_global,&pcbddc->switch_static_change);CHKERRQ(ierr); 4405 ierr = ISDestroy(&is_global);CHKERRQ(ierr); 4406 } 4407 } 4408 if (!pcbddc->fake_change && pcbddc->ChangeOfBasisMatrix && !pcbddc->work_change) { 4409 ierr = VecDuplicate(pcis->vec1_global,&pcbddc->work_change);CHKERRQ(ierr); 4410 } 4411 4412 if (!pcbddc->fake_change) { 4413 /* add pressure dofs to set of primal nodes for numbering purposes */ 4414 for (i=0;i<pcbddc->benign_n;i++) { 4415 pcbddc->local_primal_ref_node[pcbddc->local_primal_size_cc] = pcbddc->benign_p0_lidx[i]; 4416 pcbddc->primal_indices_local_idxs[pcbddc->local_primal_size] = pcbddc->benign_p0_lidx[i]; 4417 pcbddc->local_primal_ref_mult[pcbddc->local_primal_size_cc] = 1; 4418 pcbddc->local_primal_size_cc++; 4419 pcbddc->local_primal_size++; 4420 } 4421 4422 /* check if a new primal space has been introduced (also take into account benign trick) */ 4423 pcbddc->new_primal_space_local = PETSC_TRUE; 4424 if (olocal_primal_size == pcbddc->local_primal_size) { 4425 ierr = PetscMemcmp(pcbddc->local_primal_ref_node,olocal_primal_ref_node,olocal_primal_size_cc*sizeof(PetscInt),&pcbddc->new_primal_space_local);CHKERRQ(ierr); 4426 pcbddc->new_primal_space_local = (PetscBool)(!pcbddc->new_primal_space_local); 4427 if (!pcbddc->new_primal_space_local) { 4428 ierr = PetscMemcmp(pcbddc->local_primal_ref_mult,olocal_primal_ref_mult,olocal_primal_size_cc*sizeof(PetscInt),&pcbddc->new_primal_space_local);CHKERRQ(ierr); 4429 pcbddc->new_primal_space_local = (PetscBool)(!pcbddc->new_primal_space_local); 4430 } 4431 } 4432 /* new_primal_space will be used for numbering of coarse dofs, so it should be the same across all subdomains */ 4433 ierr = MPIU_Allreduce(&pcbddc->new_primal_space_local,&pcbddc->new_primal_space,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 4434 } 4435 ierr = PetscFree2(olocal_primal_ref_node,olocal_primal_ref_mult);CHKERRQ(ierr); 4436 4437 /* flush dbg viewer */ 4438 if (pcbddc->dbg_flag) { 4439 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4440 } 4441 4442 /* free workspace */ 4443 ierr = PetscBTDestroy(&qr_needed_idx);CHKERRQ(ierr); 4444 ierr = PetscBTDestroy(&change_basis);CHKERRQ(ierr); 4445 if (!pcbddc->adaptive_selection) { 4446 ierr = PetscFree3(constraints_idxs_ptr,constraints_data_ptr,constraints_n);CHKERRQ(ierr); 4447 ierr = PetscFree3(constraints_data,constraints_idxs,constraints_idxs_B);CHKERRQ(ierr); 4448 } else { 4449 ierr = PetscFree5(pcbddc->adaptive_constraints_n, 4450 pcbddc->adaptive_constraints_idxs_ptr, 4451 pcbddc->adaptive_constraints_data_ptr, 4452 pcbddc->adaptive_constraints_idxs, 4453 pcbddc->adaptive_constraints_data);CHKERRQ(ierr); 4454 ierr = PetscFree(constraints_n);CHKERRQ(ierr); 4455 ierr = PetscFree(constraints_idxs_B);CHKERRQ(ierr); 4456 } 4457 PetscFunctionReturn(0); 4458 } 4459 4460 #undef __FUNCT__ 4461 #define __FUNCT__ "PCBDDCAnalyzeInterface" 4462 PetscErrorCode PCBDDCAnalyzeInterface(PC pc) 4463 { 4464 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 4465 PC_IS *pcis = (PC_IS*)pc->data; 4466 Mat_IS *matis = (Mat_IS*)pc->pmat->data; 4467 PetscInt ierr,i,N; 4468 PetscViewer viewer=pcbddc->dbg_viewer; 4469 4470 PetscFunctionBegin; 4471 /* Reset previously computed graph */ 4472 ierr = PCBDDCGraphReset(pcbddc->mat_graph);CHKERRQ(ierr); 4473 /* Init local Graph struct */ 4474 ierr = MatGetSize(pc->pmat,&N,NULL);CHKERRQ(ierr); 4475 ierr = PCBDDCGraphInit(pcbddc->mat_graph,pcis->mapping,N);CHKERRQ(ierr); 4476 4477 /* Check validity of the csr graph passed in by the user */ 4478 if (pcbddc->mat_graph->nvtxs_csr && pcbddc->mat_graph->nvtxs_csr != pcbddc->mat_graph->nvtxs) { 4479 SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Invalid size of local CSR graph! Found %d, expected %d\n",pcbddc->mat_graph->nvtxs_csr,pcbddc->mat_graph->nvtxs); 4480 } 4481 4482 /* Set default CSR adjacency of local dofs if not provided by the user with PCBDDCSetLocalAdjacencyGraph */ 4483 if ( (!pcbddc->mat_graph->xadj || !pcbddc->mat_graph->adjncy) && pcbddc->use_local_adj) { 4484 PetscInt *xadj,*adjncy; 4485 PetscInt nvtxs; 4486 PetscBool flg_row=PETSC_FALSE; 4487 4488 ierr = MatGetRowIJ(matis->A,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);CHKERRQ(ierr); 4489 if (flg_row) { 4490 ierr = PCBDDCSetLocalAdjacencyGraph(pc,nvtxs,xadj,adjncy,PETSC_COPY_VALUES);CHKERRQ(ierr); 4491 pcbddc->computed_rowadj = PETSC_TRUE; 4492 } 4493 ierr = MatRestoreRowIJ(matis->A,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);CHKERRQ(ierr); 4494 } 4495 if (pcbddc->dbg_flag) { 4496 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4497 } 4498 4499 /* Set default dofs' splitting if no information has been provided by the user with PCBDDCSetDofsSplitting or PCBDDCSetDofsSplittingLocal */ 4500 if (pcbddc->user_provided_isfordofs) { 4501 if (pcbddc->n_ISForDofs) { /* need to convert from global to local and remove references to global dofs splitting */ 4502 ierr = PetscMalloc1(pcbddc->n_ISForDofs,&pcbddc->ISForDofsLocal);CHKERRQ(ierr); 4503 for (i=0;i<pcbddc->n_ISForDofs;i++) { 4504 ierr = PCBDDCGlobalToLocal(matis->rctx,pcis->vec1_global,pcis->vec1_N,pcbddc->ISForDofs[i],&pcbddc->ISForDofsLocal[i]);CHKERRQ(ierr); 4505 ierr = ISDestroy(&pcbddc->ISForDofs[i]);CHKERRQ(ierr); 4506 } 4507 pcbddc->n_ISForDofsLocal = pcbddc->n_ISForDofs; 4508 pcbddc->n_ISForDofs = 0; 4509 ierr = PetscFree(pcbddc->ISForDofs);CHKERRQ(ierr); 4510 } 4511 } else { 4512 if (!pcbddc->n_ISForDofsLocal) { /* field split not present, create it in local ordering */ 4513 ierr = MatGetBlockSize(pc->pmat,&pcbddc->n_ISForDofsLocal);CHKERRQ(ierr); 4514 ierr = PetscMalloc1(pcbddc->n_ISForDofsLocal,&pcbddc->ISForDofsLocal);CHKERRQ(ierr); 4515 for (i=0;i<pcbddc->n_ISForDofsLocal;i++) { 4516 ierr = ISCreateStride(PetscObjectComm((PetscObject)pc),pcis->n/pcbddc->n_ISForDofsLocal,i,pcbddc->n_ISForDofsLocal,&pcbddc->ISForDofsLocal[i]);CHKERRQ(ierr); 4517 } 4518 } 4519 } 4520 4521 /* Setup of Graph */ 4522 if (!pcbddc->DirichletBoundariesLocal && pcbddc->DirichletBoundaries) { /* need to convert from global to local */ 4523 ierr = PCBDDCGlobalToLocal(matis->rctx,pcis->vec1_global,pcis->vec1_N,pcbddc->DirichletBoundaries,&pcbddc->DirichletBoundariesLocal);CHKERRQ(ierr); 4524 } 4525 if (!pcbddc->NeumannBoundariesLocal && pcbddc->NeumannBoundaries) { /* need to convert from global to local */ 4526 ierr = PCBDDCGlobalToLocal(matis->rctx,pcis->vec1_global,pcis->vec1_N,pcbddc->NeumannBoundaries,&pcbddc->NeumannBoundariesLocal);CHKERRQ(ierr); 4527 } 4528 if (!pcbddc->user_primal_vertices_local && pcbddc->user_primal_vertices) { /* need to convert from global to local */ 4529 ierr = PCBDDCGlobalToLocal(matis->rctx,pcis->vec1_global,pcis->vec1_N,pcbddc->user_primal_vertices,&pcbddc->user_primal_vertices_local);CHKERRQ(ierr); 4530 } 4531 ierr = PCBDDCGraphSetUp(pcbddc->mat_graph,pcbddc->vertex_size,pcbddc->NeumannBoundariesLocal,pcbddc->DirichletBoundariesLocal,pcbddc->n_ISForDofsLocal,pcbddc->ISForDofsLocal,pcbddc->user_primal_vertices_local);CHKERRQ(ierr); 4532 4533 /* attach info on disconnected subdomains if present */ 4534 if (pcbddc->n_local_subs) { 4535 PetscInt *local_subs; 4536 4537 ierr = PetscMalloc1(N,&local_subs);CHKERRQ(ierr); 4538 for (i=0;i<pcbddc->n_local_subs;i++) { 4539 const PetscInt *idxs; 4540 PetscInt nl,j; 4541 4542 ierr = ISGetLocalSize(pcbddc->local_subs[i],&nl);CHKERRQ(ierr); 4543 ierr = ISGetIndices(pcbddc->local_subs[i],&idxs);CHKERRQ(ierr); 4544 for (j=0;j<nl;j++) { 4545 local_subs[idxs[j]] = i; 4546 } 4547 ierr = ISRestoreIndices(pcbddc->local_subs[i],&idxs);CHKERRQ(ierr); 4548 } 4549 pcbddc->mat_graph->n_local_subs = pcbddc->n_local_subs; 4550 pcbddc->mat_graph->local_subs = local_subs; 4551 } 4552 4553 /* Graph's connected components analysis */ 4554 ierr = PCBDDCGraphComputeConnectedComponents(pcbddc->mat_graph);CHKERRQ(ierr); 4555 4556 /* print some info to stdout */ 4557 if (pcbddc->dbg_flag) { 4558 ierr = PCBDDCGraphASCIIView(pcbddc->mat_graph,pcbddc->dbg_flag,viewer);CHKERRQ(ierr); 4559 } 4560 PetscFunctionReturn(0); 4561 } 4562 4563 /* given an index sets possibly with holes, renumbers the indexes removing the holes */ 4564 #undef __FUNCT__ 4565 #define __FUNCT__ "PCBDDCSubsetNumbering" 4566 PetscErrorCode PCBDDCSubsetNumbering(IS subset, IS subset_mult, PetscInt *N_n, IS *subset_n) 4567 { 4568 PetscSF sf; 4569 PetscLayout map; 4570 const PetscInt *idxs; 4571 PetscInt *leaf_data,*root_data,*gidxs; 4572 PetscInt N,n,i,lbounds[2],gbounds[2],Nl; 4573 PetscInt n_n,nlocals,start,first_index; 4574 PetscMPIInt commsize; 4575 PetscBool first_found; 4576 PetscErrorCode ierr; 4577 4578 PetscFunctionBegin; 4579 ierr = ISGetLocalSize(subset,&n);CHKERRQ(ierr); 4580 if (subset_mult) { 4581 PetscCheckSameComm(subset,1,subset_mult,2); 4582 ierr = ISGetLocalSize(subset,&i);CHKERRQ(ierr); 4583 if (i != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Local subset and multiplicity sizes don't match! %d != %d",n,i); 4584 } 4585 /* create workspace layout for computing global indices of subset */ 4586 ierr = ISGetIndices(subset,&idxs);CHKERRQ(ierr); 4587 lbounds[0] = lbounds[1] = 0; 4588 for (i=0;i<n;i++) { 4589 if (idxs[i] < lbounds[0]) lbounds[0] = idxs[i]; 4590 else if (idxs[i] > lbounds[1]) lbounds[1] = idxs[i]; 4591 } 4592 lbounds[0] = -lbounds[0]; 4593 ierr = MPIU_Allreduce(lbounds,gbounds,2,MPIU_INT,MPI_MAX,PetscObjectComm((PetscObject)subset));CHKERRQ(ierr); 4594 gbounds[0] = -gbounds[0]; 4595 N = gbounds[1] - gbounds[0] + 1; 4596 ierr = PetscLayoutCreate(PetscObjectComm((PetscObject)subset),&map);CHKERRQ(ierr); 4597 ierr = PetscLayoutSetBlockSize(map,1);CHKERRQ(ierr); 4598 ierr = PetscLayoutSetSize(map,N);CHKERRQ(ierr); 4599 ierr = PetscLayoutSetUp(map);CHKERRQ(ierr); 4600 ierr = PetscLayoutGetLocalSize(map,&Nl);CHKERRQ(ierr); 4601 4602 /* create sf : leaf_data == multiplicity of indexes, root data == global index in layout */ 4603 ierr = PetscMalloc2(n,&leaf_data,Nl,&root_data);CHKERRQ(ierr); 4604 if (subset_mult) { 4605 const PetscInt* idxs_mult; 4606 4607 ierr = ISGetIndices(subset_mult,&idxs_mult);CHKERRQ(ierr); 4608 ierr = PetscMemcpy(leaf_data,idxs_mult,n*sizeof(PetscInt));CHKERRQ(ierr); 4609 ierr = ISRestoreIndices(subset_mult,&idxs_mult);CHKERRQ(ierr); 4610 } else { 4611 for (i=0;i<n;i++) leaf_data[i] = 1; 4612 } 4613 /* local size of new subset */ 4614 n_n = 0; 4615 for (i=0;i<n;i++) n_n += leaf_data[i]; 4616 4617 /* global indexes in layout */ 4618 ierr = PetscMalloc1(n_n,&gidxs);CHKERRQ(ierr); /* allocating possibly extra space in gidxs which will be used later */ 4619 for (i=0;i<n;i++) gidxs[i] = idxs[i] - gbounds[0]; 4620 ierr = ISRestoreIndices(subset,&idxs);CHKERRQ(ierr); 4621 ierr = PetscSFCreate(PetscObjectComm((PetscObject)subset),&sf);CHKERRQ(ierr); 4622 ierr = PetscSFSetGraphLayout(sf,map,n,NULL,PETSC_COPY_VALUES,gidxs);CHKERRQ(ierr); 4623 ierr = PetscLayoutDestroy(&map);CHKERRQ(ierr); 4624 4625 /* reduce from leaves to roots */ 4626 ierr = PetscMemzero(root_data,Nl*sizeof(PetscInt));CHKERRQ(ierr); 4627 ierr = PetscSFReduceBegin(sf,MPIU_INT,leaf_data,root_data,MPI_MAX);CHKERRQ(ierr); 4628 ierr = PetscSFReduceEnd(sf,MPIU_INT,leaf_data,root_data,MPI_MAX);CHKERRQ(ierr); 4629 4630 /* count indexes in local part of layout */ 4631 nlocals = 0; 4632 first_index = -1; 4633 first_found = PETSC_FALSE; 4634 for (i=0;i<Nl;i++) { 4635 if (!first_found && root_data[i]) { 4636 first_found = PETSC_TRUE; 4637 first_index = i; 4638 } 4639 nlocals += root_data[i]; 4640 } 4641 4642 /* cumulative of number of indexes and size of subset without holes */ 4643 #if defined(PETSC_HAVE_MPI_EXSCAN) 4644 start = 0; 4645 ierr = MPI_Exscan(&nlocals,&start,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)subset));CHKERRQ(ierr); 4646 #else 4647 ierr = MPI_Scan(&nlocals,&start,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)subset));CHKERRQ(ierr); 4648 start = start-nlocals; 4649 #endif 4650 4651 if (N_n) { /* compute total size of new subset if requested */ 4652 *N_n = start + nlocals; 4653 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)subset),&commsize);CHKERRQ(ierr); 4654 ierr = MPI_Bcast(N_n,1,MPIU_INT,commsize-1,PetscObjectComm((PetscObject)subset));CHKERRQ(ierr); 4655 } 4656 4657 /* adapt root data with cumulative */ 4658 if (first_found) { 4659 PetscInt old_index; 4660 4661 root_data[first_index] += start; 4662 old_index = first_index; 4663 for (i=first_index+1;i<Nl;i++) { 4664 if (root_data[i]) { 4665 root_data[i] += root_data[old_index]; 4666 old_index = i; 4667 } 4668 } 4669 } 4670 4671 /* from roots to leaves */ 4672 ierr = PetscSFBcastBegin(sf,MPIU_INT,root_data,leaf_data);CHKERRQ(ierr); 4673 ierr = PetscSFBcastEnd(sf,MPIU_INT,root_data,leaf_data);CHKERRQ(ierr); 4674 ierr = PetscSFDestroy(&sf);CHKERRQ(ierr); 4675 4676 /* create new IS with global indexes without holes */ 4677 if (subset_mult) { 4678 const PetscInt* idxs_mult; 4679 PetscInt cum; 4680 4681 cum = 0; 4682 ierr = ISGetIndices(subset_mult,&idxs_mult);CHKERRQ(ierr); 4683 for (i=0;i<n;i++) { 4684 PetscInt j; 4685 for (j=0;j<idxs_mult[i];j++) gidxs[cum++] = leaf_data[i] - idxs_mult[i] + j; 4686 } 4687 ierr = ISRestoreIndices(subset_mult,&idxs_mult);CHKERRQ(ierr); 4688 } else { 4689 for (i=0;i<n;i++) { 4690 gidxs[i] = leaf_data[i]-1; 4691 } 4692 } 4693 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)subset),n_n,gidxs,PETSC_OWN_POINTER,subset_n);CHKERRQ(ierr); 4694 ierr = PetscFree2(leaf_data,root_data);CHKERRQ(ierr); 4695 PetscFunctionReturn(0); 4696 } 4697 4698 #undef __FUNCT__ 4699 #define __FUNCT__ "PCBDDCOrthonormalizeVecs" 4700 PetscErrorCode PCBDDCOrthonormalizeVecs(PetscInt n, Vec vecs[]) 4701 { 4702 PetscInt i,j; 4703 PetscScalar *alphas; 4704 PetscErrorCode ierr; 4705 4706 PetscFunctionBegin; 4707 /* this implements stabilized Gram-Schmidt */ 4708 ierr = PetscMalloc1(n,&alphas);CHKERRQ(ierr); 4709 for (i=0;i<n;i++) { 4710 ierr = VecNormalize(vecs[i],NULL);CHKERRQ(ierr); 4711 if (i<n) { ierr = VecMDot(vecs[i],n-i-1,&vecs[i+1],&alphas[i+1]);CHKERRQ(ierr); } 4712 for (j=i+1;j<n;j++) { ierr = VecAXPY(vecs[j],PetscConj(-alphas[j]),vecs[i]);CHKERRQ(ierr); } 4713 } 4714 ierr = PetscFree(alphas);CHKERRQ(ierr); 4715 PetscFunctionReturn(0); 4716 } 4717 4718 #undef __FUNCT__ 4719 #define __FUNCT__ "MatISGetSubassemblingPattern" 4720 PetscErrorCode MatISGetSubassemblingPattern(Mat mat, PetscInt *n_subdomains, PetscInt redprocs, IS* is_sends, PetscBool *have_void) 4721 { 4722 Mat A; 4723 PetscInt n_neighs,*neighs,*n_shared,**shared; 4724 PetscMPIInt size,rank,color; 4725 PetscInt *xadj,*adjncy; 4726 PetscInt *adjncy_wgt,*v_wgt,*ranks_send_to_idx; 4727 PetscInt im_active,active_procs,n,i,local_size,threshold=0; 4728 PetscInt void_procs,*procs_candidates = NULL; 4729 PetscBool ismatis,use_vwgt=PETSC_FALSE,use_square=PETSC_FALSE; 4730 PetscSubcomm subcomm; 4731 PetscErrorCode ierr; 4732 4733 PetscFunctionBegin; 4734 PetscValidHeaderSpecific(mat,MAT_CLASSID,1); 4735 ierr = PetscObjectTypeCompare((PetscObject)mat,MATIS,&ismatis);CHKERRQ(ierr); 4736 if (!ismatis) SETERRQ1(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot use %s on a matrix object which is not of type MATIS",__FUNCT__); 4737 PetscValidLogicalCollectiveInt(mat,*n_subdomains,2); 4738 PetscValidLogicalCollectiveInt(mat,redprocs,3); 4739 if (*n_subdomains <=0) SETERRQ1(PetscObjectComm((PetscObject)mat),PETSC_ERR_ARG_WRONG,"Invalid number of subdomains requested %d\n",*n_subdomains); 4740 4741 if (have_void) *have_void = PETSC_FALSE; 4742 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)mat),&size);CHKERRQ(ierr); 4743 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr); 4744 ierr = MatISGetLocalMat(mat,&A);CHKERRQ(ierr); 4745 ierr = MatGetLocalSize(A,&n,NULL);CHKERRQ(ierr); 4746 im_active = !!(n); 4747 ierr = MPIU_Allreduce(&im_active,&active_procs,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 4748 void_procs = size - active_procs; 4749 /* get ranks of of non-active processes in mat communicator */ 4750 if (void_procs) { 4751 PetscInt ncand; 4752 4753 if (have_void) *have_void = PETSC_TRUE; 4754 ierr = PetscMalloc1(size,&procs_candidates);CHKERRQ(ierr); 4755 ierr = MPI_Allgather(&im_active,1,MPIU_INT,procs_candidates,1,MPIU_INT,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 4756 for (i=0,ncand=0;i<size;i++) { 4757 if (!procs_candidates[i]) { 4758 procs_candidates[ncand++] = i; 4759 } 4760 } 4761 /* force n_subdomains to be not greater that the number of non-active processes */ 4762 *n_subdomains = PetscMin(void_procs,*n_subdomains); 4763 } 4764 4765 /* number of subdomains requested greater than active processes -> just shift the matrix */ 4766 if (active_procs < *n_subdomains) { 4767 PetscInt issize,isidx; 4768 if (im_active) { 4769 issize = 1; 4770 if (procs_candidates) { /* shift the pattern on non-active candidates (if any) */ 4771 isidx = procs_candidates[rank]; 4772 } else { 4773 isidx = rank; 4774 } 4775 } else { 4776 issize = 0; 4777 isidx = -1; 4778 } 4779 *n_subdomains = active_procs; 4780 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)mat),issize,&isidx,PETSC_COPY_VALUES,is_sends);CHKERRQ(ierr); 4781 ierr = PetscFree(procs_candidates);CHKERRQ(ierr); 4782 PetscFunctionReturn(0); 4783 } 4784 ierr = PetscOptionsGetBool(NULL,NULL,"-matis_partitioning_use_square",&use_square,NULL);CHKERRQ(ierr); 4785 ierr = PetscOptionsGetBool(NULL,NULL,"-matis_partitioning_use_vwgt",&use_vwgt,NULL);CHKERRQ(ierr); 4786 ierr = PetscOptionsGetInt(NULL,NULL,"-matis_partitioning_threshold",&threshold,NULL);CHKERRQ(ierr); 4787 4788 /* Get info on mapping */ 4789 ierr = ISLocalToGlobalMappingGetSize(mat->rmap->mapping,&local_size);CHKERRQ(ierr); 4790 ierr = ISLocalToGlobalMappingGetInfo(mat->rmap->mapping,&n_neighs,&neighs,&n_shared,&shared);CHKERRQ(ierr); 4791 4792 /* build local CSR graph of subdomains' connectivity */ 4793 ierr = PetscMalloc1(2,&xadj);CHKERRQ(ierr); 4794 xadj[0] = 0; 4795 xadj[1] = PetscMax(n_neighs-1,0); 4796 ierr = PetscMalloc1(xadj[1],&adjncy);CHKERRQ(ierr); 4797 ierr = PetscMalloc1(xadj[1],&adjncy_wgt);CHKERRQ(ierr); 4798 4799 if (threshold) { 4800 PetscInt xadj_count = 0; 4801 for (i=1;i<n_neighs;i++) { 4802 if (n_shared[i] > threshold) { 4803 adjncy[xadj_count] = neighs[i]; 4804 adjncy_wgt[xadj_count] = n_shared[i]; 4805 xadj_count++; 4806 } 4807 } 4808 xadj[1] = xadj_count; 4809 } else { 4810 if (xadj[1]) { 4811 ierr = PetscMemcpy(adjncy,&neighs[1],xadj[1]*sizeof(*adjncy));CHKERRQ(ierr); 4812 ierr = PetscMemcpy(adjncy_wgt,&n_shared[1],xadj[1]*sizeof(*adjncy_wgt));CHKERRQ(ierr); 4813 } 4814 } 4815 ierr = ISLocalToGlobalMappingRestoreInfo(mat->rmap->mapping,&n_neighs,&neighs,&n_shared,&shared);CHKERRQ(ierr); 4816 if (use_square) { 4817 for (i=0;i<xadj[1];i++) { 4818 adjncy_wgt[i] = adjncy_wgt[i]*adjncy_wgt[i]; 4819 } 4820 } 4821 ierr = PetscSortIntWithArray(xadj[1],adjncy,adjncy_wgt);CHKERRQ(ierr); 4822 4823 ierr = PetscMalloc1(1,&ranks_send_to_idx);CHKERRQ(ierr); 4824 4825 /* 4826 Restrict work on active processes only. 4827 */ 4828 ierr = PetscSubcommCreate(PetscObjectComm((PetscObject)mat),&subcomm);CHKERRQ(ierr); 4829 ierr = PetscSubcommSetNumber(subcomm,2);CHKERRQ(ierr); /* 2 groups, active process and not active processes */ 4830 ierr = PetscMPIIntCast(!local_size,&color);CHKERRQ(ierr); 4831 ierr = PetscSubcommSetTypeGeneral(subcomm,color,rank);CHKERRQ(ierr); 4832 if (color) { 4833 ierr = PetscFree(xadj);CHKERRQ(ierr); 4834 ierr = PetscFree(adjncy);CHKERRQ(ierr); 4835 ierr = PetscFree(adjncy_wgt);CHKERRQ(ierr); 4836 } else { 4837 Mat subdomain_adj; 4838 IS new_ranks,new_ranks_contig; 4839 MatPartitioning partitioner; 4840 PetscInt prank,rstart=0,rend=0; 4841 PetscInt *is_indices,*oldranks; 4842 PetscMPIInt size; 4843 PetscBool aggregate; 4844 4845 ierr = MPI_Comm_size(PetscSubcommChild(subcomm),&size);CHKERRQ(ierr); 4846 ierr = PetscMalloc1(size,&oldranks);CHKERRQ(ierr); 4847 prank = rank; 4848 ierr = MPI_Allgather(&prank,1,MPIU_INT,oldranks,1,MPIU_INT,PetscSubcommChild(subcomm));CHKERRQ(ierr); 4849 /* 4850 for (i=0;i<size;i++) { 4851 PetscPrintf(subcomm->comm,"oldranks[%d] = %d\n",i,oldranks[i]); 4852 } 4853 */ 4854 for (i=0;i<xadj[1];i++) { 4855 ierr = PetscFindInt(adjncy[i],size,oldranks,&adjncy[i]);CHKERRQ(ierr); 4856 } 4857 ierr = PetscSortIntWithArray(xadj[1],adjncy,adjncy_wgt);CHKERRQ(ierr); 4858 aggregate = ((redprocs > 0 && redprocs < size) ? PETSC_TRUE : PETSC_FALSE); 4859 if (aggregate) { 4860 PetscInt lrows,row,ncols,*cols; 4861 PetscMPIInt nrank; 4862 PetscScalar *vals; 4863 4864 ierr = MPI_Comm_rank(PetscSubcommChild(subcomm),&nrank);CHKERRQ(ierr); 4865 lrows = 0; 4866 if (nrank<redprocs) { 4867 lrows = size/redprocs; 4868 if (nrank<size%redprocs) lrows++; 4869 } 4870 ierr = MatCreateAIJ(PetscSubcommChild(subcomm),lrows,lrows,size,size,50,NULL,50,NULL,&subdomain_adj);CHKERRQ(ierr); 4871 ierr = MatGetOwnershipRange(subdomain_adj,&rstart,&rend);CHKERRQ(ierr); 4872 ierr = MatSetOption(subdomain_adj,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr); 4873 ierr = MatSetOption(subdomain_adj,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr); 4874 row = nrank; 4875 ncols = xadj[1]-xadj[0]; 4876 cols = adjncy; 4877 ierr = PetscMalloc1(ncols,&vals);CHKERRQ(ierr); 4878 for (i=0;i<ncols;i++) vals[i] = adjncy_wgt[i]; 4879 ierr = MatSetValues(subdomain_adj,1,&row,ncols,cols,vals,INSERT_VALUES);CHKERRQ(ierr); 4880 ierr = MatAssemblyBegin(subdomain_adj,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4881 ierr = MatAssemblyEnd(subdomain_adj,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4882 ierr = PetscFree(xadj);CHKERRQ(ierr); 4883 ierr = PetscFree(adjncy);CHKERRQ(ierr); 4884 ierr = PetscFree(adjncy_wgt);CHKERRQ(ierr); 4885 ierr = PetscFree(vals);CHKERRQ(ierr); 4886 } else { 4887 ierr = MatCreateMPIAdj(PetscSubcommChild(subcomm),1,(PetscInt)size,xadj,adjncy,adjncy_wgt,&subdomain_adj);CHKERRQ(ierr); 4888 } 4889 /* ierr = MatView(subdomain_adj,0);CHKERRQ(ierr); */ 4890 4891 /* Partition */ 4892 ierr = MatPartitioningCreate(PetscSubcommChild(subcomm),&partitioner);CHKERRQ(ierr); 4893 ierr = MatPartitioningSetAdjacency(partitioner,subdomain_adj);CHKERRQ(ierr); 4894 if (use_vwgt) { 4895 ierr = PetscMalloc1(1,&v_wgt);CHKERRQ(ierr); 4896 v_wgt[0] = local_size; 4897 ierr = MatPartitioningSetVertexWeights(partitioner,v_wgt);CHKERRQ(ierr); 4898 } 4899 *n_subdomains = PetscMin((PetscInt)size,*n_subdomains); 4900 ierr = MatPartitioningSetNParts(partitioner,*n_subdomains);CHKERRQ(ierr); 4901 ierr = MatPartitioningSetFromOptions(partitioner);CHKERRQ(ierr); 4902 ierr = MatPartitioningApply(partitioner,&new_ranks);CHKERRQ(ierr); 4903 /* ierr = MatPartitioningView(partitioner,0);CHKERRQ(ierr); */ 4904 4905 /* renumber new_ranks to avoid "holes" in new set of processors */ 4906 ierr = PCBDDCSubsetNumbering(new_ranks,NULL,NULL,&new_ranks_contig);CHKERRQ(ierr); 4907 ierr = ISDestroy(&new_ranks);CHKERRQ(ierr); 4908 ierr = ISGetIndices(new_ranks_contig,(const PetscInt**)&is_indices);CHKERRQ(ierr); 4909 if (!aggregate) { 4910 if (procs_candidates) { /* shift the pattern on non-active candidates (if any) */ 4911 ranks_send_to_idx[0] = procs_candidates[oldranks[is_indices[0]]]; 4912 } else { 4913 ranks_send_to_idx[0] = oldranks[is_indices[0]]; 4914 } 4915 } else { 4916 PetscInt idxs[1]; 4917 PetscMPIInt tag; 4918 MPI_Request *reqs; 4919 4920 ierr = PetscObjectGetNewTag((PetscObject)subdomain_adj,&tag);CHKERRQ(ierr); 4921 ierr = PetscMalloc1(rend-rstart,&reqs);CHKERRQ(ierr); 4922 for (i=rstart;i<rend;i++) { 4923 ierr = MPI_Isend(is_indices+i-rstart,1,MPIU_INT,i,tag,PetscSubcommChild(subcomm),&reqs[i-rstart]);CHKERRQ(ierr); 4924 } 4925 ierr = MPI_Recv(idxs,1,MPIU_INT,MPI_ANY_SOURCE,tag,PetscSubcommChild(subcomm),MPI_STATUS_IGNORE);CHKERRQ(ierr); 4926 ierr = MPI_Waitall(rend-rstart,reqs,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 4927 ierr = PetscFree(reqs);CHKERRQ(ierr); 4928 if (procs_candidates) { /* shift the pattern on non-active candidates (if any) */ 4929 ranks_send_to_idx[0] = procs_candidates[oldranks[idxs[0]]]; 4930 } else { 4931 ranks_send_to_idx[0] = oldranks[idxs[0]]; 4932 } 4933 } 4934 ierr = ISRestoreIndices(new_ranks_contig,(const PetscInt**)&is_indices);CHKERRQ(ierr); 4935 /* clean up */ 4936 ierr = PetscFree(oldranks);CHKERRQ(ierr); 4937 ierr = ISDestroy(&new_ranks_contig);CHKERRQ(ierr); 4938 ierr = MatDestroy(&subdomain_adj);CHKERRQ(ierr); 4939 ierr = MatPartitioningDestroy(&partitioner);CHKERRQ(ierr); 4940 } 4941 ierr = PetscSubcommDestroy(&subcomm);CHKERRQ(ierr); 4942 ierr = PetscFree(procs_candidates);CHKERRQ(ierr); 4943 4944 /* assemble parallel IS for sends */ 4945 i = 1; 4946 if (color) i=0; 4947 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)mat),i,ranks_send_to_idx,PETSC_OWN_POINTER,is_sends);CHKERRQ(ierr); 4948 PetscFunctionReturn(0); 4949 } 4950 4951 typedef enum {MATDENSE_PRIVATE=0,MATAIJ_PRIVATE,MATBAIJ_PRIVATE,MATSBAIJ_PRIVATE}MatTypePrivate; 4952 4953 #undef __FUNCT__ 4954 #define __FUNCT__ "MatISSubassemble" 4955 PetscErrorCode MatISSubassemble(Mat mat, IS is_sends, PetscInt n_subdomains, PetscBool restrict_comm, PetscBool restrict_full, PetscBool reuse, Mat *mat_n, PetscInt nis, IS isarray[]) 4956 { 4957 Mat local_mat; 4958 IS is_sends_internal; 4959 PetscInt rows,cols,new_local_rows; 4960 PetscInt i,bs,buf_size_idxs,buf_size_idxs_is,buf_size_vals; 4961 PetscBool ismatis,isdense,newisdense,destroy_mat; 4962 ISLocalToGlobalMapping l2gmap; 4963 PetscInt* l2gmap_indices; 4964 const PetscInt* is_indices; 4965 MatType new_local_type; 4966 /* buffers */ 4967 PetscInt *ptr_idxs,*send_buffer_idxs,*recv_buffer_idxs; 4968 PetscInt *ptr_idxs_is,*send_buffer_idxs_is,*recv_buffer_idxs_is; 4969 PetscInt *recv_buffer_idxs_local; 4970 PetscScalar *ptr_vals,*send_buffer_vals,*recv_buffer_vals; 4971 /* MPI */ 4972 MPI_Comm comm,comm_n; 4973 PetscSubcomm subcomm; 4974 PetscMPIInt n_sends,n_recvs,commsize; 4975 PetscMPIInt *iflags,*ilengths_idxs,*ilengths_vals,*ilengths_idxs_is; 4976 PetscMPIInt *onodes,*onodes_is,*olengths_idxs,*olengths_idxs_is,*olengths_vals; 4977 PetscMPIInt len,tag_idxs,tag_idxs_is,tag_vals,source_dest; 4978 MPI_Request *send_req_idxs,*send_req_idxs_is,*send_req_vals; 4979 MPI_Request *recv_req_idxs,*recv_req_idxs_is,*recv_req_vals; 4980 PetscErrorCode ierr; 4981 4982 PetscFunctionBegin; 4983 PetscValidHeaderSpecific(mat,MAT_CLASSID,1); 4984 ierr = PetscObjectTypeCompare((PetscObject)mat,MATIS,&ismatis);CHKERRQ(ierr); 4985 if (!ismatis) SETERRQ1(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot use %s on a matrix object which is not of type MATIS",__FUNCT__); 4986 PetscValidLogicalCollectiveInt(mat,n_subdomains,3); 4987 PetscValidLogicalCollectiveBool(mat,restrict_comm,4); 4988 PetscValidLogicalCollectiveBool(mat,restrict_full,5); 4989 PetscValidLogicalCollectiveBool(mat,reuse,6); 4990 PetscValidLogicalCollectiveInt(mat,nis,8); 4991 4992 /* further checks */ 4993 ierr = MatISGetLocalMat(mat,&local_mat);CHKERRQ(ierr); 4994 ierr = PetscObjectTypeCompare((PetscObject)local_mat,MATSEQDENSE,&isdense);CHKERRQ(ierr); 4995 if (!isdense) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Currently cannot subassemble MATIS when local matrix type is not of type SEQDENSE"); 4996 ierr = MatGetSize(local_mat,&rows,&cols);CHKERRQ(ierr); 4997 if (rows != cols) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Local MATIS matrices should be square"); 4998 if (reuse && *mat_n) { 4999 PetscInt mrows,mcols,mnrows,mncols; 5000 PetscValidHeaderSpecific(*mat_n,MAT_CLASSID,7); 5001 ierr = PetscObjectTypeCompare((PetscObject)*mat_n,MATIS,&ismatis);CHKERRQ(ierr); 5002 if (!ismatis) SETERRQ(PetscObjectComm((PetscObject)*mat_n),PETSC_ERR_SUP,"Cannot reuse a matrix which is not of type MATIS"); 5003 ierr = MatGetSize(mat,&mrows,&mcols);CHKERRQ(ierr); 5004 ierr = MatGetSize(*mat_n,&mnrows,&mncols);CHKERRQ(ierr); 5005 if (mrows != mnrows) SETERRQ2(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot reuse matrix! Wrong number of rows %D != %D",mrows,mnrows); 5006 if (mcols != mncols) SETERRQ2(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot reuse matrix! Wrong number of cols %D != %D",mcols,mncols); 5007 } 5008 ierr = MatGetBlockSize(local_mat,&bs);CHKERRQ(ierr); 5009 PetscValidLogicalCollectiveInt(mat,bs,0); 5010 5011 /* prepare IS for sending if not provided */ 5012 if (!is_sends) { 5013 if (!n_subdomains) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"You should specify either an IS or a target number of subdomains"); 5014 ierr = MatISGetSubassemblingPattern(mat,&n_subdomains,0,&is_sends_internal,NULL);CHKERRQ(ierr); 5015 } else { 5016 ierr = PetscObjectReference((PetscObject)is_sends);CHKERRQ(ierr); 5017 is_sends_internal = is_sends; 5018 } 5019 5020 /* get comm */ 5021 ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr); 5022 5023 /* compute number of sends */ 5024 ierr = ISGetLocalSize(is_sends_internal,&i);CHKERRQ(ierr); 5025 ierr = PetscMPIIntCast(i,&n_sends);CHKERRQ(ierr); 5026 5027 /* compute number of receives */ 5028 ierr = MPI_Comm_size(comm,&commsize);CHKERRQ(ierr); 5029 ierr = PetscMalloc1(commsize,&iflags);CHKERRQ(ierr); 5030 ierr = PetscMemzero(iflags,commsize*sizeof(*iflags));CHKERRQ(ierr); 5031 ierr = ISGetIndices(is_sends_internal,&is_indices);CHKERRQ(ierr); 5032 for (i=0;i<n_sends;i++) iflags[is_indices[i]] = 1; 5033 ierr = PetscGatherNumberOfMessages(comm,iflags,NULL,&n_recvs);CHKERRQ(ierr); 5034 ierr = PetscFree(iflags);CHKERRQ(ierr); 5035 5036 /* restrict comm if requested */ 5037 subcomm = 0; 5038 destroy_mat = PETSC_FALSE; 5039 if (restrict_comm) { 5040 PetscMPIInt color,subcommsize; 5041 5042 color = 0; 5043 if (restrict_full) { 5044 if (!n_recvs) color = 1; /* processes not receiving anything will not partecipate in new comm (full restriction) */ 5045 } else { 5046 if (!n_recvs && n_sends) color = 1; /* just those processes that are sending but not receiving anything will not partecipate in new comm */ 5047 } 5048 ierr = MPIU_Allreduce(&color,&subcommsize,1,MPI_INT,MPI_SUM,comm);CHKERRQ(ierr); 5049 subcommsize = commsize - subcommsize; 5050 /* check if reuse has been requested */ 5051 if (reuse) { 5052 if (*mat_n) { 5053 PetscMPIInt subcommsize2; 5054 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)*mat_n),&subcommsize2);CHKERRQ(ierr); 5055 if (subcommsize != subcommsize2) SETERRQ2(PetscObjectComm((PetscObject)*mat_n),PETSC_ERR_PLIB,"Cannot reuse matrix! wrong subcomm size %d != %d",subcommsize,subcommsize2); 5056 comm_n = PetscObjectComm((PetscObject)*mat_n); 5057 } else { 5058 comm_n = PETSC_COMM_SELF; 5059 } 5060 } else { /* MAT_INITIAL_MATRIX */ 5061 PetscMPIInt rank; 5062 5063 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 5064 ierr = PetscSubcommCreate(comm,&subcomm);CHKERRQ(ierr); 5065 ierr = PetscSubcommSetNumber(subcomm,2);CHKERRQ(ierr); 5066 ierr = PetscSubcommSetTypeGeneral(subcomm,color,rank);CHKERRQ(ierr); 5067 comm_n = PetscSubcommChild(subcomm); 5068 } 5069 /* flag to destroy *mat_n if not significative */ 5070 if (color) destroy_mat = PETSC_TRUE; 5071 } else { 5072 comm_n = comm; 5073 } 5074 5075 /* prepare send/receive buffers */ 5076 ierr = PetscMalloc1(commsize,&ilengths_idxs);CHKERRQ(ierr); 5077 ierr = PetscMemzero(ilengths_idxs,commsize*sizeof(*ilengths_idxs));CHKERRQ(ierr); 5078 ierr = PetscMalloc1(commsize,&ilengths_vals);CHKERRQ(ierr); 5079 ierr = PetscMemzero(ilengths_vals,commsize*sizeof(*ilengths_vals));CHKERRQ(ierr); 5080 if (nis) { 5081 ierr = PetscCalloc1(commsize,&ilengths_idxs_is);CHKERRQ(ierr); 5082 } 5083 5084 /* Get data from local matrices */ 5085 if (!isdense) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Subassembling of AIJ local matrices not yet implemented"); 5086 /* TODO: See below some guidelines on how to prepare the local buffers */ 5087 /* 5088 send_buffer_vals should contain the raw values of the local matrix 5089 send_buffer_idxs should contain: 5090 - MatType_PRIVATE type 5091 - PetscInt size_of_l2gmap 5092 - PetscInt global_row_indices[size_of_l2gmap] 5093 - PetscInt all_other_info_which_is_needed_to_compute_preallocation_and_set_values 5094 */ 5095 else { 5096 ierr = MatDenseGetArray(local_mat,&send_buffer_vals);CHKERRQ(ierr); 5097 ierr = ISLocalToGlobalMappingGetSize(mat->rmap->mapping,&i);CHKERRQ(ierr); 5098 ierr = PetscMalloc1(i+2,&send_buffer_idxs);CHKERRQ(ierr); 5099 send_buffer_idxs[0] = (PetscInt)MATDENSE_PRIVATE; 5100 send_buffer_idxs[1] = i; 5101 ierr = ISLocalToGlobalMappingGetIndices(mat->rmap->mapping,(const PetscInt**)&ptr_idxs);CHKERRQ(ierr); 5102 ierr = PetscMemcpy(&send_buffer_idxs[2],ptr_idxs,i*sizeof(PetscInt));CHKERRQ(ierr); 5103 ierr = ISLocalToGlobalMappingRestoreIndices(mat->rmap->mapping,(const PetscInt**)&ptr_idxs);CHKERRQ(ierr); 5104 ierr = PetscMPIIntCast(i,&len);CHKERRQ(ierr); 5105 for (i=0;i<n_sends;i++) { 5106 ilengths_vals[is_indices[i]] = len*len; 5107 ilengths_idxs[is_indices[i]] = len+2; 5108 } 5109 } 5110 ierr = PetscGatherMessageLengths2(comm,n_sends,n_recvs,ilengths_idxs,ilengths_vals,&onodes,&olengths_idxs,&olengths_vals);CHKERRQ(ierr); 5111 /* additional is (if any) */ 5112 if (nis) { 5113 PetscMPIInt psum; 5114 PetscInt j; 5115 for (j=0,psum=0;j<nis;j++) { 5116 PetscInt plen; 5117 ierr = ISGetLocalSize(isarray[j],&plen);CHKERRQ(ierr); 5118 ierr = PetscMPIIntCast(plen,&len);CHKERRQ(ierr); 5119 psum += len+1; /* indices + lenght */ 5120 } 5121 ierr = PetscMalloc1(psum,&send_buffer_idxs_is);CHKERRQ(ierr); 5122 for (j=0,psum=0;j<nis;j++) { 5123 PetscInt plen; 5124 const PetscInt *is_array_idxs; 5125 ierr = ISGetLocalSize(isarray[j],&plen);CHKERRQ(ierr); 5126 send_buffer_idxs_is[psum] = plen; 5127 ierr = ISGetIndices(isarray[j],&is_array_idxs);CHKERRQ(ierr); 5128 ierr = PetscMemcpy(&send_buffer_idxs_is[psum+1],is_array_idxs,plen*sizeof(PetscInt));CHKERRQ(ierr); 5129 ierr = ISRestoreIndices(isarray[j],&is_array_idxs);CHKERRQ(ierr); 5130 psum += plen+1; /* indices + lenght */ 5131 } 5132 for (i=0;i<n_sends;i++) { 5133 ilengths_idxs_is[is_indices[i]] = psum; 5134 } 5135 ierr = PetscGatherMessageLengths(comm,n_sends,n_recvs,ilengths_idxs_is,&onodes_is,&olengths_idxs_is);CHKERRQ(ierr); 5136 } 5137 5138 buf_size_idxs = 0; 5139 buf_size_vals = 0; 5140 buf_size_idxs_is = 0; 5141 for (i=0;i<n_recvs;i++) { 5142 buf_size_idxs += (PetscInt)olengths_idxs[i]; 5143 buf_size_vals += (PetscInt)olengths_vals[i]; 5144 if (nis) buf_size_idxs_is += (PetscInt)olengths_idxs_is[i]; 5145 } 5146 ierr = PetscMalloc1(buf_size_idxs,&recv_buffer_idxs);CHKERRQ(ierr); 5147 ierr = PetscMalloc1(buf_size_vals,&recv_buffer_vals);CHKERRQ(ierr); 5148 ierr = PetscMalloc1(buf_size_idxs_is,&recv_buffer_idxs_is);CHKERRQ(ierr); 5149 5150 /* get new tags for clean communications */ 5151 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_idxs);CHKERRQ(ierr); 5152 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_vals);CHKERRQ(ierr); 5153 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_idxs_is);CHKERRQ(ierr); 5154 5155 /* allocate for requests */ 5156 ierr = PetscMalloc1(n_sends,&send_req_idxs);CHKERRQ(ierr); 5157 ierr = PetscMalloc1(n_sends,&send_req_vals);CHKERRQ(ierr); 5158 ierr = PetscMalloc1(n_sends,&send_req_idxs_is);CHKERRQ(ierr); 5159 ierr = PetscMalloc1(n_recvs,&recv_req_idxs);CHKERRQ(ierr); 5160 ierr = PetscMalloc1(n_recvs,&recv_req_vals);CHKERRQ(ierr); 5161 ierr = PetscMalloc1(n_recvs,&recv_req_idxs_is);CHKERRQ(ierr); 5162 5163 /* communications */ 5164 ptr_idxs = recv_buffer_idxs; 5165 ptr_vals = recv_buffer_vals; 5166 ptr_idxs_is = recv_buffer_idxs_is; 5167 for (i=0;i<n_recvs;i++) { 5168 source_dest = onodes[i]; 5169 ierr = MPI_Irecv(ptr_idxs,olengths_idxs[i],MPIU_INT,source_dest,tag_idxs,comm,&recv_req_idxs[i]);CHKERRQ(ierr); 5170 ierr = MPI_Irecv(ptr_vals,olengths_vals[i],MPIU_SCALAR,source_dest,tag_vals,comm,&recv_req_vals[i]);CHKERRQ(ierr); 5171 ptr_idxs += olengths_idxs[i]; 5172 ptr_vals += olengths_vals[i]; 5173 if (nis) { 5174 source_dest = onodes_is[i]; 5175 ierr = MPI_Irecv(ptr_idxs_is,olengths_idxs_is[i],MPIU_INT,source_dest,tag_idxs_is,comm,&recv_req_idxs_is[i]);CHKERRQ(ierr); 5176 ptr_idxs_is += olengths_idxs_is[i]; 5177 } 5178 } 5179 for (i=0;i<n_sends;i++) { 5180 ierr = PetscMPIIntCast(is_indices[i],&source_dest);CHKERRQ(ierr); 5181 ierr = MPI_Isend(send_buffer_idxs,ilengths_idxs[source_dest],MPIU_INT,source_dest,tag_idxs,comm,&send_req_idxs[i]);CHKERRQ(ierr); 5182 ierr = MPI_Isend(send_buffer_vals,ilengths_vals[source_dest],MPIU_SCALAR,source_dest,tag_vals,comm,&send_req_vals[i]);CHKERRQ(ierr); 5183 if (nis) { 5184 ierr = MPI_Isend(send_buffer_idxs_is,ilengths_idxs_is[source_dest],MPIU_INT,source_dest,tag_idxs_is,comm,&send_req_idxs_is[i]);CHKERRQ(ierr); 5185 } 5186 } 5187 ierr = ISRestoreIndices(is_sends_internal,&is_indices);CHKERRQ(ierr); 5188 ierr = ISDestroy(&is_sends_internal);CHKERRQ(ierr); 5189 5190 /* assemble new l2g map */ 5191 ierr = MPI_Waitall(n_recvs,recv_req_idxs,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 5192 ptr_idxs = recv_buffer_idxs; 5193 new_local_rows = 0; 5194 for (i=0;i<n_recvs;i++) { 5195 new_local_rows += *(ptr_idxs+1); /* second element is the local size of the l2gmap */ 5196 ptr_idxs += olengths_idxs[i]; 5197 } 5198 ierr = PetscMalloc1(new_local_rows,&l2gmap_indices);CHKERRQ(ierr); 5199 ptr_idxs = recv_buffer_idxs; 5200 new_local_rows = 0; 5201 for (i=0;i<n_recvs;i++) { 5202 ierr = PetscMemcpy(&l2gmap_indices[new_local_rows],ptr_idxs+2,(*(ptr_idxs+1))*sizeof(PetscInt));CHKERRQ(ierr); 5203 new_local_rows += *(ptr_idxs+1); /* second element is the local size of the l2gmap */ 5204 ptr_idxs += olengths_idxs[i]; 5205 } 5206 ierr = PetscSortRemoveDupsInt(&new_local_rows,l2gmap_indices);CHKERRQ(ierr); 5207 ierr = ISLocalToGlobalMappingCreate(comm_n,1,new_local_rows,l2gmap_indices,PETSC_COPY_VALUES,&l2gmap);CHKERRQ(ierr); 5208 ierr = PetscFree(l2gmap_indices);CHKERRQ(ierr); 5209 5210 /* infer new local matrix type from received local matrices type */ 5211 /* currently if all local matrices are of type X, then the resulting matrix will be of type X, except for the dense case */ 5212 /* it also assumes that if the block size is set, than it is the same among all local matrices (see checks at the beginning of the function) */ 5213 if (n_recvs) { 5214 MatTypePrivate new_local_type_private = (MatTypePrivate)send_buffer_idxs[0]; 5215 ptr_idxs = recv_buffer_idxs; 5216 for (i=0;i<n_recvs;i++) { 5217 if ((PetscInt)new_local_type_private != *ptr_idxs) { 5218 new_local_type_private = MATAIJ_PRIVATE; 5219 break; 5220 } 5221 ptr_idxs += olengths_idxs[i]; 5222 } 5223 switch (new_local_type_private) { 5224 case MATDENSE_PRIVATE: 5225 if (n_recvs>1) { /* subassembling of dense matrices does not give a dense matrix! */ 5226 new_local_type = MATSEQAIJ; 5227 bs = 1; 5228 } else { /* if I receive only 1 dense matrix */ 5229 new_local_type = MATSEQDENSE; 5230 bs = 1; 5231 } 5232 break; 5233 case MATAIJ_PRIVATE: 5234 new_local_type = MATSEQAIJ; 5235 bs = 1; 5236 break; 5237 case MATBAIJ_PRIVATE: 5238 new_local_type = MATSEQBAIJ; 5239 break; 5240 case MATSBAIJ_PRIVATE: 5241 new_local_type = MATSEQSBAIJ; 5242 break; 5243 default: 5244 SETERRQ2(comm,PETSC_ERR_SUP,"Unsupported private type %d in %s",new_local_type_private,__FUNCT__); 5245 break; 5246 } 5247 } else { /* by default, new_local_type is seqdense */ 5248 new_local_type = MATSEQDENSE; 5249 bs = 1; 5250 } 5251 5252 /* create MATIS object if needed */ 5253 if (!reuse) { 5254 ierr = MatGetSize(mat,&rows,&cols);CHKERRQ(ierr); 5255 ierr = MatCreateIS(comm_n,bs,PETSC_DECIDE,PETSC_DECIDE,rows,cols,l2gmap,NULL,mat_n);CHKERRQ(ierr); 5256 } else { 5257 /* it also destroys the local matrices */ 5258 if (*mat_n) { 5259 ierr = MatSetLocalToGlobalMapping(*mat_n,l2gmap,l2gmap);CHKERRQ(ierr); 5260 } else { /* this is a fake object */ 5261 ierr = MatCreateIS(comm_n,bs,PETSC_DECIDE,PETSC_DECIDE,rows,cols,l2gmap,NULL,mat_n);CHKERRQ(ierr); 5262 } 5263 } 5264 ierr = MatISGetLocalMat(*mat_n,&local_mat);CHKERRQ(ierr); 5265 ierr = MatSetType(local_mat,new_local_type);CHKERRQ(ierr); 5266 5267 ierr = MPI_Waitall(n_recvs,recv_req_vals,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 5268 5269 /* Global to local map of received indices */ 5270 ierr = PetscMalloc1(buf_size_idxs,&recv_buffer_idxs_local);CHKERRQ(ierr); /* needed for values insertion */ 5271 ierr = ISGlobalToLocalMappingApply(l2gmap,IS_GTOLM_MASK,buf_size_idxs,recv_buffer_idxs,&i,recv_buffer_idxs_local);CHKERRQ(ierr); 5272 ierr = ISLocalToGlobalMappingDestroy(&l2gmap);CHKERRQ(ierr); 5273 5274 /* restore attributes -> type of incoming data and its size */ 5275 buf_size_idxs = 0; 5276 for (i=0;i<n_recvs;i++) { 5277 recv_buffer_idxs_local[buf_size_idxs] = recv_buffer_idxs[buf_size_idxs]; 5278 recv_buffer_idxs_local[buf_size_idxs+1] = recv_buffer_idxs[buf_size_idxs+1]; 5279 buf_size_idxs += (PetscInt)olengths_idxs[i]; 5280 } 5281 ierr = PetscFree(recv_buffer_idxs);CHKERRQ(ierr); 5282 5283 /* set preallocation */ 5284 ierr = PetscObjectTypeCompare((PetscObject)local_mat,MATSEQDENSE,&newisdense);CHKERRQ(ierr); 5285 if (!newisdense) { 5286 PetscInt *new_local_nnz=0; 5287 5288 ptr_vals = recv_buffer_vals; 5289 ptr_idxs = recv_buffer_idxs_local; 5290 if (n_recvs) { 5291 ierr = PetscCalloc1(new_local_rows,&new_local_nnz);CHKERRQ(ierr); 5292 } 5293 for (i=0;i<n_recvs;i++) { 5294 PetscInt j; 5295 if (*ptr_idxs == (PetscInt)MATDENSE_PRIVATE) { /* preallocation provided for dense case only */ 5296 for (j=0;j<*(ptr_idxs+1);j++) { 5297 new_local_nnz[*(ptr_idxs+2+j)] += *(ptr_idxs+1); 5298 } 5299 } else { 5300 /* TODO */ 5301 } 5302 ptr_idxs += olengths_idxs[i]; 5303 } 5304 if (new_local_nnz) { 5305 for (i=0;i<new_local_rows;i++) new_local_nnz[i] = PetscMin(new_local_nnz[i],new_local_rows); 5306 ierr = MatSeqAIJSetPreallocation(local_mat,0,new_local_nnz);CHKERRQ(ierr); 5307 for (i=0;i<new_local_rows;i++) new_local_nnz[i] /= bs; 5308 ierr = MatSeqBAIJSetPreallocation(local_mat,bs,0,new_local_nnz);CHKERRQ(ierr); 5309 for (i=0;i<new_local_rows;i++) new_local_nnz[i] = PetscMax(new_local_nnz[i]-i,0); 5310 ierr = MatSeqSBAIJSetPreallocation(local_mat,bs,0,new_local_nnz);CHKERRQ(ierr); 5311 } else { 5312 ierr = MatSetUp(local_mat);CHKERRQ(ierr); 5313 } 5314 ierr = PetscFree(new_local_nnz);CHKERRQ(ierr); 5315 } else { 5316 ierr = MatSetUp(local_mat);CHKERRQ(ierr); 5317 } 5318 5319 /* set values */ 5320 ptr_vals = recv_buffer_vals; 5321 ptr_idxs = recv_buffer_idxs_local; 5322 for (i=0;i<n_recvs;i++) { 5323 if (*ptr_idxs == (PetscInt)MATDENSE_PRIVATE) { /* values insertion provided for dense case only */ 5324 ierr = MatSetOption(local_mat,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr); 5325 ierr = MatSetValues(local_mat,*(ptr_idxs+1),ptr_idxs+2,*(ptr_idxs+1),ptr_idxs+2,ptr_vals,ADD_VALUES);CHKERRQ(ierr); 5326 ierr = MatAssemblyBegin(local_mat,MAT_FLUSH_ASSEMBLY);CHKERRQ(ierr); 5327 ierr = MatAssemblyEnd(local_mat,MAT_FLUSH_ASSEMBLY);CHKERRQ(ierr); 5328 ierr = MatSetOption(local_mat,MAT_ROW_ORIENTED,PETSC_TRUE);CHKERRQ(ierr); 5329 } else { 5330 /* TODO */ 5331 } 5332 ptr_idxs += olengths_idxs[i]; 5333 ptr_vals += olengths_vals[i]; 5334 } 5335 ierr = MatAssemblyBegin(local_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5336 ierr = MatAssemblyEnd(local_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5337 ierr = MatAssemblyBegin(*mat_n,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5338 ierr = MatAssemblyEnd(*mat_n,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5339 ierr = PetscFree(recv_buffer_vals);CHKERRQ(ierr); 5340 ierr = PetscFree(recv_buffer_idxs_local);CHKERRQ(ierr); 5341 5342 #if 0 5343 if (!restrict_comm) { /* check */ 5344 Vec lvec,rvec; 5345 PetscReal infty_error; 5346 5347 ierr = MatCreateVecs(mat,&rvec,&lvec);CHKERRQ(ierr); 5348 ierr = VecSetRandom(rvec,NULL);CHKERRQ(ierr); 5349 ierr = MatMult(mat,rvec,lvec);CHKERRQ(ierr); 5350 ierr = VecScale(lvec,-1.0);CHKERRQ(ierr); 5351 ierr = MatMultAdd(*mat_n,rvec,lvec,lvec);CHKERRQ(ierr); 5352 ierr = VecNorm(lvec,NORM_INFINITY,&infty_error);CHKERRQ(ierr); 5353 ierr = PetscPrintf(PetscObjectComm((PetscObject)mat),"Infinity error subassembling %1.6e\n",infty_error); 5354 ierr = VecDestroy(&rvec);CHKERRQ(ierr); 5355 ierr = VecDestroy(&lvec);CHKERRQ(ierr); 5356 } 5357 #endif 5358 5359 /* assemble new additional is (if any) */ 5360 if (nis) { 5361 PetscInt **temp_idxs,*count_is,j,psum; 5362 5363 ierr = MPI_Waitall(n_recvs,recv_req_idxs_is,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 5364 ierr = PetscCalloc1(nis,&count_is);CHKERRQ(ierr); 5365 ptr_idxs = recv_buffer_idxs_is; 5366 psum = 0; 5367 for (i=0;i<n_recvs;i++) { 5368 for (j=0;j<nis;j++) { 5369 PetscInt plen = *(ptr_idxs); /* first element is the local size of IS's indices */ 5370 count_is[j] += plen; /* increment counting of buffer for j-th IS */ 5371 psum += plen; 5372 ptr_idxs += plen+1; /* shift pointer to received data */ 5373 } 5374 } 5375 ierr = PetscMalloc1(nis,&temp_idxs);CHKERRQ(ierr); 5376 ierr = PetscMalloc1(psum,&temp_idxs[0]);CHKERRQ(ierr); 5377 for (i=1;i<nis;i++) { 5378 temp_idxs[i] = temp_idxs[i-1]+count_is[i-1]; 5379 } 5380 ierr = PetscMemzero(count_is,nis*sizeof(PetscInt));CHKERRQ(ierr); 5381 ptr_idxs = recv_buffer_idxs_is; 5382 for (i=0;i<n_recvs;i++) { 5383 for (j=0;j<nis;j++) { 5384 PetscInt plen = *(ptr_idxs); /* first element is the local size of IS's indices */ 5385 ierr = PetscMemcpy(&temp_idxs[j][count_is[j]],ptr_idxs+1,plen*sizeof(PetscInt));CHKERRQ(ierr); 5386 count_is[j] += plen; /* increment starting point of buffer for j-th IS */ 5387 ptr_idxs += plen+1; /* shift pointer to received data */ 5388 } 5389 } 5390 for (i=0;i<nis;i++) { 5391 ierr = ISDestroy(&isarray[i]);CHKERRQ(ierr); 5392 ierr = PetscSortRemoveDupsInt(&count_is[i],temp_idxs[i]);CHKERRQ(ierr);CHKERRQ(ierr); 5393 ierr = ISCreateGeneral(comm_n,count_is[i],temp_idxs[i],PETSC_COPY_VALUES,&isarray[i]);CHKERRQ(ierr); 5394 } 5395 ierr = PetscFree(count_is);CHKERRQ(ierr); 5396 ierr = PetscFree(temp_idxs[0]);CHKERRQ(ierr); 5397 ierr = PetscFree(temp_idxs);CHKERRQ(ierr); 5398 } 5399 /* free workspace */ 5400 ierr = PetscFree(recv_buffer_idxs_is);CHKERRQ(ierr); 5401 ierr = MPI_Waitall(n_sends,send_req_idxs,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 5402 ierr = PetscFree(send_buffer_idxs);CHKERRQ(ierr); 5403 ierr = MPI_Waitall(n_sends,send_req_vals,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 5404 if (isdense) { 5405 ierr = MatISGetLocalMat(mat,&local_mat);CHKERRQ(ierr); 5406 ierr = MatDenseRestoreArray(local_mat,&send_buffer_vals);CHKERRQ(ierr); 5407 } else { 5408 /* ierr = PetscFree(send_buffer_vals);CHKERRQ(ierr); */ 5409 } 5410 if (nis) { 5411 ierr = MPI_Waitall(n_sends,send_req_idxs_is,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 5412 ierr = PetscFree(send_buffer_idxs_is);CHKERRQ(ierr); 5413 } 5414 ierr = PetscFree(recv_req_idxs);CHKERRQ(ierr); 5415 ierr = PetscFree(recv_req_vals);CHKERRQ(ierr); 5416 ierr = PetscFree(recv_req_idxs_is);CHKERRQ(ierr); 5417 ierr = PetscFree(send_req_idxs);CHKERRQ(ierr); 5418 ierr = PetscFree(send_req_vals);CHKERRQ(ierr); 5419 ierr = PetscFree(send_req_idxs_is);CHKERRQ(ierr); 5420 ierr = PetscFree(ilengths_vals);CHKERRQ(ierr); 5421 ierr = PetscFree(ilengths_idxs);CHKERRQ(ierr); 5422 ierr = PetscFree(olengths_vals);CHKERRQ(ierr); 5423 ierr = PetscFree(olengths_idxs);CHKERRQ(ierr); 5424 ierr = PetscFree(onodes);CHKERRQ(ierr); 5425 if (nis) { 5426 ierr = PetscFree(ilengths_idxs_is);CHKERRQ(ierr); 5427 ierr = PetscFree(olengths_idxs_is);CHKERRQ(ierr); 5428 ierr = PetscFree(onodes_is);CHKERRQ(ierr); 5429 } 5430 ierr = PetscSubcommDestroy(&subcomm);CHKERRQ(ierr); 5431 if (destroy_mat) { /* destroy mat is true only if restrict comm is true and process will not partecipate */ 5432 ierr = MatDestroy(mat_n);CHKERRQ(ierr); 5433 for (i=0;i<nis;i++) { 5434 ierr = ISDestroy(&isarray[i]);CHKERRQ(ierr); 5435 } 5436 *mat_n = NULL; 5437 } 5438 PetscFunctionReturn(0); 5439 } 5440 5441 /* temporary hack into ksp private data structure */ 5442 #include <petsc/private/kspimpl.h> 5443 5444 #undef __FUNCT__ 5445 #define __FUNCT__ "PCBDDCSetUpCoarseSolver" 5446 PetscErrorCode PCBDDCSetUpCoarseSolver(PC pc,PetscScalar* coarse_submat_vals) 5447 { 5448 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 5449 PC_IS *pcis = (PC_IS*)pc->data; 5450 Mat coarse_mat,coarse_mat_is,coarse_submat_dense; 5451 MatNullSpace CoarseNullSpace=NULL; 5452 ISLocalToGlobalMapping coarse_islg; 5453 IS coarse_is,*isarray; 5454 PetscInt i,im_active=-1,active_procs=-1; 5455 PetscInt nis,nisdofs,nisneu,nisvert; 5456 PC pc_temp; 5457 PCType coarse_pc_type; 5458 KSPType coarse_ksp_type; 5459 PetscBool multilevel_requested,multilevel_allowed; 5460 PetscBool isredundant,isbddc,isnn,coarse_reuse; 5461 Mat t_coarse_mat_is; 5462 PetscInt ncoarse; 5463 PetscBool compute_vecs = PETSC_FALSE; 5464 PetscScalar *array; 5465 MatReuse coarse_mat_reuse; 5466 PetscBool restr, full_restr, have_void; 5467 PetscErrorCode ierr; 5468 5469 PetscFunctionBegin; 5470 /* Assign global numbering to coarse dofs */ 5471 if (pcbddc->new_primal_space || pcbddc->coarse_size == -1) { /* a new primal space is present or it is the first initialization, so recompute global numbering */ 5472 PetscInt ocoarse_size; 5473 compute_vecs = PETSC_TRUE; 5474 ocoarse_size = pcbddc->coarse_size; 5475 ierr = PetscFree(pcbddc->global_primal_indices);CHKERRQ(ierr); 5476 ierr = PCBDDCComputePrimalNumbering(pc,&pcbddc->coarse_size,&pcbddc->global_primal_indices);CHKERRQ(ierr); 5477 /* see if we can avoid some work */ 5478 if (pcbddc->coarse_ksp) { /* coarse ksp has already been created */ 5479 /* if the coarse size is different or we are using adaptive selection, better to not reuse the coarse matrix */ 5480 if (ocoarse_size != pcbddc->coarse_size || pcbddc->adaptive_selection) { 5481 PC pc; 5482 PetscBool isbddc; 5483 5484 /* temporary workaround since PCBDDC does not have a reset method so far */ 5485 ierr = KSPGetPC(pcbddc->coarse_ksp,&pc);CHKERRQ(ierr); 5486 ierr = PetscObjectTypeCompare((PetscObject)pc,PCBDDC,&isbddc);CHKERRQ(ierr); 5487 if (isbddc) { 5488 ierr = KSPDestroy(&pcbddc->coarse_ksp);CHKERRQ(ierr); 5489 } else { 5490 ierr = KSPReset(pcbddc->coarse_ksp);CHKERRQ(ierr); 5491 } 5492 coarse_reuse = PETSC_FALSE; 5493 } else { /* we can safely reuse already computed coarse matrix */ 5494 coarse_reuse = PETSC_TRUE; 5495 } 5496 } else { /* there's no coarse ksp, so we need to create the coarse matrix too */ 5497 coarse_reuse = PETSC_FALSE; 5498 } 5499 /* reset any subassembling information */ 5500 if (!coarse_reuse || pcbddc->recompute_topography) { 5501 ierr = ISDestroy(&pcbddc->coarse_subassembling);CHKERRQ(ierr); 5502 } 5503 } else { /* primal space is unchanged, so we can reuse coarse matrix */ 5504 coarse_reuse = PETSC_TRUE; 5505 } 5506 /* assemble coarse matrix */ 5507 if (coarse_reuse && pcbddc->coarse_ksp) { 5508 ierr = KSPGetOperators(pcbddc->coarse_ksp,&coarse_mat,NULL);CHKERRQ(ierr); 5509 ierr = PetscObjectReference((PetscObject)coarse_mat);CHKERRQ(ierr); 5510 coarse_mat_reuse = MAT_REUSE_MATRIX; 5511 } else { 5512 coarse_mat = NULL; 5513 coarse_mat_reuse = MAT_INITIAL_MATRIX; 5514 } 5515 5516 /* creates temporary l2gmap and IS for coarse indexes */ 5517 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),pcbddc->local_primal_size,pcbddc->global_primal_indices,PETSC_COPY_VALUES,&coarse_is);CHKERRQ(ierr); 5518 ierr = ISLocalToGlobalMappingCreateIS(coarse_is,&coarse_islg);CHKERRQ(ierr); 5519 5520 /* creates temporary MATIS object for coarse matrix */ 5521 ierr = MatCreateSeqDense(PETSC_COMM_SELF,pcbddc->local_primal_size,pcbddc->local_primal_size,NULL,&coarse_submat_dense);CHKERRQ(ierr); 5522 ierr = MatDenseGetArray(coarse_submat_dense,&array);CHKERRQ(ierr); 5523 ierr = PetscMemcpy(array,coarse_submat_vals,sizeof(*coarse_submat_vals)*pcbddc->local_primal_size*pcbddc->local_primal_size);CHKERRQ(ierr); 5524 ierr = MatDenseRestoreArray(coarse_submat_dense,&array);CHKERRQ(ierr); 5525 ierr = MatCreateIS(PetscObjectComm((PetscObject)pc),1,PETSC_DECIDE,PETSC_DECIDE,pcbddc->coarse_size,pcbddc->coarse_size,coarse_islg,NULL,&t_coarse_mat_is);CHKERRQ(ierr); 5526 ierr = MatISSetLocalMat(t_coarse_mat_is,coarse_submat_dense);CHKERRQ(ierr); 5527 ierr = MatAssemblyBegin(t_coarse_mat_is,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5528 ierr = MatAssemblyEnd(t_coarse_mat_is,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5529 ierr = MatDestroy(&coarse_submat_dense);CHKERRQ(ierr); 5530 5531 /* count "active" (i.e. with positive local size) and "void" processes */ 5532 im_active = !!(pcis->n); 5533 ierr = MPIU_Allreduce(&im_active,&active_procs,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 5534 5535 /* determine number of process partecipating to coarse solver and compute subassembling pattern */ 5536 /* restr : whether if we want to exclude senders (which are not receivers) from the subassembling pattern */ 5537 /* full_restr : just use the receivers from the subassembling pattern */ 5538 coarse_mat_is = NULL; 5539 multilevel_allowed = PETSC_FALSE; 5540 multilevel_requested = PETSC_FALSE; 5541 full_restr = PETSC_TRUE; 5542 pcbddc->coarse_eqs_per_proc = PetscMin(pcbddc->coarse_size,pcbddc->coarse_eqs_per_proc); 5543 if (pcbddc->current_level < pcbddc->max_levels) multilevel_requested = PETSC_TRUE; 5544 if (multilevel_requested) { 5545 ncoarse = active_procs/pcbddc->coarsening_ratio; 5546 restr = PETSC_FALSE; 5547 full_restr = PETSC_FALSE; 5548 } else { 5549 ncoarse = pcbddc->coarse_size/pcbddc->coarse_eqs_per_proc; 5550 restr = PETSC_TRUE; 5551 full_restr = PETSC_TRUE; 5552 } 5553 ncoarse = PetscMax(1,ncoarse); 5554 if (!pcbddc->coarse_subassembling) { 5555 ierr = MatISGetSubassemblingPattern(t_coarse_mat_is,&ncoarse,pcbddc->coarse_adj_red,&pcbddc->coarse_subassembling,&have_void);CHKERRQ(ierr); 5556 } else { /* if a subassembling pattern exists, then we can reuse the coarse ksp and compute the number of process involved */ 5557 PetscInt psum; 5558 PetscMPIInt size; 5559 if (pcbddc->coarse_ksp) psum = 1; 5560 else psum = 0; 5561 ierr = MPIU_Allreduce(&psum,&ncoarse,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 5562 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)pc),&size);CHKERRQ(ierr); 5563 if (ncoarse < size) have_void = PETSC_TRUE; 5564 } 5565 /* determine if we can go multilevel */ 5566 if (multilevel_requested) { 5567 if (ncoarse > 1) multilevel_allowed = PETSC_TRUE; /* found enough processes */ 5568 else restr = full_restr = PETSC_TRUE; /* 1 subdomain, use a direct solver */ 5569 } 5570 if (multilevel_allowed && have_void) restr = PETSC_TRUE; 5571 5572 /* compute dofs splitting and neumann boundaries for coarse dofs */ 5573 if (multilevel_allowed && (pcbddc->n_ISForDofsLocal || pcbddc->NeumannBoundariesLocal || (pcbddc->benign_saddle_point && pcbddc->user_primal_vertices_local))) { /* protects from unneded computations */ 5574 PetscInt *tidxs,*tidxs2,nout,tsize,i; 5575 const PetscInt *idxs; 5576 ISLocalToGlobalMapping tmap; 5577 5578 /* create map between primal indices (in local representative ordering) and local primal numbering */ 5579 ierr = ISLocalToGlobalMappingCreate(PETSC_COMM_SELF,1,pcbddc->local_primal_size,pcbddc->primal_indices_local_idxs,PETSC_COPY_VALUES,&tmap);CHKERRQ(ierr); 5580 /* allocate space for temporary storage */ 5581 ierr = PetscMalloc1(pcbddc->local_primal_size,&tidxs);CHKERRQ(ierr); 5582 ierr = PetscMalloc1(pcbddc->local_primal_size,&tidxs2);CHKERRQ(ierr); 5583 /* allocate for IS array */ 5584 nisdofs = pcbddc->n_ISForDofsLocal; 5585 nisneu = !!pcbddc->NeumannBoundariesLocal; 5586 nisvert = 0; 5587 if (pcbddc->benign_saddle_point && pcbddc->user_primal_vertices_local) { 5588 nisvert = 1; 5589 } 5590 nis = nisdofs + nisneu + nisvert; 5591 ierr = PetscMalloc1(nis,&isarray);CHKERRQ(ierr); 5592 /* dofs splitting */ 5593 for (i=0;i<nisdofs;i++) { 5594 /* ierr = ISView(pcbddc->ISForDofsLocal[i],0);CHKERRQ(ierr); */ 5595 ierr = ISGetLocalSize(pcbddc->ISForDofsLocal[i],&tsize);CHKERRQ(ierr); 5596 ierr = ISGetIndices(pcbddc->ISForDofsLocal[i],&idxs);CHKERRQ(ierr); 5597 ierr = ISGlobalToLocalMappingApply(tmap,IS_GTOLM_DROP,tsize,idxs,&nout,tidxs);CHKERRQ(ierr); 5598 ierr = ISRestoreIndices(pcbddc->ISForDofsLocal[i],&idxs);CHKERRQ(ierr); 5599 ierr = ISLocalToGlobalMappingApply(coarse_islg,nout,tidxs,tidxs2);CHKERRQ(ierr); 5600 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),nout,tidxs2,PETSC_COPY_VALUES,&isarray[i]);CHKERRQ(ierr); 5601 /* ierr = ISView(isarray[i],0);CHKERRQ(ierr); */ 5602 } 5603 /* neumann boundaries */ 5604 if (pcbddc->NeumannBoundariesLocal) { 5605 /* ierr = ISView(pcbddc->NeumannBoundariesLocal,0);CHKERRQ(ierr); */ 5606 ierr = ISGetLocalSize(pcbddc->NeumannBoundariesLocal,&tsize);CHKERRQ(ierr); 5607 ierr = ISGetIndices(pcbddc->NeumannBoundariesLocal,&idxs);CHKERRQ(ierr); 5608 ierr = ISGlobalToLocalMappingApply(tmap,IS_GTOLM_DROP,tsize,idxs,&nout,tidxs);CHKERRQ(ierr); 5609 ierr = ISRestoreIndices(pcbddc->NeumannBoundariesLocal,&idxs);CHKERRQ(ierr); 5610 ierr = ISLocalToGlobalMappingApply(coarse_islg,nout,tidxs,tidxs2);CHKERRQ(ierr); 5611 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),nout,tidxs2,PETSC_COPY_VALUES,&isarray[nisdofs]);CHKERRQ(ierr); 5612 /* ierr = ISView(isarray[nisdofs],0);CHKERRQ(ierr); */ 5613 } 5614 /* primal vertices (benign) */ 5615 if (pcbddc->benign_saddle_point && pcbddc->user_primal_vertices_local) { 5616 ierr = ISGetLocalSize(pcbddc->user_primal_vertices_local,&tsize);CHKERRQ(ierr); 5617 ierr = ISGetIndices(pcbddc->user_primal_vertices_local,&idxs);CHKERRQ(ierr); 5618 ierr = ISGlobalToLocalMappingApply(tmap,IS_GTOLM_DROP,tsize,idxs,&nout,tidxs);CHKERRQ(ierr); 5619 ierr = ISRestoreIndices(pcbddc->user_primal_vertices_local,&idxs);CHKERRQ(ierr); 5620 ierr = ISLocalToGlobalMappingApply(coarse_islg,nout,tidxs,tidxs2);CHKERRQ(ierr); 5621 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),nout,tidxs2,PETSC_COPY_VALUES,&isarray[nis-1]);CHKERRQ(ierr); 5622 /* ierr = ISView(isarray[nis-1],0);CHKERRQ(ierr); */ 5623 } 5624 /* free memory */ 5625 ierr = PetscFree(tidxs);CHKERRQ(ierr); 5626 ierr = PetscFree(tidxs2);CHKERRQ(ierr); 5627 ierr = ISLocalToGlobalMappingDestroy(&tmap);CHKERRQ(ierr); 5628 } else { 5629 nis = 0; 5630 nisdofs = 0; 5631 nisneu = 0; 5632 nisvert = 0; 5633 isarray = NULL; 5634 } 5635 /* destroy no longer needed map */ 5636 ierr = ISLocalToGlobalMappingDestroy(&coarse_islg);CHKERRQ(ierr); 5637 5638 /* subassemble */ 5639 if (multilevel_allowed) { 5640 PetscBool reuse,reuser; 5641 if (coarse_mat) reuse = PETSC_TRUE; 5642 else reuse = PETSC_FALSE; 5643 ierr = MPIU_Allreduce(&reuse,&reuser,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 5644 if (reuser) { 5645 ierr = MatISSubassemble(t_coarse_mat_is,pcbddc->coarse_subassembling,0,restr,full_restr,PETSC_TRUE,&coarse_mat,nis,isarray);CHKERRQ(ierr); 5646 } else { 5647 ierr = MatISSubassemble(t_coarse_mat_is,pcbddc->coarse_subassembling,0,restr,full_restr,PETSC_FALSE,&coarse_mat_is,nis,isarray);CHKERRQ(ierr); 5648 } 5649 } else { 5650 ierr = MatISSubassemble(t_coarse_mat_is,pcbddc->coarse_subassembling,0,restr,full_restr,PETSC_FALSE,&coarse_mat_is,nis,isarray);CHKERRQ(ierr); 5651 } 5652 if (coarse_mat_is || coarse_mat) { 5653 PetscMPIInt size; 5654 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)coarse_mat_is),&size); 5655 if (!multilevel_allowed) { 5656 ierr = MatISGetMPIXAIJ(coarse_mat_is,coarse_mat_reuse,&coarse_mat);CHKERRQ(ierr); 5657 } else { 5658 Mat A; 5659 5660 /* if this matrix is present, it means we are not reusing the coarse matrix */ 5661 if (coarse_mat_is) { 5662 if (coarse_mat) SETERRQ(PetscObjectComm((PetscObject)coarse_mat_is),PETSC_ERR_PLIB,"This should not happen"); 5663 ierr = PetscObjectReference((PetscObject)coarse_mat_is);CHKERRQ(ierr); 5664 coarse_mat = coarse_mat_is; 5665 } 5666 /* be sure we don't have MatSeqDENSE as local mat */ 5667 ierr = MatISGetLocalMat(coarse_mat,&A);CHKERRQ(ierr); 5668 ierr = MatConvert(A,MATSEQAIJ,MAT_INPLACE_MATRIX,&A);CHKERRQ(ierr); 5669 } 5670 } 5671 ierr = MatDestroy(&t_coarse_mat_is);CHKERRQ(ierr); 5672 ierr = MatDestroy(&coarse_mat_is);CHKERRQ(ierr); 5673 5674 /* create local to global scatters for coarse problem */ 5675 if (compute_vecs) { 5676 PetscInt lrows; 5677 ierr = VecDestroy(&pcbddc->coarse_vec);CHKERRQ(ierr); 5678 if (coarse_mat) { 5679 ierr = MatGetLocalSize(coarse_mat,&lrows,NULL);CHKERRQ(ierr); 5680 } else { 5681 lrows = 0; 5682 } 5683 ierr = VecCreate(PetscObjectComm((PetscObject)pc),&pcbddc->coarse_vec);CHKERRQ(ierr); 5684 ierr = VecSetSizes(pcbddc->coarse_vec,lrows,PETSC_DECIDE);CHKERRQ(ierr); 5685 ierr = VecSetType(pcbddc->coarse_vec,VECSTANDARD);CHKERRQ(ierr); 5686 ierr = VecScatterDestroy(&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr); 5687 ierr = VecScatterCreate(pcbddc->vec1_P,NULL,pcbddc->coarse_vec,coarse_is,&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr); 5688 } 5689 ierr = ISDestroy(&coarse_is);CHKERRQ(ierr); 5690 5691 /* set defaults for coarse KSP and PC */ 5692 if (multilevel_allowed) { 5693 coarse_ksp_type = KSPRICHARDSON; 5694 coarse_pc_type = PCBDDC; 5695 } else { 5696 coarse_ksp_type = KSPPREONLY; 5697 coarse_pc_type = PCREDUNDANT; 5698 } 5699 5700 /* print some info if requested */ 5701 if (pcbddc->dbg_flag) { 5702 if (!multilevel_allowed) { 5703 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 5704 if (multilevel_requested) { 5705 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Not enough active processes on level %d (active processes %d, coarsening ratio %d)\n",pcbddc->current_level,active_procs,pcbddc->coarsening_ratio);CHKERRQ(ierr); 5706 } else if (pcbddc->max_levels) { 5707 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Maximum number of requested levels reached (%d)\n",pcbddc->max_levels);CHKERRQ(ierr); 5708 } 5709 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 5710 } 5711 } 5712 5713 /* create the coarse KSP object only once with defaults */ 5714 if (coarse_mat) { 5715 PetscViewer dbg_viewer = NULL; 5716 if (pcbddc->dbg_flag) { 5717 dbg_viewer = PETSC_VIEWER_STDOUT_(PetscObjectComm((PetscObject)coarse_mat)); 5718 ierr = PetscViewerASCIIAddTab(dbg_viewer,2*pcbddc->current_level);CHKERRQ(ierr); 5719 } 5720 if (!pcbddc->coarse_ksp) { 5721 char prefix[256],str_level[16]; 5722 size_t len; 5723 ierr = KSPCreate(PetscObjectComm((PetscObject)coarse_mat),&pcbddc->coarse_ksp);CHKERRQ(ierr); 5724 ierr = KSPSetErrorIfNotConverged(pcbddc->coarse_ksp,pc->erroriffailure);CHKERRQ(ierr); 5725 ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->coarse_ksp,(PetscObject)pc,1);CHKERRQ(ierr); 5726 ierr = KSPSetTolerances(pcbddc->coarse_ksp,PETSC_DEFAULT,PETSC_DEFAULT,PETSC_DEFAULT,1);CHKERRQ(ierr); 5727 ierr = KSPSetOperators(pcbddc->coarse_ksp,coarse_mat,coarse_mat);CHKERRQ(ierr); 5728 ierr = KSPSetType(pcbddc->coarse_ksp,coarse_ksp_type);CHKERRQ(ierr); 5729 ierr = KSPSetNormType(pcbddc->coarse_ksp,KSP_NORM_NONE);CHKERRQ(ierr); 5730 ierr = KSPGetPC(pcbddc->coarse_ksp,&pc_temp);CHKERRQ(ierr); 5731 ierr = PCSetType(pc_temp,coarse_pc_type);CHKERRQ(ierr); 5732 /* prefix */ 5733 ierr = PetscStrcpy(prefix,"");CHKERRQ(ierr); 5734 ierr = PetscStrcpy(str_level,"");CHKERRQ(ierr); 5735 if (!pcbddc->current_level) { 5736 ierr = PetscStrcpy(prefix,((PetscObject)pc)->prefix);CHKERRQ(ierr); 5737 ierr = PetscStrcat(prefix,"pc_bddc_coarse_");CHKERRQ(ierr); 5738 } else { 5739 ierr = PetscStrlen(((PetscObject)pc)->prefix,&len);CHKERRQ(ierr); 5740 if (pcbddc->current_level>1) len -= 3; /* remove "lX_" with X level number */ 5741 if (pcbddc->current_level>10) len -= 1; /* remove another char from level number */ 5742 ierr = PetscStrncpy(prefix,((PetscObject)pc)->prefix,len+1);CHKERRQ(ierr); 5743 sprintf(str_level,"l%d_",(int)(pcbddc->current_level)); 5744 ierr = PetscStrcat(prefix,str_level);CHKERRQ(ierr); 5745 } 5746 ierr = KSPSetOptionsPrefix(pcbddc->coarse_ksp,prefix);CHKERRQ(ierr); 5747 /* propagate BDDC info to the next level (these are dummy calls if pc_temp is not of type PCBDDC) */ 5748 ierr = PCBDDCSetLevel(pc_temp,pcbddc->current_level+1);CHKERRQ(ierr); 5749 ierr = PCBDDCSetCoarseningRatio(pc_temp,pcbddc->coarsening_ratio);CHKERRQ(ierr); 5750 ierr = PCBDDCSetLevels(pc_temp,pcbddc->max_levels);CHKERRQ(ierr); 5751 /* allow user customization */ 5752 ierr = KSPSetFromOptions(pcbddc->coarse_ksp);CHKERRQ(ierr); 5753 } 5754 /* propagate BDDC info to the next level (these are dummy calls if pc_temp is not of type PCBDDC) */ 5755 ierr = KSPGetPC(pcbddc->coarse_ksp,&pc_temp);CHKERRQ(ierr); 5756 if (nisdofs) { 5757 ierr = PCBDDCSetDofsSplitting(pc_temp,nisdofs,isarray);CHKERRQ(ierr); 5758 for (i=0;i<nisdofs;i++) { 5759 ierr = ISDestroy(&isarray[i]);CHKERRQ(ierr); 5760 } 5761 } 5762 if (nisneu) { 5763 ierr = PCBDDCSetNeumannBoundaries(pc_temp,isarray[nisdofs]);CHKERRQ(ierr); 5764 ierr = ISDestroy(&isarray[nisdofs]);CHKERRQ(ierr); 5765 } 5766 if (nisvert) { 5767 ierr = PCBDDCSetPrimalVerticesIS(pc_temp,isarray[nis-1]);CHKERRQ(ierr); 5768 ierr = ISDestroy(&isarray[nis-1]);CHKERRQ(ierr); 5769 } 5770 5771 /* get some info after set from options */ 5772 ierr = PetscObjectTypeCompare((PetscObject)pc_temp,PCNN,&isnn);CHKERRQ(ierr); 5773 ierr = PetscObjectTypeCompare((PetscObject)pc_temp,PCBDDC,&isbddc);CHKERRQ(ierr); 5774 ierr = PetscObjectTypeCompare((PetscObject)pc_temp,PCREDUNDANT,&isredundant);CHKERRQ(ierr); 5775 if (isbddc && !multilevel_allowed) { /* multilevel can only be requested via pc_bddc_set_levels */ 5776 ierr = PCSetType(pc_temp,coarse_pc_type);CHKERRQ(ierr); 5777 isbddc = PETSC_FALSE; 5778 } 5779 ierr = PCFactorSetReuseFill(pc_temp,PETSC_TRUE);CHKERRQ(ierr); 5780 if (isredundant) { 5781 KSP inner_ksp; 5782 PC inner_pc; 5783 ierr = PCRedundantGetKSP(pc_temp,&inner_ksp);CHKERRQ(ierr); 5784 ierr = KSPGetPC(inner_ksp,&inner_pc);CHKERRQ(ierr); 5785 ierr = PCFactorSetReuseFill(inner_pc,PETSC_TRUE);CHKERRQ(ierr); 5786 } 5787 5788 /* parameters which miss an API */ 5789 if (isbddc) { 5790 PC_BDDC* pcbddc_coarse = (PC_BDDC*)pc_temp->data; 5791 pcbddc_coarse->detect_disconnected = PETSC_TRUE; 5792 pcbddc_coarse->coarse_eqs_per_proc = pcbddc->coarse_eqs_per_proc; 5793 pcbddc_coarse->benign_saddle_point = pcbddc->benign_saddle_point; 5794 pcbddc_coarse->benign_compute_nonetflux = pcbddc->benign_compute_nonetflux; 5795 if (pcbddc_coarse->benign_compute_nonetflux) { 5796 pcbddc_coarse->adaptive_userdefined = PETSC_TRUE; 5797 } 5798 } 5799 5800 /* propagate symmetry info of coarse matrix */ 5801 ierr = MatSetOption(coarse_mat,MAT_STRUCTURALLY_SYMMETRIC,PETSC_TRUE);CHKERRQ(ierr); 5802 if (pc->pmat->symmetric_set) { 5803 ierr = MatSetOption(coarse_mat,MAT_SYMMETRIC,pc->pmat->symmetric);CHKERRQ(ierr); 5804 } 5805 if (pc->pmat->hermitian_set) { 5806 ierr = MatSetOption(coarse_mat,MAT_HERMITIAN,pc->pmat->hermitian);CHKERRQ(ierr); 5807 } 5808 if (pc->pmat->spd_set) { 5809 ierr = MatSetOption(coarse_mat,MAT_SPD,pc->pmat->spd);CHKERRQ(ierr); 5810 } 5811 /* set operators */ 5812 ierr = KSPSetOperators(pcbddc->coarse_ksp,coarse_mat,coarse_mat);CHKERRQ(ierr); 5813 if (pcbddc->dbg_flag) { 5814 ierr = PetscViewerASCIISubtractTab(dbg_viewer,2*pcbddc->current_level);CHKERRQ(ierr); 5815 } 5816 } 5817 ierr = PetscFree(isarray);CHKERRQ(ierr); 5818 #if 0 5819 { 5820 PetscViewer viewer; 5821 char filename[256]; 5822 sprintf(filename,"coarse_mat_level%d.m",pcbddc->current_level); 5823 ierr = PetscViewerASCIIOpen(PetscObjectComm((PetscObject)coarse_mat),filename,&viewer);CHKERRQ(ierr); 5824 ierr = PetscViewerPushFormat(viewer,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr); 5825 ierr = MatView(coarse_mat,viewer);CHKERRQ(ierr); 5826 ierr = PetscViewerPopFormat(viewer);CHKERRQ(ierr); 5827 ierr = PetscViewerDestroy(&viewer);CHKERRQ(ierr); 5828 } 5829 #endif 5830 5831 /* Compute coarse null space (special handling by BDDC only) */ 5832 #if 0 5833 if (pcbddc->NullSpace) { 5834 ierr = PCBDDCNullSpaceAssembleCoarse(pc,coarse_mat,&CoarseNullSpace);CHKERRQ(ierr); 5835 } 5836 #endif 5837 /* hack */ 5838 if (pcbddc->coarse_ksp) { 5839 Vec crhs,csol; 5840 5841 ierr = KSPGetSolution(pcbddc->coarse_ksp,&csol);CHKERRQ(ierr); 5842 ierr = KSPGetRhs(pcbddc->coarse_ksp,&crhs);CHKERRQ(ierr); 5843 if (!csol) { 5844 ierr = MatCreateVecs(coarse_mat,&((pcbddc->coarse_ksp)->vec_sol),NULL);CHKERRQ(ierr); 5845 } 5846 if (!crhs) { 5847 ierr = MatCreateVecs(coarse_mat,NULL,&((pcbddc->coarse_ksp)->vec_rhs));CHKERRQ(ierr); 5848 } 5849 } 5850 5851 /* compute null space for coarse solver if the benign trick has been requested */ 5852 if (pcbddc->benign_null) { 5853 5854 ierr = VecSet(pcbddc->vec1_P,0.);CHKERRQ(ierr); 5855 for (i=0;i<pcbddc->benign_n;i++) { 5856 ierr = VecSetValue(pcbddc->vec1_P,pcbddc->local_primal_size-pcbddc->benign_n+i,1.0,INSERT_VALUES);CHKERRQ(ierr); 5857 } 5858 ierr = VecAssemblyBegin(pcbddc->vec1_P);CHKERRQ(ierr); 5859 ierr = VecAssemblyEnd(pcbddc->vec1_P);CHKERRQ(ierr); 5860 ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,pcbddc->vec1_P,pcbddc->coarse_vec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 5861 ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,pcbddc->vec1_P,pcbddc->coarse_vec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 5862 if (coarse_mat) { 5863 Vec nullv; 5864 PetscScalar *array,*array2; 5865 PetscInt nl; 5866 5867 ierr = MatCreateVecs(coarse_mat,&nullv,NULL);CHKERRQ(ierr); 5868 ierr = VecGetLocalSize(nullv,&nl);CHKERRQ(ierr); 5869 ierr = VecGetArrayRead(pcbddc->coarse_vec,(const PetscScalar**)&array);CHKERRQ(ierr); 5870 ierr = VecGetArray(nullv,&array2);CHKERRQ(ierr); 5871 ierr = PetscMemcpy(array2,array,nl*sizeof(*array));CHKERRQ(ierr); 5872 ierr = VecRestoreArray(nullv,&array2);CHKERRQ(ierr); 5873 ierr = VecRestoreArrayRead(pcbddc->coarse_vec,(const PetscScalar**)&array);CHKERRQ(ierr); 5874 ierr = VecNormalize(nullv,NULL);CHKERRQ(ierr); 5875 ierr = MatNullSpaceCreate(PetscObjectComm((PetscObject)coarse_mat),PETSC_FALSE,1,&nullv,&CoarseNullSpace);CHKERRQ(ierr); 5876 ierr = VecDestroy(&nullv);CHKERRQ(ierr); 5877 } 5878 } 5879 5880 if (pcbddc->coarse_ksp) { 5881 PetscBool ispreonly; 5882 5883 if (CoarseNullSpace) { 5884 PetscBool isnull; 5885 ierr = MatNullSpaceTest(CoarseNullSpace,coarse_mat,&isnull);CHKERRQ(ierr); 5886 if (0) { 5887 if (isbddc && !pcbddc->benign_saddle_point) { 5888 ierr = PCBDDCSetNullSpace(pc_temp,CoarseNullSpace);CHKERRQ(ierr); 5889 } else { 5890 ierr = MatSetNullSpace(coarse_mat,CoarseNullSpace);CHKERRQ(ierr); 5891 } 5892 } else { 5893 ierr = MatNullSpaceDestroy(&CoarseNullSpace);CHKERRQ(ierr); 5894 } 5895 } 5896 /* setup coarse ksp */ 5897 ierr = KSPSetUp(pcbddc->coarse_ksp);CHKERRQ(ierr); 5898 /* Check coarse problem if in debug mode or if solving with an iterative method */ 5899 ierr = PetscObjectTypeCompare((PetscObject)pcbddc->coarse_ksp,KSPPREONLY,&ispreonly);CHKERRQ(ierr); 5900 if (pcbddc->dbg_flag || (!ispreonly && pcbddc->use_coarse_estimates) ) { 5901 KSP check_ksp; 5902 KSPType check_ksp_type; 5903 PC check_pc; 5904 Vec check_vec,coarse_vec; 5905 PetscReal abs_infty_error,infty_error,lambda_min=1.0,lambda_max=1.0; 5906 PetscInt its; 5907 PetscBool compute_eigs; 5908 PetscReal *eigs_r,*eigs_c; 5909 PetscInt neigs; 5910 const char *prefix; 5911 5912 /* Create ksp object suitable for estimation of extreme eigenvalues */ 5913 ierr = KSPCreate(PetscObjectComm((PetscObject)pcbddc->coarse_ksp),&check_ksp);CHKERRQ(ierr); 5914 ierr = KSPSetErrorIfNotConverged(pcbddc->coarse_ksp,pc->erroriffailure);CHKERRQ(ierr); 5915 ierr = KSPSetOperators(check_ksp,coarse_mat,coarse_mat);CHKERRQ(ierr); 5916 ierr = KSPSetTolerances(check_ksp,1.e-12,1.e-12,PETSC_DEFAULT,pcbddc->coarse_size);CHKERRQ(ierr); 5917 if (ispreonly) { 5918 check_ksp_type = KSPPREONLY; 5919 compute_eigs = PETSC_FALSE; 5920 } else { 5921 check_ksp_type = KSPGMRES; 5922 compute_eigs = PETSC_TRUE; 5923 } 5924 ierr = KSPSetType(check_ksp,check_ksp_type);CHKERRQ(ierr); 5925 ierr = KSPSetComputeSingularValues(check_ksp,compute_eigs);CHKERRQ(ierr); 5926 ierr = KSPSetComputeEigenvalues(check_ksp,compute_eigs);CHKERRQ(ierr); 5927 ierr = KSPGMRESSetRestart(check_ksp,pcbddc->coarse_size+1);CHKERRQ(ierr); 5928 ierr = KSPGetOptionsPrefix(pcbddc->coarse_ksp,&prefix);CHKERRQ(ierr); 5929 ierr = KSPSetOptionsPrefix(check_ksp,prefix);CHKERRQ(ierr); 5930 ierr = KSPAppendOptionsPrefix(check_ksp,"check_");CHKERRQ(ierr); 5931 ierr = KSPSetFromOptions(check_ksp);CHKERRQ(ierr); 5932 ierr = KSPSetUp(check_ksp);CHKERRQ(ierr); 5933 ierr = KSPGetPC(pcbddc->coarse_ksp,&check_pc);CHKERRQ(ierr); 5934 ierr = KSPSetPC(check_ksp,check_pc);CHKERRQ(ierr); 5935 /* create random vec */ 5936 ierr = KSPGetSolution(pcbddc->coarse_ksp,&coarse_vec);CHKERRQ(ierr); 5937 ierr = VecDuplicate(coarse_vec,&check_vec);CHKERRQ(ierr); 5938 ierr = VecSetRandom(check_vec,NULL);CHKERRQ(ierr); 5939 if (CoarseNullSpace) { 5940 ierr = MatNullSpaceRemove(CoarseNullSpace,check_vec);CHKERRQ(ierr); 5941 } 5942 ierr = MatMult(coarse_mat,check_vec,coarse_vec);CHKERRQ(ierr); 5943 /* solve coarse problem */ 5944 ierr = KSPSolve(check_ksp,coarse_vec,coarse_vec);CHKERRQ(ierr); 5945 if (CoarseNullSpace) { 5946 ierr = MatNullSpaceRemove(CoarseNullSpace,coarse_vec);CHKERRQ(ierr); 5947 } 5948 /* set eigenvalue estimation if preonly has not been requested */ 5949 if (compute_eigs) { 5950 ierr = PetscMalloc1(pcbddc->coarse_size+1,&eigs_r);CHKERRQ(ierr); 5951 ierr = PetscMalloc1(pcbddc->coarse_size+1,&eigs_c);CHKERRQ(ierr); 5952 ierr = KSPComputeEigenvalues(check_ksp,pcbddc->coarse_size+1,eigs_r,eigs_c,&neigs);CHKERRQ(ierr); 5953 lambda_max = eigs_r[neigs-1]; 5954 lambda_min = eigs_r[0]; 5955 if (pcbddc->use_coarse_estimates) { 5956 if (lambda_max>lambda_min) { 5957 ierr = KSPChebyshevSetEigenvalues(pcbddc->coarse_ksp,lambda_max,lambda_min);CHKERRQ(ierr); 5958 ierr = KSPRichardsonSetScale(pcbddc->coarse_ksp,2.0/(lambda_max+lambda_min));CHKERRQ(ierr); 5959 } 5960 } 5961 } 5962 5963 /* check coarse problem residual error */ 5964 if (pcbddc->dbg_flag) { 5965 PetscViewer dbg_viewer = PETSC_VIEWER_STDOUT_(PetscObjectComm((PetscObject)pcbddc->coarse_ksp)); 5966 ierr = PetscViewerASCIIAddTab(dbg_viewer,2*(pcbddc->current_level+1));CHKERRQ(ierr); 5967 ierr = VecAXPY(check_vec,-1.0,coarse_vec);CHKERRQ(ierr); 5968 ierr = VecNorm(check_vec,NORM_INFINITY,&infty_error);CHKERRQ(ierr); 5969 ierr = MatMult(coarse_mat,check_vec,coarse_vec);CHKERRQ(ierr); 5970 ierr = VecNorm(coarse_vec,NORM_INFINITY,&abs_infty_error);CHKERRQ(ierr); 5971 ierr = VecDestroy(&check_vec);CHKERRQ(ierr); 5972 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem details (use estimates %d)\n",pcbddc->use_coarse_estimates);CHKERRQ(ierr); 5973 ierr = PetscObjectPrintClassNamePrefixType((PetscObject)(pcbddc->coarse_ksp),dbg_viewer);CHKERRQ(ierr); 5974 ierr = PetscObjectPrintClassNamePrefixType((PetscObject)(check_pc),dbg_viewer);CHKERRQ(ierr); 5975 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem exact infty_error : %1.6e\n",infty_error);CHKERRQ(ierr); 5976 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem residual infty_error: %1.6e\n",abs_infty_error);CHKERRQ(ierr); 5977 if (CoarseNullSpace) { 5978 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem is singular\n");CHKERRQ(ierr); 5979 } 5980 if (compute_eigs) { 5981 PetscReal lambda_max_s,lambda_min_s; 5982 ierr = KSPGetType(check_ksp,&check_ksp_type);CHKERRQ(ierr); 5983 ierr = KSPGetIterationNumber(check_ksp,&its);CHKERRQ(ierr); 5984 ierr = KSPComputeExtremeSingularValues(check_ksp,&lambda_max_s,&lambda_min_s);CHKERRQ(ierr); 5985 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem eigenvalues (estimated with %d iterations of %s): %1.6e %1.6e (%1.6e %1.6e)\n",its,check_ksp_type,lambda_min,lambda_max,lambda_min_s,lambda_max_s);CHKERRQ(ierr); 5986 for (i=0;i<neigs;i++) { 5987 ierr = PetscViewerASCIIPrintf(dbg_viewer,"%1.6e %1.6ei\n",eigs_r[i],eigs_c[i]);CHKERRQ(ierr); 5988 } 5989 } 5990 ierr = PetscViewerFlush(dbg_viewer);CHKERRQ(ierr); 5991 ierr = PetscViewerASCIISubtractTab(dbg_viewer,2*(pcbddc->current_level+1));CHKERRQ(ierr); 5992 } 5993 ierr = KSPDestroy(&check_ksp);CHKERRQ(ierr); 5994 if (compute_eigs) { 5995 ierr = PetscFree(eigs_r);CHKERRQ(ierr); 5996 ierr = PetscFree(eigs_c);CHKERRQ(ierr); 5997 } 5998 } 5999 } 6000 /* print additional info */ 6001 if (pcbddc->dbg_flag) { 6002 /* waits until all processes reaches this point */ 6003 ierr = PetscBarrier((PetscObject)pc);CHKERRQ(ierr); 6004 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Coarse solver setup completed at level %d\n",pcbddc->current_level);CHKERRQ(ierr); 6005 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 6006 } 6007 6008 /* free memory */ 6009 ierr = MatNullSpaceDestroy(&CoarseNullSpace);CHKERRQ(ierr); 6010 ierr = MatDestroy(&coarse_mat);CHKERRQ(ierr); 6011 PetscFunctionReturn(0); 6012 } 6013 6014 #undef __FUNCT__ 6015 #define __FUNCT__ "PCBDDCComputePrimalNumbering" 6016 PetscErrorCode PCBDDCComputePrimalNumbering(PC pc,PetscInt* coarse_size_n,PetscInt** local_primal_indices_n) 6017 { 6018 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 6019 PC_IS* pcis = (PC_IS*)pc->data; 6020 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 6021 IS subset,subset_mult,subset_n; 6022 PetscInt local_size,coarse_size=0; 6023 PetscInt *local_primal_indices=NULL; 6024 const PetscInt *t_local_primal_indices; 6025 PetscErrorCode ierr; 6026 6027 PetscFunctionBegin; 6028 /* Compute global number of coarse dofs */ 6029 if (pcbddc->local_primal_size && !pcbddc->local_primal_ref_node) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"BDDC ConstraintsSetUp should be called first"); 6030 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)(pc->pmat)),pcbddc->local_primal_size_cc,pcbddc->local_primal_ref_node,PETSC_COPY_VALUES,&subset_n);CHKERRQ(ierr); 6031 ierr = ISLocalToGlobalMappingApplyIS(pcis->mapping,subset_n,&subset);CHKERRQ(ierr); 6032 ierr = ISDestroy(&subset_n);CHKERRQ(ierr); 6033 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)(pc->pmat)),pcbddc->local_primal_size_cc,pcbddc->local_primal_ref_mult,PETSC_COPY_VALUES,&subset_mult);CHKERRQ(ierr); 6034 ierr = PCBDDCSubsetNumbering(subset,subset_mult,&coarse_size,&subset_n);CHKERRQ(ierr); 6035 ierr = ISDestroy(&subset);CHKERRQ(ierr); 6036 ierr = ISDestroy(&subset_mult);CHKERRQ(ierr); 6037 ierr = ISGetLocalSize(subset_n,&local_size);CHKERRQ(ierr); 6038 if (local_size != pcbddc->local_primal_size) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Invalid number of local primal indices computed %D != %D",local_size,pcbddc->local_primal_size); 6039 ierr = PetscMalloc1(local_size,&local_primal_indices);CHKERRQ(ierr); 6040 ierr = ISGetIndices(subset_n,&t_local_primal_indices);CHKERRQ(ierr); 6041 ierr = PetscMemcpy(local_primal_indices,t_local_primal_indices,local_size*sizeof(PetscInt));CHKERRQ(ierr); 6042 ierr = ISRestoreIndices(subset_n,&t_local_primal_indices);CHKERRQ(ierr); 6043 ierr = ISDestroy(&subset_n);CHKERRQ(ierr); 6044 6045 /* check numbering */ 6046 if (pcbddc->dbg_flag) { 6047 PetscScalar coarsesum,*array,*array2; 6048 PetscInt i; 6049 PetscBool set_error = PETSC_FALSE,set_error_reduced = PETSC_FALSE; 6050 6051 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 6052 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 6053 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Check coarse indices\n");CHKERRQ(ierr); 6054 ierr = PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 6055 /* counter */ 6056 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 6057 ierr = VecSet(pcis->vec1_N,1.0);CHKERRQ(ierr); 6058 ierr = VecScatterBegin(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 6059 ierr = VecScatterEnd(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 6060 ierr = VecScatterBegin(matis->rctx,pcis->vec1_global,pcis->vec2_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 6061 ierr = VecScatterEnd(matis->rctx,pcis->vec1_global,pcis->vec2_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 6062 ierr = VecSet(pcis->vec1_N,0.0);CHKERRQ(ierr); 6063 for (i=0;i<pcbddc->local_primal_size;i++) { 6064 ierr = VecSetValue(pcis->vec1_N,pcbddc->primal_indices_local_idxs[i],1.0,INSERT_VALUES);CHKERRQ(ierr); 6065 } 6066 ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr); 6067 ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr); 6068 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 6069 ierr = VecScatterBegin(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 6070 ierr = VecScatterEnd(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 6071 ierr = VecScatterBegin(matis->rctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 6072 ierr = VecScatterEnd(matis->rctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 6073 ierr = VecGetArray(pcis->vec1_N,&array);CHKERRQ(ierr); 6074 ierr = VecGetArray(pcis->vec2_N,&array2);CHKERRQ(ierr); 6075 for (i=0;i<pcis->n;i++) { 6076 if (array[i] != 0.0 && array[i] != array2[i]) { 6077 PetscInt owned = (PetscInt)PetscRealPart(array[i]),gi; 6078 PetscInt neigh = (PetscInt)PetscRealPart(array2[i]); 6079 set_error = PETSC_TRUE; 6080 ierr = ISLocalToGlobalMappingApply(pcis->mapping,1,&i,&gi);CHKERRQ(ierr); 6081 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d: local index %d (gid %d) owned by %d processes instead of %d!\n",PetscGlobalRank,i,gi,owned,neigh);CHKERRQ(ierr); 6082 } 6083 } 6084 ierr = VecRestoreArray(pcis->vec2_N,&array2);CHKERRQ(ierr); 6085 ierr = MPIU_Allreduce(&set_error,&set_error_reduced,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 6086 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 6087 for (i=0;i<pcis->n;i++) { 6088 if (PetscRealPart(array[i]) > 0.0) array[i] = 1.0/PetscRealPart(array[i]); 6089 } 6090 ierr = VecRestoreArray(pcis->vec1_N,&array);CHKERRQ(ierr); 6091 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 6092 ierr = VecScatterBegin(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 6093 ierr = VecScatterEnd(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 6094 ierr = VecSum(pcis->vec1_global,&coarsesum);CHKERRQ(ierr); 6095 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Size of coarse problem is %d (%lf)\n",coarse_size,PetscRealPart(coarsesum));CHKERRQ(ierr); 6096 if (pcbddc->dbg_flag > 1 || set_error_reduced) { 6097 PetscInt *gidxs; 6098 6099 ierr = PetscMalloc1(pcbddc->local_primal_size,&gidxs);CHKERRQ(ierr); 6100 ierr = ISLocalToGlobalMappingApply(pcis->mapping,pcbddc->local_primal_size,pcbddc->primal_indices_local_idxs,gidxs);CHKERRQ(ierr); 6101 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Distribution of local primal indices\n");CHKERRQ(ierr); 6102 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 6103 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d\n",PetscGlobalRank);CHKERRQ(ierr); 6104 for (i=0;i<pcbddc->local_primal_size;i++) { 6105 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local_primal_indices[%d]=%d (%d,%d)\n",i,local_primal_indices[i],pcbddc->primal_indices_local_idxs[i],gidxs[i]);CHKERRQ(ierr); 6106 } 6107 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 6108 ierr = PetscFree(gidxs);CHKERRQ(ierr); 6109 } 6110 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 6111 ierr = PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 6112 if (set_error_reduced) SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"BDDC Numbering of coarse dofs failed"); 6113 } 6114 /* ierr = PetscPrintf(PetscObjectComm((PetscObject)pc),"Size of coarse problem is %d\n",coarse_size);CHKERRQ(ierr); */ 6115 /* get back data */ 6116 *coarse_size_n = coarse_size; 6117 *local_primal_indices_n = local_primal_indices; 6118 PetscFunctionReturn(0); 6119 } 6120 6121 #undef __FUNCT__ 6122 #define __FUNCT__ "PCBDDCGlobalToLocal" 6123 PetscErrorCode PCBDDCGlobalToLocal(VecScatter g2l_ctx,Vec gwork, Vec lwork, IS globalis, IS* localis) 6124 { 6125 IS localis_t; 6126 PetscInt i,lsize,*idxs,n; 6127 PetscScalar *vals; 6128 PetscErrorCode ierr; 6129 6130 PetscFunctionBegin; 6131 /* get indices in local ordering exploiting local to global map */ 6132 ierr = ISGetLocalSize(globalis,&lsize);CHKERRQ(ierr); 6133 ierr = PetscMalloc1(lsize,&vals);CHKERRQ(ierr); 6134 for (i=0;i<lsize;i++) vals[i] = 1.0; 6135 ierr = ISGetIndices(globalis,(const PetscInt**)&idxs);CHKERRQ(ierr); 6136 ierr = VecSet(gwork,0.0);CHKERRQ(ierr); 6137 ierr = VecSet(lwork,0.0);CHKERRQ(ierr); 6138 if (idxs) { /* multilevel guard */ 6139 ierr = VecSetValues(gwork,lsize,idxs,vals,INSERT_VALUES);CHKERRQ(ierr); 6140 } 6141 ierr = VecAssemblyBegin(gwork);CHKERRQ(ierr); 6142 ierr = ISRestoreIndices(globalis,(const PetscInt**)&idxs);CHKERRQ(ierr); 6143 ierr = PetscFree(vals);CHKERRQ(ierr); 6144 ierr = VecAssemblyEnd(gwork);CHKERRQ(ierr); 6145 /* now compute set in local ordering */ 6146 ierr = VecScatterBegin(g2l_ctx,gwork,lwork,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 6147 ierr = VecScatterEnd(g2l_ctx,gwork,lwork,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 6148 ierr = VecGetArrayRead(lwork,(const PetscScalar**)&vals);CHKERRQ(ierr); 6149 ierr = VecGetSize(lwork,&n);CHKERRQ(ierr); 6150 for (i=0,lsize=0;i<n;i++) { 6151 if (PetscRealPart(vals[i]) > 0.5) { 6152 lsize++; 6153 } 6154 } 6155 ierr = PetscMalloc1(lsize,&idxs);CHKERRQ(ierr); 6156 for (i=0,lsize=0;i<n;i++) { 6157 if (PetscRealPart(vals[i]) > 0.5) { 6158 idxs[lsize++] = i; 6159 } 6160 } 6161 ierr = VecRestoreArrayRead(lwork,(const PetscScalar**)&vals);CHKERRQ(ierr); 6162 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)gwork),lsize,idxs,PETSC_OWN_POINTER,&localis_t);CHKERRQ(ierr); 6163 *localis = localis_t; 6164 PetscFunctionReturn(0); 6165 } 6166 6167 #undef __FUNCT__ 6168 #define __FUNCT__ "PCBDDCSetUpSubSchurs" 6169 PetscErrorCode PCBDDCSetUpSubSchurs(PC pc) 6170 { 6171 PC_IS *pcis=(PC_IS*)pc->data; 6172 PC_BDDC *pcbddc=(PC_BDDC*)pc->data; 6173 PCBDDCSubSchurs sub_schurs=pcbddc->sub_schurs; 6174 Mat S_j; 6175 PetscInt *used_xadj,*used_adjncy; 6176 PetscBool free_used_adj; 6177 PetscErrorCode ierr; 6178 6179 PetscFunctionBegin; 6180 /* decide the adjacency to be used for determining internal problems for local schur on subsets */ 6181 free_used_adj = PETSC_FALSE; 6182 if (pcbddc->sub_schurs_layers == -1) { 6183 used_xadj = NULL; 6184 used_adjncy = NULL; 6185 } else { 6186 if (pcbddc->sub_schurs_use_useradj && pcbddc->mat_graph->xadj) { 6187 used_xadj = pcbddc->mat_graph->xadj; 6188 used_adjncy = pcbddc->mat_graph->adjncy; 6189 } else if (pcbddc->computed_rowadj) { 6190 used_xadj = pcbddc->mat_graph->xadj; 6191 used_adjncy = pcbddc->mat_graph->adjncy; 6192 } else { 6193 PetscBool flg_row=PETSC_FALSE; 6194 const PetscInt *xadj,*adjncy; 6195 PetscInt nvtxs; 6196 6197 ierr = MatGetRowIJ(pcbddc->local_mat,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,&xadj,&adjncy,&flg_row);CHKERRQ(ierr); 6198 if (flg_row) { 6199 ierr = PetscMalloc2(nvtxs+1,&used_xadj,xadj[nvtxs],&used_adjncy);CHKERRQ(ierr); 6200 ierr = PetscMemcpy(used_xadj,xadj,(nvtxs+1)*sizeof(*xadj));CHKERRQ(ierr); 6201 ierr = PetscMemcpy(used_adjncy,adjncy,(xadj[nvtxs])*sizeof(*adjncy));CHKERRQ(ierr); 6202 free_used_adj = PETSC_TRUE; 6203 } else { 6204 pcbddc->sub_schurs_layers = -1; 6205 used_xadj = NULL; 6206 used_adjncy = NULL; 6207 } 6208 ierr = MatRestoreRowIJ(pcbddc->local_mat,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,&xadj,&adjncy,&flg_row);CHKERRQ(ierr); 6209 } 6210 } 6211 6212 /* setup sub_schurs data */ 6213 ierr = MatCreateSchurComplement(pcis->A_II,pcis->A_II,pcis->A_IB,pcis->A_BI,pcis->A_BB,&S_j);CHKERRQ(ierr); 6214 if (!sub_schurs->schur_explicit) { 6215 /* pcbddc->ksp_D up to date only if not using MatFactor with Schur complement support */ 6216 ierr = MatSchurComplementSetKSP(S_j,pcbddc->ksp_D);CHKERRQ(ierr); 6217 ierr = PCBDDCSubSchursSetUp(sub_schurs,NULL,S_j,PETSC_FALSE,used_xadj,used_adjncy,pcbddc->sub_schurs_layers,NULL,pcbddc->adaptive_selection,PETSC_FALSE,PETSC_FALSE,0,NULL,NULL,NULL,NULL);CHKERRQ(ierr); 6218 } else { 6219 PetscBool reuse_solvers = (PetscBool)!pcbddc->use_change_of_basis; 6220 PetscBool isseqaij,need_change = PETSC_FALSE;; 6221 PetscInt benign_n; 6222 Mat change = NULL; 6223 Vec scaling = NULL; 6224 IS change_primal = NULL; 6225 6226 if (!pcbddc->use_vertices && reuse_solvers) { 6227 PetscInt n_vertices; 6228 6229 ierr = ISGetLocalSize(sub_schurs->is_vertices,&n_vertices);CHKERRQ(ierr); 6230 reuse_solvers = (PetscBool)!n_vertices; 6231 } 6232 ierr = PetscObjectTypeCompare((PetscObject)pcbddc->local_mat,MATSEQAIJ,&isseqaij);CHKERRQ(ierr); 6233 if (!isseqaij) { 6234 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 6235 if (matis->A == pcbddc->local_mat) { 6236 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 6237 ierr = MatConvert(matis->A,MATSEQAIJ,MAT_INITIAL_MATRIX,&pcbddc->local_mat);CHKERRQ(ierr); 6238 } else { 6239 ierr = MatConvert(pcbddc->local_mat,MATSEQAIJ,MAT_INPLACE_MATRIX,&pcbddc->local_mat);CHKERRQ(ierr); 6240 } 6241 } 6242 if (!pcbddc->benign_change_explicit) { 6243 benign_n = pcbddc->benign_n; 6244 } else { 6245 benign_n = 0; 6246 } 6247 /* sub_schurs->change is a local object; instead, PCBDDCConstraintsSetUp and the quantities used in the test below are logically collective on pc. 6248 We need a global reduction to avoid possible deadlocks. 6249 We assume that sub_schurs->change is created once, and then reused for different solves, unless the topography has been recomputed */ 6250 if (pcbddc->adaptive_userdefined || (pcbddc->deluxe_zerorows && !pcbddc->use_change_of_basis)) { 6251 PetscBool have_loc_change = !!(sub_schurs->change); 6252 ierr = MPIU_Allreduce(&have_loc_change,&need_change,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 6253 need_change = !need_change; 6254 } 6255 /* If the user defines additional constraints, we import them here. 6256 We need to compute the change of basis according to the quadrature weights attached to pmat via MatSetNearNullSpace, and this could not be done (at the moment) without some hacking */ 6257 if (need_change) { 6258 PC_IS *pcisf; 6259 PC_BDDC *pcbddcf; 6260 PC pcf; 6261 6262 if (pcbddc->sub_schurs_rebuild) { 6263 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Cannot compute change of basis with a different graph"); 6264 } 6265 ierr = PCCreate(PetscObjectComm((PetscObject)pc),&pcf);CHKERRQ(ierr); 6266 ierr = PCSetOperators(pcf,pc->mat,pc->pmat);CHKERRQ(ierr); 6267 ierr = PCSetType(pcf,PCBDDC);CHKERRQ(ierr); 6268 /* hacks */ 6269 pcisf = (PC_IS*)pcf->data; 6270 pcisf->is_B_local = pcis->is_B_local; 6271 pcisf->vec1_N = pcis->vec1_N; 6272 pcisf->BtoNmap = pcis->BtoNmap; 6273 pcisf->n = pcis->n; 6274 pcisf->n_B = pcis->n_B; 6275 pcbddcf = (PC_BDDC*)pcf->data; 6276 ierr = PetscFree(pcbddcf->mat_graph);CHKERRQ(ierr); 6277 pcbddcf->mat_graph = pcbddc->mat_graph; 6278 pcbddcf->use_faces = PETSC_TRUE; 6279 pcbddcf->use_change_of_basis = PETSC_TRUE; 6280 pcbddcf->use_change_on_faces = PETSC_TRUE; 6281 pcbddcf->use_qr_single = PETSC_TRUE; 6282 pcbddcf->fake_change = PETSC_TRUE; 6283 ierr = PCBDDCConstraintsSetUp(pcf);CHKERRQ(ierr); 6284 /* store information on primal vertices and change of basis (in local numbering) */ 6285 sub_schurs->change_with_qr = pcbddcf->use_qr_single; 6286 ierr = ISCreateGeneral(PETSC_COMM_SELF,pcbddcf->n_vertices,pcbddcf->local_primal_ref_node,PETSC_COPY_VALUES,&change_primal);CHKERRQ(ierr); 6287 change = pcbddcf->ConstraintMatrix; 6288 pcbddcf->ConstraintMatrix = NULL; 6289 /* free unneeded memory allocated in PCBDDCConstraintsSetUp */ 6290 ierr = PetscFree(pcbddcf->sub_schurs);CHKERRQ(ierr); 6291 ierr = MatNullSpaceDestroy(&pcbddcf->onearnullspace);CHKERRQ(ierr); 6292 ierr = PetscFree2(pcbddcf->local_primal_ref_node,pcbddcf->local_primal_ref_mult);CHKERRQ(ierr); 6293 ierr = PetscFree(pcbddcf->primal_indices_local_idxs);CHKERRQ(ierr); 6294 ierr = PetscFree(pcbddcf->onearnullvecs_state);CHKERRQ(ierr); 6295 ierr = PetscFree(pcf->data);CHKERRQ(ierr); 6296 pcf->ops->destroy = NULL; 6297 ierr = PCDestroy(&pcf);CHKERRQ(ierr); 6298 } 6299 if (!pcbddc->use_deluxe_scaling) scaling = pcis->D; 6300 ierr = PCBDDCSubSchursSetUp(sub_schurs,pcbddc->local_mat,S_j,pcbddc->sub_schurs_exact_schur,used_xadj,used_adjncy,pcbddc->sub_schurs_layers,scaling,pcbddc->adaptive_selection,reuse_solvers,pcbddc->benign_saddle_point,benign_n,pcbddc->benign_p0_lidx,pcbddc->benign_zerodiag_subs,change,change_primal);CHKERRQ(ierr); 6301 ierr = MatDestroy(&change);CHKERRQ(ierr); 6302 ierr = ISDestroy(&change_primal);CHKERRQ(ierr); 6303 } 6304 ierr = MatDestroy(&S_j);CHKERRQ(ierr); 6305 6306 /* free adjacency */ 6307 if (free_used_adj) { 6308 ierr = PetscFree2(used_xadj,used_adjncy);CHKERRQ(ierr); 6309 } 6310 PetscFunctionReturn(0); 6311 } 6312 6313 #undef __FUNCT__ 6314 #define __FUNCT__ "PCBDDCInitSubSchurs" 6315 PetscErrorCode PCBDDCInitSubSchurs(PC pc) 6316 { 6317 PC_IS *pcis=(PC_IS*)pc->data; 6318 PC_BDDC *pcbddc=(PC_BDDC*)pc->data; 6319 PCBDDCSubSchurs sub_schurs=pcbddc->sub_schurs; 6320 PCBDDCGraph graph; 6321 PetscErrorCode ierr; 6322 6323 PetscFunctionBegin; 6324 /* attach interface graph for determining subsets */ 6325 if (pcbddc->sub_schurs_rebuild) { /* in case rebuild has been requested, it uses a graph generated only by the neighbouring information */ 6326 IS verticesIS,verticescomm; 6327 PetscInt vsize,*idxs; 6328 6329 ierr = PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph,NULL,NULL,NULL,NULL,&verticesIS);CHKERRQ(ierr); 6330 ierr = ISGetSize(verticesIS,&vsize);CHKERRQ(ierr); 6331 ierr = ISGetIndices(verticesIS,(const PetscInt**)&idxs);CHKERRQ(ierr); 6332 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),vsize,idxs,PETSC_COPY_VALUES,&verticescomm);CHKERRQ(ierr); 6333 ierr = ISRestoreIndices(verticesIS,(const PetscInt**)&idxs);CHKERRQ(ierr); 6334 ierr = ISDestroy(&verticesIS);CHKERRQ(ierr); 6335 ierr = PCBDDCGraphCreate(&graph);CHKERRQ(ierr); 6336 ierr = PCBDDCGraphInit(graph,pcbddc->mat_graph->l2gmap,pcbddc->mat_graph->nvtxs_global);CHKERRQ(ierr); 6337 ierr = PCBDDCGraphSetUp(graph,pcbddc->mat_graph->custom_minimal_size,NULL,pcbddc->DirichletBoundariesLocal,0,NULL,verticescomm);CHKERRQ(ierr); 6338 ierr = ISDestroy(&verticescomm);CHKERRQ(ierr); 6339 ierr = PCBDDCGraphComputeConnectedComponents(graph);CHKERRQ(ierr); 6340 /* 6341 if (pcbddc->dbg_flag) { 6342 ierr = PCBDDCGraphASCIIView(graph,pcbddc->dbg_flag,pcbddc->dbg_viewer);CHKERRQ(ierr); 6343 } 6344 */ 6345 } else { 6346 graph = pcbddc->mat_graph; 6347 } 6348 6349 /* sub_schurs init */ 6350 ierr = PCBDDCSubSchursInit(sub_schurs,pcis->is_I_local,pcis->is_B_local,graph,pcis->BtoNmap);CHKERRQ(ierr); 6351 6352 /* free graph struct */ 6353 if (pcbddc->sub_schurs_rebuild) { 6354 ierr = PCBDDCGraphDestroy(&graph);CHKERRQ(ierr); 6355 } 6356 PetscFunctionReturn(0); 6357 } 6358 6359 #undef __FUNCT__ 6360 #define __FUNCT__ "PCBDDCCheckOperator" 6361 PetscErrorCode PCBDDCCheckOperator(PC pc) 6362 { 6363 PC_IS *pcis=(PC_IS*)pc->data; 6364 PC_BDDC *pcbddc=(PC_BDDC*)pc->data; 6365 PetscErrorCode ierr; 6366 6367 PetscFunctionBegin; 6368 if (pcbddc->n_vertices == pcbddc->local_primal_size) { 6369 IS zerodiag = NULL; 6370 Mat S_j,B0_B=NULL; 6371 Vec dummy_vec=NULL,vec_check_B,vec_scale_P; 6372 PetscScalar *p0_check,*array,*array2; 6373 PetscReal norm; 6374 PetscInt i; 6375 6376 /* B0 and B0_B */ 6377 if (zerodiag) { 6378 IS dummy; 6379 6380 ierr = ISCreateStride(PETSC_COMM_SELF,pcbddc->benign_n,0,1,&dummy);CHKERRQ(ierr); 6381 ierr = MatGetSubMatrix(pcbddc->benign_B0,dummy,pcis->is_B_local,MAT_INITIAL_MATRIX,&B0_B);CHKERRQ(ierr); 6382 ierr = MatCreateVecs(B0_B,NULL,&dummy_vec);CHKERRQ(ierr); 6383 ierr = ISDestroy(&dummy);CHKERRQ(ierr); 6384 } 6385 /* I need a primal vector to scale primal nodes since BDDC sums contibutions */ 6386 ierr = VecDuplicate(pcbddc->vec1_P,&vec_scale_P);CHKERRQ(ierr); 6387 ierr = VecSet(pcbddc->vec1_P,1.0);CHKERRQ(ierr); 6388 ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,pcbddc->vec1_P,pcbddc->coarse_vec,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 6389 ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,pcbddc->vec1_P,pcbddc->coarse_vec,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 6390 ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,pcbddc->coarse_vec,vec_scale_P,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 6391 ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,pcbddc->coarse_vec,vec_scale_P,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 6392 ierr = VecReciprocal(vec_scale_P);CHKERRQ(ierr); 6393 /* S_j */ 6394 ierr = MatCreateSchurComplement(pcis->A_II,pcis->A_II,pcis->A_IB,pcis->A_BI,pcis->A_BB,&S_j);CHKERRQ(ierr); 6395 ierr = MatSchurComplementSetKSP(S_j,pcbddc->ksp_D);CHKERRQ(ierr); 6396 6397 /* mimic vector in \widetilde{W}_\Gamma */ 6398 ierr = VecSetRandom(pcis->vec1_N,NULL);CHKERRQ(ierr); 6399 /* continuous in primal space */ 6400 ierr = VecSetRandom(pcbddc->coarse_vec,NULL);CHKERRQ(ierr); 6401 ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,pcbddc->coarse_vec,pcbddc->vec1_P,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 6402 ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,pcbddc->coarse_vec,pcbddc->vec1_P,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 6403 ierr = VecGetArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 6404 ierr = PetscCalloc1(pcbddc->benign_n,&p0_check);CHKERRQ(ierr); 6405 for (i=0;i<pcbddc->benign_n;i++) p0_check[i] = array[pcbddc->local_primal_size-pcbddc->benign_n+i]; 6406 ierr = VecSetValues(pcis->vec1_N,pcbddc->local_primal_size,pcbddc->local_primal_ref_node,array,INSERT_VALUES);CHKERRQ(ierr); 6407 ierr = VecRestoreArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 6408 ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr); 6409 ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr); 6410 ierr = VecScatterBegin(pcis->N_to_B,pcis->vec1_N,pcis->vec2_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 6411 ierr = VecScatterEnd(pcis->N_to_B,pcis->vec1_N,pcis->vec2_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 6412 ierr = VecDuplicate(pcis->vec2_B,&vec_check_B);CHKERRQ(ierr); 6413 ierr = VecCopy(pcis->vec2_B,vec_check_B);CHKERRQ(ierr); 6414 6415 /* assemble rhs for coarse problem */ 6416 /* widetilde{S}_\Gamma w_\Gamma + \widetilde{B0}^T_B p0 */ 6417 /* local with Schur */ 6418 ierr = MatMult(S_j,pcis->vec2_B,pcis->vec1_B);CHKERRQ(ierr); 6419 if (zerodiag) { 6420 ierr = VecGetArray(dummy_vec,&array);CHKERRQ(ierr); 6421 for (i=0;i<pcbddc->benign_n;i++) array[i] = p0_check[i]; 6422 ierr = VecRestoreArray(dummy_vec,&array);CHKERRQ(ierr); 6423 ierr = MatMultTransposeAdd(B0_B,dummy_vec,pcis->vec1_B,pcis->vec1_B);CHKERRQ(ierr); 6424 } 6425 /* sum on primal nodes the local contributions */ 6426 ierr = VecScatterBegin(pcis->N_to_B,pcis->vec1_B,pcis->vec1_N,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 6427 ierr = VecScatterEnd(pcis->N_to_B,pcis->vec1_B,pcis->vec1_N,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 6428 ierr = VecGetArray(pcis->vec1_N,&array);CHKERRQ(ierr); 6429 ierr = VecGetArray(pcbddc->vec1_P,&array2);CHKERRQ(ierr); 6430 for (i=0;i<pcbddc->local_primal_size;i++) array2[i] = array[pcbddc->local_primal_ref_node[i]]; 6431 ierr = VecRestoreArray(pcbddc->vec1_P,&array2);CHKERRQ(ierr); 6432 ierr = VecRestoreArray(pcis->vec1_N,&array);CHKERRQ(ierr); 6433 ierr = VecSet(pcbddc->coarse_vec,0.);CHKERRQ(ierr); 6434 ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,pcbddc->vec1_P,pcbddc->coarse_vec,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 6435 ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,pcbddc->vec1_P,pcbddc->coarse_vec,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 6436 ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,pcbddc->coarse_vec,pcbddc->vec1_P,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 6437 ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,pcbddc->coarse_vec,pcbddc->vec1_P,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 6438 ierr = VecGetArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 6439 /* scale primal nodes (BDDC sums contibutions) */ 6440 ierr = VecPointwiseMult(pcbddc->vec1_P,vec_scale_P,pcbddc->vec1_P);CHKERRQ(ierr); 6441 ierr = VecSetValues(pcis->vec1_N,pcbddc->local_primal_size,pcbddc->local_primal_ref_node,array,INSERT_VALUES);CHKERRQ(ierr); 6442 ierr = VecRestoreArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 6443 ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr); 6444 ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr); 6445 ierr = VecScatterBegin(pcis->N_to_B,pcis->vec1_N,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 6446 ierr = VecScatterEnd(pcis->N_to_B,pcis->vec1_N,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 6447 /* global: \widetilde{B0}_B w_\Gamma */ 6448 if (zerodiag) { 6449 ierr = MatMult(B0_B,pcis->vec2_B,dummy_vec);CHKERRQ(ierr); 6450 ierr = VecGetArray(dummy_vec,&array);CHKERRQ(ierr); 6451 for (i=0;i<pcbddc->benign_n;i++) pcbddc->benign_p0[i] = array[i]; 6452 ierr = VecRestoreArray(dummy_vec,&array);CHKERRQ(ierr); 6453 } 6454 /* BDDC */ 6455 ierr = VecSet(pcis->vec1_D,0.);CHKERRQ(ierr); 6456 ierr = PCBDDCApplyInterfacePreconditioner(pc,PETSC_FALSE);CHKERRQ(ierr); 6457 6458 ierr = VecCopy(pcis->vec1_B,pcis->vec2_B);CHKERRQ(ierr); 6459 ierr = VecAXPY(pcis->vec1_B,-1.0,vec_check_B);CHKERRQ(ierr); 6460 ierr = VecNorm(pcis->vec1_B,NORM_INFINITY,&norm);CHKERRQ(ierr); 6461 PetscPrintf(PETSC_COMM_SELF,"[%d] BDDC local error is %1.4e\n",PetscGlobalRank,norm); 6462 for (i=0;i<pcbddc->benign_n;i++) { 6463 PetscPrintf(PETSC_COMM_SELF,"[%d] BDDC p0[%d] error is %1.4e\n",PetscGlobalRank,i,PetscAbsScalar(pcbddc->benign_p0[i]-p0_check[i])); 6464 } 6465 ierr = PetscFree(p0_check);CHKERRQ(ierr); 6466 ierr = VecDestroy(&vec_scale_P);CHKERRQ(ierr); 6467 ierr = VecDestroy(&vec_check_B);CHKERRQ(ierr); 6468 ierr = VecDestroy(&dummy_vec);CHKERRQ(ierr); 6469 ierr = MatDestroy(&S_j);CHKERRQ(ierr); 6470 ierr = MatDestroy(&B0_B);CHKERRQ(ierr); 6471 } 6472 PetscFunctionReturn(0); 6473 } 6474