1 #include <../src/mat/impls/aij/seq/aij.h> 2 #include <../src/ksp/pc/impls/bddc/bddc.h> 3 #include <../src/ksp/pc/impls/bddc/bddcprivate.h> 4 #include <petscblaslapack.h> 5 6 #undef __FUNCT__ 7 #define __FUNCT__ "PCBDDCBenignMatMult_Private_Private" 8 PetscErrorCode PCBDDCBenignMatMult_Private_Private(Mat A, Vec x, Vec y, PetscBool transpose) 9 { 10 PCBDDCBenignMatMult_ctx ctx; 11 PetscErrorCode ierr; 12 PetscBool apply_right,apply_left,reset_x; 13 14 PetscFunctionBegin; 15 ierr = MatShellGetContext(A,&ctx);CHKERRQ(ierr); 16 if (transpose) { 17 apply_right = ctx->apply_left; 18 apply_left = ctx->apply_right; 19 } else { 20 apply_right = ctx->apply_right; 21 apply_left = ctx->apply_left; 22 } 23 reset_x = PETSC_FALSE; 24 if (apply_right) { 25 const PetscScalar *ax; 26 PetscInt nl,i; 27 28 ierr = VecGetLocalSize(x,&nl);CHKERRQ(ierr); 29 ierr = VecGetArrayRead(x,&ax);CHKERRQ(ierr); 30 ierr = PetscMemcpy(ctx->work,ax,nl*sizeof(PetscScalar));CHKERRQ(ierr); 31 ierr = VecRestoreArrayRead(x,&ax);CHKERRQ(ierr); 32 for (i=0;i<ctx->benign_n;i++) { 33 PetscScalar sum,val; 34 const PetscInt *idxs; 35 PetscInt nz,j; 36 ierr = ISGetLocalSize(ctx->benign_zerodiag_subs[i],&nz);CHKERRQ(ierr); 37 ierr = ISGetIndices(ctx->benign_zerodiag_subs[i],&idxs);CHKERRQ(ierr); 38 sum = 0.; 39 if (ctx->apply_p0) { 40 val = ctx->work[idxs[nz-1]]; 41 for (j=0;j<nz-1;j++) { 42 sum += ctx->work[idxs[j]]; 43 ctx->work[idxs[j]] += val; 44 } 45 } else { 46 for (j=0;j<nz-1;j++) { 47 sum += ctx->work[idxs[j]]; 48 } 49 } 50 ctx->work[idxs[nz-1]] -= sum; 51 ierr = ISRestoreIndices(ctx->benign_zerodiag_subs[i],&idxs);CHKERRQ(ierr); 52 } 53 ierr = VecPlaceArray(x,ctx->work);CHKERRQ(ierr); 54 reset_x = PETSC_TRUE; 55 } 56 if (transpose) { 57 ierr = MatMultTranspose(ctx->A,x,y);CHKERRQ(ierr); 58 } else { 59 ierr = MatMult(ctx->A,x,y);CHKERRQ(ierr); 60 } 61 if (reset_x) { 62 ierr = VecResetArray(x);CHKERRQ(ierr); 63 } 64 if (apply_left) { 65 PetscScalar *ay; 66 PetscInt i; 67 68 ierr = VecGetArray(y,&ay);CHKERRQ(ierr); 69 for (i=0;i<ctx->benign_n;i++) { 70 PetscScalar sum,val; 71 const PetscInt *idxs; 72 PetscInt nz,j; 73 ierr = ISGetLocalSize(ctx->benign_zerodiag_subs[i],&nz);CHKERRQ(ierr); 74 ierr = ISGetIndices(ctx->benign_zerodiag_subs[i],&idxs);CHKERRQ(ierr); 75 val = -ay[idxs[nz-1]]; 76 if (ctx->apply_p0) { 77 sum = 0.; 78 for (j=0;j<nz-1;j++) { 79 sum += ay[idxs[j]]; 80 ay[idxs[j]] += val; 81 } 82 ay[idxs[nz-1]] += sum; 83 } else { 84 for (j=0;j<nz-1;j++) { 85 ay[idxs[j]] += val; 86 } 87 ay[idxs[nz-1]] = 0.; 88 } 89 ierr = ISRestoreIndices(ctx->benign_zerodiag_subs[i],&idxs);CHKERRQ(ierr); 90 } 91 ierr = VecRestoreArray(y,&ay);CHKERRQ(ierr); 92 } 93 PetscFunctionReturn(0); 94 } 95 96 #undef __FUNCT__ 97 #define __FUNCT__ "PCBDDCBenignMatMultTranspose_Private" 98 PetscErrorCode PCBDDCBenignMatMultTranspose_Private(Mat A, Vec x, Vec y) 99 { 100 PetscErrorCode ierr; 101 102 PetscFunctionBegin; 103 ierr = PCBDDCBenignMatMult_Private_Private(A,x,y,PETSC_TRUE);CHKERRQ(ierr); 104 PetscFunctionReturn(0); 105 } 106 107 #undef __FUNCT__ 108 #define __FUNCT__ "PCBDDCBenignMatMult_Private" 109 PetscErrorCode PCBDDCBenignMatMult_Private(Mat A, Vec x, Vec y) 110 { 111 PetscErrorCode ierr; 112 113 PetscFunctionBegin; 114 ierr = PCBDDCBenignMatMult_Private_Private(A,x,y,PETSC_FALSE);CHKERRQ(ierr); 115 PetscFunctionReturn(0); 116 } 117 118 #undef __FUNCT__ 119 #define __FUNCT__ "PCBDDCBenignShellMat" 120 PetscErrorCode PCBDDCBenignShellMat(PC pc, PetscBool restore) 121 { 122 PC_IS *pcis = (PC_IS*)pc->data; 123 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 124 PCBDDCBenignMatMult_ctx ctx; 125 PetscErrorCode ierr; 126 127 PetscFunctionBegin; 128 if (!restore) { 129 Mat A_IB,A_BI; 130 PetscScalar *work; 131 PCBDDCReuseSolvers reuse = pcbddc->sub_schurs->reuse_solver; 132 133 if (pcbddc->benign_original_mat) { 134 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Benign original mat has not been restored"); 135 } 136 if (!pcbddc->benign_change || !pcbddc->benign_n || pcbddc->benign_change_explicit) { 137 PetscFunctionReturn(0); 138 } 139 ierr = PetscMalloc1(pcis->n,&work);CHKERRQ(ierr); 140 ierr = MatCreate(PETSC_COMM_SELF,&A_IB);CHKERRQ(ierr); 141 ierr = MatSetSizes(A_IB,pcis->n-pcis->n_B,pcis->n_B,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr); 142 ierr = MatSetType(A_IB,MATSHELL);CHKERRQ(ierr); 143 ierr = MatShellSetOperation(A_IB,MATOP_MULT,(void (*)(void))PCBDDCBenignMatMult_Private);CHKERRQ(ierr); 144 ierr = MatShellSetOperation(A_IB,MATOP_MULT_TRANSPOSE,(void (*)(void))PCBDDCBenignMatMultTranspose_Private);CHKERRQ(ierr); 145 ierr = PetscNew(&ctx);CHKERRQ(ierr); 146 ierr = MatShellSetContext(A_IB,ctx);CHKERRQ(ierr); 147 ctx->apply_left = PETSC_TRUE; 148 ctx->apply_right = PETSC_FALSE; 149 ctx->apply_p0 = PETSC_FALSE; 150 ctx->benign_n = pcbddc->benign_n; 151 if (reuse) { 152 ctx->benign_zerodiag_subs = reuse->benign_zerodiag_subs; 153 ctx->free = PETSC_FALSE; 154 } else { /* TODO: could be optimized for successive solves */ 155 ISLocalToGlobalMapping N_to_D; 156 PetscInt i; 157 158 ierr = ISLocalToGlobalMappingCreateIS(pcis->is_I_local,&N_to_D);CHKERRQ(ierr); 159 ierr = PetscMalloc1(pcbddc->benign_n,&ctx->benign_zerodiag_subs);CHKERRQ(ierr); 160 for (i=0;i<pcbddc->benign_n;i++) { 161 ierr = ISGlobalToLocalMappingApplyIS(N_to_D,IS_GTOLM_DROP,pcbddc->benign_zerodiag_subs[i],&ctx->benign_zerodiag_subs[i]);CHKERRQ(ierr); 162 } 163 ierr = ISLocalToGlobalMappingDestroy(&N_to_D);CHKERRQ(ierr); 164 ctx->free = PETSC_TRUE; 165 } 166 ctx->A = pcis->A_IB; 167 ctx->work = work; 168 ierr = MatSetUp(A_IB);CHKERRQ(ierr); 169 ierr = MatAssemblyBegin(A_IB,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 170 ierr = MatAssemblyEnd(A_IB,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 171 pcis->A_IB = A_IB; 172 173 /* A_BI as A_IB^T */ 174 ierr = MatCreateTranspose(A_IB,&A_BI);CHKERRQ(ierr); 175 pcbddc->benign_original_mat = pcis->A_BI; 176 pcis->A_BI = A_BI; 177 } else { 178 if (!pcbddc->benign_original_mat) { 179 PetscFunctionReturn(0); 180 } 181 ierr = MatShellGetContext(pcis->A_IB,&ctx);CHKERRQ(ierr); 182 ierr = MatDestroy(&pcis->A_IB);CHKERRQ(ierr); 183 pcis->A_IB = ctx->A; 184 ctx->A = NULL; 185 ierr = MatDestroy(&pcis->A_BI);CHKERRQ(ierr); 186 pcis->A_BI = pcbddc->benign_original_mat; 187 pcbddc->benign_original_mat = NULL; 188 if (ctx->free) { 189 PetscInt i; 190 for (i=0;i<ctx->benign_n;i++) { 191 ierr = ISDestroy(&ctx->benign_zerodiag_subs[i]);CHKERRQ(ierr); 192 } 193 ierr = PetscFree(ctx->benign_zerodiag_subs);CHKERRQ(ierr); 194 } 195 ierr = PetscFree(ctx->work);CHKERRQ(ierr); 196 ierr = PetscFree(ctx);CHKERRQ(ierr); 197 } 198 PetscFunctionReturn(0); 199 } 200 201 /* used just in bddc debug mode */ 202 #undef __FUNCT__ 203 #define __FUNCT__ "PCBDDCBenignProject" 204 PetscErrorCode PCBDDCBenignProject(PC pc, IS is1, IS is2, Mat *B) 205 { 206 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 207 Mat_IS *matis = (Mat_IS*)pc->pmat->data; 208 Mat An; 209 PetscErrorCode ierr; 210 211 PetscFunctionBegin; 212 ierr = MatPtAP(matis->A,pcbddc->benign_change,MAT_INITIAL_MATRIX,2.0,&An);CHKERRQ(ierr); 213 ierr = MatZeroRowsColumns(An,pcbddc->benign_n,pcbddc->benign_p0_lidx,1.0,NULL,NULL);CHKERRQ(ierr); 214 if (is1) { 215 ierr = MatGetSubMatrix(An,is1,is2,MAT_INITIAL_MATRIX,B);CHKERRQ(ierr); 216 ierr = MatDestroy(&An);CHKERRQ(ierr); 217 } else { 218 *B = An; 219 } 220 PetscFunctionReturn(0); 221 } 222 223 /* TODO: add reuse flag */ 224 #undef __FUNCT__ 225 #define __FUNCT__ "MatSeqAIJCompress" 226 PetscErrorCode MatSeqAIJCompress(Mat A, Mat *B) 227 { 228 Mat Bt; 229 PetscScalar *a,*bdata; 230 const PetscInt *ii,*ij; 231 PetscInt m,n,i,nnz,*bii,*bij; 232 PetscBool flg_row; 233 PetscErrorCode ierr; 234 235 PetscFunctionBegin; 236 ierr = MatGetSize(A,&n,&m);CHKERRQ(ierr); 237 ierr = MatGetRowIJ(A,0,PETSC_FALSE,PETSC_FALSE,&n,&ii,&ij,&flg_row);CHKERRQ(ierr); 238 ierr = MatSeqAIJGetArray(A,&a);CHKERRQ(ierr); 239 nnz = n; 240 for (i=0;i<ii[n];i++) { 241 if (PetscLikely(PetscAbsScalar(a[i]) > PETSC_SMALL)) nnz++; 242 } 243 ierr = PetscMalloc1(n+1,&bii);CHKERRQ(ierr); 244 ierr = PetscMalloc1(nnz,&bij);CHKERRQ(ierr); 245 ierr = PetscMalloc1(nnz,&bdata);CHKERRQ(ierr); 246 nnz = 0; 247 bii[0] = 0; 248 for (i=0;i<n;i++) { 249 PetscInt j; 250 for (j=ii[i];j<ii[i+1];j++) { 251 PetscScalar entry = a[j]; 252 if (PetscLikely(PetscAbsScalar(entry) > PETSC_SMALL) || ij[j] == i) { 253 bij[nnz] = ij[j]; 254 bdata[nnz] = entry; 255 nnz++; 256 } 257 } 258 bii[i+1] = nnz; 259 } 260 ierr = MatSeqAIJRestoreArray(A,&a);CHKERRQ(ierr); 261 ierr = MatCreateSeqAIJWithArrays(PetscObjectComm((PetscObject)A),n,m,bii,bij,bdata,&Bt);CHKERRQ(ierr); 262 ierr = MatRestoreRowIJ(A,0,PETSC_FALSE,PETSC_FALSE,&n,&ii,&ij,&flg_row);CHKERRQ(ierr); 263 { 264 Mat_SeqAIJ *b = (Mat_SeqAIJ*)(Bt->data); 265 b->free_a = PETSC_TRUE; 266 b->free_ij = PETSC_TRUE; 267 } 268 *B = Bt; 269 PetscFunctionReturn(0); 270 } 271 272 #undef __FUNCT__ 273 #define __FUNCT__ "MatDetectDisconnectedComponents" 274 PetscErrorCode MatDetectDisconnectedComponents(Mat A, PetscBool filter, PetscInt *ncc, IS* cc[]) 275 { 276 Mat B; 277 IS is_dummy,*cc_n; 278 ISLocalToGlobalMapping l2gmap_dummy; 279 PCBDDCGraph graph; 280 PetscInt i,n; 281 PetscInt *xadj,*adjncy; 282 PetscInt *xadj_filtered,*adjncy_filtered; 283 PetscBool flg_row,isseqaij; 284 PetscErrorCode ierr; 285 286 PetscFunctionBegin; 287 if (!A->rmap->N || !A->cmap->N) { 288 *ncc = 0; 289 *cc = NULL; 290 PetscFunctionReturn(0); 291 } 292 ierr = PetscObjectTypeCompare((PetscObject)A,MATSEQAIJ,&isseqaij);CHKERRQ(ierr); 293 if (!isseqaij && filter) { 294 PetscBool isseqdense; 295 296 ierr = PetscObjectTypeCompare((PetscObject)A,MATSEQDENSE,&isseqdense);CHKERRQ(ierr); 297 if (!isseqdense) { 298 ierr = MatConvert(A,MATSEQAIJ,MAT_INITIAL_MATRIX,&B);CHKERRQ(ierr); 299 } else { /* TODO: rectangular case and LDA */ 300 PetscScalar *array; 301 PetscReal chop=1.e-6; 302 303 ierr = MatDuplicate(A,MAT_COPY_VALUES,&B);CHKERRQ(ierr); 304 ierr = MatDenseGetArray(B,&array);CHKERRQ(ierr); 305 ierr = MatGetSize(B,&n,NULL);CHKERRQ(ierr); 306 for (i=0;i<n;i++) { 307 PetscInt j; 308 for (j=i+1;j<n;j++) { 309 PetscReal thresh = chop*(PetscAbsScalar(array[i*(n+1)])+PetscAbsScalar(array[j*(n+1)])); 310 if (PetscAbsScalar(array[i*n+j]) < thresh) array[i*n+j] = 0.; 311 if (PetscAbsScalar(array[j*n+i]) < thresh) array[j*n+i] = 0.; 312 } 313 } 314 ierr = MatDenseRestoreArray(B,&array);CHKERRQ(ierr); 315 ierr = MatConvert(B,MATSEQAIJ,MAT_INPLACE_MATRIX,&B);CHKERRQ(ierr); 316 } 317 } else { 318 B = A; 319 } 320 ierr = MatGetRowIJ(B,0,PETSC_TRUE,PETSC_FALSE,&n,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);CHKERRQ(ierr); 321 322 /* if filter is true, then removes entries lower than PETSC_SMALL in magnitude */ 323 if (filter) { 324 PetscScalar *data; 325 PetscInt j,cum; 326 327 ierr = PetscCalloc2(n+1,&xadj_filtered,xadj[n],&adjncy_filtered);CHKERRQ(ierr); 328 ierr = MatSeqAIJGetArray(B,&data);CHKERRQ(ierr); 329 cum = 0; 330 for (i=0;i<n;i++) { 331 PetscInt t; 332 333 for (j=xadj[i];j<xadj[i+1];j++) { 334 if (PetscUnlikely(PetscAbsScalar(data[j]) < PETSC_SMALL)) { 335 continue; 336 } 337 adjncy_filtered[cum+xadj_filtered[i]++] = adjncy[j]; 338 } 339 t = xadj_filtered[i]; 340 xadj_filtered[i] = cum; 341 cum += t; 342 } 343 ierr = MatSeqAIJRestoreArray(B,&data);CHKERRQ(ierr); 344 } else { 345 xadj_filtered = NULL; 346 adjncy_filtered = NULL; 347 } 348 349 /* compute local connected components using PCBDDCGraph */ 350 ierr = ISCreateStride(PETSC_COMM_SELF,n,0,1,&is_dummy);CHKERRQ(ierr); 351 ierr = ISLocalToGlobalMappingCreateIS(is_dummy,&l2gmap_dummy);CHKERRQ(ierr); 352 ierr = ISDestroy(&is_dummy);CHKERRQ(ierr); 353 ierr = PCBDDCGraphCreate(&graph);CHKERRQ(ierr); 354 ierr = PCBDDCGraphInit(graph,l2gmap_dummy,n);CHKERRQ(ierr); 355 ierr = ISLocalToGlobalMappingDestroy(&l2gmap_dummy);CHKERRQ(ierr); 356 if (xadj_filtered) { 357 graph->xadj = xadj_filtered; 358 graph->adjncy = adjncy_filtered; 359 } else { 360 graph->xadj = xadj; 361 graph->adjncy = adjncy; 362 } 363 ierr = PCBDDCGraphSetUp(graph,1,NULL,NULL,0,NULL,NULL);CHKERRQ(ierr); 364 ierr = PCBDDCGraphComputeConnectedComponents(graph);CHKERRQ(ierr); 365 /* partial clean up */ 366 ierr = PetscFree2(xadj_filtered,adjncy_filtered);CHKERRQ(ierr); 367 ierr = MatRestoreRowIJ(B,0,PETSC_TRUE,PETSC_FALSE,&n,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);CHKERRQ(ierr); 368 if (A != B) { 369 ierr = MatDestroy(&B);CHKERRQ(ierr); 370 } 371 372 /* get back data */ 373 if (ncc) *ncc = graph->ncc; 374 if (cc) { 375 ierr = PetscMalloc1(graph->ncc,&cc_n);CHKERRQ(ierr); 376 for (i=0;i<graph->ncc;i++) { 377 ierr = ISCreateGeneral(PETSC_COMM_SELF,graph->cptr[i+1]-graph->cptr[i],graph->queue+graph->cptr[i],PETSC_COPY_VALUES,&cc_n[i]);CHKERRQ(ierr); 378 } 379 *cc = cc_n; 380 } 381 /* clean up graph */ 382 graph->xadj = 0; 383 graph->adjncy = 0; 384 ierr = PCBDDCGraphDestroy(&graph);CHKERRQ(ierr); 385 PetscFunctionReturn(0); 386 } 387 388 #undef __FUNCT__ 389 #define __FUNCT__ "PCBDDCBenignCheck" 390 PetscErrorCode PCBDDCBenignCheck(PC pc, IS zerodiag) 391 { 392 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 393 PC_IS* pcis = (PC_IS*)(pc->data); 394 IS dirIS = NULL; 395 PetscInt i; 396 PetscErrorCode ierr; 397 398 PetscFunctionBegin; 399 ierr = PCBDDCGraphGetDirichletDofs(pcbddc->mat_graph,&dirIS);CHKERRQ(ierr); 400 if (zerodiag) { 401 Mat A; 402 Vec vec3_N; 403 PetscScalar *vals; 404 const PetscInt *idxs; 405 PetscInt nz,*count; 406 407 /* p0 */ 408 ierr = VecSet(pcis->vec1_N,0.);CHKERRQ(ierr); 409 ierr = PetscMalloc1(pcis->n,&vals);CHKERRQ(ierr); 410 ierr = ISGetLocalSize(zerodiag,&nz);CHKERRQ(ierr); 411 ierr = ISGetIndices(zerodiag,&idxs);CHKERRQ(ierr); 412 for (i=0;i<nz;i++) vals[i] = 1.; 413 ierr = VecSetValues(pcis->vec1_N,nz,idxs,vals,INSERT_VALUES);CHKERRQ(ierr); 414 ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr); 415 ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr); 416 /* v_I */ 417 ierr = VecSetRandom(pcis->vec2_N,NULL);CHKERRQ(ierr); 418 for (i=0;i<nz;i++) vals[i] = 0.; 419 ierr = VecSetValues(pcis->vec2_N,nz,idxs,vals,INSERT_VALUES);CHKERRQ(ierr); 420 ierr = ISRestoreIndices(zerodiag,&idxs);CHKERRQ(ierr); 421 ierr = ISGetIndices(pcis->is_B_local,&idxs);CHKERRQ(ierr); 422 for (i=0;i<pcis->n_B;i++) vals[i] = 0.; 423 ierr = VecSetValues(pcis->vec2_N,pcis->n_B,idxs,vals,INSERT_VALUES);CHKERRQ(ierr); 424 ierr = ISRestoreIndices(pcis->is_B_local,&idxs);CHKERRQ(ierr); 425 if (dirIS) { 426 PetscInt n; 427 428 ierr = ISGetLocalSize(dirIS,&n);CHKERRQ(ierr); 429 ierr = ISGetIndices(dirIS,&idxs);CHKERRQ(ierr); 430 for (i=0;i<n;i++) vals[i] = 0.; 431 ierr = VecSetValues(pcis->vec2_N,n,idxs,vals,INSERT_VALUES);CHKERRQ(ierr); 432 ierr = ISRestoreIndices(dirIS,&idxs);CHKERRQ(ierr); 433 } 434 ierr = VecAssemblyBegin(pcis->vec2_N);CHKERRQ(ierr); 435 ierr = VecAssemblyEnd(pcis->vec2_N);CHKERRQ(ierr); 436 ierr = VecDuplicate(pcis->vec1_N,&vec3_N);CHKERRQ(ierr); 437 ierr = VecSet(vec3_N,0.);CHKERRQ(ierr); 438 ierr = MatISGetLocalMat(pc->mat,&A);CHKERRQ(ierr); 439 ierr = MatMult(A,pcis->vec1_N,vec3_N);CHKERRQ(ierr); 440 ierr = VecDot(vec3_N,pcis->vec2_N,&vals[0]);CHKERRQ(ierr); 441 if (PetscAbsScalar(vals[0]) > PETSC_SMALL) { 442 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"Benign trick can not be applied! b(v_I,p_0) = %f (should be numerically 0.)",PetscAbsScalar(vals[0])); 443 } 444 ierr = PetscFree(vals);CHKERRQ(ierr); 445 ierr = VecDestroy(&vec3_N);CHKERRQ(ierr); 446 447 /* there should not be any pressure dofs lying on the interface */ 448 ierr = PetscCalloc1(pcis->n,&count);CHKERRQ(ierr); 449 ierr = ISGetIndices(pcis->is_B_local,&idxs);CHKERRQ(ierr); 450 for (i=0;i<pcis->n_B;i++) count[idxs[i]]++; 451 ierr = ISRestoreIndices(pcis->is_B_local,&idxs);CHKERRQ(ierr); 452 ierr = ISGetIndices(zerodiag,&idxs);CHKERRQ(ierr); 453 for (i=0;i<nz;i++) { 454 if (count[idxs[i]]) { 455 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"Benign trick can not be applied! pressure dof %d is an interface dof",idxs[i]); 456 } 457 } 458 ierr = ISRestoreIndices(zerodiag,&idxs);CHKERRQ(ierr); 459 ierr = PetscFree(count);CHKERRQ(ierr); 460 } 461 ierr = ISDestroy(&dirIS);CHKERRQ(ierr); 462 463 /* check PCBDDCBenignGetOrSetP0 */ 464 ierr = VecSetRandom(pcis->vec1_global,NULL);CHKERRQ(ierr); 465 for (i=0;i<pcbddc->benign_n;i++) pcbddc->benign_p0[i] = -PetscGlobalRank-i; 466 ierr = PCBDDCBenignGetOrSetP0(pc,pcis->vec1_global,PETSC_FALSE);CHKERRQ(ierr); 467 for (i=0;i<pcbddc->benign_n;i++) pcbddc->benign_p0[i] = 1; 468 ierr = PCBDDCBenignGetOrSetP0(pc,pcis->vec1_global,PETSC_TRUE);CHKERRQ(ierr); 469 for (i=0;i<pcbddc->benign_n;i++) { 470 if ((PetscInt)PetscRealPart(pcbddc->benign_p0[i]) != -PetscGlobalRank-i) { 471 SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error testing PCBDDCBenignGetOrSetP0! Found %1.4e at %d instead of %1.4e\n",pcbddc->benign_p0[i],i,-PetscGlobalRank-i);CHKERRQ(ierr); 472 } 473 } 474 PetscFunctionReturn(0); 475 } 476 477 #undef __FUNCT__ 478 #define __FUNCT__ "PCBDDCBenignDetectSaddlePoint" 479 PetscErrorCode PCBDDCBenignDetectSaddlePoint(PC pc, IS *zerodiaglocal) 480 { 481 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 482 IS pressures,zerodiag,*zerodiag_subs; 483 PetscInt nz,n; 484 PetscBool sorted,have_null,has_null_pressures,recompute_zerodiag; 485 PetscErrorCode ierr; 486 487 PetscFunctionBegin; 488 ierr = PetscSFDestroy(&pcbddc->benign_sf);CHKERRQ(ierr); 489 ierr = MatDestroy(&pcbddc->benign_B0);CHKERRQ(ierr); 490 for (n=0;n<pcbddc->benign_n;n++) { 491 ierr = ISDestroy(&pcbddc->benign_zerodiag_subs[n]);CHKERRQ(ierr); 492 } 493 ierr = PetscFree(pcbddc->benign_zerodiag_subs);CHKERRQ(ierr); 494 pcbddc->benign_n = 0; 495 /* if a local info on dofs is present, assumes the last field is represented by "pressures" 496 otherwise, it uses only zerodiagonal dofs (ok if the pressure block is all zero; it could fail if it is not) 497 Checks if all the pressure dofs in each subdomain have a zero diagonal 498 If not, a change of basis on pressures is not needed 499 since the local Schur complements are SPD 500 */ 501 has_null_pressures = PETSC_TRUE; 502 have_null = PETSC_TRUE; 503 if (pcbddc->n_ISForDofsLocal) { 504 PetscInt npl,*idxs,p = pcbddc->n_ISForDofsLocal-1; 505 506 /* Dofs splitting for BDDC cannot have PETSC_COMM_SELF, so create a sequential IS */ 507 ierr = ISGetLocalSize(pcbddc->ISForDofsLocal[p],&npl);CHKERRQ(ierr); 508 ierr = ISGetIndices(pcbddc->ISForDofsLocal[p],(const PetscInt**)&idxs);CHKERRQ(ierr); 509 ierr = ISCreateGeneral(PETSC_COMM_SELF,npl,idxs,PETSC_COPY_VALUES,&pressures);CHKERRQ(ierr); 510 ierr = ISRestoreIndices(pcbddc->ISForDofsLocal[p],(const PetscInt**)&idxs);CHKERRQ(ierr); 511 ierr = ISSorted(pressures,&sorted);CHKERRQ(ierr); 512 if (!sorted) { 513 ierr = ISSort(pressures);CHKERRQ(ierr); 514 } 515 } else { 516 pressures = NULL; 517 } 518 /* pcis has not been setup yet, so get the local size from the subdomain matrix */ 519 ierr = MatGetLocalSize(pcbddc->local_mat,&n,NULL);CHKERRQ(ierr); 520 ierr = MatFindZeroDiagonals(pcbddc->local_mat,&zerodiag);CHKERRQ(ierr); 521 ierr = ISSorted(zerodiag,&sorted);CHKERRQ(ierr); 522 if (!sorted) { 523 ierr = ISSort(zerodiag);CHKERRQ(ierr); 524 } 525 ierr = ISGetLocalSize(zerodiag,&nz);CHKERRQ(ierr); 526 if (!nz) { 527 if (n) have_null = PETSC_FALSE; 528 has_null_pressures = PETSC_FALSE; 529 ierr = ISDestroy(&zerodiag);CHKERRQ(ierr); 530 } 531 recompute_zerodiag = PETSC_FALSE; 532 /* in case disconnected subdomains info is present, split the pressures accordingly (otherwise the benign trick could fail) */ 533 zerodiag_subs = NULL; 534 pcbddc->benign_n = 0; 535 if (has_null_pressures) { 536 IS *subs; 537 PetscInt nsubs,i; 538 539 subs = pcbddc->local_subs; 540 nsubs = pcbddc->n_local_subs; 541 if (nsubs > 1) { 542 ierr = PetscCalloc1(nsubs,&zerodiag_subs);CHKERRQ(ierr); 543 for (i=0;i<nsubs;i++) { 544 ISLocalToGlobalMapping l2g; 545 IS t_zerodiag_subs; 546 PetscInt nl; 547 548 ierr = ISLocalToGlobalMappingCreateIS(subs[i],&l2g);CHKERRQ(ierr); 549 ierr = ISGlobalToLocalMappingApplyIS(l2g,IS_GTOLM_DROP,zerodiag,&t_zerodiag_subs);CHKERRQ(ierr); 550 ierr = ISGetLocalSize(t_zerodiag_subs,&nl);CHKERRQ(ierr); 551 if (nl) { 552 PetscBool valid = PETSC_TRUE; 553 554 if (pressures) { 555 IS t_pressure_subs; 556 ierr = ISGlobalToLocalMappingApplyIS(l2g,IS_GTOLM_DROP,pressures,&t_pressure_subs);CHKERRQ(ierr); 557 ierr = ISEqual(t_pressure_subs,t_zerodiag_subs,&valid);CHKERRQ(ierr); 558 ierr = ISDestroy(&t_pressure_subs);CHKERRQ(ierr); 559 } 560 if (valid) { 561 ierr = ISLocalToGlobalMappingApplyIS(l2g,t_zerodiag_subs,&zerodiag_subs[pcbddc->benign_n]);CHKERRQ(ierr); 562 pcbddc->benign_n++; 563 } else { 564 recompute_zerodiag = PETSC_TRUE; 565 } 566 } 567 ierr = ISDestroy(&t_zerodiag_subs);CHKERRQ(ierr); 568 ierr = ISLocalToGlobalMappingDestroy(&l2g);CHKERRQ(ierr); 569 } 570 } else { /* there's just one subdomain (or zero if they have not been detected */ 571 PetscBool valid = PETSC_TRUE; 572 if (pressures) { 573 ierr = ISEqual(pressures,zerodiag,&valid);CHKERRQ(ierr); 574 } 575 if (valid) { 576 pcbddc->benign_n = 1; 577 ierr = PetscMalloc1(pcbddc->benign_n,&zerodiag_subs);CHKERRQ(ierr); 578 ierr = PetscObjectReference((PetscObject)zerodiag);CHKERRQ(ierr); 579 zerodiag_subs[0] = zerodiag; 580 } 581 } 582 } 583 584 if (!pcbddc->benign_n) { 585 ierr = ISDestroy(&zerodiag);CHKERRQ(ierr); 586 recompute_zerodiag = PETSC_FALSE; 587 has_null_pressures = PETSC_FALSE; 588 have_null = PETSC_FALSE; 589 } 590 591 /* final check for null pressures */ 592 if (zerodiag && pressures) { 593 PetscInt nz,np; 594 ierr = ISGetLocalSize(zerodiag,&nz);CHKERRQ(ierr); 595 ierr = ISGetLocalSize(pressures,&np);CHKERRQ(ierr); 596 if (nz != np) have_null = PETSC_FALSE; 597 } 598 599 if (recompute_zerodiag) { 600 ierr = ISDestroy(&zerodiag);CHKERRQ(ierr); 601 if (pcbddc->benign_n == 1) { 602 ierr = PetscObjectReference((PetscObject)zerodiag_subs[0]);CHKERRQ(ierr); 603 zerodiag = zerodiag_subs[0]; 604 } else { 605 PetscInt i,nzn,*new_idxs; 606 607 nzn = 0; 608 for (i=0;i<pcbddc->benign_n;i++) { 609 PetscInt ns; 610 ierr = ISGetLocalSize(zerodiag_subs[i],&ns);CHKERRQ(ierr); 611 nzn += ns; 612 } 613 ierr = PetscMalloc1(nzn,&new_idxs);CHKERRQ(ierr); 614 nzn = 0; 615 for (i=0;i<pcbddc->benign_n;i++) { 616 PetscInt ns,*idxs; 617 ierr = ISGetLocalSize(zerodiag_subs[i],&ns);CHKERRQ(ierr); 618 ierr = ISGetIndices(zerodiag_subs[i],(const PetscInt**)&idxs);CHKERRQ(ierr); 619 ierr = PetscMemcpy(new_idxs+nzn,idxs,ns*sizeof(PetscInt));CHKERRQ(ierr); 620 ierr = ISRestoreIndices(zerodiag_subs[i],(const PetscInt**)&idxs);CHKERRQ(ierr); 621 nzn += ns; 622 } 623 ierr = PetscSortInt(nzn,new_idxs);CHKERRQ(ierr); 624 ierr = ISCreateGeneral(PETSC_COMM_SELF,nzn,new_idxs,PETSC_OWN_POINTER,&zerodiag);CHKERRQ(ierr); 625 } 626 have_null = PETSC_FALSE; 627 } 628 629 /* no-net-flux */ 630 if (pcbddc->benign_compute_nonetflux) { 631 Mat_IS* matis = (Mat_IS*)(pc->pmat->data); 632 MatNullSpace near_null_space; 633 ISLocalToGlobalMapping rmap; 634 Vec quad_vec; 635 PetscScalar *pvals; 636 PetscInt i,np,*dummyins; 637 638 /* create vector to hold quadrature weights */ 639 ierr = MatCreateVecs(pc->pmat,&quad_vec,NULL);CHKERRQ(ierr); 640 ierr = VecSet(quad_vec,0.0);CHKERRQ(ierr); 641 ierr = MatGetLocalToGlobalMapping(pc->pmat,&rmap,NULL);CHKERRQ(ierr); 642 ierr = VecSetLocalToGlobalMapping(quad_vec,rmap);CHKERRQ(ierr); 643 644 /* compute B^{(i)T} * 1_p */ 645 np = 0; 646 if (zerodiag) { 647 ierr = ISGetLocalSize(zerodiag,&np);CHKERRQ(ierr); 648 } else if (pressures) { 649 ierr = ISGetLocalSize(pressures,&np);CHKERRQ(ierr); 650 } 651 ierr = PetscMalloc1(np,&pvals);CHKERRQ(ierr); 652 for (i=0;i<np;i++) pvals[i] = 1.; 653 ierr = VecSet(matis->x,0.);CHKERRQ(ierr); 654 if (np) { 655 const PetscInt *pidxs; 656 657 if (zerodiag) { 658 ierr = ISGetIndices(zerodiag,&pidxs);CHKERRQ(ierr); 659 } else if (pressures) { 660 ierr = ISGetIndices(pressures,&pidxs);CHKERRQ(ierr); 661 } 662 ierr = VecSetValues(matis->x,np,pidxs,pvals,INSERT_VALUES);CHKERRQ(ierr); 663 if (zerodiag) { 664 ierr = ISRestoreIndices(zerodiag,&pidxs);CHKERRQ(ierr); 665 } else if (pressures) { 666 ierr = ISRestoreIndices(pressures,&pidxs);CHKERRQ(ierr); 667 } 668 } 669 ierr = VecAssemblyBegin(matis->x);CHKERRQ(ierr); 670 ierr = VecAssemblyEnd(matis->x);CHKERRQ(ierr); 671 ierr = MatMult(matis->A,matis->x,matis->y);CHKERRQ(ierr); 672 ierr = PetscFree(pvals);CHKERRQ(ierr); 673 674 /* decide which of the sharing ranks (per dof) has to insert the values (should just be a matter of having a different orientation) */ 675 ierr = MatISSetUpSF(pc->pmat);CHKERRQ(ierr); 676 for (i=0;i<matis->sf_nroots;i++) matis->sf_rootdata[i] = -1; 677 for (i=0;i<matis->sf_nleaves;i++) matis->sf_leafdata[i] = PetscGlobalRank; 678 ierr = PetscSFReduceBegin(matis->sf,MPIU_INT,matis->sf_leafdata,matis->sf_rootdata,MPI_MAX);CHKERRQ(ierr); 679 ierr = PetscSFReduceEnd(matis->sf,MPIU_INT,matis->sf_leafdata,matis->sf_rootdata,MPI_MAX);CHKERRQ(ierr); 680 ierr = PetscSFBcastBegin(matis->sf,MPIU_INT,matis->sf_rootdata,matis->sf_leafdata);CHKERRQ(ierr); 681 ierr = PetscSFBcastEnd(matis->sf,MPIU_INT,matis->sf_rootdata,matis->sf_leafdata);CHKERRQ(ierr); 682 ierr = VecGetArray(matis->y,&pvals);CHKERRQ(ierr); 683 for (i=0;i<matis->sf_nleaves;i++) { 684 if (PetscGlobalRank != matis->sf_leafdata[i]) { 685 pvals[i] = 0.; 686 } 687 } 688 ierr = PetscMalloc1(matis->sf_nleaves,&dummyins);CHKERRQ(ierr); 689 for (i=0;i<matis->sf_nleaves;i++) dummyins[i] = i; 690 ierr = VecSetValuesLocal(quad_vec,matis->sf_nleaves,dummyins,pvals,ADD_VALUES);CHKERRQ(ierr); 691 ierr = VecRestoreArray(matis->y,&pvals);CHKERRQ(ierr); 692 ierr = PetscFree(dummyins);CHKERRQ(ierr); 693 694 /* assembly quadrature vec and attach near null space to pmat */ 695 ierr = VecAssemblyBegin(quad_vec);CHKERRQ(ierr); 696 ierr = VecAssemblyEnd(quad_vec);CHKERRQ(ierr); 697 ierr = VecNormalize(quad_vec,NULL);CHKERRQ(ierr); 698 ierr = MatNullSpaceCreate(PetscObjectComm((PetscObject)pc),PETSC_FALSE,1,&quad_vec,&near_null_space);CHKERRQ(ierr); 699 ierr = VecDestroy(&quad_vec);CHKERRQ(ierr); 700 ierr = MatSetNearNullSpace(pc->pmat,near_null_space);CHKERRQ(ierr); 701 ierr = MatNullSpaceDestroy(&near_null_space);CHKERRQ(ierr); 702 } 703 704 /* change of basis and p0 dofs */ 705 if (has_null_pressures) { 706 IS zerodiagc; 707 const PetscInt *idxs,*idxsc; 708 PetscInt i,s,*nnz; 709 710 ierr = ISGetLocalSize(zerodiag,&nz);CHKERRQ(ierr); 711 ierr = ISComplement(zerodiag,0,n,&zerodiagc);CHKERRQ(ierr); 712 ierr = ISGetIndices(zerodiagc,&idxsc);CHKERRQ(ierr); 713 /* local change of basis for pressures */ 714 ierr = MatDestroy(&pcbddc->benign_change);CHKERRQ(ierr); 715 ierr = MatCreate(PetscObjectComm((PetscObject)pcbddc->local_mat),&pcbddc->benign_change);CHKERRQ(ierr); 716 ierr = MatSetType(pcbddc->benign_change,MATAIJ);CHKERRQ(ierr); 717 ierr = MatSetSizes(pcbddc->benign_change,n,n,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr); 718 ierr = PetscMalloc1(n,&nnz);CHKERRQ(ierr); 719 for (i=0;i<n-nz;i++) nnz[idxsc[i]] = 1; /* identity on velocities plus pressure dofs for non-singular subdomains */ 720 for (i=0;i<pcbddc->benign_n;i++) { 721 PetscInt nzs,j; 722 723 ierr = ISGetLocalSize(zerodiag_subs[i],&nzs);CHKERRQ(ierr); 724 ierr = ISGetIndices(zerodiag_subs[i],&idxs);CHKERRQ(ierr); 725 for (j=0;j<nzs-1;j++) nnz[idxs[j]] = 2; /* change on pressures */ 726 nnz[idxs[nzs-1]] = nzs; /* last local pressure dof in subdomain */ 727 ierr = ISRestoreIndices(zerodiag_subs[i],&idxs);CHKERRQ(ierr); 728 } 729 ierr = MatSeqAIJSetPreallocation(pcbddc->benign_change,0,nnz);CHKERRQ(ierr); 730 ierr = PetscFree(nnz);CHKERRQ(ierr); 731 /* set identity on velocities */ 732 for (i=0;i<n-nz;i++) { 733 ierr = MatSetValue(pcbddc->benign_change,idxsc[i],idxsc[i],1.,INSERT_VALUES);CHKERRQ(ierr); 734 } 735 ierr = ISRestoreIndices(zerodiagc,&idxsc);CHKERRQ(ierr); 736 ierr = ISDestroy(&zerodiagc);CHKERRQ(ierr); 737 ierr = PetscFree3(pcbddc->benign_p0_lidx,pcbddc->benign_p0_gidx,pcbddc->benign_p0);CHKERRQ(ierr); 738 ierr = PetscMalloc3(pcbddc->benign_n,&pcbddc->benign_p0_lidx,pcbddc->benign_n,&pcbddc->benign_p0_gidx,pcbddc->benign_n,&pcbddc->benign_p0);CHKERRQ(ierr); 739 /* set change on pressures */ 740 for (s=0;s<pcbddc->benign_n;s++) { 741 PetscScalar *array; 742 PetscInt nzs; 743 744 ierr = ISGetLocalSize(zerodiag_subs[s],&nzs);CHKERRQ(ierr); 745 ierr = ISGetIndices(zerodiag_subs[s],&idxs);CHKERRQ(ierr); 746 for (i=0;i<nzs-1;i++) { 747 PetscScalar vals[2]; 748 PetscInt cols[2]; 749 750 cols[0] = idxs[i]; 751 cols[1] = idxs[nzs-1]; 752 vals[0] = 1.; 753 vals[1] = 1.; 754 ierr = MatSetValues(pcbddc->benign_change,1,cols,2,cols,vals,INSERT_VALUES);CHKERRQ(ierr); 755 } 756 ierr = PetscMalloc1(nzs,&array);CHKERRQ(ierr); 757 for (i=0;i<nzs-1;i++) array[i] = -1.; 758 array[nzs-1] = 1.; 759 ierr = MatSetValues(pcbddc->benign_change,1,idxs+nzs-1,nzs,idxs,array,INSERT_VALUES);CHKERRQ(ierr); 760 /* store local idxs for p0 */ 761 pcbddc->benign_p0_lidx[s] = idxs[nzs-1]; 762 ierr = ISRestoreIndices(zerodiag_subs[s],&idxs);CHKERRQ(ierr); 763 ierr = PetscFree(array);CHKERRQ(ierr); 764 } 765 ierr = MatAssemblyBegin(pcbddc->benign_change,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 766 ierr = MatAssemblyEnd(pcbddc->benign_change,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 767 /* project if needed */ 768 if (pcbddc->benign_change_explicit) { 769 Mat M; 770 771 ierr = MatPtAP(pcbddc->local_mat,pcbddc->benign_change,MAT_INITIAL_MATRIX,2.0,&M);CHKERRQ(ierr); 772 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 773 ierr = MatSeqAIJCompress(M,&pcbddc->local_mat);CHKERRQ(ierr); 774 ierr = MatDestroy(&M);CHKERRQ(ierr); 775 } 776 /* store global idxs for p0 */ 777 ierr = ISLocalToGlobalMappingApply(pc->pmat->rmap->mapping,pcbddc->benign_n,pcbddc->benign_p0_lidx,pcbddc->benign_p0_gidx);CHKERRQ(ierr); 778 } 779 pcbddc->benign_zerodiag_subs = zerodiag_subs; 780 ierr = ISDestroy(&pressures);CHKERRQ(ierr); 781 782 /* determines if the coarse solver will be singular or not */ 783 ierr = MPI_Allreduce(&have_null,&pcbddc->benign_null,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 784 *zerodiaglocal = zerodiag; 785 PetscFunctionReturn(0); 786 } 787 788 #undef __FUNCT__ 789 #define __FUNCT__ "PCBDDCBenignGetOrSetP0" 790 PetscErrorCode PCBDDCBenignGetOrSetP0(PC pc, Vec v, PetscBool get) 791 { 792 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 793 PetscErrorCode ierr; 794 795 PetscFunctionBegin; 796 if (!pcbddc->benign_sf) { 797 ierr = PetscSFCreate(PetscObjectComm((PetscObject)pc),&pcbddc->benign_sf);CHKERRQ(ierr); 798 ierr = PetscSFSetGraphLayout(pcbddc->benign_sf,pc->pmat->rmap,pcbddc->benign_n,NULL,PETSC_OWN_POINTER,pcbddc->benign_p0_gidx);CHKERRQ(ierr); 799 } 800 if (get) { /* use SF to get values */ 801 PetscScalar *array; 802 803 ierr = VecGetArrayRead(v,(const PetscScalar**)&array);CHKERRQ(ierr); 804 ierr = PetscSFBcastBegin(pcbddc->benign_sf,MPIU_SCALAR,array,pcbddc->benign_p0);CHKERRQ(ierr); 805 ierr = PetscSFBcastEnd(pcbddc->benign_sf,MPIU_SCALAR,array,pcbddc->benign_p0);CHKERRQ(ierr); 806 ierr = VecRestoreArrayRead(v,(const PetscScalar**)&array);CHKERRQ(ierr); 807 } else { /* use VecSetValues (not scalable, I should try to find a better solution (defining a new MPI_OP for reduction) */ 808 ierr = VecSetValues(v,pcbddc->benign_n,pcbddc->benign_p0_gidx,pcbddc->benign_p0,INSERT_VALUES);CHKERRQ(ierr); 809 ierr = VecAssemblyBegin(v);CHKERRQ(ierr); 810 ierr = VecAssemblyEnd(v);CHKERRQ(ierr); 811 } 812 PetscFunctionReturn(0); 813 } 814 815 #undef __FUNCT__ 816 #define __FUNCT__ "PCBDDCBenignPopOrPushB0" 817 PetscErrorCode PCBDDCBenignPopOrPushB0(PC pc, PetscBool pop) 818 { 819 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 820 PetscErrorCode ierr; 821 822 PetscFunctionBegin; 823 /* TODO: add error checking 824 - avoid nested pop (or push) calls. 825 - cannot push before pop. 826 - cannot call this if pcbddc->local_mat is NULL 827 */ 828 if (!pcbddc->benign_n) { 829 PetscFunctionReturn(0); 830 } 831 if (pop) { 832 if (pcbddc->benign_change_explicit) { 833 IS is_p0; 834 MatReuse reuse; 835 836 /* extract B_0 */ 837 reuse = MAT_INITIAL_MATRIX; 838 if (pcbddc->benign_B0) { 839 reuse = MAT_REUSE_MATRIX; 840 } 841 ierr = ISCreateGeneral(PETSC_COMM_SELF,pcbddc->benign_n,pcbddc->benign_p0_lidx,PETSC_COPY_VALUES,&is_p0);CHKERRQ(ierr); 842 ierr = MatGetSubMatrix(pcbddc->local_mat,is_p0,NULL,reuse,&pcbddc->benign_B0);CHKERRQ(ierr); 843 /* remove rows and cols from local problem */ 844 ierr = MatSetOption(pcbddc->local_mat,MAT_KEEP_NONZERO_PATTERN,PETSC_TRUE);CHKERRQ(ierr); 845 ierr = MatSetOption(pcbddc->local_mat,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr); 846 ierr = MatZeroRowsColumnsIS(pcbddc->local_mat,is_p0,1.0,NULL,NULL);CHKERRQ(ierr); 847 ierr = ISDestroy(&is_p0);CHKERRQ(ierr); 848 } else { 849 Mat_IS *matis = (Mat_IS*)pc->pmat->data; 850 PetscScalar *vals; 851 PetscInt i,n,*idxs_ins; 852 853 ierr = VecGetLocalSize(matis->y,&n);CHKERRQ(ierr); 854 ierr = PetscMalloc2(n,&idxs_ins,n,&vals);CHKERRQ(ierr); 855 if (!pcbddc->benign_B0) { 856 PetscInt *nnz; 857 ierr = MatCreate(PetscObjectComm((PetscObject)pcbddc->local_mat),&pcbddc->benign_B0);CHKERRQ(ierr); 858 ierr = MatSetType(pcbddc->benign_B0,MATAIJ);CHKERRQ(ierr); 859 ierr = MatSetSizes(pcbddc->benign_B0,pcbddc->benign_n,n,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr); 860 ierr = PetscMalloc1(pcbddc->benign_n,&nnz);CHKERRQ(ierr); 861 for (i=0;i<pcbddc->benign_n;i++) { 862 ierr = ISGetLocalSize(pcbddc->benign_zerodiag_subs[i],&nnz[i]);CHKERRQ(ierr); 863 nnz[i] = n - nnz[i]; 864 } 865 ierr = MatSeqAIJSetPreallocation(pcbddc->benign_B0,0,nnz);CHKERRQ(ierr); 866 ierr = PetscFree(nnz);CHKERRQ(ierr); 867 } 868 869 for (i=0;i<pcbddc->benign_n;i++) { 870 PetscScalar *array; 871 PetscInt *idxs,j,nz,cum; 872 873 ierr = VecSet(matis->x,0.);CHKERRQ(ierr); 874 ierr = ISGetLocalSize(pcbddc->benign_zerodiag_subs[i],&nz);CHKERRQ(ierr); 875 ierr = ISGetIndices(pcbddc->benign_zerodiag_subs[i],(const PetscInt**)&idxs);CHKERRQ(ierr); 876 for (j=0;j<nz;j++) vals[j] = 1.; 877 ierr = VecSetValues(matis->x,nz,idxs,vals,INSERT_VALUES);CHKERRQ(ierr); 878 ierr = VecAssemblyBegin(matis->x);CHKERRQ(ierr); 879 ierr = VecAssemblyEnd(matis->x);CHKERRQ(ierr); 880 ierr = VecSet(matis->y,0.);CHKERRQ(ierr); 881 ierr = MatMult(matis->A,matis->x,matis->y);CHKERRQ(ierr); 882 ierr = VecGetArray(matis->y,&array);CHKERRQ(ierr); 883 cum = 0; 884 for (j=0;j<n;j++) { 885 if (PetscUnlikely(PetscAbsReal(array[j]) > PETSC_SMALL)) { 886 vals[cum] = array[j]; 887 idxs_ins[cum] = j; 888 cum++; 889 } 890 } 891 ierr = MatSetValues(pcbddc->benign_B0,1,&i,cum,idxs_ins,vals,INSERT_VALUES);CHKERRQ(ierr); 892 ierr = VecRestoreArray(matis->y,&array);CHKERRQ(ierr); 893 ierr = ISRestoreIndices(pcbddc->benign_zerodiag_subs[i],(const PetscInt**)&idxs);CHKERRQ(ierr); 894 } 895 ierr = MatAssemblyBegin(pcbddc->benign_B0,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 896 ierr = MatAssemblyEnd(pcbddc->benign_B0,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 897 ierr = PetscFree2(idxs_ins,vals);CHKERRQ(ierr); 898 } 899 } else { /* push */ 900 if (pcbddc->benign_change_explicit) { 901 PetscInt i; 902 903 for (i=0;i<pcbddc->benign_n;i++) { 904 PetscScalar *B0_vals; 905 PetscInt *B0_cols,B0_ncol; 906 907 ierr = MatGetRow(pcbddc->benign_B0,i,&B0_ncol,(const PetscInt**)&B0_cols,(const PetscScalar**)&B0_vals);CHKERRQ(ierr); 908 ierr = MatSetValues(pcbddc->local_mat,1,pcbddc->benign_p0_lidx+i,B0_ncol,B0_cols,B0_vals,INSERT_VALUES);CHKERRQ(ierr); 909 ierr = MatSetValues(pcbddc->local_mat,B0_ncol,B0_cols,1,pcbddc->benign_p0_lidx+i,B0_vals,INSERT_VALUES);CHKERRQ(ierr); 910 ierr = MatSetValue(pcbddc->local_mat,pcbddc->benign_p0_lidx[i],pcbddc->benign_p0_lidx[i],0.0,INSERT_VALUES);CHKERRQ(ierr); 911 ierr = MatRestoreRow(pcbddc->benign_B0,i,&B0_ncol,(const PetscInt**)&B0_cols,(const PetscScalar**)&B0_vals);CHKERRQ(ierr); 912 } 913 ierr = MatAssemblyBegin(pcbddc->local_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 914 ierr = MatAssemblyEnd(pcbddc->local_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 915 } else { 916 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Cannot push B0!\n"); 917 } 918 } 919 PetscFunctionReturn(0); 920 } 921 922 #undef __FUNCT__ 923 #define __FUNCT__ "PCBDDCAdaptiveSelection" 924 PetscErrorCode PCBDDCAdaptiveSelection(PC pc) 925 { 926 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 927 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 928 PetscBLASInt B_dummyint,B_neigs,B_ierr,B_lwork; 929 PetscBLASInt *B_iwork,*B_ifail; 930 PetscScalar *work,lwork; 931 PetscScalar *St,*S,*eigv; 932 PetscScalar *Sarray,*Starray; 933 PetscReal *eigs,thresh; 934 PetscInt i,nmax,nmin,nv,cum,mss,cum2,cumarray,maxneigs; 935 PetscBool allocated_S_St; 936 #if defined(PETSC_USE_COMPLEX) 937 PetscReal *rwork; 938 #endif 939 PetscErrorCode ierr; 940 941 PetscFunctionBegin; 942 if (!sub_schurs->schur_explicit) SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_SUP,"Adaptive selection of constraints requires MUMPS and/or MKL_CPARDISO"); 943 if (sub_schurs->n_subs && (!sub_schurs->is_hermitian || !sub_schurs->is_posdef)) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_SUP,"Adaptive selection not yet implemented for general matrix pencils (herm %d, posdef %d)\n",sub_schurs->is_hermitian,sub_schurs->is_posdef); 944 945 if (pcbddc->dbg_flag) { 946 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 947 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 948 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Check adaptive selection of constraints\n");CHKERRQ(ierr); 949 ierr = PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 950 } 951 952 if (pcbddc->dbg_flag) { 953 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d cc %d (%d,%d).\n",PetscGlobalRank,sub_schurs->n_subs,sub_schurs->is_hermitian,sub_schurs->is_posdef); 954 } 955 956 /* max size of subsets */ 957 mss = 0; 958 for (i=0;i<sub_schurs->n_subs;i++) { 959 PetscInt subset_size; 960 961 ierr = ISGetLocalSize(sub_schurs->is_subs[i],&subset_size);CHKERRQ(ierr); 962 mss = PetscMax(mss,subset_size); 963 } 964 965 /* min/max and threshold */ 966 nmax = pcbddc->adaptive_nmax > 0 ? pcbddc->adaptive_nmax : mss; 967 nmin = pcbddc->adaptive_nmin > 0 ? pcbddc->adaptive_nmin : 0; 968 nmax = PetscMax(nmin,nmax); 969 allocated_S_St = PETSC_FALSE; 970 if (nmin) { 971 allocated_S_St = PETSC_TRUE; 972 } 973 974 /* allocate lapack workspace */ 975 cum = cum2 = 0; 976 maxneigs = 0; 977 for (i=0;i<sub_schurs->n_subs;i++) { 978 PetscInt n,subset_size; 979 980 ierr = ISGetLocalSize(sub_schurs->is_subs[i],&subset_size);CHKERRQ(ierr); 981 n = PetscMin(subset_size,nmax); 982 cum += subset_size; 983 cum2 += subset_size*n; 984 maxneigs = PetscMax(maxneigs,n); 985 } 986 if (mss) { 987 if (sub_schurs->is_hermitian && sub_schurs->is_posdef) { 988 PetscBLASInt B_itype = 1; 989 PetscBLASInt B_N = mss; 990 PetscReal zero = 0.0; 991 PetscReal eps = 0.0; /* dlamch? */ 992 993 B_lwork = -1; 994 S = NULL; 995 St = NULL; 996 eigs = NULL; 997 eigv = NULL; 998 B_iwork = NULL; 999 B_ifail = NULL; 1000 #if defined(PETSC_USE_COMPLEX) 1001 rwork = NULL; 1002 #endif 1003 thresh = 1.0; 1004 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 1005 #if defined(PETSC_USE_COMPLEX) 1006 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&zero,&thresh,&B_dummyint,&B_dummyint,&eps,&B_neigs,eigs,eigv,&B_N,&lwork,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr)); 1007 #else 1008 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&zero,&thresh,&B_dummyint,&B_dummyint,&eps,&B_neigs,eigs,eigv,&B_N,&lwork,&B_lwork,B_iwork,B_ifail,&B_ierr)); 1009 #endif 1010 if (B_ierr != 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to SYGVX Lapack routine %d",(int)B_ierr); 1011 ierr = PetscFPTrapPop();CHKERRQ(ierr); 1012 } else { 1013 /* TODO */ 1014 } 1015 } else { 1016 lwork = 0; 1017 } 1018 1019 nv = 0; 1020 if (sub_schurs->is_vertices && pcbddc->use_vertices) { /* complement set of active subsets, each entry is a vertex (boundary made by active subsets, vertices and dirichlet dofs) */ 1021 ierr = ISGetLocalSize(sub_schurs->is_vertices,&nv);CHKERRQ(ierr); 1022 } 1023 ierr = PetscBLASIntCast((PetscInt)PetscRealPart(lwork),&B_lwork);CHKERRQ(ierr); 1024 if (allocated_S_St) { 1025 ierr = PetscMalloc2(mss*mss,&S,mss*mss,&St);CHKERRQ(ierr); 1026 } 1027 ierr = PetscMalloc5(mss*mss,&eigv,mss,&eigs,B_lwork,&work,5*mss,&B_iwork,mss,&B_ifail);CHKERRQ(ierr); 1028 #if defined(PETSC_USE_COMPLEX) 1029 ierr = PetscMalloc1(7*mss,&rwork);CHKERRQ(ierr); 1030 #endif 1031 ierr = PetscMalloc5(nv+sub_schurs->n_subs,&pcbddc->adaptive_constraints_n, 1032 nv+sub_schurs->n_subs+1,&pcbddc->adaptive_constraints_idxs_ptr, 1033 nv+sub_schurs->n_subs+1,&pcbddc->adaptive_constraints_data_ptr, 1034 nv+cum,&pcbddc->adaptive_constraints_idxs, 1035 nv+cum2,&pcbddc->adaptive_constraints_data);CHKERRQ(ierr); 1036 ierr = PetscMemzero(pcbddc->adaptive_constraints_n,(nv+sub_schurs->n_subs)*sizeof(PetscInt));CHKERRQ(ierr); 1037 1038 maxneigs = 0; 1039 cum = cumarray = 0; 1040 pcbddc->adaptive_constraints_idxs_ptr[0] = 0; 1041 pcbddc->adaptive_constraints_data_ptr[0] = 0; 1042 if (sub_schurs->is_vertices && pcbddc->use_vertices) { 1043 const PetscInt *idxs; 1044 1045 ierr = ISGetIndices(sub_schurs->is_vertices,&idxs);CHKERRQ(ierr); 1046 for (cum=0;cum<nv;cum++) { 1047 pcbddc->adaptive_constraints_n[cum] = 1; 1048 pcbddc->adaptive_constraints_idxs[cum] = idxs[cum]; 1049 pcbddc->adaptive_constraints_data[cum] = 1.0; 1050 pcbddc->adaptive_constraints_idxs_ptr[cum+1] = pcbddc->adaptive_constraints_idxs_ptr[cum]+1; 1051 pcbddc->adaptive_constraints_data_ptr[cum+1] = pcbddc->adaptive_constraints_data_ptr[cum]+1; 1052 } 1053 ierr = ISRestoreIndices(sub_schurs->is_vertices,&idxs);CHKERRQ(ierr); 1054 } 1055 1056 if (mss) { /* multilevel */ 1057 ierr = MatSeqAIJGetArray(sub_schurs->sum_S_Ej_inv_all,&Sarray);CHKERRQ(ierr); 1058 ierr = MatSeqAIJGetArray(sub_schurs->sum_S_Ej_tilda_all,&Starray);CHKERRQ(ierr); 1059 } 1060 1061 thresh = pcbddc->adaptive_threshold; 1062 for (i=0;i<sub_schurs->n_subs;i++) { 1063 const PetscInt *idxs; 1064 PetscReal upper,lower; 1065 PetscInt j,subset_size,eigs_start = 0; 1066 PetscBLASInt B_N; 1067 PetscBool same_data = PETSC_FALSE; 1068 1069 if (pcbddc->use_deluxe_scaling) { 1070 upper = PETSC_MAX_REAL; 1071 lower = thresh; 1072 } else { 1073 upper = 1./thresh; 1074 lower = 0.; 1075 } 1076 ierr = ISGetLocalSize(sub_schurs->is_subs[i],&subset_size);CHKERRQ(ierr); 1077 ierr = ISGetIndices(sub_schurs->is_subs[i],&idxs);CHKERRQ(ierr); 1078 ierr = PetscBLASIntCast(subset_size,&B_N);CHKERRQ(ierr); 1079 if (allocated_S_St) { /* S and S_t should be copied since we could need them later */ 1080 if (sub_schurs->is_hermitian) { 1081 PetscInt j,k; 1082 if (sub_schurs->n_subs == 1) { /* zeroing memory to use PetscMemcmp later */ 1083 ierr = PetscMemzero(S,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 1084 ierr = PetscMemzero(St,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 1085 } 1086 for (j=0;j<subset_size;j++) { 1087 for (k=j;k<subset_size;k++) { 1088 S [j*subset_size+k] = Sarray [cumarray+j*subset_size+k]; 1089 St[j*subset_size+k] = Starray[cumarray+j*subset_size+k]; 1090 } 1091 } 1092 } else { 1093 ierr = PetscMemcpy(S,Sarray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 1094 ierr = PetscMemcpy(St,Starray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 1095 } 1096 } else { 1097 S = Sarray + cumarray; 1098 St = Starray + cumarray; 1099 } 1100 /* see if we can save some work */ 1101 if (sub_schurs->n_subs == 1 && pcbddc->use_deluxe_scaling) { 1102 ierr = PetscMemcmp(S,St,subset_size*subset_size*sizeof(PetscScalar),&same_data);CHKERRQ(ierr); 1103 } 1104 1105 if (same_data && !sub_schurs->change) { /* there's no need of constraints here */ 1106 B_neigs = 0; 1107 } else { 1108 if (sub_schurs->is_hermitian && sub_schurs->is_posdef) { 1109 PetscBLASInt B_itype = 1; 1110 PetscBLASInt B_IL, B_IU; 1111 PetscReal eps = -1.0; /* dlamch? */ 1112 PetscInt nmin_s; 1113 PetscBool compute_range = PETSC_FALSE; 1114 1115 if (pcbddc->dbg_flag) { 1116 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Computing for sub %d/%d %d %d.\n",i,sub_schurs->n_subs,subset_size,pcbddc->mat_graph->count[idxs[0]]); 1117 } 1118 1119 compute_range = PETSC_FALSE; 1120 if (thresh > 1.+PETSC_SMALL && !same_data) { 1121 compute_range = PETSC_TRUE; 1122 } 1123 1124 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 1125 if (compute_range) { 1126 1127 /* ask for eigenvalues larger than thresh */ 1128 #if defined(PETSC_USE_COMPLEX) 1129 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&lower,&upper,&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr)); 1130 #else 1131 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&lower,&upper,&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr)); 1132 #endif 1133 } else if (!same_data) { 1134 B_IU = PetscMax(1,PetscMin(B_N,nmax)); 1135 B_IL = 1; 1136 #if defined(PETSC_USE_COMPLEX) 1137 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","I","L",&B_N,St,&B_N,S,&B_N,&lower,&upper,&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr)); 1138 #else 1139 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","I","L",&B_N,St,&B_N,S,&B_N,&lower,&upper,&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr)); 1140 #endif 1141 } else { /* same_data is true, so get the adaptive function requested by the user */ 1142 PetscInt k; 1143 if (!sub_schurs->change_primal_sub) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"This should not happen"); 1144 ierr = ISGetLocalSize(sub_schurs->change_primal_sub[i],&nmax);CHKERRQ(ierr); 1145 ierr = PetscBLASIntCast(nmax,&B_neigs);CHKERRQ(ierr); 1146 nmin = nmax; 1147 ierr = PetscMemzero(eigv,subset_size*nmax*sizeof(PetscScalar));CHKERRQ(ierr); 1148 for (k=0;k<nmax;k++) { 1149 eigs[k] = 1./PETSC_SMALL; 1150 eigv[k*(subset_size+1)] = 1.0; 1151 } 1152 } 1153 ierr = PetscFPTrapPop();CHKERRQ(ierr); 1154 if (B_ierr) { 1155 if (B_ierr < 0 ) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: illegal value for argument %d",-(int)B_ierr); 1156 else if (B_ierr <= B_N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: %d eigenvalues failed to converge",(int)B_ierr); 1157 else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: leading minor of order %d is not positive definite",(int)B_ierr-B_N-1); 1158 } 1159 1160 if (B_neigs > nmax) { 1161 if (pcbddc->dbg_flag) { 1162 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," found %d eigs, more than maximum required %d.\n",B_neigs,nmax); 1163 } 1164 if (pcbddc->use_deluxe_scaling) eigs_start = B_neigs -nmax; 1165 B_neigs = nmax; 1166 } 1167 1168 nmin_s = PetscMin(nmin,B_N); 1169 if (B_neigs < nmin_s) { 1170 PetscBLASInt B_neigs2; 1171 1172 if (pcbddc->use_deluxe_scaling) { 1173 B_IL = B_N - nmin_s + 1; 1174 B_IU = B_N - B_neigs; 1175 } else { 1176 B_IL = B_neigs + 1; 1177 B_IU = nmin_s; 1178 } 1179 if (pcbddc->dbg_flag) { 1180 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," found %d eigs, less than minimum required %d. Asking for %d to %d incl (fortran like)\n",B_neigs,nmin,B_IL,B_IU); 1181 } 1182 if (sub_schurs->is_hermitian) { 1183 PetscInt j; 1184 for (j=0;j<subset_size;j++) { 1185 ierr = PetscMemcpy(S+j*(subset_size+1),Sarray+cumarray+j*(subset_size+1),(subset_size-j)*sizeof(PetscScalar));CHKERRQ(ierr); 1186 } 1187 for (j=0;j<subset_size;j++) { 1188 ierr = PetscMemcpy(St+j*(subset_size+1),Starray+cumarray+j*(subset_size+1),(subset_size-j)*sizeof(PetscScalar));CHKERRQ(ierr); 1189 } 1190 } else { 1191 ierr = PetscMemcpy(S,Sarray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 1192 ierr = PetscMemcpy(St,Starray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 1193 } 1194 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 1195 #if defined(PETSC_USE_COMPLEX) 1196 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","I","L",&B_N,St,&B_N,S,&B_N,&lower,&upper,&B_IL,&B_IU,&eps,&B_neigs2,eigs+B_neigs,eigv+B_neigs*subset_size,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr)); 1197 #else 1198 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","I","L",&B_N,St,&B_N,S,&B_N,&lower,&upper,&B_IL,&B_IU,&eps,&B_neigs2,eigs+B_neigs,eigv+B_neigs*subset_size,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr)); 1199 #endif 1200 ierr = PetscFPTrapPop();CHKERRQ(ierr); 1201 B_neigs += B_neigs2; 1202 } 1203 if (B_ierr) { 1204 if (B_ierr < 0 ) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: illegal value for argument %d",-(int)B_ierr); 1205 else if (B_ierr <= B_N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: %d eigenvalues failed to converge",(int)B_ierr); 1206 else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: leading minor of order %d is not positive definite",(int)B_ierr-B_N-1); 1207 } 1208 if (pcbddc->dbg_flag) { 1209 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," -> Got %d eigs\n",B_neigs);CHKERRQ(ierr); 1210 for (j=0;j<B_neigs;j++) { 1211 if (eigs[j] == 0.0) { 1212 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," Inf\n");CHKERRQ(ierr); 1213 } else { 1214 if (pcbddc->use_deluxe_scaling) { 1215 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," %1.6e\n",eigs[j+eigs_start]);CHKERRQ(ierr); 1216 } else { 1217 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," %1.6e\n",1./eigs[j+eigs_start]);CHKERRQ(ierr); 1218 } 1219 } 1220 } 1221 } 1222 } else { 1223 /* TODO */ 1224 } 1225 } 1226 /* change the basis back to the original one */ 1227 if (sub_schurs->change) { 1228 Mat change,phi,phit; 1229 1230 if (pcbddc->dbg_flag > 1) { 1231 PetscInt ii; 1232 for (ii=0;ii<B_neigs;ii++) { 1233 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," -> Eigenvector (old basis) %d/%d (%d)\n",ii,B_neigs,B_N);CHKERRQ(ierr); 1234 for (j=0;j<B_N;j++) { 1235 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," %1.4e\n",eigv[(ii+eigs_start)*subset_size+j]);CHKERRQ(ierr); 1236 } 1237 } 1238 } 1239 ierr = KSPGetOperators(sub_schurs->change[i],&change,NULL);CHKERRQ(ierr); 1240 ierr = MatCreateSeqDense(PETSC_COMM_SELF,subset_size,B_neigs,eigv+eigs_start*subset_size,&phit);CHKERRQ(ierr); 1241 ierr = MatMatMult(change,phit,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&phi);CHKERRQ(ierr); 1242 ierr = MatCopy(phi,phit,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 1243 ierr = MatDestroy(&phit);CHKERRQ(ierr); 1244 ierr = MatDestroy(&phi);CHKERRQ(ierr); 1245 } 1246 maxneigs = PetscMax(B_neigs,maxneigs); 1247 pcbddc->adaptive_constraints_n[i+nv] = B_neigs; 1248 if (B_neigs) { 1249 ierr = PetscMemcpy(pcbddc->adaptive_constraints_data+pcbddc->adaptive_constraints_data_ptr[cum],eigv+eigs_start*subset_size,B_neigs*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 1250 1251 if (pcbddc->dbg_flag > 1) { 1252 PetscInt ii; 1253 for (ii=0;ii<B_neigs;ii++) { 1254 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," -> Eigenvector %d/%d (%d)\n",ii,B_neigs,B_N);CHKERRQ(ierr); 1255 for (j=0;j<B_N;j++) { 1256 #if defined(PETSC_USE_COMPLEX) 1257 PetscReal r = PetscRealPart(pcbddc->adaptive_constraints_data[ii*subset_size+j+pcbddc->adaptive_constraints_data_ptr[cum]]); 1258 PetscReal c = PetscImaginaryPart(pcbddc->adaptive_constraints_data[ii*subset_size+j+pcbddc->adaptive_constraints_data_ptr[cum]]); 1259 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," %1.4e + %1.4e i\n",r,c);CHKERRQ(ierr); 1260 #else 1261 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," %1.4e\n",pcbddc->adaptive_constraints_data[ii*subset_size+j+pcbddc->adaptive_constraints_data_ptr[cum]]);CHKERRQ(ierr); 1262 #endif 1263 } 1264 } 1265 } 1266 ierr = PetscMemcpy(pcbddc->adaptive_constraints_idxs+pcbddc->adaptive_constraints_idxs_ptr[cum],idxs,subset_size*sizeof(PetscInt));CHKERRQ(ierr); 1267 pcbddc->adaptive_constraints_idxs_ptr[cum+1] = pcbddc->adaptive_constraints_idxs_ptr[cum] + subset_size; 1268 pcbddc->adaptive_constraints_data_ptr[cum+1] = pcbddc->adaptive_constraints_data_ptr[cum] + subset_size*B_neigs; 1269 cum++; 1270 } 1271 ierr = ISRestoreIndices(sub_schurs->is_subs[i],&idxs);CHKERRQ(ierr); 1272 /* shift for next computation */ 1273 cumarray += subset_size*subset_size; 1274 } 1275 if (pcbddc->dbg_flag) { 1276 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 1277 } 1278 1279 if (mss) { 1280 ierr = MatSeqAIJRestoreArray(sub_schurs->sum_S_Ej_inv_all,&Sarray);CHKERRQ(ierr); 1281 ierr = MatSeqAIJRestoreArray(sub_schurs->sum_S_Ej_tilda_all,&Starray);CHKERRQ(ierr); 1282 /* destroy matrices (junk) */ 1283 ierr = MatDestroy(&sub_schurs->sum_S_Ej_inv_all);CHKERRQ(ierr); 1284 ierr = MatDestroy(&sub_schurs->sum_S_Ej_tilda_all);CHKERRQ(ierr); 1285 } 1286 if (allocated_S_St) { 1287 ierr = PetscFree2(S,St);CHKERRQ(ierr); 1288 } 1289 ierr = PetscFree5(eigv,eigs,work,B_iwork,B_ifail);CHKERRQ(ierr); 1290 #if defined(PETSC_USE_COMPLEX) 1291 ierr = PetscFree(rwork);CHKERRQ(ierr); 1292 #endif 1293 if (pcbddc->dbg_flag) { 1294 PetscInt maxneigs_r; 1295 ierr = MPIU_Allreduce(&maxneigs,&maxneigs_r,1,MPIU_INT,MPI_MAX,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 1296 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Maximum number of constraints per cc %d\n",maxneigs_r);CHKERRQ(ierr); 1297 } 1298 PetscFunctionReturn(0); 1299 } 1300 1301 #undef __FUNCT__ 1302 #define __FUNCT__ "PCBDDCSetUpSolvers" 1303 PetscErrorCode PCBDDCSetUpSolvers(PC pc) 1304 { 1305 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 1306 PetscScalar *coarse_submat_vals; 1307 PetscErrorCode ierr; 1308 1309 PetscFunctionBegin; 1310 /* Setup local scatters R_to_B and (optionally) R_to_D */ 1311 /* PCBDDCSetUpLocalWorkVectors should be called first! */ 1312 ierr = PCBDDCSetUpLocalScatters(pc);CHKERRQ(ierr); 1313 1314 /* Setup local neumann solver ksp_R */ 1315 /* PCBDDCSetUpLocalScatters should be called first! */ 1316 ierr = PCBDDCSetUpLocalSolvers(pc,PETSC_FALSE,PETSC_TRUE);CHKERRQ(ierr); 1317 1318 /* Change global null space passed in by the user if change of basis has been requested */ 1319 if (pcbddc->NullSpace && pcbddc->ChangeOfBasisMatrix) { 1320 ierr = PCBDDCNullSpaceAdaptGlobal(pc);CHKERRQ(ierr); 1321 } 1322 1323 /* 1324 Setup local correction and local part of coarse basis. 1325 Gives back the dense local part of the coarse matrix in column major ordering 1326 */ 1327 ierr = PCBDDCSetUpCorrection(pc,&coarse_submat_vals);CHKERRQ(ierr); 1328 1329 /* Compute total number of coarse nodes and setup coarse solver */ 1330 ierr = PCBDDCSetUpCoarseSolver(pc,coarse_submat_vals);CHKERRQ(ierr); 1331 1332 /* free */ 1333 ierr = PetscFree(coarse_submat_vals);CHKERRQ(ierr); 1334 PetscFunctionReturn(0); 1335 } 1336 1337 #undef __FUNCT__ 1338 #define __FUNCT__ "PCBDDCResetCustomization" 1339 PetscErrorCode PCBDDCResetCustomization(PC pc) 1340 { 1341 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 1342 PetscErrorCode ierr; 1343 1344 PetscFunctionBegin; 1345 ierr = PCBDDCGraphResetCSR(pcbddc->mat_graph);CHKERRQ(ierr); 1346 ierr = ISDestroy(&pcbddc->user_primal_vertices);CHKERRQ(ierr); 1347 ierr = ISDestroy(&pcbddc->user_primal_vertices_local);CHKERRQ(ierr); 1348 ierr = MatNullSpaceDestroy(&pcbddc->NullSpace);CHKERRQ(ierr); 1349 ierr = ISDestroy(&pcbddc->NeumannBoundaries);CHKERRQ(ierr); 1350 ierr = ISDestroy(&pcbddc->NeumannBoundariesLocal);CHKERRQ(ierr); 1351 ierr = ISDestroy(&pcbddc->DirichletBoundaries);CHKERRQ(ierr); 1352 ierr = MatNullSpaceDestroy(&pcbddc->onearnullspace);CHKERRQ(ierr); 1353 ierr = PetscFree(pcbddc->onearnullvecs_state);CHKERRQ(ierr); 1354 ierr = ISDestroy(&pcbddc->DirichletBoundariesLocal);CHKERRQ(ierr); 1355 ierr = PCBDDCSetDofsSplitting(pc,0,NULL);CHKERRQ(ierr); 1356 ierr = PCBDDCSetDofsSplittingLocal(pc,0,NULL);CHKERRQ(ierr); 1357 PetscFunctionReturn(0); 1358 } 1359 1360 #undef __FUNCT__ 1361 #define __FUNCT__ "PCBDDCResetTopography" 1362 PetscErrorCode PCBDDCResetTopography(PC pc) 1363 { 1364 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 1365 PetscInt i; 1366 PetscErrorCode ierr; 1367 1368 PetscFunctionBegin; 1369 ierr = MatDestroy(&pcbddc->user_ChangeOfBasisMatrix);CHKERRQ(ierr); 1370 ierr = MatDestroy(&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 1371 ierr = MatDestroy(&pcbddc->switch_static_change);CHKERRQ(ierr); 1372 ierr = VecDestroy(&pcbddc->work_change);CHKERRQ(ierr); 1373 ierr = MatDestroy(&pcbddc->ConstraintMatrix);CHKERRQ(ierr); 1374 ierr = PCBDDCGraphReset(pcbddc->mat_graph);CHKERRQ(ierr); 1375 for (i=0;i<pcbddc->n_local_subs;i++) { 1376 ierr = ISDestroy(&pcbddc->local_subs[i]);CHKERRQ(ierr); 1377 } 1378 ierr = PetscFree(pcbddc->local_subs);CHKERRQ(ierr); 1379 ierr = PCBDDCSubSchursReset(pcbddc->sub_schurs);CHKERRQ(ierr); 1380 PetscFunctionReturn(0); 1381 } 1382 1383 #undef __FUNCT__ 1384 #define __FUNCT__ "PCBDDCResetSolvers" 1385 PetscErrorCode PCBDDCResetSolvers(PC pc) 1386 { 1387 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 1388 PetscErrorCode ierr; 1389 1390 PetscFunctionBegin; 1391 ierr = VecDestroy(&pcbddc->coarse_vec);CHKERRQ(ierr); 1392 if (pcbddc->coarse_phi_B) { 1393 PetscScalar *array; 1394 ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&array);CHKERRQ(ierr); 1395 ierr = PetscFree(array);CHKERRQ(ierr); 1396 } 1397 ierr = MatDestroy(&pcbddc->coarse_phi_B);CHKERRQ(ierr); 1398 ierr = MatDestroy(&pcbddc->coarse_phi_D);CHKERRQ(ierr); 1399 ierr = MatDestroy(&pcbddc->coarse_psi_B);CHKERRQ(ierr); 1400 ierr = MatDestroy(&pcbddc->coarse_psi_D);CHKERRQ(ierr); 1401 ierr = VecDestroy(&pcbddc->vec1_P);CHKERRQ(ierr); 1402 ierr = VecDestroy(&pcbddc->vec1_C);CHKERRQ(ierr); 1403 ierr = MatDestroy(&pcbddc->local_auxmat2);CHKERRQ(ierr); 1404 ierr = MatDestroy(&pcbddc->local_auxmat1);CHKERRQ(ierr); 1405 ierr = VecDestroy(&pcbddc->vec1_R);CHKERRQ(ierr); 1406 ierr = VecDestroy(&pcbddc->vec2_R);CHKERRQ(ierr); 1407 ierr = ISDestroy(&pcbddc->is_R_local);CHKERRQ(ierr); 1408 ierr = VecScatterDestroy(&pcbddc->R_to_B);CHKERRQ(ierr); 1409 ierr = VecScatterDestroy(&pcbddc->R_to_D);CHKERRQ(ierr); 1410 ierr = VecScatterDestroy(&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr); 1411 ierr = KSPDestroy(&pcbddc->ksp_D);CHKERRQ(ierr); 1412 ierr = KSPDestroy(&pcbddc->ksp_R);CHKERRQ(ierr); 1413 ierr = KSPDestroy(&pcbddc->coarse_ksp);CHKERRQ(ierr); 1414 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 1415 ierr = PetscFree(pcbddc->primal_indices_local_idxs);CHKERRQ(ierr); 1416 ierr = PetscFree2(pcbddc->local_primal_ref_node,pcbddc->local_primal_ref_mult);CHKERRQ(ierr); 1417 ierr = PetscFree(pcbddc->global_primal_indices);CHKERRQ(ierr); 1418 ierr = ISDestroy(&pcbddc->coarse_subassembling);CHKERRQ(ierr); 1419 ierr = ISDestroy(&pcbddc->coarse_subassembling_init);CHKERRQ(ierr); 1420 ierr = MatDestroy(&pcbddc->benign_change);CHKERRQ(ierr); 1421 ierr = VecDestroy(&pcbddc->benign_vec);CHKERRQ(ierr); 1422 ierr = PCBDDCBenignShellMat(pc,PETSC_TRUE);CHKERRQ(ierr); 1423 ierr = MatDestroy(&pcbddc->benign_B0);CHKERRQ(ierr); 1424 ierr = PetscSFDestroy(&pcbddc->benign_sf);CHKERRQ(ierr); 1425 if (pcbddc->benign_zerodiag_subs) { 1426 PetscInt i; 1427 for (i=0;i<pcbddc->benign_n;i++) { 1428 ierr = ISDestroy(&pcbddc->benign_zerodiag_subs[i]);CHKERRQ(ierr); 1429 } 1430 ierr = PetscFree(pcbddc->benign_zerodiag_subs);CHKERRQ(ierr); 1431 } 1432 ierr = PetscFree3(pcbddc->benign_p0_lidx,pcbddc->benign_p0_gidx,pcbddc->benign_p0);CHKERRQ(ierr); 1433 PetscFunctionReturn(0); 1434 } 1435 1436 #undef __FUNCT__ 1437 #define __FUNCT__ "PCBDDCSetUpLocalWorkVectors" 1438 PetscErrorCode PCBDDCSetUpLocalWorkVectors(PC pc) 1439 { 1440 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 1441 PC_IS *pcis = (PC_IS*)pc->data; 1442 VecType impVecType; 1443 PetscInt n_constraints,n_R,old_size; 1444 PetscErrorCode ierr; 1445 1446 PetscFunctionBegin; 1447 if (!pcbddc->ConstraintMatrix) SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"BDDC Constraint matrix has not been created"); 1448 /* get sizes */ 1449 n_constraints = pcbddc->local_primal_size - pcbddc->benign_n - pcbddc->n_vertices; 1450 n_R = pcis->n - pcbddc->n_vertices; 1451 ierr = VecGetType(pcis->vec1_N,&impVecType);CHKERRQ(ierr); 1452 /* local work vectors (try to avoid unneeded work)*/ 1453 /* R nodes */ 1454 old_size = -1; 1455 if (pcbddc->vec1_R) { 1456 ierr = VecGetSize(pcbddc->vec1_R,&old_size);CHKERRQ(ierr); 1457 } 1458 if (n_R != old_size) { 1459 ierr = VecDestroy(&pcbddc->vec1_R);CHKERRQ(ierr); 1460 ierr = VecDestroy(&pcbddc->vec2_R);CHKERRQ(ierr); 1461 ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_R);CHKERRQ(ierr); 1462 ierr = VecSetSizes(pcbddc->vec1_R,PETSC_DECIDE,n_R);CHKERRQ(ierr); 1463 ierr = VecSetType(pcbddc->vec1_R,impVecType);CHKERRQ(ierr); 1464 ierr = VecDuplicate(pcbddc->vec1_R,&pcbddc->vec2_R);CHKERRQ(ierr); 1465 } 1466 /* local primal dofs */ 1467 old_size = -1; 1468 if (pcbddc->vec1_P) { 1469 ierr = VecGetSize(pcbddc->vec1_P,&old_size);CHKERRQ(ierr); 1470 } 1471 if (pcbddc->local_primal_size != old_size) { 1472 ierr = VecDestroy(&pcbddc->vec1_P);CHKERRQ(ierr); 1473 ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_P);CHKERRQ(ierr); 1474 ierr = VecSetSizes(pcbddc->vec1_P,PETSC_DECIDE,pcbddc->local_primal_size);CHKERRQ(ierr); 1475 ierr = VecSetType(pcbddc->vec1_P,impVecType);CHKERRQ(ierr); 1476 } 1477 /* local explicit constraints */ 1478 old_size = -1; 1479 if (pcbddc->vec1_C) { 1480 ierr = VecGetSize(pcbddc->vec1_C,&old_size);CHKERRQ(ierr); 1481 } 1482 if (n_constraints && n_constraints != old_size) { 1483 ierr = VecDestroy(&pcbddc->vec1_C);CHKERRQ(ierr); 1484 ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_C);CHKERRQ(ierr); 1485 ierr = VecSetSizes(pcbddc->vec1_C,PETSC_DECIDE,n_constraints);CHKERRQ(ierr); 1486 ierr = VecSetType(pcbddc->vec1_C,impVecType);CHKERRQ(ierr); 1487 } 1488 PetscFunctionReturn(0); 1489 } 1490 1491 #undef __FUNCT__ 1492 #define __FUNCT__ "PCBDDCSetUpCorrection" 1493 PetscErrorCode PCBDDCSetUpCorrection(PC pc, PetscScalar **coarse_submat_vals_n) 1494 { 1495 PetscErrorCode ierr; 1496 /* pointers to pcis and pcbddc */ 1497 PC_IS* pcis = (PC_IS*)pc->data; 1498 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 1499 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 1500 /* submatrices of local problem */ 1501 Mat A_RV,A_VR,A_VV,local_auxmat2_R; 1502 /* submatrices of local coarse problem */ 1503 Mat S_VV,S_CV,S_VC,S_CC; 1504 /* working matrices */ 1505 Mat C_CR; 1506 /* additional working stuff */ 1507 PC pc_R; 1508 Mat F; 1509 Vec dummy_vec; 1510 PetscBool isLU,isCHOL,isILU,need_benign_correction; 1511 PetscScalar *coarse_submat_vals; /* TODO: use a PETSc matrix */ 1512 PetscScalar *work; 1513 PetscInt *idx_V_B; 1514 PetscInt lda_rhs,n,n_vertices,n_constraints,*p0_lidx_I; 1515 PetscInt i,n_R,n_D,n_B; 1516 1517 /* some shortcuts to scalars */ 1518 PetscScalar one=1.0,m_one=-1.0; 1519 1520 PetscFunctionBegin; 1521 if (!pcbddc->symmetric_primal && pcbddc->benign_n) { 1522 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Non-symmetric primal basis computation with benign trick not yet implemented"); 1523 } 1524 1525 /* Set Non-overlapping dimensions */ 1526 n_vertices = pcbddc->n_vertices; 1527 n_constraints = pcbddc->local_primal_size - pcbddc->benign_n - n_vertices; 1528 n_B = pcis->n_B; 1529 n_D = pcis->n - n_B; 1530 n_R = pcis->n - n_vertices; 1531 1532 /* vertices in boundary numbering */ 1533 ierr = PetscMalloc1(n_vertices,&idx_V_B);CHKERRQ(ierr); 1534 ierr = ISGlobalToLocalMappingApply(pcis->BtoNmap,IS_GTOLM_DROP,n_vertices,pcbddc->local_primal_ref_node,&i,idx_V_B);CHKERRQ(ierr); 1535 if (i != n_vertices) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error in boundary numbering for BDDC vertices! %D != %D\n",n_vertices,i); 1536 1537 /* Subdomain contribution (Non-overlapping) to coarse matrix */ 1538 ierr = PetscCalloc1(pcbddc->local_primal_size*pcbddc->local_primal_size,&coarse_submat_vals);CHKERRQ(ierr); 1539 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_vertices,n_vertices,coarse_submat_vals,&S_VV);CHKERRQ(ierr); 1540 ierr = MatSeqDenseSetLDA(S_VV,pcbddc->local_primal_size);CHKERRQ(ierr); 1541 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_constraints,n_vertices,coarse_submat_vals+n_vertices,&S_CV);CHKERRQ(ierr); 1542 ierr = MatSeqDenseSetLDA(S_CV,pcbddc->local_primal_size);CHKERRQ(ierr); 1543 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_vertices,n_constraints,coarse_submat_vals+pcbddc->local_primal_size*n_vertices,&S_VC);CHKERRQ(ierr); 1544 ierr = MatSeqDenseSetLDA(S_VC,pcbddc->local_primal_size);CHKERRQ(ierr); 1545 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_constraints,n_constraints,coarse_submat_vals+(pcbddc->local_primal_size+1)*n_vertices,&S_CC);CHKERRQ(ierr); 1546 ierr = MatSeqDenseSetLDA(S_CC,pcbddc->local_primal_size);CHKERRQ(ierr); 1547 1548 /* determine if can use MatSolve routines instead of calling KSPSolve on ksp_R */ 1549 ierr = KSPGetPC(pcbddc->ksp_R,&pc_R);CHKERRQ(ierr); 1550 ierr = PetscObjectTypeCompare((PetscObject)pc_R,PCLU,&isLU);CHKERRQ(ierr); 1551 ierr = PetscObjectTypeCompare((PetscObject)pc_R,PCILU,&isILU);CHKERRQ(ierr); 1552 ierr = PetscObjectTypeCompare((PetscObject)pc_R,PCCHOLESKY,&isCHOL);CHKERRQ(ierr); 1553 lda_rhs = n_R; 1554 need_benign_correction = PETSC_FALSE; 1555 if (isLU || isILU || isCHOL) { 1556 ierr = PCFactorGetMatrix(pc_R,&F);CHKERRQ(ierr); 1557 } else if (sub_schurs->reuse_solver) { 1558 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 1559 MatFactorType type; 1560 1561 F = reuse_solver->F; 1562 ierr = MatGetFactorType(F,&type);CHKERRQ(ierr); 1563 if (type == MAT_FACTOR_CHOLESKY) isCHOL = PETSC_TRUE; 1564 ierr = MatGetSize(F,&lda_rhs,NULL);CHKERRQ(ierr); 1565 need_benign_correction = !!reuse_solver->benign_n; 1566 } else { 1567 F = NULL; 1568 } 1569 1570 /* allocate workspace */ 1571 n = 0; 1572 if (n_constraints) { 1573 n += lda_rhs*n_constraints; 1574 } 1575 if (n_vertices) { 1576 n = PetscMax(2*lda_rhs*n_vertices,n); 1577 n = PetscMax((lda_rhs+n_B)*n_vertices,n); 1578 } 1579 ierr = PetscMalloc1(n,&work);CHKERRQ(ierr); 1580 1581 /* create dummy vector to modify rhs and sol of MatMatSolve (work array will never be used) */ 1582 dummy_vec = NULL; 1583 if (need_benign_correction && lda_rhs != n_R && F) { 1584 ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,1,lda_rhs,work,&dummy_vec);CHKERRQ(ierr); 1585 } 1586 1587 /* Precompute stuffs needed for preprocessing and application of BDDC*/ 1588 if (n_constraints) { 1589 Mat M1,M2,M3,C_B; 1590 IS is_aux; 1591 PetscScalar *array,*array2; 1592 1593 ierr = MatDestroy(&pcbddc->local_auxmat1);CHKERRQ(ierr); 1594 ierr = MatDestroy(&pcbddc->local_auxmat2);CHKERRQ(ierr); 1595 1596 /* Extract constraints on R nodes: C_{CR} */ 1597 ierr = ISCreateStride(PETSC_COMM_SELF,n_constraints,n_vertices,1,&is_aux);CHKERRQ(ierr); 1598 ierr = MatGetSubMatrix(pcbddc->ConstraintMatrix,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&C_CR);CHKERRQ(ierr); 1599 ierr = MatGetSubMatrix(pcbddc->ConstraintMatrix,is_aux,pcis->is_B_local,MAT_INITIAL_MATRIX,&C_B);CHKERRQ(ierr); 1600 1601 /* Assemble local_auxmat2_R = (- A_{RR}^{-1} C^T_{CR}) needed by BDDC setup */ 1602 /* Assemble pcbddc->local_auxmat2 = R_to_B (- A_{RR}^{-1} C^T_{CR}) needed by BDDC application */ 1603 ierr = PetscMemzero(work,lda_rhs*n_constraints*sizeof(PetscScalar));CHKERRQ(ierr); 1604 for (i=0;i<n_constraints;i++) { 1605 const PetscScalar *row_cmat_values; 1606 const PetscInt *row_cmat_indices; 1607 PetscInt size_of_constraint,j; 1608 1609 ierr = MatGetRow(C_CR,i,&size_of_constraint,&row_cmat_indices,&row_cmat_values);CHKERRQ(ierr); 1610 for (j=0;j<size_of_constraint;j++) { 1611 work[row_cmat_indices[j]+i*lda_rhs] = -row_cmat_values[j]; 1612 } 1613 ierr = MatRestoreRow(C_CR,i,&size_of_constraint,&row_cmat_indices,&row_cmat_values);CHKERRQ(ierr); 1614 } 1615 ierr = MatCreateSeqDense(PETSC_COMM_SELF,lda_rhs,n_constraints,NULL,&local_auxmat2_R);CHKERRQ(ierr); 1616 if (F) { 1617 Mat B; 1618 1619 ierr = MatCreateSeqDense(PETSC_COMM_SELF,lda_rhs,n_constraints,work,&B);CHKERRQ(ierr); 1620 if (need_benign_correction) { 1621 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 1622 1623 /* rhs is already zero on interior dofs, no need to change the rhs */ 1624 ierr = PetscMemzero(reuse_solver->benign_save_vals,pcbddc->benign_n*sizeof(PetscScalar));CHKERRQ(ierr); 1625 } 1626 ierr = MatMatSolve(F,B,local_auxmat2_R);CHKERRQ(ierr); 1627 if (need_benign_correction) { 1628 PetscScalar *marr; 1629 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 1630 1631 ierr = MatDenseGetArray(local_auxmat2_R,&marr);CHKERRQ(ierr); 1632 if (lda_rhs != n_R) { 1633 for (i=0;i<n_constraints;i++) { 1634 ierr = VecPlaceArray(dummy_vec,marr+i*lda_rhs);CHKERRQ(ierr); 1635 ierr = PCBDDCReuseSolversBenignAdapt(reuse_solver,dummy_vec,NULL,PETSC_TRUE,PETSC_TRUE);CHKERRQ(ierr); 1636 ierr = VecResetArray(dummy_vec);CHKERRQ(ierr); 1637 } 1638 } else { 1639 for (i=0;i<n_constraints;i++) { 1640 ierr = VecPlaceArray(pcbddc->vec1_R,marr+i*lda_rhs);CHKERRQ(ierr); 1641 ierr = PCBDDCReuseSolversBenignAdapt(reuse_solver,pcbddc->vec1_R,NULL,PETSC_TRUE,PETSC_TRUE);CHKERRQ(ierr); 1642 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 1643 } 1644 } 1645 ierr = MatDenseRestoreArray(local_auxmat2_R,&marr);CHKERRQ(ierr); 1646 } 1647 ierr = MatDestroy(&B);CHKERRQ(ierr); 1648 } else { 1649 PetscScalar *marr; 1650 1651 ierr = MatDenseGetArray(local_auxmat2_R,&marr);CHKERRQ(ierr); 1652 for (i=0;i<n_constraints;i++) { 1653 ierr = VecPlaceArray(pcbddc->vec1_R,work+i*lda_rhs);CHKERRQ(ierr); 1654 ierr = VecPlaceArray(pcbddc->vec2_R,marr+i*lda_rhs);CHKERRQ(ierr); 1655 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 1656 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 1657 ierr = VecResetArray(pcbddc->vec2_R);CHKERRQ(ierr); 1658 } 1659 ierr = MatDenseRestoreArray(local_auxmat2_R,&marr);CHKERRQ(ierr); 1660 } 1661 if (!pcbddc->switch_static) { 1662 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_B,n_constraints,NULL,&pcbddc->local_auxmat2);CHKERRQ(ierr); 1663 ierr = MatDenseGetArray(pcbddc->local_auxmat2,&array);CHKERRQ(ierr); 1664 ierr = MatDenseGetArray(local_auxmat2_R,&array2);CHKERRQ(ierr); 1665 for (i=0;i<n_constraints;i++) { 1666 ierr = VecPlaceArray(pcbddc->vec1_R,array2+i*lda_rhs);CHKERRQ(ierr); 1667 ierr = VecPlaceArray(pcis->vec1_B,array+i*n_B);CHKERRQ(ierr); 1668 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1669 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1670 ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr); 1671 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 1672 } 1673 ierr = MatDenseRestoreArray(local_auxmat2_R,&array2);CHKERRQ(ierr); 1674 ierr = MatDenseRestoreArray(pcbddc->local_auxmat2,&array);CHKERRQ(ierr); 1675 ierr = MatMatMult(C_B,pcbddc->local_auxmat2,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&M3);CHKERRQ(ierr); 1676 } else { 1677 if (lda_rhs != n_R) { 1678 IS dummy; 1679 1680 ierr = ISCreateStride(PETSC_COMM_SELF,n_R,0,1,&dummy);CHKERRQ(ierr); 1681 ierr = MatGetSubMatrix(local_auxmat2_R,dummy,NULL,MAT_INITIAL_MATRIX,&pcbddc->local_auxmat2);CHKERRQ(ierr); 1682 ierr = ISDestroy(&dummy);CHKERRQ(ierr); 1683 } else { 1684 ierr = PetscObjectReference((PetscObject)local_auxmat2_R);CHKERRQ(ierr); 1685 pcbddc->local_auxmat2 = local_auxmat2_R; 1686 } 1687 ierr = MatMatMult(C_CR,pcbddc->local_auxmat2,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&M3);CHKERRQ(ierr); 1688 } 1689 ierr = ISDestroy(&is_aux);CHKERRQ(ierr); 1690 /* Assemble explicitly S_CC = ( C_{CR} A_{RR}^{-1} C^T_{CR} )^{-1} */ 1691 ierr = MatScale(M3,m_one);CHKERRQ(ierr); 1692 ierr = MatDuplicate(M3,MAT_DO_NOT_COPY_VALUES,&M1);CHKERRQ(ierr); 1693 ierr = MatDuplicate(M3,MAT_DO_NOT_COPY_VALUES,&M2);CHKERRQ(ierr); 1694 if (isCHOL) { 1695 ierr = MatCholeskyFactor(M3,NULL,NULL);CHKERRQ(ierr); 1696 } else { 1697 ierr = MatLUFactor(M3,NULL,NULL,NULL);CHKERRQ(ierr); 1698 } 1699 ierr = VecSet(pcbddc->vec1_C,one);CHKERRQ(ierr); 1700 ierr = MatDiagonalSet(M2,pcbddc->vec1_C,INSERT_VALUES);CHKERRQ(ierr); 1701 ierr = MatMatSolve(M3,M2,M1);CHKERRQ(ierr); 1702 ierr = MatDestroy(&M2);CHKERRQ(ierr); 1703 ierr = MatDestroy(&M3);CHKERRQ(ierr); 1704 /* Assemble local_auxmat1 = S_CC*C_{CB} needed by BDDC application in KSP and in preproc */ 1705 ierr = MatMatMult(M1,C_B,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&pcbddc->local_auxmat1);CHKERRQ(ierr); 1706 ierr = MatDestroy(&C_B);CHKERRQ(ierr); 1707 ierr = MatCopy(M1,S_CC,SAME_NONZERO_PATTERN);CHKERRQ(ierr); /* S_CC can have a different LDA, MatMatSolve doesn't support it */ 1708 ierr = MatDestroy(&M1);CHKERRQ(ierr); 1709 } 1710 1711 /* Get submatrices from subdomain matrix */ 1712 if (n_vertices) { 1713 IS is_aux; 1714 1715 if (sub_schurs->reuse_solver) { /* is_R_local is not sorted, ISComplement doesn't like it */ 1716 IS tis; 1717 1718 ierr = ISDuplicate(pcbddc->is_R_local,&tis);CHKERRQ(ierr); 1719 ierr = ISSort(tis);CHKERRQ(ierr); 1720 ierr = ISComplement(tis,0,pcis->n,&is_aux);CHKERRQ(ierr); 1721 ierr = ISDestroy(&tis);CHKERRQ(ierr); 1722 } else { 1723 ierr = ISComplement(pcbddc->is_R_local,0,pcis->n,&is_aux);CHKERRQ(ierr); 1724 } 1725 ierr = MatGetSubMatrix(pcbddc->local_mat,pcbddc->is_R_local,is_aux,MAT_INITIAL_MATRIX,&A_RV);CHKERRQ(ierr); 1726 ierr = MatGetSubMatrix(pcbddc->local_mat,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&A_VR);CHKERRQ(ierr); 1727 ierr = MatGetSubMatrix(pcbddc->local_mat,is_aux,is_aux,MAT_INITIAL_MATRIX,&A_VV);CHKERRQ(ierr); 1728 ierr = ISDestroy(&is_aux);CHKERRQ(ierr); 1729 } 1730 1731 /* Matrix of coarse basis functions (local) */ 1732 if (pcbddc->coarse_phi_B) { 1733 PetscInt on_B,on_primal,on_D=n_D; 1734 if (pcbddc->coarse_phi_D) { 1735 ierr = MatGetSize(pcbddc->coarse_phi_D,&on_D,NULL);CHKERRQ(ierr); 1736 } 1737 ierr = MatGetSize(pcbddc->coarse_phi_B,&on_B,&on_primal);CHKERRQ(ierr); 1738 if (on_B != n_B || on_primal != pcbddc->local_primal_size || on_D != n_D) { 1739 PetscScalar *marray; 1740 1741 ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&marray);CHKERRQ(ierr); 1742 ierr = PetscFree(marray);CHKERRQ(ierr); 1743 ierr = MatDestroy(&pcbddc->coarse_phi_B);CHKERRQ(ierr); 1744 ierr = MatDestroy(&pcbddc->coarse_psi_B);CHKERRQ(ierr); 1745 ierr = MatDestroy(&pcbddc->coarse_phi_D);CHKERRQ(ierr); 1746 ierr = MatDestroy(&pcbddc->coarse_psi_D);CHKERRQ(ierr); 1747 } 1748 } 1749 1750 if (!pcbddc->coarse_phi_B) { 1751 PetscScalar *marray; 1752 1753 n = n_B*pcbddc->local_primal_size; 1754 if (pcbddc->switch_static || pcbddc->dbg_flag) { 1755 n += n_D*pcbddc->local_primal_size; 1756 } 1757 if (!pcbddc->symmetric_primal) { 1758 n *= 2; 1759 } 1760 ierr = PetscCalloc1(n,&marray);CHKERRQ(ierr); 1761 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_B,pcbddc->local_primal_size,marray,&pcbddc->coarse_phi_B);CHKERRQ(ierr); 1762 n = n_B*pcbddc->local_primal_size; 1763 if (pcbddc->switch_static || pcbddc->dbg_flag) { 1764 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_D,pcbddc->local_primal_size,marray+n,&pcbddc->coarse_phi_D);CHKERRQ(ierr); 1765 n += n_D*pcbddc->local_primal_size; 1766 } 1767 if (!pcbddc->symmetric_primal) { 1768 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_B,pcbddc->local_primal_size,marray+n,&pcbddc->coarse_psi_B);CHKERRQ(ierr); 1769 if (pcbddc->switch_static || pcbddc->dbg_flag) { 1770 n = n_B*pcbddc->local_primal_size; 1771 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_D,pcbddc->local_primal_size,marray+n,&pcbddc->coarse_psi_D);CHKERRQ(ierr); 1772 } 1773 } else { 1774 ierr = PetscObjectReference((PetscObject)pcbddc->coarse_phi_B);CHKERRQ(ierr); 1775 pcbddc->coarse_psi_B = pcbddc->coarse_phi_B; 1776 if (pcbddc->switch_static || pcbddc->dbg_flag) { 1777 ierr = PetscObjectReference((PetscObject)pcbddc->coarse_phi_D);CHKERRQ(ierr); 1778 pcbddc->coarse_psi_D = pcbddc->coarse_phi_D; 1779 } 1780 } 1781 } 1782 1783 /* We are now ready to evaluate coarse basis functions and subdomain contribution to coarse problem */ 1784 p0_lidx_I = NULL; 1785 if (pcbddc->benign_n && (pcbddc->switch_static || pcbddc->dbg_flag)) { 1786 const PetscInt *idxs; 1787 1788 ierr = ISGetIndices(pcis->is_I_local,&idxs);CHKERRQ(ierr); 1789 ierr = PetscMalloc1(pcbddc->benign_n,&p0_lidx_I);CHKERRQ(ierr); 1790 for (i=0;i<pcbddc->benign_n;i++) { 1791 ierr = PetscFindInt(pcbddc->benign_p0_lidx[i],pcis->n-pcis->n_B,idxs,&p0_lidx_I[i]);CHKERRQ(ierr); 1792 } 1793 ierr = ISRestoreIndices(pcis->is_I_local,&idxs);CHKERRQ(ierr); 1794 } 1795 1796 /* vertices */ 1797 if (n_vertices) { 1798 1799 ierr = MatConvert(A_VV,MATDENSE,MAT_INPLACE_MATRIX,&A_VV);CHKERRQ(ierr); 1800 1801 if (n_R) { 1802 Mat A_RRmA_RV,A_RV_bcorr=NULL,S_VVt; /* S_VVt with LDA=N */ 1803 PetscBLASInt B_N,B_one = 1; 1804 PetscScalar *x,*y; 1805 PetscBool isseqaij; 1806 1807 ierr = MatScale(A_RV,m_one);CHKERRQ(ierr); 1808 if (need_benign_correction) { 1809 ISLocalToGlobalMapping RtoN; 1810 IS is_p0; 1811 PetscInt *idxs_p0,n; 1812 1813 ierr = PetscMalloc1(pcbddc->benign_n,&idxs_p0);CHKERRQ(ierr); 1814 ierr = ISLocalToGlobalMappingCreateIS(pcbddc->is_R_local,&RtoN);CHKERRQ(ierr); 1815 ierr = ISGlobalToLocalMappingApply(RtoN,IS_GTOLM_DROP,pcbddc->benign_n,pcbddc->benign_p0_lidx,&n,idxs_p0);CHKERRQ(ierr); 1816 if (n != pcbddc->benign_n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error in R numbering for benign p0! %d != %d\n",n,pcbddc->benign_n); 1817 ierr = ISLocalToGlobalMappingDestroy(&RtoN);CHKERRQ(ierr); 1818 ierr = ISCreateGeneral(PETSC_COMM_SELF,n,idxs_p0,PETSC_OWN_POINTER,&is_p0);CHKERRQ(ierr); 1819 ierr = MatGetSubMatrix(A_RV,is_p0,NULL,MAT_INITIAL_MATRIX,&A_RV_bcorr);CHKERRQ(ierr); 1820 ierr = ISDestroy(&is_p0);CHKERRQ(ierr); 1821 } 1822 1823 if (lda_rhs == n_R) { 1824 ierr = MatConvert(A_RV,MATDENSE,MAT_INPLACE_MATRIX,&A_RV);CHKERRQ(ierr); 1825 } else { 1826 PetscScalar *av,*array; 1827 const PetscInt *xadj,*adjncy; 1828 PetscInt n; 1829 PetscBool flg_row; 1830 1831 array = work+lda_rhs*n_vertices; 1832 ierr = PetscMemzero(array,lda_rhs*n_vertices*sizeof(PetscScalar));CHKERRQ(ierr); 1833 ierr = MatConvert(A_RV,MATSEQAIJ,MAT_INPLACE_MATRIX,&A_RV);CHKERRQ(ierr); 1834 ierr = MatGetRowIJ(A_RV,0,PETSC_FALSE,PETSC_FALSE,&n,&xadj,&adjncy,&flg_row);CHKERRQ(ierr); 1835 ierr = MatSeqAIJGetArray(A_RV,&av);CHKERRQ(ierr); 1836 for (i=0;i<n;i++) { 1837 PetscInt j; 1838 for (j=xadj[i];j<xadj[i+1];j++) array[lda_rhs*adjncy[j]+i] = av[j]; 1839 } 1840 ierr = MatRestoreRowIJ(A_RV,0,PETSC_FALSE,PETSC_FALSE,&n,&xadj,&adjncy,&flg_row);CHKERRQ(ierr); 1841 ierr = MatDestroy(&A_RV);CHKERRQ(ierr); 1842 ierr = MatCreateSeqDense(PETSC_COMM_SELF,lda_rhs,n_vertices,array,&A_RV);CHKERRQ(ierr); 1843 } 1844 ierr = MatCreateSeqDense(PETSC_COMM_SELF,lda_rhs,n_vertices,work,&A_RRmA_RV);CHKERRQ(ierr); 1845 if (need_benign_correction) { 1846 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 1847 PetscScalar *marr; 1848 1849 ierr = MatDenseGetArray(A_RV,&marr);CHKERRQ(ierr); 1850 /* need \Phi^T A_RV = (I+L)A_RV, L given by 1851 1852 | 0 0 0 | (V) 1853 L = | 0 0 -1 | (P-p0) 1854 | 0 0 -1 | (p0) 1855 1856 */ 1857 for (i=0;i<reuse_solver->benign_n;i++) { 1858 const PetscScalar *vals; 1859 const PetscInt *idxs,*idxs_zero; 1860 PetscInt n,j,nz; 1861 1862 ierr = ISGetLocalSize(reuse_solver->benign_zerodiag_subs[i],&nz);CHKERRQ(ierr); 1863 ierr = ISGetIndices(reuse_solver->benign_zerodiag_subs[i],&idxs_zero);CHKERRQ(ierr); 1864 ierr = MatGetRow(A_RV_bcorr,i,&n,&idxs,&vals);CHKERRQ(ierr); 1865 for (j=0;j<n;j++) { 1866 PetscScalar val = vals[j]; 1867 PetscInt k,col = idxs[j]; 1868 for (k=0;k<nz;k++) marr[idxs_zero[k]+lda_rhs*col] -= val; 1869 } 1870 ierr = MatRestoreRow(A_RV_bcorr,i,&n,&idxs,&vals);CHKERRQ(ierr); 1871 ierr = ISRestoreIndices(reuse_solver->benign_zerodiag_subs[i],&idxs_zero);CHKERRQ(ierr); 1872 } 1873 ierr = MatDenseRestoreArray(A_RV,&marr);CHKERRQ(ierr); 1874 } 1875 if (F) { 1876 /* need to correct the rhs */ 1877 if (need_benign_correction) { 1878 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 1879 PetscScalar *marr; 1880 1881 ierr = MatDenseGetArray(A_RV,&marr);CHKERRQ(ierr); 1882 if (lda_rhs != n_R) { 1883 for (i=0;i<n_vertices;i++) { 1884 ierr = VecPlaceArray(dummy_vec,marr+i*lda_rhs);CHKERRQ(ierr); 1885 ierr = PCBDDCReuseSolversBenignAdapt(reuse_solver,dummy_vec,NULL,PETSC_FALSE,PETSC_TRUE);CHKERRQ(ierr); 1886 ierr = VecResetArray(dummy_vec);CHKERRQ(ierr); 1887 } 1888 } else { 1889 for (i=0;i<n_vertices;i++) { 1890 ierr = VecPlaceArray(pcbddc->vec1_R,marr+i*lda_rhs);CHKERRQ(ierr); 1891 ierr = PCBDDCReuseSolversBenignAdapt(reuse_solver,pcbddc->vec1_R,NULL,PETSC_FALSE,PETSC_TRUE);CHKERRQ(ierr); 1892 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 1893 } 1894 } 1895 ierr = MatDenseRestoreArray(A_RV,&marr);CHKERRQ(ierr); 1896 } 1897 ierr = MatMatSolve(F,A_RV,A_RRmA_RV);CHKERRQ(ierr); 1898 /* need to correct the solution */ 1899 if (need_benign_correction) { 1900 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 1901 PetscScalar *marr; 1902 1903 ierr = MatDenseGetArray(A_RRmA_RV,&marr);CHKERRQ(ierr); 1904 if (lda_rhs != n_R) { 1905 for (i=0;i<n_vertices;i++) { 1906 ierr = VecPlaceArray(dummy_vec,marr+i*lda_rhs);CHKERRQ(ierr); 1907 ierr = PCBDDCReuseSolversBenignAdapt(reuse_solver,dummy_vec,NULL,PETSC_TRUE,PETSC_TRUE);CHKERRQ(ierr); 1908 ierr = VecResetArray(dummy_vec);CHKERRQ(ierr); 1909 } 1910 } else { 1911 for (i=0;i<n_vertices;i++) { 1912 ierr = VecPlaceArray(pcbddc->vec1_R,marr+i*lda_rhs);CHKERRQ(ierr); 1913 ierr = PCBDDCReuseSolversBenignAdapt(reuse_solver,pcbddc->vec1_R,NULL,PETSC_TRUE,PETSC_TRUE);CHKERRQ(ierr); 1914 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 1915 } 1916 } 1917 ierr = MatDenseRestoreArray(A_RRmA_RV,&marr);CHKERRQ(ierr); 1918 } 1919 } else { 1920 ierr = MatDenseGetArray(A_RV,&y);CHKERRQ(ierr); 1921 for (i=0;i<n_vertices;i++) { 1922 ierr = VecPlaceArray(pcbddc->vec1_R,y+i*lda_rhs);CHKERRQ(ierr); 1923 ierr = VecPlaceArray(pcbddc->vec2_R,work+i*lda_rhs);CHKERRQ(ierr); 1924 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 1925 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 1926 ierr = VecResetArray(pcbddc->vec2_R);CHKERRQ(ierr); 1927 } 1928 ierr = MatDenseRestoreArray(A_RV,&y);CHKERRQ(ierr); 1929 } 1930 ierr = MatDestroy(&A_RV);CHKERRQ(ierr); 1931 /* S_VV and S_CV */ 1932 if (n_constraints) { 1933 Mat B; 1934 1935 ierr = PetscMemzero(work+lda_rhs*n_vertices,n_B*n_vertices*sizeof(PetscScalar));CHKERRQ(ierr); 1936 for (i=0;i<n_vertices;i++) { 1937 ierr = VecPlaceArray(pcbddc->vec1_R,work+i*lda_rhs);CHKERRQ(ierr); 1938 ierr = VecPlaceArray(pcis->vec1_B,work+lda_rhs*n_vertices+i*n_B);CHKERRQ(ierr); 1939 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1940 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1941 ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr); 1942 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 1943 } 1944 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_B,n_vertices,work+lda_rhs*n_vertices,&B);CHKERRQ(ierr); 1945 ierr = MatMatMult(pcbddc->local_auxmat1,B,MAT_REUSE_MATRIX,PETSC_DEFAULT,&S_CV);CHKERRQ(ierr); 1946 ierr = MatDestroy(&B);CHKERRQ(ierr); 1947 ierr = MatCreateSeqDense(PETSC_COMM_SELF,lda_rhs,n_vertices,work+lda_rhs*n_vertices,&B);CHKERRQ(ierr); 1948 ierr = MatMatMult(local_auxmat2_R,S_CV,MAT_REUSE_MATRIX,PETSC_DEFAULT,&B);CHKERRQ(ierr); 1949 ierr = MatScale(S_CV,m_one);CHKERRQ(ierr); 1950 ierr = PetscBLASIntCast(lda_rhs*n_vertices,&B_N);CHKERRQ(ierr); 1951 PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&B_N,&one,work+lda_rhs*n_vertices,&B_one,work,&B_one)); 1952 ierr = MatDestroy(&B);CHKERRQ(ierr); 1953 } 1954 ierr = PetscObjectTypeCompare((PetscObject)A_VR,MATSEQAIJ,&isseqaij);CHKERRQ(ierr); 1955 if (!isseqaij) { /* MatMatMult with SEQ(S)BAIJ below will raise an error */ 1956 ierr = MatConvert(A_VR,MATSEQAIJ,MAT_INPLACE_MATRIX,&A_VR);CHKERRQ(ierr); 1957 } 1958 if (lda_rhs != n_R) { 1959 ierr = MatDestroy(&A_RRmA_RV);CHKERRQ(ierr); 1960 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_vertices,work,&A_RRmA_RV);CHKERRQ(ierr); 1961 ierr = MatSeqDenseSetLDA(A_RRmA_RV,lda_rhs);CHKERRQ(ierr); 1962 } 1963 ierr = MatMatMult(A_VR,A_RRmA_RV,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&S_VVt);CHKERRQ(ierr); 1964 /* need A_VR * \Phi * A_RRmA_RV = A_VR * (I+L)^T * A_RRmA_RV, L given as before */ 1965 if (need_benign_correction) { 1966 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 1967 PetscScalar *marr,*sums; 1968 1969 ierr = PetscMalloc1(n_vertices,&sums);CHKERRQ(ierr); 1970 ierr = MatDenseGetArray(S_VVt,&marr); 1971 for (i=0;i<reuse_solver->benign_n;i++) { 1972 const PetscScalar *vals; 1973 const PetscInt *idxs,*idxs_zero; 1974 PetscInt n,j,nz; 1975 1976 ierr = ISGetLocalSize(reuse_solver->benign_zerodiag_subs[i],&nz);CHKERRQ(ierr); 1977 ierr = ISGetIndices(reuse_solver->benign_zerodiag_subs[i],&idxs_zero);CHKERRQ(ierr); 1978 for (j=0;j<n_vertices;j++) { 1979 PetscInt k; 1980 sums[j] = 0.; 1981 for (k=0;k<nz;k++) sums[j] += work[idxs_zero[k]+j*lda_rhs]; 1982 } 1983 ierr = MatGetRow(A_RV_bcorr,i,&n,&idxs,&vals);CHKERRQ(ierr); 1984 for (j=0;j<n;j++) { 1985 PetscScalar val = vals[j]; 1986 PetscInt k; 1987 for (k=0;k<n_vertices;k++) { 1988 marr[idxs[j]+k*n_vertices] += val*sums[k]; 1989 } 1990 } 1991 ierr = MatRestoreRow(A_RV_bcorr,i,&n,&idxs,&vals);CHKERRQ(ierr); 1992 ierr = ISRestoreIndices(reuse_solver->benign_zerodiag_subs[i],&idxs_zero);CHKERRQ(ierr); 1993 } 1994 ierr = PetscFree(sums);CHKERRQ(ierr); 1995 ierr = MatDenseRestoreArray(S_VVt,&marr); 1996 ierr = MatDestroy(&A_RV_bcorr);CHKERRQ(ierr); 1997 } 1998 ierr = MatDestroy(&A_RRmA_RV);CHKERRQ(ierr); 1999 ierr = PetscBLASIntCast(n_vertices*n_vertices,&B_N);CHKERRQ(ierr); 2000 ierr = MatDenseGetArray(A_VV,&x);CHKERRQ(ierr); 2001 ierr = MatDenseGetArray(S_VVt,&y);CHKERRQ(ierr); 2002 PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&B_N,&one,x,&B_one,y,&B_one)); 2003 ierr = MatDenseRestoreArray(A_VV,&x);CHKERRQ(ierr); 2004 ierr = MatDenseRestoreArray(S_VVt,&y);CHKERRQ(ierr); 2005 ierr = MatCopy(S_VVt,S_VV,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 2006 ierr = MatDestroy(&S_VVt);CHKERRQ(ierr); 2007 } else { 2008 ierr = MatCopy(A_VV,S_VV,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 2009 } 2010 ierr = MatDestroy(&A_VV);CHKERRQ(ierr); 2011 2012 /* coarse basis functions */ 2013 for (i=0;i<n_vertices;i++) { 2014 PetscScalar *y; 2015 2016 ierr = VecPlaceArray(pcbddc->vec1_R,work+lda_rhs*i);CHKERRQ(ierr); 2017 ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&y);CHKERRQ(ierr); 2018 ierr = VecPlaceArray(pcis->vec1_B,y+n_B*i);CHKERRQ(ierr); 2019 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2020 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2021 y[n_B*i+idx_V_B[i]] = 1.0; 2022 ierr = MatDenseRestoreArray(pcbddc->coarse_phi_B,&y);CHKERRQ(ierr); 2023 ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr); 2024 2025 if (pcbddc->switch_static || pcbddc->dbg_flag) { 2026 PetscInt j; 2027 2028 ierr = MatDenseGetArray(pcbddc->coarse_phi_D,&y);CHKERRQ(ierr); 2029 ierr = VecPlaceArray(pcis->vec1_D,y+n_D*i);CHKERRQ(ierr); 2030 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2031 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2032 ierr = VecResetArray(pcis->vec1_D);CHKERRQ(ierr); 2033 for (j=0;j<pcbddc->benign_n;j++) y[n_D*i+p0_lidx_I[j]] = 0.0; 2034 ierr = MatDenseRestoreArray(pcbddc->coarse_phi_D,&y);CHKERRQ(ierr); 2035 } 2036 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 2037 } 2038 /* if n_R == 0 the object is not destroyed */ 2039 ierr = MatDestroy(&A_RV);CHKERRQ(ierr); 2040 } 2041 ierr = VecDestroy(&dummy_vec);CHKERRQ(ierr); 2042 2043 if (n_constraints) { 2044 Mat B; 2045 2046 ierr = MatCreateSeqDense(PETSC_COMM_SELF,lda_rhs,n_constraints,work,&B);CHKERRQ(ierr); 2047 ierr = MatScale(S_CC,m_one);CHKERRQ(ierr); 2048 ierr = MatMatMult(local_auxmat2_R,S_CC,MAT_REUSE_MATRIX,PETSC_DEFAULT,&B);CHKERRQ(ierr); 2049 ierr = MatScale(S_CC,m_one);CHKERRQ(ierr); 2050 if (n_vertices) { 2051 if (isCHOL) { /* if we can solve the interior problem with cholesky, we should also be fine with transposing here */ 2052 ierr = MatTranspose(S_CV,MAT_REUSE_MATRIX,&S_VC);CHKERRQ(ierr); 2053 } else { 2054 Mat S_VCt; 2055 2056 if (lda_rhs != n_R) { 2057 ierr = MatDestroy(&B);CHKERRQ(ierr); 2058 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_constraints,work,&B);CHKERRQ(ierr); 2059 ierr = MatSeqDenseSetLDA(B,lda_rhs);CHKERRQ(ierr); 2060 } 2061 ierr = MatMatMult(A_VR,B,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&S_VCt);CHKERRQ(ierr); 2062 ierr = MatCopy(S_VCt,S_VC,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 2063 ierr = MatDestroy(&S_VCt);CHKERRQ(ierr); 2064 } 2065 } 2066 ierr = MatDestroy(&B);CHKERRQ(ierr); 2067 /* coarse basis functions */ 2068 for (i=0;i<n_constraints;i++) { 2069 PetscScalar *y; 2070 2071 ierr = VecPlaceArray(pcbddc->vec1_R,work+lda_rhs*i);CHKERRQ(ierr); 2072 ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&y);CHKERRQ(ierr); 2073 ierr = VecPlaceArray(pcis->vec1_B,y+n_B*(i+n_vertices));CHKERRQ(ierr); 2074 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2075 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2076 ierr = MatDenseRestoreArray(pcbddc->coarse_phi_B,&y);CHKERRQ(ierr); 2077 ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr); 2078 if (pcbddc->switch_static || pcbddc->dbg_flag) { 2079 PetscInt j; 2080 2081 ierr = MatDenseGetArray(pcbddc->coarse_phi_D,&y);CHKERRQ(ierr); 2082 ierr = VecPlaceArray(pcis->vec1_D,y+n_D*(i+n_vertices));CHKERRQ(ierr); 2083 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2084 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2085 ierr = VecResetArray(pcis->vec1_D);CHKERRQ(ierr); 2086 for (j=0;j<pcbddc->benign_n;j++) y[n_D*i+p0_lidx_I[j]] = 0.0; 2087 ierr = MatDenseRestoreArray(pcbddc->coarse_phi_D,&y);CHKERRQ(ierr); 2088 } 2089 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 2090 } 2091 } 2092 if (n_constraints) { 2093 ierr = MatDestroy(&local_auxmat2_R);CHKERRQ(ierr); 2094 } 2095 ierr = PetscFree(p0_lidx_I);CHKERRQ(ierr); 2096 2097 /* coarse matrix entries relative to B_0 */ 2098 if (pcbddc->benign_n) { 2099 Mat B0_B,B0_BPHI; 2100 IS is_dummy; 2101 PetscScalar *data; 2102 PetscInt j; 2103 2104 ierr = ISCreateStride(PETSC_COMM_SELF,pcbddc->benign_n,0,1,&is_dummy);CHKERRQ(ierr); 2105 ierr = MatGetSubMatrix(pcbddc->benign_B0,is_dummy,pcis->is_B_local,MAT_INITIAL_MATRIX,&B0_B);CHKERRQ(ierr); 2106 ierr = ISDestroy(&is_dummy);CHKERRQ(ierr); 2107 ierr = MatMatMult(B0_B,pcbddc->coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&B0_BPHI);CHKERRQ(ierr); 2108 ierr = MatConvert(B0_BPHI,MATSEQDENSE,MAT_INPLACE_MATRIX,&B0_BPHI);CHKERRQ(ierr); 2109 ierr = MatDenseGetArray(B0_BPHI,&data);CHKERRQ(ierr); 2110 for (j=0;j<pcbddc->benign_n;j++) { 2111 PetscInt primal_idx = pcbddc->local_primal_size - pcbddc->benign_n + j; 2112 for (i=0;i<pcbddc->local_primal_size;i++) { 2113 coarse_submat_vals[primal_idx*pcbddc->local_primal_size+i] = data[i*pcbddc->benign_n+j]; 2114 coarse_submat_vals[i*pcbddc->local_primal_size+primal_idx] = data[i*pcbddc->benign_n+j]; 2115 } 2116 } 2117 ierr = MatDenseRestoreArray(B0_BPHI,&data);CHKERRQ(ierr); 2118 ierr = MatDestroy(&B0_B);CHKERRQ(ierr); 2119 ierr = MatDestroy(&B0_BPHI);CHKERRQ(ierr); 2120 } 2121 2122 /* compute other basis functions for non-symmetric problems */ 2123 if (!pcbddc->symmetric_primal) { 2124 Mat B_V=NULL,B_C=NULL; 2125 PetscScalar *marray; 2126 2127 if (n_constraints) { 2128 Mat S_CCT,C_CRT; 2129 2130 ierr = MatTranspose(C_CR,MAT_INPLACE_MATRIX,&C_CRT);CHKERRQ(ierr); 2131 ierr = MatTranspose(S_CC,MAT_INITIAL_MATRIX,&S_CCT);CHKERRQ(ierr); 2132 ierr = MatMatMult(C_CRT,S_CCT,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&B_C);CHKERRQ(ierr); 2133 ierr = MatDestroy(&S_CCT);CHKERRQ(ierr); 2134 if (n_vertices) { 2135 Mat S_VCT; 2136 2137 ierr = MatTranspose(S_VC,MAT_INITIAL_MATRIX,&S_VCT);CHKERRQ(ierr); 2138 ierr = MatMatMult(C_CRT,S_VCT,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&B_V);CHKERRQ(ierr); 2139 ierr = MatDestroy(&S_VCT);CHKERRQ(ierr); 2140 } 2141 ierr = MatDestroy(&C_CRT);CHKERRQ(ierr); 2142 } 2143 if (n_vertices && n_R) { 2144 PetscScalar *av,*marray; 2145 const PetscInt *xadj,*adjncy; 2146 PetscInt n; 2147 PetscBool flg_row; 2148 2149 /* B_V = B_V - A_VR^T */ 2150 ierr = MatConvert(A_VR,MATSEQAIJ,MAT_INPLACE_MATRIX,&A_VR);CHKERRQ(ierr); 2151 ierr = MatGetRowIJ(A_VR,0,PETSC_FALSE,PETSC_FALSE,&n,&xadj,&adjncy,&flg_row);CHKERRQ(ierr); 2152 ierr = MatSeqAIJGetArray(A_VR,&av);CHKERRQ(ierr); 2153 ierr = MatDenseGetArray(B_V,&marray);CHKERRQ(ierr); 2154 for (i=0;i<n;i++) { 2155 PetscInt j; 2156 for (j=xadj[i];j<xadj[i+1];j++) marray[i*n_R + adjncy[j]] -= av[j]; 2157 } 2158 ierr = MatDenseRestoreArray(B_V,&marray);CHKERRQ(ierr); 2159 ierr = MatRestoreRowIJ(A_VR,0,PETSC_FALSE,PETSC_FALSE,&n,&xadj,&adjncy,&flg_row);CHKERRQ(ierr); 2160 ierr = MatDestroy(&A_VR);CHKERRQ(ierr); 2161 } 2162 2163 /* currently there's no support for MatTransposeMatSolve(F,B,X) */ 2164 ierr = MatDenseGetArray(B_V,&marray);CHKERRQ(ierr); 2165 for (i=0;i<n_vertices;i++) { 2166 ierr = VecPlaceArray(pcbddc->vec1_R,marray+i*n_R);CHKERRQ(ierr); 2167 ierr = VecPlaceArray(pcbddc->vec2_R,work+i*n_R);CHKERRQ(ierr); 2168 ierr = KSPSolveTranspose(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 2169 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 2170 ierr = VecResetArray(pcbddc->vec2_R);CHKERRQ(ierr); 2171 } 2172 ierr = MatDenseRestoreArray(B_V,&marray);CHKERRQ(ierr); 2173 ierr = MatDenseGetArray(B_C,&marray);CHKERRQ(ierr); 2174 for (i=n_vertices;i<n_constraints+n_vertices;i++) { 2175 ierr = VecPlaceArray(pcbddc->vec1_R,marray+(i-n_vertices)*n_R);CHKERRQ(ierr); 2176 ierr = VecPlaceArray(pcbddc->vec2_R,work+i*n_R);CHKERRQ(ierr); 2177 ierr = KSPSolveTranspose(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 2178 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 2179 ierr = VecResetArray(pcbddc->vec2_R);CHKERRQ(ierr); 2180 } 2181 ierr = MatDenseRestoreArray(B_C,&marray);CHKERRQ(ierr); 2182 /* coarse basis functions */ 2183 for (i=0;i<pcbddc->local_primal_size;i++) { 2184 PetscScalar *y; 2185 2186 ierr = VecPlaceArray(pcbddc->vec1_R,work+i*n_R);CHKERRQ(ierr); 2187 ierr = MatDenseGetArray(pcbddc->coarse_psi_B,&y);CHKERRQ(ierr); 2188 ierr = VecPlaceArray(pcis->vec1_B,y+n_B*i);CHKERRQ(ierr); 2189 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2190 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2191 if (i<n_vertices) { 2192 y[n_B*i+idx_V_B[i]] = 1.0; 2193 } 2194 ierr = MatDenseRestoreArray(pcbddc->coarse_psi_B,&y);CHKERRQ(ierr); 2195 ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr); 2196 2197 if (pcbddc->switch_static || pcbddc->dbg_flag) { 2198 ierr = MatDenseGetArray(pcbddc->coarse_psi_D,&y);CHKERRQ(ierr); 2199 ierr = VecPlaceArray(pcis->vec1_D,y+n_D*i);CHKERRQ(ierr); 2200 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2201 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2202 ierr = VecResetArray(pcis->vec1_D);CHKERRQ(ierr); 2203 ierr = MatDenseRestoreArray(pcbddc->coarse_psi_D,&y);CHKERRQ(ierr); 2204 } 2205 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 2206 } 2207 ierr = MatDestroy(&B_V);CHKERRQ(ierr); 2208 ierr = MatDestroy(&B_C);CHKERRQ(ierr); 2209 } 2210 /* free memory */ 2211 ierr = PetscFree(idx_V_B);CHKERRQ(ierr); 2212 ierr = MatDestroy(&S_VV);CHKERRQ(ierr); 2213 ierr = MatDestroy(&S_CV);CHKERRQ(ierr); 2214 ierr = MatDestroy(&S_VC);CHKERRQ(ierr); 2215 ierr = MatDestroy(&S_CC);CHKERRQ(ierr); 2216 ierr = PetscFree(work);CHKERRQ(ierr); 2217 if (n_vertices) { 2218 ierr = MatDestroy(&A_VR);CHKERRQ(ierr); 2219 } 2220 if (n_constraints) { 2221 ierr = MatDestroy(&C_CR);CHKERRQ(ierr); 2222 } 2223 /* Checking coarse_sub_mat and coarse basis functios */ 2224 /* Symmetric case : It should be \Phi^{(j)^T} A^{(j)} \Phi^{(j)}=coarse_sub_mat */ 2225 /* Non-symmetric case : It should be \Psi^{(j)^T} A^{(j)} \Phi^{(j)}=coarse_sub_mat */ 2226 if (pcbddc->dbg_flag) { 2227 Mat coarse_sub_mat; 2228 Mat AUXMAT,TM1,TM2,TM3,TM4; 2229 Mat coarse_phi_D,coarse_phi_B; 2230 Mat coarse_psi_D,coarse_psi_B; 2231 Mat A_II,A_BB,A_IB,A_BI; 2232 Mat C_B,CPHI; 2233 IS is_dummy; 2234 Vec mones; 2235 MatType checkmattype=MATSEQAIJ; 2236 PetscReal real_value; 2237 2238 if (pcbddc->benign_n && !pcbddc->benign_change_explicit) { 2239 Mat A; 2240 ierr = PCBDDCBenignProject(pc,NULL,NULL,&A);CHKERRQ(ierr); 2241 ierr = MatGetSubMatrix(A,pcis->is_I_local,pcis->is_I_local,MAT_INITIAL_MATRIX,&A_II);CHKERRQ(ierr); 2242 ierr = MatGetSubMatrix(A,pcis->is_I_local,pcis->is_B_local,MAT_INITIAL_MATRIX,&A_IB);CHKERRQ(ierr); 2243 ierr = MatGetSubMatrix(A,pcis->is_B_local,pcis->is_I_local,MAT_INITIAL_MATRIX,&A_BI);CHKERRQ(ierr); 2244 ierr = MatGetSubMatrix(A,pcis->is_B_local,pcis->is_B_local,MAT_INITIAL_MATRIX,&A_BB);CHKERRQ(ierr); 2245 ierr = MatDestroy(&A);CHKERRQ(ierr); 2246 } else { 2247 ierr = MatConvert(pcis->A_II,checkmattype,MAT_INITIAL_MATRIX,&A_II);CHKERRQ(ierr); 2248 ierr = MatConvert(pcis->A_IB,checkmattype,MAT_INITIAL_MATRIX,&A_IB);CHKERRQ(ierr); 2249 ierr = MatConvert(pcis->A_BI,checkmattype,MAT_INITIAL_MATRIX,&A_BI);CHKERRQ(ierr); 2250 ierr = MatConvert(pcis->A_BB,checkmattype,MAT_INITIAL_MATRIX,&A_BB);CHKERRQ(ierr); 2251 } 2252 ierr = MatConvert(pcbddc->coarse_phi_D,checkmattype,MAT_INITIAL_MATRIX,&coarse_phi_D);CHKERRQ(ierr); 2253 ierr = MatConvert(pcbddc->coarse_phi_B,checkmattype,MAT_INITIAL_MATRIX,&coarse_phi_B);CHKERRQ(ierr); 2254 if (!pcbddc->symmetric_primal) { 2255 ierr = MatConvert(pcbddc->coarse_psi_D,checkmattype,MAT_INITIAL_MATRIX,&coarse_psi_D);CHKERRQ(ierr); 2256 ierr = MatConvert(pcbddc->coarse_psi_B,checkmattype,MAT_INITIAL_MATRIX,&coarse_psi_B);CHKERRQ(ierr); 2257 } 2258 ierr = MatCreateSeqDense(PETSC_COMM_SELF,pcbddc->local_primal_size,pcbddc->local_primal_size,coarse_submat_vals,&coarse_sub_mat);CHKERRQ(ierr); 2259 2260 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 2261 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Check coarse sub mat computation (symmetric %d)\n",pcbddc->symmetric_primal);CHKERRQ(ierr); 2262 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 2263 if (!pcbddc->symmetric_primal) { 2264 ierr = MatMatMult(A_II,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 2265 ierr = MatTransposeMatMult(coarse_psi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM1);CHKERRQ(ierr); 2266 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 2267 ierr = MatMatMult(A_BB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 2268 ierr = MatTransposeMatMult(coarse_psi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM2);CHKERRQ(ierr); 2269 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 2270 ierr = MatMatMult(A_IB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 2271 ierr = MatTransposeMatMult(coarse_psi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM3);CHKERRQ(ierr); 2272 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 2273 ierr = MatMatMult(A_BI,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 2274 ierr = MatTransposeMatMult(coarse_psi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM4);CHKERRQ(ierr); 2275 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 2276 } else { 2277 ierr = MatPtAP(A_II,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&TM1);CHKERRQ(ierr); 2278 ierr = MatPtAP(A_BB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&TM2);CHKERRQ(ierr); 2279 ierr = MatMatMult(A_IB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 2280 ierr = MatTransposeMatMult(coarse_phi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM3);CHKERRQ(ierr); 2281 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 2282 ierr = MatMatMult(A_BI,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 2283 ierr = MatTransposeMatMult(coarse_phi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM4);CHKERRQ(ierr); 2284 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 2285 } 2286 ierr = MatAXPY(TM1,one,TM2,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 2287 ierr = MatAXPY(TM1,one,TM3,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 2288 ierr = MatAXPY(TM1,one,TM4,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 2289 ierr = MatConvert(TM1,MATSEQDENSE,MAT_INPLACE_MATRIX,&TM1);CHKERRQ(ierr); 2290 if (pcbddc->benign_n) { 2291 Mat B0_B,B0_BPHI; 2292 PetscScalar *data,*data2; 2293 PetscInt j; 2294 2295 ierr = ISCreateStride(PETSC_COMM_SELF,pcbddc->benign_n,0,1,&is_dummy);CHKERRQ(ierr); 2296 ierr = MatGetSubMatrix(pcbddc->benign_B0,is_dummy,pcis->is_B_local,MAT_INITIAL_MATRIX,&B0_B);CHKERRQ(ierr); 2297 ierr = MatMatMult(B0_B,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&B0_BPHI);CHKERRQ(ierr); 2298 ierr = MatConvert(B0_BPHI,MATSEQDENSE,MAT_INPLACE_MATRIX,&B0_BPHI);CHKERRQ(ierr); 2299 ierr = MatDenseGetArray(TM1,&data);CHKERRQ(ierr); 2300 ierr = MatDenseGetArray(B0_BPHI,&data2);CHKERRQ(ierr); 2301 for (j=0;j<pcbddc->benign_n;j++) { 2302 PetscInt primal_idx = pcbddc->local_primal_size - pcbddc->benign_n + j; 2303 for (i=0;i<pcbddc->local_primal_size;i++) { 2304 data[primal_idx*pcbddc->local_primal_size+i] += data2[i*pcbddc->benign_n+j]; 2305 data[i*pcbddc->local_primal_size+primal_idx] += data2[i*pcbddc->benign_n+j]; 2306 } 2307 } 2308 ierr = MatDenseRestoreArray(TM1,&data);CHKERRQ(ierr); 2309 ierr = MatDenseRestoreArray(B0_BPHI,&data2);CHKERRQ(ierr); 2310 ierr = MatDestroy(&B0_B);CHKERRQ(ierr); 2311 ierr = ISDestroy(&is_dummy);CHKERRQ(ierr); 2312 ierr = MatDestroy(&B0_BPHI);CHKERRQ(ierr); 2313 } 2314 #if 0 2315 { 2316 PetscViewer viewer; 2317 char filename[256]; 2318 sprintf(filename,"details_local_coarse_mat%d_level%d.m",PetscGlobalRank,pcbddc->current_level); 2319 ierr = PetscViewerASCIIOpen(PETSC_COMM_SELF,filename,&viewer);CHKERRQ(ierr); 2320 ierr = PetscViewerSetFormat(viewer,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr); 2321 ierr = PetscObjectSetName((PetscObject)coarse_sub_mat,"computed");CHKERRQ(ierr); 2322 ierr = MatView(coarse_sub_mat,viewer);CHKERRQ(ierr); 2323 ierr = PetscObjectSetName((PetscObject)TM1,"projected");CHKERRQ(ierr); 2324 ierr = MatView(TM1,viewer);CHKERRQ(ierr); 2325 if (save_change) { 2326 Mat phi_B; 2327 ierr = MatMatMult(save_change,pcbddc->coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&phi_B);CHKERRQ(ierr); 2328 ierr = PetscObjectSetName((PetscObject)phi_B,"phi_B");CHKERRQ(ierr); 2329 ierr = MatView(phi_B,viewer);CHKERRQ(ierr); 2330 ierr = MatDestroy(&phi_B);CHKERRQ(ierr); 2331 } else { 2332 ierr = PetscObjectSetName((PetscObject)pcbddc->coarse_phi_B,"phi_B");CHKERRQ(ierr); 2333 ierr = MatView(pcbddc->coarse_phi_B,viewer);CHKERRQ(ierr); 2334 } 2335 if (pcbddc->coarse_phi_D) { 2336 ierr = PetscObjectSetName((PetscObject)pcbddc->coarse_phi_D,"phi_D");CHKERRQ(ierr); 2337 ierr = MatView(pcbddc->coarse_phi_D,viewer);CHKERRQ(ierr); 2338 } 2339 if (pcbddc->coarse_psi_B) { 2340 ierr = PetscObjectSetName((PetscObject)pcbddc->coarse_psi_B,"psi_B");CHKERRQ(ierr); 2341 ierr = MatView(pcbddc->coarse_psi_B,viewer);CHKERRQ(ierr); 2342 } 2343 if (pcbddc->coarse_psi_D) { 2344 ierr = PetscObjectSetName((PetscObject)pcbddc->coarse_psi_D,"psi_D");CHKERRQ(ierr); 2345 ierr = MatView(pcbddc->coarse_psi_D,viewer);CHKERRQ(ierr); 2346 } 2347 ierr = PetscViewerDestroy(&viewer);CHKERRQ(ierr); 2348 } 2349 #endif 2350 ierr = MatAXPY(TM1,m_one,coarse_sub_mat,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 2351 ierr = MatNorm(TM1,NORM_FROBENIUS,&real_value);CHKERRQ(ierr); 2352 ierr = PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 2353 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d matrix error % 1.14e\n",PetscGlobalRank,real_value);CHKERRQ(ierr); 2354 2355 /* check constraints */ 2356 ierr = ISCreateStride(PETSC_COMM_SELF,pcbddc->local_primal_size-pcbddc->benign_n,0,1,&is_dummy);CHKERRQ(ierr); 2357 ierr = MatGetSubMatrix(pcbddc->ConstraintMatrix,is_dummy,pcis->is_B_local,MAT_INITIAL_MATRIX,&C_B);CHKERRQ(ierr); 2358 if (!pcbddc->benign_n) { /* TODO: add benign case */ 2359 ierr = MatMatMult(C_B,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&CPHI);CHKERRQ(ierr); 2360 } else { 2361 PetscScalar *data; 2362 Mat tmat; 2363 ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&data);CHKERRQ(ierr); 2364 ierr = MatCreateSeqDense(PETSC_COMM_SELF,pcis->n_B,pcbddc->local_primal_size-pcbddc->benign_n,data,&tmat);CHKERRQ(ierr); 2365 ierr = MatDenseRestoreArray(pcbddc->coarse_phi_B,&data);CHKERRQ(ierr); 2366 ierr = MatMatMult(C_B,tmat,MAT_INITIAL_MATRIX,1.0,&CPHI);CHKERRQ(ierr); 2367 ierr = MatDestroy(&tmat);CHKERRQ(ierr); 2368 } 2369 ierr = MatCreateVecs(CPHI,&mones,NULL);CHKERRQ(ierr); 2370 ierr = VecSet(mones,-1.0);CHKERRQ(ierr); 2371 ierr = MatDiagonalSet(CPHI,mones,ADD_VALUES);CHKERRQ(ierr); 2372 ierr = MatNorm(CPHI,NORM_FROBENIUS,&real_value);CHKERRQ(ierr); 2373 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d phi constraints error % 1.14e\n",PetscGlobalRank,real_value);CHKERRQ(ierr); 2374 if (!pcbddc->symmetric_primal) { 2375 ierr = MatMatMult(C_B,coarse_psi_B,MAT_REUSE_MATRIX,1.0,&CPHI);CHKERRQ(ierr); 2376 ierr = VecSet(mones,-1.0);CHKERRQ(ierr); 2377 ierr = MatDiagonalSet(CPHI,mones,ADD_VALUES);CHKERRQ(ierr); 2378 ierr = MatNorm(CPHI,NORM_FROBENIUS,&real_value);CHKERRQ(ierr); 2379 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d psi constraints error % 1.14e\n",PetscGlobalRank,real_value);CHKERRQ(ierr); 2380 } 2381 ierr = MatDestroy(&C_B);CHKERRQ(ierr); 2382 ierr = MatDestroy(&CPHI);CHKERRQ(ierr); 2383 ierr = ISDestroy(&is_dummy);CHKERRQ(ierr); 2384 ierr = VecDestroy(&mones);CHKERRQ(ierr); 2385 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 2386 ierr = MatDestroy(&A_II);CHKERRQ(ierr); 2387 ierr = MatDestroy(&A_BB);CHKERRQ(ierr); 2388 ierr = MatDestroy(&A_IB);CHKERRQ(ierr); 2389 ierr = MatDestroy(&A_BI);CHKERRQ(ierr); 2390 ierr = MatDestroy(&TM1);CHKERRQ(ierr); 2391 ierr = MatDestroy(&TM2);CHKERRQ(ierr); 2392 ierr = MatDestroy(&TM3);CHKERRQ(ierr); 2393 ierr = MatDestroy(&TM4);CHKERRQ(ierr); 2394 ierr = MatDestroy(&coarse_phi_D);CHKERRQ(ierr); 2395 ierr = MatDestroy(&coarse_phi_B);CHKERRQ(ierr); 2396 if (!pcbddc->symmetric_primal) { 2397 ierr = MatDestroy(&coarse_psi_D);CHKERRQ(ierr); 2398 ierr = MatDestroy(&coarse_psi_B);CHKERRQ(ierr); 2399 } 2400 ierr = MatDestroy(&coarse_sub_mat);CHKERRQ(ierr); 2401 } 2402 /* get back data */ 2403 *coarse_submat_vals_n = coarse_submat_vals; 2404 PetscFunctionReturn(0); 2405 } 2406 2407 #undef __FUNCT__ 2408 #define __FUNCT__ "MatGetSubMatrixUnsorted" 2409 PetscErrorCode MatGetSubMatrixUnsorted(Mat A, IS isrow, IS iscol, Mat* B) 2410 { 2411 Mat *work_mat; 2412 IS isrow_s,iscol_s; 2413 PetscBool rsorted,csorted; 2414 PetscInt rsize,*idxs_perm_r,csize,*idxs_perm_c; 2415 PetscErrorCode ierr; 2416 2417 PetscFunctionBegin; 2418 ierr = ISSorted(isrow,&rsorted);CHKERRQ(ierr); 2419 ierr = ISSorted(iscol,&csorted);CHKERRQ(ierr); 2420 ierr = ISGetLocalSize(isrow,&rsize);CHKERRQ(ierr); 2421 ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr); 2422 2423 if (!rsorted) { 2424 const PetscInt *idxs; 2425 PetscInt *idxs_sorted,i; 2426 2427 ierr = PetscMalloc1(rsize,&idxs_perm_r);CHKERRQ(ierr); 2428 ierr = PetscMalloc1(rsize,&idxs_sorted);CHKERRQ(ierr); 2429 for (i=0;i<rsize;i++) { 2430 idxs_perm_r[i] = i; 2431 } 2432 ierr = ISGetIndices(isrow,&idxs);CHKERRQ(ierr); 2433 ierr = PetscSortIntWithPermutation(rsize,idxs,idxs_perm_r);CHKERRQ(ierr); 2434 for (i=0;i<rsize;i++) { 2435 idxs_sorted[i] = idxs[idxs_perm_r[i]]; 2436 } 2437 ierr = ISRestoreIndices(isrow,&idxs);CHKERRQ(ierr); 2438 ierr = ISCreateGeneral(PETSC_COMM_SELF,rsize,idxs_sorted,PETSC_OWN_POINTER,&isrow_s);CHKERRQ(ierr); 2439 } else { 2440 ierr = PetscObjectReference((PetscObject)isrow);CHKERRQ(ierr); 2441 isrow_s = isrow; 2442 } 2443 2444 if (!csorted) { 2445 if (isrow == iscol) { 2446 ierr = PetscObjectReference((PetscObject)isrow_s);CHKERRQ(ierr); 2447 iscol_s = isrow_s; 2448 } else { 2449 const PetscInt *idxs; 2450 PetscInt *idxs_sorted,i; 2451 2452 ierr = PetscMalloc1(csize,&idxs_perm_c);CHKERRQ(ierr); 2453 ierr = PetscMalloc1(csize,&idxs_sorted);CHKERRQ(ierr); 2454 for (i=0;i<csize;i++) { 2455 idxs_perm_c[i] = i; 2456 } 2457 ierr = ISGetIndices(iscol,&idxs);CHKERRQ(ierr); 2458 ierr = PetscSortIntWithPermutation(csize,idxs,idxs_perm_c);CHKERRQ(ierr); 2459 for (i=0;i<csize;i++) { 2460 idxs_sorted[i] = idxs[idxs_perm_c[i]]; 2461 } 2462 ierr = ISRestoreIndices(iscol,&idxs);CHKERRQ(ierr); 2463 ierr = ISCreateGeneral(PETSC_COMM_SELF,csize,idxs_sorted,PETSC_OWN_POINTER,&iscol_s);CHKERRQ(ierr); 2464 } 2465 } else { 2466 ierr = PetscObjectReference((PetscObject)iscol);CHKERRQ(ierr); 2467 iscol_s = iscol; 2468 } 2469 2470 ierr = MatGetSubMatrices(A,1,&isrow_s,&iscol_s,MAT_INITIAL_MATRIX,&work_mat);CHKERRQ(ierr); 2471 2472 if (!rsorted || !csorted) { 2473 Mat new_mat; 2474 IS is_perm_r,is_perm_c; 2475 2476 if (!rsorted) { 2477 PetscInt *idxs_r,i; 2478 ierr = PetscMalloc1(rsize,&idxs_r);CHKERRQ(ierr); 2479 for (i=0;i<rsize;i++) { 2480 idxs_r[idxs_perm_r[i]] = i; 2481 } 2482 ierr = PetscFree(idxs_perm_r);CHKERRQ(ierr); 2483 ierr = ISCreateGeneral(PETSC_COMM_SELF,rsize,idxs_r,PETSC_OWN_POINTER,&is_perm_r);CHKERRQ(ierr); 2484 } else { 2485 ierr = ISCreateStride(PETSC_COMM_SELF,rsize,0,1,&is_perm_r);CHKERRQ(ierr); 2486 } 2487 ierr = ISSetPermutation(is_perm_r);CHKERRQ(ierr); 2488 2489 if (!csorted) { 2490 if (isrow_s == iscol_s) { 2491 ierr = PetscObjectReference((PetscObject)is_perm_r);CHKERRQ(ierr); 2492 is_perm_c = is_perm_r; 2493 } else { 2494 PetscInt *idxs_c,i; 2495 ierr = PetscMalloc1(csize,&idxs_c);CHKERRQ(ierr); 2496 for (i=0;i<csize;i++) { 2497 idxs_c[idxs_perm_c[i]] = i; 2498 } 2499 ierr = PetscFree(idxs_perm_c);CHKERRQ(ierr); 2500 ierr = ISCreateGeneral(PETSC_COMM_SELF,csize,idxs_c,PETSC_OWN_POINTER,&is_perm_c);CHKERRQ(ierr); 2501 } 2502 } else { 2503 ierr = ISCreateStride(PETSC_COMM_SELF,csize,0,1,&is_perm_c);CHKERRQ(ierr); 2504 } 2505 ierr = ISSetPermutation(is_perm_c);CHKERRQ(ierr); 2506 2507 ierr = MatPermute(work_mat[0],is_perm_r,is_perm_c,&new_mat);CHKERRQ(ierr); 2508 ierr = MatDestroy(&work_mat[0]);CHKERRQ(ierr); 2509 work_mat[0] = new_mat; 2510 ierr = ISDestroy(&is_perm_r);CHKERRQ(ierr); 2511 ierr = ISDestroy(&is_perm_c);CHKERRQ(ierr); 2512 } 2513 2514 ierr = PetscObjectReference((PetscObject)work_mat[0]);CHKERRQ(ierr); 2515 *B = work_mat[0]; 2516 ierr = MatDestroyMatrices(1,&work_mat);CHKERRQ(ierr); 2517 ierr = ISDestroy(&isrow_s);CHKERRQ(ierr); 2518 ierr = ISDestroy(&iscol_s);CHKERRQ(ierr); 2519 PetscFunctionReturn(0); 2520 } 2521 2522 #undef __FUNCT__ 2523 #define __FUNCT__ "PCBDDCComputeLocalMatrix" 2524 PetscErrorCode PCBDDCComputeLocalMatrix(PC pc, Mat ChangeOfBasisMatrix) 2525 { 2526 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 2527 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 2528 Mat new_mat; 2529 IS is_local,is_global; 2530 PetscInt local_size; 2531 PetscBool isseqaij; 2532 PetscErrorCode ierr; 2533 2534 PetscFunctionBegin; 2535 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 2536 ierr = MatGetSize(matis->A,&local_size,NULL);CHKERRQ(ierr); 2537 ierr = ISCreateStride(PetscObjectComm((PetscObject)matis->A),local_size,0,1,&is_local);CHKERRQ(ierr); 2538 ierr = ISLocalToGlobalMappingApplyIS(pc->pmat->rmap->mapping,is_local,&is_global);CHKERRQ(ierr); 2539 ierr = ISDestroy(&is_local);CHKERRQ(ierr); 2540 ierr = MatGetSubMatrixUnsorted(ChangeOfBasisMatrix,is_global,is_global,&new_mat);CHKERRQ(ierr); 2541 ierr = ISDestroy(&is_global);CHKERRQ(ierr); 2542 2543 /* check */ 2544 if (pcbddc->dbg_flag) { 2545 Vec x,x_change; 2546 PetscReal error; 2547 2548 ierr = MatCreateVecs(ChangeOfBasisMatrix,&x,&x_change);CHKERRQ(ierr); 2549 ierr = VecSetRandom(x,NULL);CHKERRQ(ierr); 2550 ierr = MatMult(ChangeOfBasisMatrix,x,x_change);CHKERRQ(ierr); 2551 ierr = VecScatterBegin(matis->cctx,x,matis->x,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2552 ierr = VecScatterEnd(matis->cctx,x,matis->x,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2553 ierr = MatMult(new_mat,matis->x,matis->y);CHKERRQ(ierr); 2554 ierr = VecScatterBegin(matis->rctx,matis->y,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2555 ierr = VecScatterEnd(matis->rctx,matis->y,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2556 ierr = VecAXPY(x,-1.0,x_change);CHKERRQ(ierr); 2557 ierr = VecNorm(x,NORM_INFINITY,&error);CHKERRQ(ierr); 2558 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 2559 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Error global vs local change on N: %1.6e\n",error);CHKERRQ(ierr); 2560 ierr = VecDestroy(&x);CHKERRQ(ierr); 2561 ierr = VecDestroy(&x_change);CHKERRQ(ierr); 2562 } 2563 2564 /* TODO: HOW TO WORK WITH BAIJ and SBAIJ and SEQDENSE? */ 2565 ierr = PetscObjectTypeCompare((PetscObject)matis->A,MATSEQAIJ,&isseqaij);CHKERRQ(ierr); 2566 if (isseqaij) { 2567 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 2568 ierr = MatPtAP(matis->A,new_mat,MAT_INITIAL_MATRIX,2.0,&pcbddc->local_mat);CHKERRQ(ierr); 2569 } else { 2570 Mat work_mat; 2571 2572 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 2573 ierr = MatConvert(matis->A,MATSEQAIJ,MAT_INITIAL_MATRIX,&work_mat);CHKERRQ(ierr); 2574 ierr = MatPtAP(work_mat,new_mat,MAT_INITIAL_MATRIX,2.0,&pcbddc->local_mat);CHKERRQ(ierr); 2575 } 2576 if (matis->A->symmetric_set) { 2577 ierr = MatSetOption(pcbddc->local_mat,MAT_SYMMETRIC,matis->A->symmetric);CHKERRQ(ierr); 2578 #if !defined(PETSC_USE_COMPLEX) 2579 ierr = MatSetOption(pcbddc->local_mat,MAT_HERMITIAN,matis->A->symmetric);CHKERRQ(ierr); 2580 #endif 2581 } 2582 ierr = MatDestroy(&new_mat);CHKERRQ(ierr); 2583 PetscFunctionReturn(0); 2584 } 2585 2586 #undef __FUNCT__ 2587 #define __FUNCT__ "PCBDDCSetUpLocalScatters" 2588 PetscErrorCode PCBDDCSetUpLocalScatters(PC pc) 2589 { 2590 PC_IS* pcis = (PC_IS*)(pc->data); 2591 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 2592 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 2593 PetscInt *idx_R_local=NULL; 2594 PetscInt n_vertices,i,j,n_R,n_D,n_B; 2595 PetscInt vbs,bs; 2596 PetscBT bitmask=NULL; 2597 PetscErrorCode ierr; 2598 2599 PetscFunctionBegin; 2600 /* 2601 No need to setup local scatters if 2602 - primal space is unchanged 2603 AND 2604 - we actually have locally some primal dofs (could not be true in multilevel or for isolated subdomains) 2605 AND 2606 - we are not in debugging mode (this is needed since there are Synchronized prints at the end of the subroutine 2607 */ 2608 if (!pcbddc->new_primal_space_local && pcbddc->local_primal_size && !pcbddc->dbg_flag) { 2609 PetscFunctionReturn(0); 2610 } 2611 /* destroy old objects */ 2612 ierr = ISDestroy(&pcbddc->is_R_local);CHKERRQ(ierr); 2613 ierr = VecScatterDestroy(&pcbddc->R_to_B);CHKERRQ(ierr); 2614 ierr = VecScatterDestroy(&pcbddc->R_to_D);CHKERRQ(ierr); 2615 /* Set Non-overlapping dimensions */ 2616 n_B = pcis->n_B; 2617 n_D = pcis->n - n_B; 2618 n_vertices = pcbddc->n_vertices; 2619 2620 /* Dohrmann's notation: dofs splitted in R (Remaining: all dofs but the vertices) and V (Vertices) */ 2621 2622 /* create auxiliary bitmask and allocate workspace */ 2623 if (!sub_schurs->reuse_solver) { 2624 ierr = PetscMalloc1(pcis->n-n_vertices,&idx_R_local);CHKERRQ(ierr); 2625 ierr = PetscBTCreate(pcis->n,&bitmask);CHKERRQ(ierr); 2626 for (i=0;i<n_vertices;i++) { 2627 ierr = PetscBTSet(bitmask,pcbddc->local_primal_ref_node[i]);CHKERRQ(ierr); 2628 } 2629 2630 for (i=0, n_R=0; i<pcis->n; i++) { 2631 if (!PetscBTLookup(bitmask,i)) { 2632 idx_R_local[n_R++] = i; 2633 } 2634 } 2635 } else { /* A different ordering (already computed) is present if we are reusing the Schur solver */ 2636 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 2637 2638 ierr = ISGetIndices(reuse_solver->is_R,(const PetscInt**)&idx_R_local);CHKERRQ(ierr); 2639 ierr = ISGetLocalSize(reuse_solver->is_R,&n_R);CHKERRQ(ierr); 2640 } 2641 2642 /* Block code */ 2643 vbs = 1; 2644 ierr = MatGetBlockSize(pcbddc->local_mat,&bs);CHKERRQ(ierr); 2645 if (bs>1 && !(n_vertices%bs)) { 2646 PetscBool is_blocked = PETSC_TRUE; 2647 PetscInt *vary; 2648 if (!sub_schurs->reuse_solver) { 2649 ierr = PetscMalloc1(pcis->n/bs,&vary);CHKERRQ(ierr); 2650 ierr = PetscMemzero(vary,pcis->n/bs*sizeof(PetscInt));CHKERRQ(ierr); 2651 /* Verify that the vertex indices correspond to each element in a block (code taken from sbaij2.c) */ 2652 /* it is ok to check this way since local_primal_ref_node are always sorted by local numbering and idx_R_local is obtained as a complement */ 2653 for (i=0; i<n_vertices; i++) vary[pcbddc->local_primal_ref_node[i]/bs]++; 2654 for (i=0; i<pcis->n/bs; i++) { 2655 if (vary[i]!=0 && vary[i]!=bs) { 2656 is_blocked = PETSC_FALSE; 2657 break; 2658 } 2659 } 2660 ierr = PetscFree(vary);CHKERRQ(ierr); 2661 } else { 2662 /* Verify directly the R set */ 2663 for (i=0; i<n_R/bs; i++) { 2664 PetscInt j,node=idx_R_local[bs*i]; 2665 for (j=1; j<bs; j++) { 2666 if (node != idx_R_local[bs*i+j]-j) { 2667 is_blocked = PETSC_FALSE; 2668 break; 2669 } 2670 } 2671 } 2672 } 2673 if (is_blocked) { /* build compressed IS for R nodes (complement of vertices) */ 2674 vbs = bs; 2675 for (i=0;i<n_R/vbs;i++) { 2676 idx_R_local[i] = idx_R_local[vbs*i]/vbs; 2677 } 2678 } 2679 } 2680 ierr = ISCreateBlock(PETSC_COMM_SELF,vbs,n_R/vbs,idx_R_local,PETSC_COPY_VALUES,&pcbddc->is_R_local);CHKERRQ(ierr); 2681 if (sub_schurs->reuse_solver) { 2682 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 2683 2684 ierr = ISRestoreIndices(reuse_solver->is_R,(const PetscInt**)&idx_R_local);CHKERRQ(ierr); 2685 ierr = ISDestroy(&reuse_solver->is_R);CHKERRQ(ierr); 2686 ierr = PetscObjectReference((PetscObject)pcbddc->is_R_local);CHKERRQ(ierr); 2687 reuse_solver->is_R = pcbddc->is_R_local; 2688 } else { 2689 ierr = PetscFree(idx_R_local);CHKERRQ(ierr); 2690 } 2691 2692 /* print some info if requested */ 2693 if (pcbddc->dbg_flag) { 2694 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 2695 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 2696 ierr = PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 2697 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d local dimensions\n",PetscGlobalRank);CHKERRQ(ierr); 2698 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local_size = %d, dirichlet_size = %d, boundary_size = %d\n",pcis->n,n_D,n_B);CHKERRQ(ierr); 2699 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"r_size = %d, v_size = %d, constraints = %d, local_primal_size = %d\n",n_R,n_vertices,pcbddc->local_primal_size-n_vertices-pcbddc->benign_n,pcbddc->local_primal_size);CHKERRQ(ierr); 2700 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 2701 } 2702 2703 /* VecScatters pcbddc->R_to_B and (optionally) pcbddc->R_to_D */ 2704 if (!sub_schurs->reuse_solver) { 2705 IS is_aux1,is_aux2; 2706 PetscInt *aux_array1,*aux_array2,*is_indices,*idx_R_local; 2707 2708 ierr = ISGetIndices(pcbddc->is_R_local,(const PetscInt**)&idx_R_local);CHKERRQ(ierr); 2709 ierr = PetscMalloc1(pcis->n_B-n_vertices,&aux_array1);CHKERRQ(ierr); 2710 ierr = PetscMalloc1(pcis->n_B-n_vertices,&aux_array2);CHKERRQ(ierr); 2711 ierr = ISGetIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 2712 for (i=0; i<n_D; i++) { 2713 ierr = PetscBTSet(bitmask,is_indices[i]);CHKERRQ(ierr); 2714 } 2715 ierr = ISRestoreIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 2716 for (i=0, j=0; i<n_R; i++) { 2717 if (!PetscBTLookup(bitmask,idx_R_local[i])) { 2718 aux_array1[j++] = i; 2719 } 2720 } 2721 ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array1,PETSC_OWN_POINTER,&is_aux1);CHKERRQ(ierr); 2722 ierr = ISGetIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 2723 for (i=0, j=0; i<n_B; i++) { 2724 if (!PetscBTLookup(bitmask,is_indices[i])) { 2725 aux_array2[j++] = i; 2726 } 2727 } 2728 ierr = ISRestoreIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 2729 ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array2,PETSC_OWN_POINTER,&is_aux2);CHKERRQ(ierr); 2730 ierr = VecScatterCreate(pcbddc->vec1_R,is_aux1,pcis->vec1_B,is_aux2,&pcbddc->R_to_B);CHKERRQ(ierr); 2731 ierr = ISDestroy(&is_aux1);CHKERRQ(ierr); 2732 ierr = ISDestroy(&is_aux2);CHKERRQ(ierr); 2733 2734 if (pcbddc->switch_static || pcbddc->dbg_flag) { 2735 ierr = PetscMalloc1(n_D,&aux_array1);CHKERRQ(ierr); 2736 for (i=0, j=0; i<n_R; i++) { 2737 if (PetscBTLookup(bitmask,idx_R_local[i])) { 2738 aux_array1[j++] = i; 2739 } 2740 } 2741 ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array1,PETSC_OWN_POINTER,&is_aux1);CHKERRQ(ierr); 2742 ierr = VecScatterCreate(pcbddc->vec1_R,is_aux1,pcis->vec1_D,(IS)0,&pcbddc->R_to_D);CHKERRQ(ierr); 2743 ierr = ISDestroy(&is_aux1);CHKERRQ(ierr); 2744 } 2745 ierr = PetscBTDestroy(&bitmask);CHKERRQ(ierr); 2746 ierr = ISRestoreIndices(pcbddc->is_R_local,(const PetscInt**)&idx_R_local);CHKERRQ(ierr); 2747 } else { 2748 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 2749 IS tis; 2750 PetscInt schur_size; 2751 2752 ierr = ISGetLocalSize(reuse_solver->is_B,&schur_size);CHKERRQ(ierr); 2753 ierr = ISCreateStride(PETSC_COMM_SELF,schur_size,n_D,1,&tis);CHKERRQ(ierr); 2754 ierr = VecScatterCreate(pcbddc->vec1_R,tis,pcis->vec1_B,reuse_solver->is_B,&pcbddc->R_to_B);CHKERRQ(ierr); 2755 ierr = ISDestroy(&tis);CHKERRQ(ierr); 2756 if (pcbddc->switch_static || pcbddc->dbg_flag) { 2757 ierr = ISCreateStride(PETSC_COMM_SELF,n_D,0,1,&tis);CHKERRQ(ierr); 2758 ierr = VecScatterCreate(pcbddc->vec1_R,tis,pcis->vec1_D,(IS)0,&pcbddc->R_to_D);CHKERRQ(ierr); 2759 ierr = ISDestroy(&tis);CHKERRQ(ierr); 2760 } 2761 } 2762 PetscFunctionReturn(0); 2763 } 2764 2765 2766 #undef __FUNCT__ 2767 #define __FUNCT__ "PCBDDCSetUpLocalSolvers" 2768 PetscErrorCode PCBDDCSetUpLocalSolvers(PC pc, PetscBool dirichlet, PetscBool neumann) 2769 { 2770 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 2771 PC_IS *pcis = (PC_IS*)pc->data; 2772 PC pc_temp; 2773 Mat A_RR; 2774 MatReuse reuse; 2775 PetscScalar m_one = -1.0; 2776 PetscReal value; 2777 PetscInt n_D,n_R; 2778 PetscBool use_exact,use_exact_reduced,issbaij; 2779 PetscErrorCode ierr; 2780 /* prefixes stuff */ 2781 char dir_prefix[256],neu_prefix[256],str_level[16]; 2782 size_t len; 2783 2784 PetscFunctionBegin; 2785 2786 /* compute prefixes */ 2787 ierr = PetscStrcpy(dir_prefix,"");CHKERRQ(ierr); 2788 ierr = PetscStrcpy(neu_prefix,"");CHKERRQ(ierr); 2789 if (!pcbddc->current_level) { 2790 ierr = PetscStrcpy(dir_prefix,((PetscObject)pc)->prefix);CHKERRQ(ierr); 2791 ierr = PetscStrcpy(neu_prefix,((PetscObject)pc)->prefix);CHKERRQ(ierr); 2792 ierr = PetscStrcat(dir_prefix,"pc_bddc_dirichlet_");CHKERRQ(ierr); 2793 ierr = PetscStrcat(neu_prefix,"pc_bddc_neumann_");CHKERRQ(ierr); 2794 } else { 2795 ierr = PetscStrcpy(str_level,"");CHKERRQ(ierr); 2796 sprintf(str_level,"l%d_",(int)(pcbddc->current_level)); 2797 ierr = PetscStrlen(((PetscObject)pc)->prefix,&len);CHKERRQ(ierr); 2798 len -= 15; /* remove "pc_bddc_coarse_" */ 2799 if (pcbddc->current_level>1) len -= 3; /* remove "lX_" with X level number */ 2800 if (pcbddc->current_level>10) len -= 1; /* remove another char from level number */ 2801 ierr = PetscStrncpy(dir_prefix,((PetscObject)pc)->prefix,len+1);CHKERRQ(ierr); 2802 ierr = PetscStrncpy(neu_prefix,((PetscObject)pc)->prefix,len+1);CHKERRQ(ierr); 2803 ierr = PetscStrcat(dir_prefix,"pc_bddc_dirichlet_");CHKERRQ(ierr); 2804 ierr = PetscStrcat(neu_prefix,"pc_bddc_neumann_");CHKERRQ(ierr); 2805 ierr = PetscStrcat(dir_prefix,str_level);CHKERRQ(ierr); 2806 ierr = PetscStrcat(neu_prefix,str_level);CHKERRQ(ierr); 2807 } 2808 2809 /* DIRICHLET PROBLEM */ 2810 if (dirichlet) { 2811 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 2812 if (pcbddc->benign_n && !pcbddc->benign_change_explicit) { 2813 if (!sub_schurs->reuse_solver) { 2814 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Not yet implemented\n"); 2815 } 2816 if (pcbddc->dbg_flag) { 2817 Mat A_IIn; 2818 2819 ierr = PCBDDCBenignProject(pc,pcis->is_I_local,pcis->is_I_local,&A_IIn);CHKERRQ(ierr); 2820 ierr = MatDestroy(&pcis->A_II);CHKERRQ(ierr); 2821 pcis->A_II = A_IIn; 2822 } 2823 } 2824 if (pcbddc->local_mat->symmetric_set) { 2825 ierr = MatSetOption(pcis->A_II,MAT_SYMMETRIC,pcbddc->local_mat->symmetric_set);CHKERRQ(ierr); 2826 } 2827 /* Matrix for Dirichlet problem is pcis->A_II */ 2828 n_D = pcis->n - pcis->n_B; 2829 if (!pcbddc->ksp_D) { /* create object if not yet build */ 2830 ierr = KSPCreate(PETSC_COMM_SELF,&pcbddc->ksp_D);CHKERRQ(ierr); 2831 ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->ksp_D,(PetscObject)pc,1);CHKERRQ(ierr); 2832 /* default */ 2833 ierr = KSPSetType(pcbddc->ksp_D,KSPPREONLY);CHKERRQ(ierr); 2834 ierr = KSPSetOptionsPrefix(pcbddc->ksp_D,dir_prefix);CHKERRQ(ierr); 2835 ierr = PetscObjectTypeCompare((PetscObject)pcis->A_II,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr); 2836 ierr = KSPGetPC(pcbddc->ksp_D,&pc_temp);CHKERRQ(ierr); 2837 if (issbaij) { 2838 ierr = PCSetType(pc_temp,PCCHOLESKY);CHKERRQ(ierr); 2839 } else { 2840 ierr = PCSetType(pc_temp,PCLU);CHKERRQ(ierr); 2841 } 2842 /* Allow user's customization */ 2843 ierr = KSPSetFromOptions(pcbddc->ksp_D);CHKERRQ(ierr); 2844 ierr = PCFactorSetReuseFill(pc_temp,PETSC_TRUE);CHKERRQ(ierr); 2845 } 2846 ierr = KSPSetOperators(pcbddc->ksp_D,pcis->A_II,pcis->A_II);CHKERRQ(ierr); 2847 if (sub_schurs->reuse_solver) { 2848 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 2849 2850 ierr = KSPSetPC(pcbddc->ksp_D,reuse_solver->interior_solver);CHKERRQ(ierr); 2851 } 2852 /* umfpack interface has a bug when matrix dimension is zero. TODO solve from umfpack interface */ 2853 if (!n_D) { 2854 ierr = KSPGetPC(pcbddc->ksp_D,&pc_temp);CHKERRQ(ierr); 2855 ierr = PCSetType(pc_temp,PCNONE);CHKERRQ(ierr); 2856 } 2857 /* Set Up KSP for Dirichlet problem of BDDC */ 2858 ierr = KSPSetUp(pcbddc->ksp_D);CHKERRQ(ierr); 2859 /* set ksp_D into pcis data */ 2860 ierr = KSPDestroy(&pcis->ksp_D);CHKERRQ(ierr); 2861 ierr = PetscObjectReference((PetscObject)pcbddc->ksp_D);CHKERRQ(ierr); 2862 pcis->ksp_D = pcbddc->ksp_D; 2863 } 2864 2865 /* NEUMANN PROBLEM */ 2866 A_RR = 0; 2867 if (neumann) { 2868 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 2869 PetscInt ibs,mbs; 2870 PetscBool issbaij; 2871 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 2872 /* Matrix for Neumann problem is A_RR -> we need to create/reuse it at this point */ 2873 ierr = ISGetSize(pcbddc->is_R_local,&n_R);CHKERRQ(ierr); 2874 if (pcbddc->ksp_R) { /* already created ksp */ 2875 PetscInt nn_R; 2876 ierr = KSPGetOperators(pcbddc->ksp_R,NULL,&A_RR);CHKERRQ(ierr); 2877 ierr = PetscObjectReference((PetscObject)A_RR);CHKERRQ(ierr); 2878 ierr = MatGetSize(A_RR,&nn_R,NULL);CHKERRQ(ierr); 2879 if (nn_R != n_R) { /* old ksp is not reusable, so reset it */ 2880 ierr = KSPReset(pcbddc->ksp_R);CHKERRQ(ierr); 2881 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 2882 reuse = MAT_INITIAL_MATRIX; 2883 } else { /* same sizes, but nonzero pattern depend on primal vertices so it can be changed */ 2884 if (pcbddc->new_primal_space_local) { /* we are not sure the matrix will have the same nonzero pattern */ 2885 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 2886 reuse = MAT_INITIAL_MATRIX; 2887 } else { /* safe to reuse the matrix */ 2888 reuse = MAT_REUSE_MATRIX; 2889 } 2890 } 2891 /* last check */ 2892 if (pc->flag == DIFFERENT_NONZERO_PATTERN) { 2893 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 2894 reuse = MAT_INITIAL_MATRIX; 2895 } 2896 } else { /* first time, so we need to create the matrix */ 2897 reuse = MAT_INITIAL_MATRIX; 2898 } 2899 /* convert pcbddc->local_mat if needed later in PCBDDCSetUpCorrection */ 2900 ierr = MatGetBlockSize(pcbddc->local_mat,&mbs);CHKERRQ(ierr); 2901 ierr = ISGetBlockSize(pcbddc->is_R_local,&ibs);CHKERRQ(ierr); 2902 ierr = PetscObjectTypeCompare((PetscObject)pcbddc->local_mat,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr); 2903 if (ibs != mbs) { /* need to convert to SEQAIJ to extract any submatrix with is_R_local */ 2904 if (matis->A == pcbddc->local_mat) { 2905 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 2906 ierr = MatConvert(matis->A,MATSEQAIJ,MAT_INITIAL_MATRIX,&pcbddc->local_mat);CHKERRQ(ierr); 2907 } else { 2908 ierr = MatConvert(pcbddc->local_mat,MATSEQAIJ,MAT_INPLACE_MATRIX,&pcbddc->local_mat);CHKERRQ(ierr); 2909 } 2910 } else if (issbaij) { /* need to convert to BAIJ to get offdiagonal blocks */ 2911 if (matis->A == pcbddc->local_mat) { 2912 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 2913 ierr = MatConvert(matis->A,MATSEQBAIJ,MAT_INITIAL_MATRIX,&pcbddc->local_mat);CHKERRQ(ierr); 2914 } else { 2915 ierr = MatConvert(pcbddc->local_mat,MATSEQBAIJ,MAT_INPLACE_MATRIX,&pcbddc->local_mat);CHKERRQ(ierr); 2916 } 2917 } 2918 /* extract A_RR */ 2919 if (sub_schurs->reuse_solver) { 2920 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 2921 2922 if (pcbddc->dbg_flag) { /* we need A_RR to test the solver later */ 2923 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 2924 if (reuse_solver->benign_n) { /* we are not using the explicit change of basis on the pressures */ 2925 ierr = PCBDDCBenignProject(pc,pcbddc->is_R_local,pcbddc->is_R_local,&A_RR);CHKERRQ(ierr); 2926 } else { 2927 ierr = MatGetSubMatrix(pcbddc->local_mat,pcbddc->is_R_local,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&A_RR);CHKERRQ(ierr); 2928 } 2929 } else { 2930 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 2931 ierr = PCGetOperators(reuse_solver->correction_solver,&A_RR,NULL);CHKERRQ(ierr); 2932 ierr = PetscObjectReference((PetscObject)A_RR);CHKERRQ(ierr); 2933 } 2934 } else { /* we have to build the neumann solver, so we need to extract the relevant matrix */ 2935 ierr = MatGetSubMatrix(pcbddc->local_mat,pcbddc->is_R_local,pcbddc->is_R_local,reuse,&A_RR);CHKERRQ(ierr); 2936 } 2937 if (pcbddc->local_mat->symmetric_set) { 2938 ierr = MatSetOption(A_RR,MAT_SYMMETRIC,pcbddc->local_mat->symmetric_set);CHKERRQ(ierr); 2939 } 2940 if (!pcbddc->ksp_R) { /* create object if not present */ 2941 ierr = KSPCreate(PETSC_COMM_SELF,&pcbddc->ksp_R);CHKERRQ(ierr); 2942 ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->ksp_R,(PetscObject)pc,1);CHKERRQ(ierr); 2943 /* default */ 2944 ierr = KSPSetType(pcbddc->ksp_R,KSPPREONLY);CHKERRQ(ierr); 2945 ierr = KSPSetOptionsPrefix(pcbddc->ksp_R,neu_prefix);CHKERRQ(ierr); 2946 ierr = KSPGetPC(pcbddc->ksp_R,&pc_temp);CHKERRQ(ierr); 2947 ierr = PetscObjectTypeCompare((PetscObject)A_RR,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr); 2948 if (issbaij) { 2949 ierr = PCSetType(pc_temp,PCCHOLESKY);CHKERRQ(ierr); 2950 } else { 2951 ierr = PCSetType(pc_temp,PCLU);CHKERRQ(ierr); 2952 } 2953 /* Allow user's customization */ 2954 ierr = KSPSetFromOptions(pcbddc->ksp_R);CHKERRQ(ierr); 2955 ierr = PCFactorSetReuseFill(pc_temp,PETSC_TRUE);CHKERRQ(ierr); 2956 } 2957 /* umfpack interface has a bug when matrix dimension is zero. TODO solve from umfpack interface */ 2958 if (!n_R) { 2959 ierr = KSPGetPC(pcbddc->ksp_R,&pc_temp);CHKERRQ(ierr); 2960 ierr = PCSetType(pc_temp,PCNONE);CHKERRQ(ierr); 2961 } 2962 ierr = KSPSetOperators(pcbddc->ksp_R,A_RR,A_RR);CHKERRQ(ierr); 2963 /* Reuse solver if it is present */ 2964 if (sub_schurs->reuse_solver) { 2965 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 2966 2967 ierr = KSPSetPC(pcbddc->ksp_R,reuse_solver->correction_solver);CHKERRQ(ierr); 2968 } 2969 /* Set Up KSP for Neumann problem of BDDC */ 2970 ierr = KSPSetUp(pcbddc->ksp_R);CHKERRQ(ierr); 2971 } 2972 2973 /* check Dirichlet and Neumann solvers and adapt them if a nullspace correction is needed */ 2974 if (pcbddc->NullSpace || pcbddc->dbg_flag) { 2975 if (pcbddc->dbg_flag) { 2976 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 2977 ierr = PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 2978 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 2979 } 2980 if (dirichlet) { /* Dirichlet */ 2981 ierr = VecSetRandom(pcis->vec1_D,NULL);CHKERRQ(ierr); 2982 ierr = MatMult(pcis->A_II,pcis->vec1_D,pcis->vec2_D);CHKERRQ(ierr); 2983 ierr = KSPSolve(pcbddc->ksp_D,pcis->vec2_D,pcis->vec2_D);CHKERRQ(ierr); 2984 ierr = VecAXPY(pcis->vec1_D,m_one,pcis->vec2_D);CHKERRQ(ierr); 2985 ierr = VecNorm(pcis->vec1_D,NORM_INFINITY,&value);CHKERRQ(ierr); 2986 /* need to be adapted? */ 2987 use_exact = (PetscAbsReal(value) > 1.e-4 ? PETSC_FALSE : PETSC_TRUE); 2988 ierr = MPIU_Allreduce(&use_exact,&use_exact_reduced,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 2989 ierr = PCBDDCSetUseExactDirichlet(pc,use_exact_reduced);CHKERRQ(ierr); 2990 /* print info */ 2991 if (pcbddc->dbg_flag) { 2992 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d infinity error for Dirichlet solve (%s) = % 1.14e \n",PetscGlobalRank,((PetscObject)(pcbddc->ksp_D))->prefix,value);CHKERRQ(ierr); 2993 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 2994 } 2995 if (pcbddc->NullSpace && !use_exact_reduced && !pcbddc->switch_static) { 2996 ierr = PCBDDCNullSpaceAssembleCorrection(pc,PETSC_TRUE,pcis->is_I_local);CHKERRQ(ierr); 2997 } 2998 } 2999 if (neumann) { /* Neumann */ 3000 ierr = VecSetRandom(pcbddc->vec1_R,NULL);CHKERRQ(ierr); 3001 ierr = MatMult(A_RR,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 3002 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec2_R,pcbddc->vec2_R);CHKERRQ(ierr); 3003 ierr = VecAXPY(pcbddc->vec1_R,m_one,pcbddc->vec2_R);CHKERRQ(ierr); 3004 ierr = VecNorm(pcbddc->vec1_R,NORM_INFINITY,&value);CHKERRQ(ierr); 3005 /* need to be adapted? */ 3006 use_exact = (PetscAbsReal(value) > 1.e-4 ? PETSC_FALSE : PETSC_TRUE); 3007 ierr = MPIU_Allreduce(&use_exact,&use_exact_reduced,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 3008 /* print info */ 3009 if (pcbddc->dbg_flag) { 3010 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d infinity error for Neumann solve (%s) = % 1.14e\n",PetscGlobalRank,((PetscObject)(pcbddc->ksp_R))->prefix,value);CHKERRQ(ierr); 3011 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 3012 } 3013 if (pcbddc->NullSpace && !use_exact_reduced) { /* is it the right logic? */ 3014 ierr = PCBDDCNullSpaceAssembleCorrection(pc,PETSC_FALSE,pcbddc->is_R_local);CHKERRQ(ierr); 3015 } 3016 } 3017 } 3018 /* free Neumann problem's matrix */ 3019 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 3020 PetscFunctionReturn(0); 3021 } 3022 3023 #undef __FUNCT__ 3024 #define __FUNCT__ "PCBDDCSolveSubstructureCorrection" 3025 static PetscErrorCode PCBDDCSolveSubstructureCorrection(PC pc, Vec inout_B, Vec inout_D, PetscBool applytranspose) 3026 { 3027 PetscErrorCode ierr; 3028 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 3029 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 3030 3031 PetscFunctionBegin; 3032 if (!sub_schurs->reuse_solver) { 3033 ierr = VecSet(pcbddc->vec1_R,0.);CHKERRQ(ierr); 3034 } 3035 if (!pcbddc->switch_static) { 3036 if (applytranspose && pcbddc->local_auxmat1) { 3037 ierr = MatMultTranspose(pcbddc->local_auxmat2,inout_B,pcbddc->vec1_C);CHKERRQ(ierr); 3038 ierr = MatMultTransposeAdd(pcbddc->local_auxmat1,pcbddc->vec1_C,inout_B,inout_B);CHKERRQ(ierr); 3039 } 3040 if (!sub_schurs->reuse_solver) { 3041 ierr = VecScatterBegin(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 3042 ierr = VecScatterEnd(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 3043 } else { 3044 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 3045 3046 ierr = VecScatterBegin(reuse_solver->correction_scatter_B,inout_B,reuse_solver->rhs_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3047 ierr = VecScatterEnd(reuse_solver->correction_scatter_B,inout_B,reuse_solver->rhs_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3048 } 3049 } else { 3050 ierr = VecScatterBegin(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 3051 ierr = VecScatterEnd(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 3052 ierr = VecScatterBegin(pcbddc->R_to_D,inout_D,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 3053 ierr = VecScatterEnd(pcbddc->R_to_D,inout_D,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 3054 if (applytranspose && pcbddc->local_auxmat1) { 3055 ierr = MatMultTranspose(pcbddc->local_auxmat2,pcbddc->vec1_R,pcbddc->vec1_C);CHKERRQ(ierr); 3056 ierr = MatMultTransposeAdd(pcbddc->local_auxmat1,pcbddc->vec1_C,inout_B,inout_B);CHKERRQ(ierr); 3057 ierr = VecScatterBegin(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 3058 ierr = VecScatterEnd(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 3059 } 3060 } 3061 if (!sub_schurs->reuse_solver || pcbddc->switch_static) { 3062 if (applytranspose) { 3063 ierr = KSPSolveTranspose(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr); 3064 } else { 3065 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr); 3066 } 3067 } else { 3068 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 3069 3070 if (applytranspose) { 3071 ierr = MatFactorSolveSchurComplementTranspose(reuse_solver->F,reuse_solver->rhs_B,reuse_solver->sol_B);CHKERRQ(ierr); 3072 } else { 3073 ierr = MatFactorSolveSchurComplement(reuse_solver->F,reuse_solver->rhs_B,reuse_solver->sol_B);CHKERRQ(ierr); 3074 } 3075 } 3076 ierr = VecSet(inout_B,0.);CHKERRQ(ierr); 3077 if (!pcbddc->switch_static) { 3078 if (!sub_schurs->reuse_solver) { 3079 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3080 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3081 } else { 3082 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 3083 3084 ierr = VecScatterBegin(reuse_solver->correction_scatter_B,reuse_solver->sol_B,inout_B,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 3085 ierr = VecScatterEnd(reuse_solver->correction_scatter_B,reuse_solver->sol_B,inout_B,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 3086 } 3087 if (!applytranspose && pcbddc->local_auxmat1) { 3088 ierr = MatMult(pcbddc->local_auxmat1,inout_B,pcbddc->vec1_C);CHKERRQ(ierr); 3089 ierr = MatMultAdd(pcbddc->local_auxmat2,pcbddc->vec1_C,inout_B,inout_B);CHKERRQ(ierr); 3090 } 3091 } else { 3092 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3093 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3094 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,inout_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3095 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,inout_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3096 if (!applytranspose && pcbddc->local_auxmat1) { 3097 ierr = MatMult(pcbddc->local_auxmat1,inout_B,pcbddc->vec1_C);CHKERRQ(ierr); 3098 ierr = MatMultAdd(pcbddc->local_auxmat2,pcbddc->vec1_C,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr); 3099 } 3100 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3101 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3102 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,inout_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3103 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,inout_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3104 } 3105 PetscFunctionReturn(0); 3106 } 3107 3108 /* parameter apply transpose determines if the interface preconditioner should be applied transposed or not */ 3109 #undef __FUNCT__ 3110 #define __FUNCT__ "PCBDDCApplyInterfacePreconditioner" 3111 PetscErrorCode PCBDDCApplyInterfacePreconditioner(PC pc, PetscBool applytranspose) 3112 { 3113 PetscErrorCode ierr; 3114 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 3115 PC_IS* pcis = (PC_IS*) (pc->data); 3116 const PetscScalar zero = 0.0; 3117 3118 PetscFunctionBegin; 3119 /* Application of PSI^T or PHI^T (depending on applytranspose, see comment above) */ 3120 if (!pcbddc->benign_apply_coarse_only) { 3121 if (applytranspose) { 3122 ierr = MatMultTranspose(pcbddc->coarse_phi_B,pcis->vec1_B,pcbddc->vec1_P);CHKERRQ(ierr); 3123 if (pcbddc->switch_static) { ierr = MatMultTransposeAdd(pcbddc->coarse_phi_D,pcis->vec1_D,pcbddc->vec1_P,pcbddc->vec1_P);CHKERRQ(ierr); } 3124 } else { 3125 ierr = MatMultTranspose(pcbddc->coarse_psi_B,pcis->vec1_B,pcbddc->vec1_P);CHKERRQ(ierr); 3126 if (pcbddc->switch_static) { ierr = MatMultTransposeAdd(pcbddc->coarse_psi_D,pcis->vec1_D,pcbddc->vec1_P,pcbddc->vec1_P);CHKERRQ(ierr); } 3127 } 3128 } else { 3129 ierr = VecSet(pcbddc->vec1_P,zero);CHKERRQ(ierr); 3130 } 3131 3132 /* add p0 to the last value of vec1_P holding the coarse dof relative to p0 */ 3133 if (pcbddc->benign_n) { 3134 PetscScalar *array; 3135 PetscInt j; 3136 3137 ierr = VecGetArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 3138 for (j=0;j<pcbddc->benign_n;j++) array[pcbddc->local_primal_size-pcbddc->benign_n+j] += pcbddc->benign_p0[j]; 3139 ierr = VecRestoreArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 3140 } 3141 3142 /* start communications from local primal nodes to rhs of coarse solver */ 3143 ierr = VecSet(pcbddc->coarse_vec,zero);CHKERRQ(ierr); 3144 ierr = PCBDDCScatterCoarseDataBegin(pc,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3145 ierr = PCBDDCScatterCoarseDataEnd(pc,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3146 3147 /* Coarse solution -> rhs and sol updated inside PCBDDCScattarCoarseDataBegin/End */ 3148 /* TODO remove null space when doing multilevel */ 3149 if (pcbddc->coarse_ksp) { 3150 Mat coarse_mat; 3151 Vec rhs,sol; 3152 MatNullSpace nullsp; 3153 3154 ierr = KSPGetRhs(pcbddc->coarse_ksp,&rhs);CHKERRQ(ierr); 3155 ierr = KSPGetSolution(pcbddc->coarse_ksp,&sol);CHKERRQ(ierr); 3156 ierr = KSPGetOperators(pcbddc->coarse_ksp,&coarse_mat,NULL);CHKERRQ(ierr); 3157 ierr = MatGetNullSpace(coarse_mat,&nullsp);CHKERRQ(ierr); 3158 if (nullsp) { 3159 ierr = MatNullSpaceRemove(nullsp,rhs);CHKERRQ(ierr); 3160 } 3161 if (applytranspose) { 3162 ierr = KSPSolveTranspose(pcbddc->coarse_ksp,rhs,sol);CHKERRQ(ierr); 3163 } else { 3164 ierr = KSPSolve(pcbddc->coarse_ksp,rhs,sol);CHKERRQ(ierr); 3165 } 3166 if (nullsp) { 3167 ierr = MatNullSpaceRemove(nullsp,sol);CHKERRQ(ierr); 3168 } 3169 } 3170 3171 /* Local solution on R nodes */ 3172 if (pcis->n && !pcbddc->benign_apply_coarse_only) { 3173 ierr = PCBDDCSolveSubstructureCorrection(pc,pcis->vec1_B,pcis->vec1_D,applytranspose);CHKERRQ(ierr); 3174 } 3175 /* communications from coarse sol to local primal nodes */ 3176 ierr = PCBDDCScatterCoarseDataBegin(pc,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 3177 ierr = PCBDDCScatterCoarseDataEnd(pc,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 3178 3179 /* Sum contributions from the two levels */ 3180 if (!pcbddc->benign_apply_coarse_only) { 3181 if (applytranspose) { 3182 ierr = MatMultAdd(pcbddc->coarse_psi_B,pcbddc->vec1_P,pcis->vec1_B,pcis->vec1_B);CHKERRQ(ierr); 3183 if (pcbddc->switch_static) { ierr = MatMultAdd(pcbddc->coarse_psi_D,pcbddc->vec1_P,pcis->vec1_D,pcis->vec1_D);CHKERRQ(ierr); } 3184 } else { 3185 ierr = MatMultAdd(pcbddc->coarse_phi_B,pcbddc->vec1_P,pcis->vec1_B,pcis->vec1_B);CHKERRQ(ierr); 3186 if (pcbddc->switch_static) { ierr = MatMultAdd(pcbddc->coarse_phi_D,pcbddc->vec1_P,pcis->vec1_D,pcis->vec1_D);CHKERRQ(ierr); } 3187 } 3188 /* store p0 */ 3189 if (pcbddc->benign_n) { 3190 PetscScalar *array; 3191 PetscInt j; 3192 3193 ierr = VecGetArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 3194 for (j=0;j<pcbddc->benign_n;j++) pcbddc->benign_p0[j] = array[pcbddc->local_primal_size-pcbddc->benign_n+j]; 3195 ierr = VecRestoreArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 3196 } 3197 } else { /* expand the coarse solution */ 3198 if (applytranspose) { 3199 ierr = MatMult(pcbddc->coarse_psi_B,pcbddc->vec1_P,pcis->vec1_B);CHKERRQ(ierr); 3200 } else { 3201 ierr = MatMult(pcbddc->coarse_phi_B,pcbddc->vec1_P,pcis->vec1_B);CHKERRQ(ierr); 3202 } 3203 } 3204 PetscFunctionReturn(0); 3205 } 3206 3207 #undef __FUNCT__ 3208 #define __FUNCT__ "PCBDDCScatterCoarseDataBegin" 3209 PetscErrorCode PCBDDCScatterCoarseDataBegin(PC pc,InsertMode imode, ScatterMode smode) 3210 { 3211 PetscErrorCode ierr; 3212 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 3213 PetscScalar *array; 3214 Vec from,to; 3215 3216 PetscFunctionBegin; 3217 if (smode == SCATTER_REVERSE) { /* from global to local -> get data from coarse solution */ 3218 from = pcbddc->coarse_vec; 3219 to = pcbddc->vec1_P; 3220 if (pcbddc->coarse_ksp) { /* get array from coarse processes */ 3221 Vec tvec; 3222 3223 ierr = KSPGetRhs(pcbddc->coarse_ksp,&tvec);CHKERRQ(ierr); 3224 ierr = VecResetArray(tvec);CHKERRQ(ierr); 3225 ierr = KSPGetSolution(pcbddc->coarse_ksp,&tvec);CHKERRQ(ierr); 3226 ierr = VecGetArray(tvec,&array);CHKERRQ(ierr); 3227 ierr = VecPlaceArray(from,array);CHKERRQ(ierr); 3228 ierr = VecRestoreArray(tvec,&array);CHKERRQ(ierr); 3229 } 3230 } else { /* from local to global -> put data in coarse right hand side */ 3231 from = pcbddc->vec1_P; 3232 to = pcbddc->coarse_vec; 3233 } 3234 ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,from,to,imode,smode);CHKERRQ(ierr); 3235 PetscFunctionReturn(0); 3236 } 3237 3238 #undef __FUNCT__ 3239 #define __FUNCT__ "PCBDDCScatterCoarseDataEnd" 3240 PetscErrorCode PCBDDCScatterCoarseDataEnd(PC pc, InsertMode imode, ScatterMode smode) 3241 { 3242 PetscErrorCode ierr; 3243 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 3244 PetscScalar *array; 3245 Vec from,to; 3246 3247 PetscFunctionBegin; 3248 if (smode == SCATTER_REVERSE) { /* from global to local -> get data from coarse solution */ 3249 from = pcbddc->coarse_vec; 3250 to = pcbddc->vec1_P; 3251 } else { /* from local to global -> put data in coarse right hand side */ 3252 from = pcbddc->vec1_P; 3253 to = pcbddc->coarse_vec; 3254 } 3255 ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,from,to,imode,smode);CHKERRQ(ierr); 3256 if (smode == SCATTER_FORWARD) { 3257 if (pcbddc->coarse_ksp) { /* get array from coarse processes */ 3258 Vec tvec; 3259 3260 ierr = KSPGetRhs(pcbddc->coarse_ksp,&tvec);CHKERRQ(ierr); 3261 ierr = VecGetArray(to,&array);CHKERRQ(ierr); 3262 ierr = VecPlaceArray(tvec,array);CHKERRQ(ierr); 3263 ierr = VecRestoreArray(to,&array);CHKERRQ(ierr); 3264 } 3265 } else { 3266 if (pcbddc->coarse_ksp) { /* restore array of pcbddc->coarse_vec */ 3267 ierr = VecResetArray(from);CHKERRQ(ierr); 3268 } 3269 } 3270 PetscFunctionReturn(0); 3271 } 3272 3273 /* uncomment for testing purposes */ 3274 /* #define PETSC_MISSING_LAPACK_GESVD 1 */ 3275 #undef __FUNCT__ 3276 #define __FUNCT__ "PCBDDCConstraintsSetUp" 3277 PetscErrorCode PCBDDCConstraintsSetUp(PC pc) 3278 { 3279 PetscErrorCode ierr; 3280 PC_IS* pcis = (PC_IS*)(pc->data); 3281 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 3282 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 3283 /* one and zero */ 3284 PetscScalar one=1.0,zero=0.0; 3285 /* space to store constraints and their local indices */ 3286 PetscScalar *constraints_data; 3287 PetscInt *constraints_idxs,*constraints_idxs_B; 3288 PetscInt *constraints_idxs_ptr,*constraints_data_ptr; 3289 PetscInt *constraints_n; 3290 /* iterators */ 3291 PetscInt i,j,k,total_counts,total_counts_cc,cum; 3292 /* BLAS integers */ 3293 PetscBLASInt lwork,lierr; 3294 PetscBLASInt Blas_N,Blas_M,Blas_K,Blas_one=1; 3295 PetscBLASInt Blas_LDA,Blas_LDB,Blas_LDC; 3296 /* reuse */ 3297 PetscInt olocal_primal_size,olocal_primal_size_cc; 3298 PetscInt *olocal_primal_ref_node,*olocal_primal_ref_mult; 3299 /* change of basis */ 3300 PetscBool qr_needed; 3301 PetscBT change_basis,qr_needed_idx; 3302 /* auxiliary stuff */ 3303 PetscInt *nnz,*is_indices; 3304 PetscInt ncc; 3305 /* some quantities */ 3306 PetscInt n_vertices,total_primal_vertices,valid_constraints; 3307 PetscInt size_of_constraint,max_size_of_constraint=0,max_constraints,temp_constraints; 3308 3309 PetscFunctionBegin; 3310 /* Destroy Mat objects computed previously */ 3311 ierr = MatDestroy(&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 3312 ierr = MatDestroy(&pcbddc->ConstraintMatrix);CHKERRQ(ierr); 3313 ierr = MatDestroy(&pcbddc->switch_static_change);CHKERRQ(ierr); 3314 /* save info on constraints from previous setup (if any) */ 3315 olocal_primal_size = pcbddc->local_primal_size; 3316 olocal_primal_size_cc = pcbddc->local_primal_size_cc; 3317 ierr = PetscMalloc2(olocal_primal_size_cc,&olocal_primal_ref_node,olocal_primal_size_cc,&olocal_primal_ref_mult);CHKERRQ(ierr); 3318 ierr = PetscMemcpy(olocal_primal_ref_node,pcbddc->local_primal_ref_node,olocal_primal_size_cc*sizeof(PetscInt));CHKERRQ(ierr); 3319 ierr = PetscMemcpy(olocal_primal_ref_mult,pcbddc->local_primal_ref_mult,olocal_primal_size_cc*sizeof(PetscInt));CHKERRQ(ierr); 3320 ierr = PetscFree2(pcbddc->local_primal_ref_node,pcbddc->local_primal_ref_mult);CHKERRQ(ierr); 3321 ierr = PetscFree(pcbddc->primal_indices_local_idxs);CHKERRQ(ierr); 3322 3323 /* print some info */ 3324 if (pcbddc->dbg_flag) { 3325 IS vertices; 3326 PetscInt nv,nedges,nfaces; 3327 ierr = PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph,&nfaces,NULL,&nedges,NULL,&vertices);CHKERRQ(ierr); 3328 ierr = ISGetSize(vertices,&nv);CHKERRQ(ierr); 3329 ierr = ISDestroy(&vertices);CHKERRQ(ierr); 3330 ierr = PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 3331 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"--------------------------------------------------------------\n");CHKERRQ(ierr); 3332 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate vertices (%d)\n",PetscGlobalRank,nv,pcbddc->use_vertices);CHKERRQ(ierr); 3333 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate edges (%d)\n",PetscGlobalRank,nedges,pcbddc->use_edges);CHKERRQ(ierr); 3334 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate faces (%d)\n",PetscGlobalRank,nfaces,pcbddc->use_faces);CHKERRQ(ierr); 3335 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 3336 ierr = PetscViewerASCIIPopSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 3337 } 3338 3339 if (!pcbddc->adaptive_selection) { 3340 IS ISForVertices,*ISForFaces,*ISForEdges; 3341 MatNullSpace nearnullsp; 3342 const Vec *nearnullvecs; 3343 Vec *localnearnullsp; 3344 PetscScalar *array; 3345 PetscInt n_ISForFaces,n_ISForEdges,nnsp_size; 3346 PetscBool nnsp_has_cnst; 3347 /* LAPACK working arrays for SVD or POD */ 3348 PetscBool skip_lapack,boolforchange; 3349 PetscScalar *work; 3350 PetscReal *singular_vals; 3351 #if defined(PETSC_USE_COMPLEX) 3352 PetscReal *rwork; 3353 #endif 3354 #if defined(PETSC_MISSING_LAPACK_GESVD) 3355 PetscScalar *temp_basis,*correlation_mat; 3356 #else 3357 PetscBLASInt dummy_int=1; 3358 PetscScalar dummy_scalar=1.; 3359 #endif 3360 3361 /* Get index sets for faces, edges and vertices from graph */ 3362 ierr = PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph,&n_ISForFaces,&ISForFaces,&n_ISForEdges,&ISForEdges,&ISForVertices);CHKERRQ(ierr); 3363 /* free unneeded index sets */ 3364 if (!pcbddc->use_vertices) { 3365 ierr = ISDestroy(&ISForVertices);CHKERRQ(ierr); 3366 } 3367 if (!pcbddc->use_edges) { 3368 for (i=0;i<n_ISForEdges;i++) { 3369 ierr = ISDestroy(&ISForEdges[i]);CHKERRQ(ierr); 3370 } 3371 ierr = PetscFree(ISForEdges);CHKERRQ(ierr); 3372 n_ISForEdges = 0; 3373 } 3374 if (!pcbddc->use_faces) { 3375 for (i=0;i<n_ISForFaces;i++) { 3376 ierr = ISDestroy(&ISForFaces[i]);CHKERRQ(ierr); 3377 } 3378 ierr = PetscFree(ISForFaces);CHKERRQ(ierr); 3379 n_ISForFaces = 0; 3380 } 3381 3382 #if defined(PETSC_USE_DEBUG) 3383 /* HACK: when solving singular problems not using vertices, a change of basis is mandatory. 3384 Also use_change_of_basis should be consistent among processors */ 3385 if (pcbddc->NullSpace) { 3386 PetscBool tbool[2],gbool[2]; 3387 3388 if (!ISForVertices && !pcbddc->user_ChangeOfBasisMatrix) { 3389 pcbddc->use_change_of_basis = PETSC_TRUE; 3390 if (!ISForEdges) { 3391 pcbddc->use_change_on_faces = PETSC_TRUE; 3392 } 3393 } 3394 tbool[0] = pcbddc->use_change_of_basis; 3395 tbool[1] = pcbddc->use_change_on_faces; 3396 ierr = MPIU_Allreduce(tbool,gbool,2,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 3397 pcbddc->use_change_of_basis = gbool[0]; 3398 pcbddc->use_change_on_faces = gbool[1]; 3399 } 3400 #endif 3401 3402 /* check if near null space is attached to global mat */ 3403 ierr = MatGetNearNullSpace(pc->pmat,&nearnullsp);CHKERRQ(ierr); 3404 if (nearnullsp) { 3405 ierr = MatNullSpaceGetVecs(nearnullsp,&nnsp_has_cnst,&nnsp_size,&nearnullvecs);CHKERRQ(ierr); 3406 /* remove any stored info */ 3407 ierr = MatNullSpaceDestroy(&pcbddc->onearnullspace);CHKERRQ(ierr); 3408 ierr = PetscFree(pcbddc->onearnullvecs_state);CHKERRQ(ierr); 3409 /* store information for BDDC solver reuse */ 3410 ierr = PetscObjectReference((PetscObject)nearnullsp);CHKERRQ(ierr); 3411 pcbddc->onearnullspace = nearnullsp; 3412 ierr = PetscMalloc1(nnsp_size,&pcbddc->onearnullvecs_state);CHKERRQ(ierr); 3413 for (i=0;i<nnsp_size;i++) { 3414 ierr = PetscObjectStateGet((PetscObject)nearnullvecs[i],&pcbddc->onearnullvecs_state[i]);CHKERRQ(ierr); 3415 } 3416 } else { /* if near null space is not provided BDDC uses constants by default */ 3417 nnsp_size = 0; 3418 nnsp_has_cnst = PETSC_TRUE; 3419 } 3420 /* get max number of constraints on a single cc */ 3421 max_constraints = nnsp_size; 3422 if (nnsp_has_cnst) max_constraints++; 3423 3424 /* 3425 Evaluate maximum storage size needed by the procedure 3426 - Indices for connected component i stored at "constraints_idxs + constraints_idxs_ptr[i]" 3427 - Values for constraints on connected component i stored at "constraints_data + constraints_data_ptr[i]" 3428 There can be multiple constraints per connected component 3429 */ 3430 n_vertices = 0; 3431 if (ISForVertices) { 3432 ierr = ISGetSize(ISForVertices,&n_vertices);CHKERRQ(ierr); 3433 } 3434 ncc = n_vertices+n_ISForFaces+n_ISForEdges; 3435 ierr = PetscMalloc3(ncc+1,&constraints_idxs_ptr,ncc+1,&constraints_data_ptr,ncc,&constraints_n);CHKERRQ(ierr); 3436 3437 total_counts = n_ISForFaces+n_ISForEdges; 3438 total_counts *= max_constraints; 3439 total_counts += n_vertices; 3440 ierr = PetscBTCreate(total_counts,&change_basis);CHKERRQ(ierr); 3441 3442 total_counts = 0; 3443 max_size_of_constraint = 0; 3444 for (i=0;i<n_ISForEdges+n_ISForFaces;i++) { 3445 IS used_is; 3446 if (i<n_ISForEdges) { 3447 used_is = ISForEdges[i]; 3448 } else { 3449 used_is = ISForFaces[i-n_ISForEdges]; 3450 } 3451 ierr = ISGetSize(used_is,&j);CHKERRQ(ierr); 3452 total_counts += j; 3453 max_size_of_constraint = PetscMax(j,max_size_of_constraint); 3454 } 3455 ierr = PetscMalloc3(total_counts*max_constraints+n_vertices,&constraints_data,total_counts+n_vertices,&constraints_idxs,total_counts+n_vertices,&constraints_idxs_B);CHKERRQ(ierr); 3456 3457 /* get local part of global near null space vectors */ 3458 ierr = PetscMalloc1(nnsp_size,&localnearnullsp);CHKERRQ(ierr); 3459 for (k=0;k<nnsp_size;k++) { 3460 ierr = VecDuplicate(pcis->vec1_N,&localnearnullsp[k]);CHKERRQ(ierr); 3461 ierr = VecScatterBegin(matis->rctx,nearnullvecs[k],localnearnullsp[k],INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3462 ierr = VecScatterEnd(matis->rctx,nearnullvecs[k],localnearnullsp[k],INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3463 } 3464 3465 /* whether or not to skip lapack calls */ 3466 skip_lapack = PETSC_TRUE; 3467 if (n_ISForFaces+n_ISForEdges && max_constraints > 1 && !pcbddc->use_nnsp_true) skip_lapack = PETSC_FALSE; 3468 3469 /* First we issue queries to allocate optimal workspace for LAPACKgesvd (or LAPACKsyev if SVD is missing) */ 3470 if (!skip_lapack) { 3471 PetscScalar temp_work; 3472 3473 #if defined(PETSC_MISSING_LAPACK_GESVD) 3474 /* Proper Orthogonal Decomposition (POD) using the snapshot method */ 3475 ierr = PetscMalloc1(max_constraints*max_constraints,&correlation_mat);CHKERRQ(ierr); 3476 ierr = PetscMalloc1(max_constraints,&singular_vals);CHKERRQ(ierr); 3477 ierr = PetscMalloc1(max_size_of_constraint*max_constraints,&temp_basis);CHKERRQ(ierr); 3478 #if defined(PETSC_USE_COMPLEX) 3479 ierr = PetscMalloc1(3*max_constraints,&rwork);CHKERRQ(ierr); 3480 #endif 3481 /* now we evaluate the optimal workspace using query with lwork=-1 */ 3482 ierr = PetscBLASIntCast(max_constraints,&Blas_N);CHKERRQ(ierr); 3483 ierr = PetscBLASIntCast(max_constraints,&Blas_LDA);CHKERRQ(ierr); 3484 lwork = -1; 3485 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 3486 #if !defined(PETSC_USE_COMPLEX) 3487 PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,&temp_work,&lwork,&lierr)); 3488 #else 3489 PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,&temp_work,&lwork,rwork,&lierr)); 3490 #endif 3491 ierr = PetscFPTrapPop();CHKERRQ(ierr); 3492 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to SYEV Lapack routine %d",(int)lierr); 3493 #else /* on missing GESVD */ 3494 /* SVD */ 3495 PetscInt max_n,min_n; 3496 max_n = max_size_of_constraint; 3497 min_n = max_constraints; 3498 if (max_size_of_constraint < max_constraints) { 3499 min_n = max_size_of_constraint; 3500 max_n = max_constraints; 3501 } 3502 ierr = PetscMalloc1(min_n,&singular_vals);CHKERRQ(ierr); 3503 #if defined(PETSC_USE_COMPLEX) 3504 ierr = PetscMalloc1(5*min_n,&rwork);CHKERRQ(ierr); 3505 #endif 3506 /* now we evaluate the optimal workspace using query with lwork=-1 */ 3507 lwork = -1; 3508 ierr = PetscBLASIntCast(max_n,&Blas_M);CHKERRQ(ierr); 3509 ierr = PetscBLASIntCast(min_n,&Blas_N);CHKERRQ(ierr); 3510 ierr = PetscBLASIntCast(max_n,&Blas_LDA);CHKERRQ(ierr); 3511 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 3512 #if !defined(PETSC_USE_COMPLEX) 3513 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&constraints_data[0],&Blas_LDA,singular_vals,&dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,&temp_work,&lwork,&lierr)); 3514 #else 3515 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&constraints_data[0],&Blas_LDA,singular_vals,&dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,&temp_work,&lwork,rwork,&lierr)); 3516 #endif 3517 ierr = PetscFPTrapPop();CHKERRQ(ierr); 3518 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to GESVD Lapack routine %d",(int)lierr); 3519 #endif /* on missing GESVD */ 3520 /* Allocate optimal workspace */ 3521 ierr = PetscBLASIntCast((PetscInt)PetscRealPart(temp_work),&lwork);CHKERRQ(ierr); 3522 ierr = PetscMalloc1(lwork,&work);CHKERRQ(ierr); 3523 } 3524 /* Now we can loop on constraining sets */ 3525 total_counts = 0; 3526 constraints_idxs_ptr[0] = 0; 3527 constraints_data_ptr[0] = 0; 3528 /* vertices */ 3529 if (n_vertices) { 3530 ierr = ISGetIndices(ISForVertices,(const PetscInt**)&is_indices);CHKERRQ(ierr); 3531 ierr = PetscMemcpy(constraints_idxs,is_indices,n_vertices*sizeof(PetscInt));CHKERRQ(ierr); 3532 for (i=0;i<n_vertices;i++) { 3533 constraints_n[total_counts] = 1; 3534 constraints_data[total_counts] = 1.0; 3535 constraints_idxs_ptr[total_counts+1] = constraints_idxs_ptr[total_counts]+1; 3536 constraints_data_ptr[total_counts+1] = constraints_data_ptr[total_counts]+1; 3537 total_counts++; 3538 } 3539 ierr = ISRestoreIndices(ISForVertices,(const PetscInt**)&is_indices);CHKERRQ(ierr); 3540 n_vertices = total_counts; 3541 } 3542 3543 /* edges and faces */ 3544 total_counts_cc = total_counts; 3545 for (ncc=0;ncc<n_ISForEdges+n_ISForFaces;ncc++) { 3546 IS used_is; 3547 PetscBool idxs_copied = PETSC_FALSE; 3548 3549 if (ncc<n_ISForEdges) { 3550 used_is = ISForEdges[ncc]; 3551 boolforchange = pcbddc->use_change_of_basis; /* change or not the basis on the edge */ 3552 } else { 3553 used_is = ISForFaces[ncc-n_ISForEdges]; 3554 boolforchange = (PetscBool)(pcbddc->use_change_of_basis && pcbddc->use_change_on_faces); /* change or not the basis on the face */ 3555 } 3556 temp_constraints = 0; /* zero the number of constraints I have on this conn comp */ 3557 3558 ierr = ISGetSize(used_is,&size_of_constraint);CHKERRQ(ierr); 3559 ierr = ISGetIndices(used_is,(const PetscInt**)&is_indices);CHKERRQ(ierr); 3560 /* change of basis should not be performed on local periodic nodes */ 3561 if (pcbddc->mat_graph->mirrors && pcbddc->mat_graph->mirrors[is_indices[0]]) boolforchange = PETSC_FALSE; 3562 if (nnsp_has_cnst) { 3563 PetscScalar quad_value; 3564 3565 ierr = PetscMemcpy(constraints_idxs + constraints_idxs_ptr[total_counts_cc],is_indices,size_of_constraint*sizeof(PetscInt));CHKERRQ(ierr); 3566 idxs_copied = PETSC_TRUE; 3567 3568 if (!pcbddc->use_nnsp_true) { 3569 quad_value = (PetscScalar)(1.0/PetscSqrtReal((PetscReal)size_of_constraint)); 3570 } else { 3571 quad_value = 1.0; 3572 } 3573 for (j=0;j<size_of_constraint;j++) { 3574 constraints_data[constraints_data_ptr[total_counts_cc]+j] = quad_value; 3575 } 3576 temp_constraints++; 3577 total_counts++; 3578 } 3579 for (k=0;k<nnsp_size;k++) { 3580 PetscReal real_value; 3581 PetscScalar *ptr_to_data; 3582 3583 ierr = VecGetArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr); 3584 ptr_to_data = &constraints_data[constraints_data_ptr[total_counts_cc]+temp_constraints*size_of_constraint]; 3585 for (j=0;j<size_of_constraint;j++) { 3586 ptr_to_data[j] = array[is_indices[j]]; 3587 } 3588 ierr = VecRestoreArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr); 3589 /* check if array is null on the connected component */ 3590 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 3591 PetscStackCallBLAS("BLASasum",real_value = BLASasum_(&Blas_N,ptr_to_data,&Blas_one)); 3592 if (real_value > 0.0) { /* keep indices and values */ 3593 temp_constraints++; 3594 total_counts++; 3595 if (!idxs_copied) { 3596 ierr = PetscMemcpy(constraints_idxs + constraints_idxs_ptr[total_counts_cc],is_indices,size_of_constraint*sizeof(PetscInt));CHKERRQ(ierr); 3597 idxs_copied = PETSC_TRUE; 3598 } 3599 } 3600 } 3601 ierr = ISRestoreIndices(used_is,(const PetscInt**)&is_indices);CHKERRQ(ierr); 3602 valid_constraints = temp_constraints; 3603 if (!pcbddc->use_nnsp_true && temp_constraints) { 3604 if (temp_constraints == 1) { /* just normalize the constraint */ 3605 PetscScalar norm,*ptr_to_data; 3606 3607 ptr_to_data = &constraints_data[constraints_data_ptr[total_counts_cc]]; 3608 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 3609 PetscStackCallBLAS("BLASdot",norm = BLASdot_(&Blas_N,ptr_to_data,&Blas_one,ptr_to_data,&Blas_one)); 3610 norm = 1.0/PetscSqrtReal(PetscRealPart(norm)); 3611 PetscStackCallBLAS("BLASscal",BLASscal_(&Blas_N,&norm,ptr_to_data,&Blas_one)); 3612 } else { /* perform SVD */ 3613 PetscReal tol = 1.0e-8; /* tolerance for retaining eigenmodes */ 3614 PetscScalar *ptr_to_data = &constraints_data[constraints_data_ptr[total_counts_cc]]; 3615 3616 #if defined(PETSC_MISSING_LAPACK_GESVD) 3617 /* SVD: Y = U*S*V^H -> U (eigenvectors of Y*Y^H) = Y*V*(S)^\dag 3618 POD: Y^H*Y = V*D*V^H, D = S^H*S -> U = Y*V*D^(-1/2) 3619 -> When PETSC_USE_COMPLEX and PETSC_MISSING_LAPACK_GESVD are defined 3620 the constraints basis will differ (by a complex factor with absolute value equal to 1) 3621 from that computed using LAPACKgesvd 3622 -> This is due to a different computation of eigenvectors in LAPACKheev 3623 -> The quality of the POD-computed basis will be the same */ 3624 ierr = PetscMemzero(correlation_mat,temp_constraints*temp_constraints*sizeof(PetscScalar));CHKERRQ(ierr); 3625 /* Store upper triangular part of correlation matrix */ 3626 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 3627 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 3628 for (j=0;j<temp_constraints;j++) { 3629 for (k=0;k<j+1;k++) { 3630 PetscStackCallBLAS("BLASdot",correlation_mat[j*temp_constraints+k] = BLASdot_(&Blas_N,ptr_to_data+k*size_of_constraint,&Blas_one,ptr_to_data+j*size_of_constraint,&Blas_one)); 3631 } 3632 } 3633 /* compute eigenvalues and eigenvectors of correlation matrix */ 3634 ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr); 3635 ierr = PetscBLASIntCast(temp_constraints,&Blas_LDA);CHKERRQ(ierr); 3636 #if !defined(PETSC_USE_COMPLEX) 3637 PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,work,&lwork,&lierr)); 3638 #else 3639 PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,work,&lwork,rwork,&lierr)); 3640 #endif 3641 ierr = PetscFPTrapPop();CHKERRQ(ierr); 3642 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYEV Lapack routine %d",(int)lierr); 3643 /* retain eigenvalues greater than tol: note that LAPACKsyev gives eigs in ascending order */ 3644 j = 0; 3645 while (j < temp_constraints && singular_vals[j] < tol) j++; 3646 total_counts = total_counts-j; 3647 valid_constraints = temp_constraints-j; 3648 /* scale and copy POD basis into used quadrature memory */ 3649 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 3650 ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr); 3651 ierr = PetscBLASIntCast(temp_constraints,&Blas_K);CHKERRQ(ierr); 3652 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 3653 ierr = PetscBLASIntCast(temp_constraints,&Blas_LDB);CHKERRQ(ierr); 3654 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDC);CHKERRQ(ierr); 3655 if (j<temp_constraints) { 3656 PetscInt ii; 3657 for (k=j;k<temp_constraints;k++) singular_vals[k] = 1.0/PetscSqrtReal(singular_vals[k]); 3658 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 3659 PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&Blas_M,&Blas_N,&Blas_K,&one,ptr_to_data,&Blas_LDA,correlation_mat,&Blas_LDB,&zero,temp_basis,&Blas_LDC)); 3660 ierr = PetscFPTrapPop();CHKERRQ(ierr); 3661 for (k=0;k<temp_constraints-j;k++) { 3662 for (ii=0;ii<size_of_constraint;ii++) { 3663 ptr_to_data[k*size_of_constraint+ii] = singular_vals[temp_constraints-1-k]*temp_basis[(temp_constraints-1-k)*size_of_constraint+ii]; 3664 } 3665 } 3666 } 3667 #else /* on missing GESVD */ 3668 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 3669 ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr); 3670 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 3671 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 3672 #if !defined(PETSC_USE_COMPLEX) 3673 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,ptr_to_data,&Blas_LDA,singular_vals,&dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,work,&lwork,&lierr)); 3674 #else 3675 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,ptr_to_data,&Blas_LDA,singular_vals,&dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,work,&lwork,rwork,&lierr)); 3676 #endif 3677 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in GESVD Lapack routine %d",(int)lierr); 3678 ierr = PetscFPTrapPop();CHKERRQ(ierr); 3679 /* retain eigenvalues greater than tol: note that LAPACKgesvd gives eigs in descending order */ 3680 k = temp_constraints; 3681 if (k > size_of_constraint) k = size_of_constraint; 3682 j = 0; 3683 while (j < k && singular_vals[k-j-1] < tol) j++; 3684 valid_constraints = k-j; 3685 total_counts = total_counts-temp_constraints+valid_constraints; 3686 #endif /* on missing GESVD */ 3687 } 3688 } 3689 /* update pointers information */ 3690 if (valid_constraints) { 3691 constraints_n[total_counts_cc] = valid_constraints; 3692 constraints_idxs_ptr[total_counts_cc+1] = constraints_idxs_ptr[total_counts_cc]+size_of_constraint; 3693 constraints_data_ptr[total_counts_cc+1] = constraints_data_ptr[total_counts_cc]+size_of_constraint*valid_constraints; 3694 /* set change_of_basis flag */ 3695 if (boolforchange) { 3696 PetscBTSet(change_basis,total_counts_cc); 3697 } 3698 total_counts_cc++; 3699 } 3700 } 3701 /* free workspace */ 3702 if (!skip_lapack) { 3703 ierr = PetscFree(work);CHKERRQ(ierr); 3704 #if defined(PETSC_USE_COMPLEX) 3705 ierr = PetscFree(rwork);CHKERRQ(ierr); 3706 #endif 3707 ierr = PetscFree(singular_vals);CHKERRQ(ierr); 3708 #if defined(PETSC_MISSING_LAPACK_GESVD) 3709 ierr = PetscFree(correlation_mat);CHKERRQ(ierr); 3710 ierr = PetscFree(temp_basis);CHKERRQ(ierr); 3711 #endif 3712 } 3713 for (k=0;k<nnsp_size;k++) { 3714 ierr = VecDestroy(&localnearnullsp[k]);CHKERRQ(ierr); 3715 } 3716 ierr = PetscFree(localnearnullsp);CHKERRQ(ierr); 3717 /* free index sets of faces, edges and vertices */ 3718 for (i=0;i<n_ISForFaces;i++) { 3719 ierr = ISDestroy(&ISForFaces[i]);CHKERRQ(ierr); 3720 } 3721 if (n_ISForFaces) { 3722 ierr = PetscFree(ISForFaces);CHKERRQ(ierr); 3723 } 3724 for (i=0;i<n_ISForEdges;i++) { 3725 ierr = ISDestroy(&ISForEdges[i]);CHKERRQ(ierr); 3726 } 3727 if (n_ISForEdges) { 3728 ierr = PetscFree(ISForEdges);CHKERRQ(ierr); 3729 } 3730 ierr = ISDestroy(&ISForVertices);CHKERRQ(ierr); 3731 } else { 3732 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 3733 3734 total_counts = 0; 3735 n_vertices = 0; 3736 if (sub_schurs->is_vertices && pcbddc->use_vertices) { 3737 ierr = ISGetLocalSize(sub_schurs->is_vertices,&n_vertices);CHKERRQ(ierr); 3738 } 3739 max_constraints = 0; 3740 total_counts_cc = 0; 3741 for (i=0;i<sub_schurs->n_subs+n_vertices;i++) { 3742 total_counts += pcbddc->adaptive_constraints_n[i]; 3743 if (pcbddc->adaptive_constraints_n[i]) total_counts_cc++; 3744 max_constraints = PetscMax(max_constraints,pcbddc->adaptive_constraints_n[i]); 3745 } 3746 constraints_idxs_ptr = pcbddc->adaptive_constraints_idxs_ptr; 3747 constraints_data_ptr = pcbddc->adaptive_constraints_data_ptr; 3748 constraints_idxs = pcbddc->adaptive_constraints_idxs; 3749 constraints_data = pcbddc->adaptive_constraints_data; 3750 /* constraints_n differs from pcbddc->adaptive_constraints_n */ 3751 ierr = PetscMalloc1(total_counts_cc,&constraints_n);CHKERRQ(ierr); 3752 total_counts_cc = 0; 3753 for (i=0;i<sub_schurs->n_subs+n_vertices;i++) { 3754 if (pcbddc->adaptive_constraints_n[i]) { 3755 constraints_n[total_counts_cc++] = pcbddc->adaptive_constraints_n[i]; 3756 } 3757 } 3758 #if 0 3759 printf("Found %d totals (%d)\n",total_counts_cc,total_counts); 3760 for (i=0;i<total_counts_cc;i++) { 3761 printf("const %d, start %d",i,constraints_idxs_ptr[i]); 3762 printf(" end %d:\n",constraints_idxs_ptr[i+1]); 3763 for (j=constraints_idxs_ptr[i];j<constraints_idxs_ptr[i+1];j++) { 3764 printf(" %d",constraints_idxs[j]); 3765 } 3766 printf("\n"); 3767 printf("number of cc: %d\n",constraints_n[i]); 3768 } 3769 for (i=0;i<n_vertices;i++) { 3770 PetscPrintf(PETSC_COMM_SELF,"[%d] vertex %d, n %d\n",PetscGlobalRank,i,pcbddc->adaptive_constraints_n[i]); 3771 } 3772 for (i=0;i<sub_schurs->n_subs;i++) { 3773 PetscPrintf(PETSC_COMM_SELF,"[%d] sub %d, edge %d, n %d\n",PetscGlobalRank,i,(PetscBool)PetscBTLookup(sub_schurs->is_edge,i),pcbddc->adaptive_constraints_n[i+n_vertices]); 3774 } 3775 #endif 3776 3777 max_size_of_constraint = 0; 3778 for (i=0;i<total_counts_cc;i++) max_size_of_constraint = PetscMax(max_size_of_constraint,constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i]); 3779 ierr = PetscMalloc1(constraints_idxs_ptr[total_counts_cc],&constraints_idxs_B);CHKERRQ(ierr); 3780 /* Change of basis */ 3781 ierr = PetscBTCreate(total_counts_cc,&change_basis);CHKERRQ(ierr); 3782 if (pcbddc->use_change_of_basis) { 3783 for (i=0;i<sub_schurs->n_subs;i++) { 3784 if (PetscBTLookup(sub_schurs->is_edge,i) || pcbddc->use_change_on_faces) { 3785 ierr = PetscBTSet(change_basis,i+n_vertices);CHKERRQ(ierr); 3786 } 3787 } 3788 } 3789 } 3790 pcbddc->local_primal_size = total_counts; 3791 ierr = PetscMalloc1(pcbddc->local_primal_size+pcbddc->benign_n,&pcbddc->primal_indices_local_idxs);CHKERRQ(ierr); 3792 3793 /* map constraints_idxs in boundary numbering */ 3794 ierr = ISGlobalToLocalMappingApply(pcis->BtoNmap,IS_GTOLM_DROP,constraints_idxs_ptr[total_counts_cc],constraints_idxs,&i,constraints_idxs_B);CHKERRQ(ierr); 3795 if (i != constraints_idxs_ptr[total_counts_cc]) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_SUP,"Error in boundary numbering for constraints indices %D != %D\n",constraints_idxs_ptr[total_counts_cc],i); 3796 3797 /* Create constraint matrix */ 3798 ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->ConstraintMatrix);CHKERRQ(ierr); 3799 ierr = MatSetType(pcbddc->ConstraintMatrix,MATAIJ);CHKERRQ(ierr); 3800 ierr = MatSetSizes(pcbddc->ConstraintMatrix,pcbddc->local_primal_size,pcis->n,pcbddc->local_primal_size,pcis->n);CHKERRQ(ierr); 3801 3802 /* find primal_dofs: subdomain corners plus dofs selected as primal after change of basis */ 3803 /* determine if a QR strategy is needed for change of basis */ 3804 qr_needed = PETSC_FALSE; 3805 ierr = PetscBTCreate(total_counts_cc,&qr_needed_idx);CHKERRQ(ierr); 3806 total_primal_vertices=0; 3807 pcbddc->local_primal_size_cc = 0; 3808 for (i=0;i<total_counts_cc;i++) { 3809 size_of_constraint = constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i]; 3810 if (size_of_constraint == 1 && pcbddc->mat_graph->custom_minimal_size) { 3811 pcbddc->primal_indices_local_idxs[total_primal_vertices++] = constraints_idxs[constraints_idxs_ptr[i]]; 3812 pcbddc->local_primal_size_cc += 1; 3813 } else if (PetscBTLookup(change_basis,i)) { 3814 for (k=0;k<constraints_n[i];k++) { 3815 pcbddc->primal_indices_local_idxs[total_primal_vertices++] = constraints_idxs[constraints_idxs_ptr[i]+k]; 3816 } 3817 pcbddc->local_primal_size_cc += constraints_n[i]; 3818 if (constraints_n[i] > 1 || pcbddc->use_qr_single || pcbddc->faster_deluxe) { 3819 PetscBTSet(qr_needed_idx,i); 3820 qr_needed = PETSC_TRUE; 3821 } 3822 } else { 3823 pcbddc->local_primal_size_cc += 1; 3824 } 3825 } 3826 /* note that the local variable n_vertices used below stores the number of pointwise constraints */ 3827 pcbddc->n_vertices = total_primal_vertices; 3828 /* permute indices in order to have a sorted set of vertices */ 3829 ierr = PetscSortInt(total_primal_vertices,pcbddc->primal_indices_local_idxs);CHKERRQ(ierr); 3830 3831 ierr = PetscMalloc2(pcbddc->local_primal_size_cc+pcbddc->benign_n,&pcbddc->local_primal_ref_node,pcbddc->local_primal_size_cc+pcbddc->benign_n,&pcbddc->local_primal_ref_mult);CHKERRQ(ierr); 3832 ierr = PetscMemcpy(pcbddc->local_primal_ref_node,pcbddc->primal_indices_local_idxs,total_primal_vertices*sizeof(PetscInt));CHKERRQ(ierr); 3833 for (i=0;i<total_primal_vertices;i++) pcbddc->local_primal_ref_mult[i] = 1; 3834 3835 /* nonzero structure of constraint matrix */ 3836 /* and get reference dof for local constraints */ 3837 ierr = PetscMalloc1(pcbddc->local_primal_size,&nnz);CHKERRQ(ierr); 3838 for (i=0;i<total_primal_vertices;i++) nnz[i] = 1; 3839 3840 j = total_primal_vertices; 3841 total_counts = total_primal_vertices; 3842 cum = total_primal_vertices; 3843 for (i=n_vertices;i<total_counts_cc;i++) { 3844 if (!PetscBTLookup(change_basis,i)) { 3845 pcbddc->local_primal_ref_node[cum] = constraints_idxs[constraints_idxs_ptr[i]]; 3846 pcbddc->local_primal_ref_mult[cum] = constraints_n[i]; 3847 cum++; 3848 size_of_constraint = constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i]; 3849 for (k=0;k<constraints_n[i];k++) { 3850 pcbddc->primal_indices_local_idxs[total_counts++] = constraints_idxs[constraints_idxs_ptr[i]+k]; 3851 nnz[j+k] = size_of_constraint; 3852 } 3853 j += constraints_n[i]; 3854 } 3855 } 3856 ierr = MatSeqAIJSetPreallocation(pcbddc->ConstraintMatrix,0,nnz);CHKERRQ(ierr); 3857 ierr = PetscFree(nnz);CHKERRQ(ierr); 3858 3859 /* set values in constraint matrix */ 3860 for (i=0;i<total_primal_vertices;i++) { 3861 ierr = MatSetValue(pcbddc->ConstraintMatrix,i,pcbddc->local_primal_ref_node[i],1.0,INSERT_VALUES);CHKERRQ(ierr); 3862 } 3863 total_counts = total_primal_vertices; 3864 for (i=n_vertices;i<total_counts_cc;i++) { 3865 if (!PetscBTLookup(change_basis,i)) { 3866 PetscInt *cols; 3867 3868 size_of_constraint = constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i]; 3869 cols = constraints_idxs+constraints_idxs_ptr[i]; 3870 for (k=0;k<constraints_n[i];k++) { 3871 PetscInt row = total_counts+k; 3872 PetscScalar *vals; 3873 3874 vals = constraints_data+constraints_data_ptr[i]+k*size_of_constraint; 3875 ierr = MatSetValues(pcbddc->ConstraintMatrix,1,&row,size_of_constraint,cols,vals,INSERT_VALUES);CHKERRQ(ierr); 3876 } 3877 total_counts += constraints_n[i]; 3878 } 3879 } 3880 /* assembling */ 3881 ierr = MatAssemblyBegin(pcbddc->ConstraintMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3882 ierr = MatAssemblyEnd(pcbddc->ConstraintMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3883 3884 /* 3885 ierr = PetscViewerPushFormat(PETSC_VIEWER_STDOUT_SELF,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr); 3886 ierr = MatView(pcbddc->ConstraintMatrix,(PetscViewer)0);CHKERRQ(ierr); 3887 ierr = PetscViewerPopFormat(PETSC_VIEWER_STDOUT_SELF);CHKERRQ(ierr); 3888 */ 3889 /* Create matrix for change of basis. We don't need it in case pcbddc->use_change_of_basis is FALSE */ 3890 if (pcbddc->use_change_of_basis) { 3891 /* dual and primal dofs on a single cc */ 3892 PetscInt dual_dofs,primal_dofs; 3893 /* working stuff for GEQRF */ 3894 PetscScalar *qr_basis,*qr_tau = NULL,*qr_work,lqr_work_t; 3895 PetscBLASInt lqr_work; 3896 /* working stuff for UNGQR */ 3897 PetscScalar *gqr_work,lgqr_work_t; 3898 PetscBLASInt lgqr_work; 3899 /* working stuff for TRTRS */ 3900 PetscScalar *trs_rhs; 3901 PetscBLASInt Blas_NRHS; 3902 /* pointers for values insertion into change of basis matrix */ 3903 PetscInt *start_rows,*start_cols; 3904 PetscScalar *start_vals; 3905 /* working stuff for values insertion */ 3906 PetscBT is_primal; 3907 PetscInt *aux_primal_numbering_B; 3908 /* matrix sizes */ 3909 PetscInt global_size,local_size; 3910 /* temporary change of basis */ 3911 Mat localChangeOfBasisMatrix; 3912 /* extra space for debugging */ 3913 PetscScalar *dbg_work; 3914 3915 /* local temporary change of basis acts on local interfaces -> dimension is n_B x n_B */ 3916 ierr = MatCreate(PETSC_COMM_SELF,&localChangeOfBasisMatrix);CHKERRQ(ierr); 3917 ierr = MatSetType(localChangeOfBasisMatrix,MATAIJ);CHKERRQ(ierr); 3918 ierr = MatSetSizes(localChangeOfBasisMatrix,pcis->n,pcis->n,pcis->n,pcis->n);CHKERRQ(ierr); 3919 /* nonzeros for local mat */ 3920 ierr = PetscMalloc1(pcis->n,&nnz);CHKERRQ(ierr); 3921 if (!pcbddc->benign_change || pcbddc->fake_change) { 3922 for (i=0;i<pcis->n;i++) nnz[i]=1; 3923 } else { 3924 const PetscInt *ii; 3925 PetscInt n; 3926 PetscBool flg_row; 3927 ierr = MatGetRowIJ(pcbddc->benign_change,0,PETSC_FALSE,PETSC_FALSE,&n,&ii,NULL,&flg_row);CHKERRQ(ierr); 3928 for (i=0;i<n;i++) nnz[i] = ii[i+1]-ii[i]; 3929 ierr = MatRestoreRowIJ(pcbddc->benign_change,0,PETSC_FALSE,PETSC_FALSE,&n,&ii,NULL,&flg_row);CHKERRQ(ierr); 3930 } 3931 for (i=n_vertices;i<total_counts_cc;i++) { 3932 if (PetscBTLookup(change_basis,i)) { 3933 size_of_constraint = constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i]; 3934 if (PetscBTLookup(qr_needed_idx,i)) { 3935 for (j=0;j<size_of_constraint;j++) nnz[constraints_idxs[constraints_idxs_ptr[i]+j]] = size_of_constraint; 3936 } else { 3937 nnz[constraints_idxs[constraints_idxs_ptr[i]]] = size_of_constraint; 3938 for (j=1;j<size_of_constraint;j++) nnz[constraints_idxs[constraints_idxs_ptr[i]+j]] = 2; 3939 } 3940 } 3941 } 3942 ierr = MatSeqAIJSetPreallocation(localChangeOfBasisMatrix,0,nnz);CHKERRQ(ierr); 3943 ierr = PetscFree(nnz);CHKERRQ(ierr); 3944 /* Set interior change in the matrix */ 3945 if (!pcbddc->benign_change || pcbddc->fake_change) { 3946 for (i=0;i<pcis->n;i++) { 3947 ierr = MatSetValue(localChangeOfBasisMatrix,i,i,1.0,INSERT_VALUES);CHKERRQ(ierr); 3948 } 3949 } else { 3950 const PetscInt *ii,*jj; 3951 PetscScalar *aa; 3952 PetscInt n; 3953 PetscBool flg_row; 3954 ierr = MatGetRowIJ(pcbddc->benign_change,0,PETSC_FALSE,PETSC_FALSE,&n,&ii,&jj,&flg_row);CHKERRQ(ierr); 3955 ierr = MatSeqAIJGetArray(pcbddc->benign_change,&aa);CHKERRQ(ierr); 3956 for (i=0;i<n;i++) { 3957 ierr = MatSetValues(localChangeOfBasisMatrix,1,&i,ii[i+1]-ii[i],jj+ii[i],aa+ii[i],INSERT_VALUES);CHKERRQ(ierr); 3958 } 3959 ierr = MatSeqAIJRestoreArray(pcbddc->benign_change,&aa);CHKERRQ(ierr); 3960 ierr = MatRestoreRowIJ(pcbddc->benign_change,0,PETSC_FALSE,PETSC_FALSE,&n,&ii,&jj,&flg_row);CHKERRQ(ierr); 3961 } 3962 3963 if (pcbddc->dbg_flag) { 3964 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"--------------------------------------------------------------\n");CHKERRQ(ierr); 3965 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Checking change of basis computation for subdomain %04d\n",PetscGlobalRank);CHKERRQ(ierr); 3966 } 3967 3968 3969 /* Now we loop on the constraints which need a change of basis */ 3970 /* 3971 Change of basis matrix is evaluated similarly to the FIRST APPROACH in 3972 Klawonn and Widlund, Dual-primal FETI-DP methods for linear elasticity, (see Sect 6.2.1) 3973 3974 Basic blocks of change of basis matrix T computed by 3975 3976 - Using the following block transformation if there is only a primal dof on the cc (and -pc_bddc_use_qr_single is not specified) 3977 3978 | 1 0 ... 0 s_1/S | 3979 | 0 1 ... 0 s_2/S | 3980 | ... | 3981 | 0 ... 1 s_{n-1}/S | 3982 | -s_1/s_n ... -s_{n-1}/s_n s_n/S | 3983 3984 with S = \sum_{i=1}^n s_i^2 3985 NOTE: in the above example, the primal dof is the last one of the edge in LOCAL ordering 3986 in the current implementation, the primal dof is the first one of the edge in GLOBAL ordering 3987 3988 - QR decomposition of constraints otherwise 3989 */ 3990 if (qr_needed) { 3991 /* space to store Q */ 3992 ierr = PetscMalloc1(max_size_of_constraint*max_size_of_constraint,&qr_basis);CHKERRQ(ierr); 3993 /* first we issue queries for optimal work */ 3994 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_M);CHKERRQ(ierr); 3995 ierr = PetscBLASIntCast(max_constraints,&Blas_N);CHKERRQ(ierr); 3996 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 3997 lqr_work = -1; 3998 PetscStackCallBLAS("LAPACKgeqrf",LAPACKgeqrf_(&Blas_M,&Blas_N,qr_basis,&Blas_LDA,qr_tau,&lqr_work_t,&lqr_work,&lierr)); 3999 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to GEQRF Lapack routine %d",(int)lierr); 4000 ierr = PetscBLASIntCast((PetscInt)PetscRealPart(lqr_work_t),&lqr_work);CHKERRQ(ierr); 4001 ierr = PetscMalloc1((PetscInt)PetscRealPart(lqr_work_t),&qr_work);CHKERRQ(ierr); 4002 lgqr_work = -1; 4003 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_M);CHKERRQ(ierr); 4004 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_N);CHKERRQ(ierr); 4005 ierr = PetscBLASIntCast(max_constraints,&Blas_K);CHKERRQ(ierr); 4006 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 4007 if (Blas_K>Blas_M) Blas_K=Blas_M; /* adjust just for computing optimal work */ 4008 PetscStackCallBLAS("LAPACKungqr",LAPACKungqr_(&Blas_M,&Blas_N,&Blas_K,qr_basis,&Blas_LDA,qr_tau,&lgqr_work_t,&lgqr_work,&lierr)); 4009 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to UNGQR Lapack routine %d",(int)lierr); 4010 ierr = PetscBLASIntCast((PetscInt)PetscRealPart(lgqr_work_t),&lgqr_work);CHKERRQ(ierr); 4011 ierr = PetscMalloc1((PetscInt)PetscRealPart(lgqr_work_t),&gqr_work);CHKERRQ(ierr); 4012 /* array to store scaling factors for reflectors */ 4013 ierr = PetscMalloc1(max_constraints,&qr_tau);CHKERRQ(ierr); 4014 /* array to store rhs and solution of triangular solver */ 4015 ierr = PetscMalloc1(max_constraints*max_constraints,&trs_rhs);CHKERRQ(ierr); 4016 /* allocating workspace for check */ 4017 if (pcbddc->dbg_flag) { 4018 ierr = PetscMalloc1(max_size_of_constraint*(max_constraints+max_size_of_constraint),&dbg_work);CHKERRQ(ierr); 4019 } 4020 } 4021 /* array to store whether a node is primal or not */ 4022 ierr = PetscBTCreate(pcis->n_B,&is_primal);CHKERRQ(ierr); 4023 ierr = PetscMalloc1(total_primal_vertices,&aux_primal_numbering_B);CHKERRQ(ierr); 4024 ierr = ISGlobalToLocalMappingApply(pcis->BtoNmap,IS_GTOLM_DROP,total_primal_vertices,pcbddc->local_primal_ref_node,&i,aux_primal_numbering_B);CHKERRQ(ierr); 4025 if (i != total_primal_vertices) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_SUP,"Error in boundary numbering for BDDC vertices! %D != %D\n",total_primal_vertices,i); 4026 for (i=0;i<total_primal_vertices;i++) { 4027 ierr = PetscBTSet(is_primal,aux_primal_numbering_B[i]);CHKERRQ(ierr); 4028 } 4029 ierr = PetscFree(aux_primal_numbering_B);CHKERRQ(ierr); 4030 4031 /* loop on constraints and see whether or not they need a change of basis and compute it */ 4032 for (total_counts=n_vertices;total_counts<total_counts_cc;total_counts++) { 4033 size_of_constraint = constraints_idxs_ptr[total_counts+1]-constraints_idxs_ptr[total_counts]; 4034 if (PetscBTLookup(change_basis,total_counts)) { 4035 /* get constraint info */ 4036 primal_dofs = constraints_n[total_counts]; 4037 dual_dofs = size_of_constraint-primal_dofs; 4038 4039 if (pcbddc->dbg_flag) { 4040 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Constraints %d: %d need a change of basis (size %d)\n",total_counts,primal_dofs,size_of_constraint);CHKERRQ(ierr); 4041 } 4042 4043 if (PetscBTLookup(qr_needed_idx,total_counts)) { /* QR */ 4044 4045 /* copy quadrature constraints for change of basis check */ 4046 if (pcbddc->dbg_flag) { 4047 ierr = PetscMemcpy(dbg_work,&constraints_data[constraints_data_ptr[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr); 4048 } 4049 /* copy temporary constraints into larger work vector (in order to store all columns of Q) */ 4050 ierr = PetscMemcpy(qr_basis,&constraints_data[constraints_data_ptr[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr); 4051 4052 /* compute QR decomposition of constraints */ 4053 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 4054 ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr); 4055 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 4056 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 4057 PetscStackCallBLAS("LAPACKgeqrf",LAPACKgeqrf_(&Blas_M,&Blas_N,qr_basis,&Blas_LDA,qr_tau,qr_work,&lqr_work,&lierr)); 4058 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in GEQRF Lapack routine %d",(int)lierr); 4059 ierr = PetscFPTrapPop();CHKERRQ(ierr); 4060 4061 /* explictly compute R^-T */ 4062 ierr = PetscMemzero(trs_rhs,primal_dofs*primal_dofs*sizeof(*trs_rhs));CHKERRQ(ierr); 4063 for (j=0;j<primal_dofs;j++) trs_rhs[j*(primal_dofs+1)] = 1.0; 4064 ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr); 4065 ierr = PetscBLASIntCast(primal_dofs,&Blas_NRHS);CHKERRQ(ierr); 4066 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 4067 ierr = PetscBLASIntCast(primal_dofs,&Blas_LDB);CHKERRQ(ierr); 4068 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 4069 PetscStackCallBLAS("LAPACKtrtrs",LAPACKtrtrs_("U","T","N",&Blas_N,&Blas_NRHS,qr_basis,&Blas_LDA,trs_rhs,&Blas_LDB,&lierr)); 4070 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in TRTRS Lapack routine %d",(int)lierr); 4071 ierr = PetscFPTrapPop();CHKERRQ(ierr); 4072 4073 /* explicitly compute all columns of Q (Q = [Q1 | Q2] ) overwriting QR factorization in qr_basis */ 4074 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 4075 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 4076 ierr = PetscBLASIntCast(primal_dofs,&Blas_K);CHKERRQ(ierr); 4077 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 4078 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 4079 PetscStackCallBLAS("LAPACKungqr",LAPACKungqr_(&Blas_M,&Blas_N,&Blas_K,qr_basis,&Blas_LDA,qr_tau,gqr_work,&lgqr_work,&lierr)); 4080 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in UNGQR Lapack routine %d",(int)lierr); 4081 ierr = PetscFPTrapPop();CHKERRQ(ierr); 4082 4083 /* first primal_dofs columns of Q need to be re-scaled in order to be unitary w.r.t constraints 4084 i.e. C_{pxn}*Q_{nxn} should be equal to [I_pxp | 0_pxd] (see check below) 4085 where n=size_of_constraint, p=primal_dofs, d=dual_dofs (n=p+d), I and 0 identity and null matrix resp. */ 4086 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 4087 ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr); 4088 ierr = PetscBLASIntCast(primal_dofs,&Blas_K);CHKERRQ(ierr); 4089 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 4090 ierr = PetscBLASIntCast(primal_dofs,&Blas_LDB);CHKERRQ(ierr); 4091 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDC);CHKERRQ(ierr); 4092 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 4093 PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&Blas_M,&Blas_N,&Blas_K,&one,qr_basis,&Blas_LDA,trs_rhs,&Blas_LDB,&zero,constraints_data+constraints_data_ptr[total_counts],&Blas_LDC)); 4094 ierr = PetscFPTrapPop();CHKERRQ(ierr); 4095 ierr = PetscMemcpy(qr_basis,&constraints_data[constraints_data_ptr[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr); 4096 4097 /* insert values in change of basis matrix respecting global ordering of new primal dofs */ 4098 start_rows = &constraints_idxs[constraints_idxs_ptr[total_counts]]; 4099 /* insert cols for primal dofs */ 4100 for (j=0;j<primal_dofs;j++) { 4101 start_vals = &qr_basis[j*size_of_constraint]; 4102 start_cols = &constraints_idxs[constraints_idxs_ptr[total_counts]+j]; 4103 ierr = MatSetValues(localChangeOfBasisMatrix,size_of_constraint,start_rows,1,start_cols,start_vals,INSERT_VALUES);CHKERRQ(ierr); 4104 } 4105 /* insert cols for dual dofs */ 4106 for (j=0,k=0;j<dual_dofs;k++) { 4107 if (!PetscBTLookup(is_primal,constraints_idxs_B[constraints_idxs_ptr[total_counts]+k])) { 4108 start_vals = &qr_basis[(primal_dofs+j)*size_of_constraint]; 4109 start_cols = &constraints_idxs[constraints_idxs_ptr[total_counts]+k]; 4110 ierr = MatSetValues(localChangeOfBasisMatrix,size_of_constraint,start_rows,1,start_cols,start_vals,INSERT_VALUES);CHKERRQ(ierr); 4111 j++; 4112 } 4113 } 4114 4115 /* check change of basis */ 4116 if (pcbddc->dbg_flag) { 4117 PetscInt ii,jj; 4118 PetscBool valid_qr=PETSC_TRUE; 4119 ierr = PetscBLASIntCast(primal_dofs,&Blas_M);CHKERRQ(ierr); 4120 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 4121 ierr = PetscBLASIntCast(size_of_constraint,&Blas_K);CHKERRQ(ierr); 4122 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 4123 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDB);CHKERRQ(ierr); 4124 ierr = PetscBLASIntCast(primal_dofs,&Blas_LDC);CHKERRQ(ierr); 4125 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 4126 PetscStackCallBLAS("BLASgemm",BLASgemm_("T","N",&Blas_M,&Blas_N,&Blas_K,&one,dbg_work,&Blas_LDA,qr_basis,&Blas_LDB,&zero,&dbg_work[size_of_constraint*primal_dofs],&Blas_LDC)); 4127 ierr = PetscFPTrapPop();CHKERRQ(ierr); 4128 for (jj=0;jj<size_of_constraint;jj++) { 4129 for (ii=0;ii<primal_dofs;ii++) { 4130 if (ii != jj && PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]) > 1.e-12) valid_qr = PETSC_FALSE; 4131 if (ii == jj && PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]-1.0) > 1.e-12) valid_qr = PETSC_FALSE; 4132 } 4133 } 4134 if (!valid_qr) { 4135 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> wrong change of basis!\n");CHKERRQ(ierr); 4136 for (jj=0;jj<size_of_constraint;jj++) { 4137 for (ii=0;ii<primal_dofs;ii++) { 4138 if (ii != jj && PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]) > 1.e-12) { 4139 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\tQr basis function %d is not orthogonal to constraint %d (%1.14e)!\n",jj,ii,PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii])); 4140 } 4141 if (ii == jj && PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]-1.0) > 1.e-12) { 4142 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\tQr basis function %d is not unitary w.r.t constraint %d (%1.14e)!\n",jj,ii,PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii])); 4143 } 4144 } 4145 } 4146 } else { 4147 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> right change of basis!\n");CHKERRQ(ierr); 4148 } 4149 } 4150 } else { /* simple transformation block */ 4151 PetscInt row,col; 4152 PetscScalar val,norm; 4153 4154 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 4155 PetscStackCallBLAS("BLASdot",norm = BLASdot_(&Blas_N,constraints_data+constraints_data_ptr[total_counts],&Blas_one,constraints_data+constraints_data_ptr[total_counts],&Blas_one)); 4156 for (j=0;j<size_of_constraint;j++) { 4157 PetscInt row_B = constraints_idxs_B[constraints_idxs_ptr[total_counts]+j]; 4158 row = constraints_idxs[constraints_idxs_ptr[total_counts]+j]; 4159 if (!PetscBTLookup(is_primal,row_B)) { 4160 col = constraints_idxs[constraints_idxs_ptr[total_counts]]; 4161 ierr = MatSetValue(localChangeOfBasisMatrix,row,row,1.0,INSERT_VALUES);CHKERRQ(ierr); 4162 ierr = MatSetValue(localChangeOfBasisMatrix,row,col,constraints_data[constraints_data_ptr[total_counts]+j]/norm,INSERT_VALUES);CHKERRQ(ierr); 4163 } else { 4164 for (k=0;k<size_of_constraint;k++) { 4165 col = constraints_idxs[constraints_idxs_ptr[total_counts]+k]; 4166 if (row != col) { 4167 val = -constraints_data[constraints_data_ptr[total_counts]+k]/constraints_data[constraints_data_ptr[total_counts]]; 4168 } else { 4169 val = constraints_data[constraints_data_ptr[total_counts]]/norm; 4170 } 4171 ierr = MatSetValue(localChangeOfBasisMatrix,row,col,val,INSERT_VALUES);CHKERRQ(ierr); 4172 } 4173 } 4174 } 4175 if (pcbddc->dbg_flag) { 4176 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> using standard change of basis\n");CHKERRQ(ierr); 4177 } 4178 } 4179 } else { 4180 if (pcbddc->dbg_flag) { 4181 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Constraint %d does not need a change of basis (size %d)\n",total_counts,size_of_constraint);CHKERRQ(ierr); 4182 } 4183 } 4184 } 4185 4186 /* free workspace */ 4187 if (qr_needed) { 4188 if (pcbddc->dbg_flag) { 4189 ierr = PetscFree(dbg_work);CHKERRQ(ierr); 4190 } 4191 ierr = PetscFree(trs_rhs);CHKERRQ(ierr); 4192 ierr = PetscFree(qr_tau);CHKERRQ(ierr); 4193 ierr = PetscFree(qr_work);CHKERRQ(ierr); 4194 ierr = PetscFree(gqr_work);CHKERRQ(ierr); 4195 ierr = PetscFree(qr_basis);CHKERRQ(ierr); 4196 } 4197 ierr = PetscBTDestroy(&is_primal);CHKERRQ(ierr); 4198 ierr = MatAssemblyBegin(localChangeOfBasisMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4199 ierr = MatAssemblyEnd(localChangeOfBasisMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4200 4201 /* assembling of global change of variable */ 4202 if (!pcbddc->fake_change) { 4203 Mat tmat; 4204 PetscInt bs; 4205 4206 ierr = VecGetSize(pcis->vec1_global,&global_size);CHKERRQ(ierr); 4207 ierr = VecGetLocalSize(pcis->vec1_global,&local_size);CHKERRQ(ierr); 4208 ierr = MatDuplicate(pc->pmat,MAT_DO_NOT_COPY_VALUES,&tmat);CHKERRQ(ierr); 4209 ierr = MatISSetLocalMat(tmat,localChangeOfBasisMatrix);CHKERRQ(ierr); 4210 ierr = MatCreate(PetscObjectComm((PetscObject)pc),&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 4211 ierr = MatSetType(pcbddc->ChangeOfBasisMatrix,MATAIJ);CHKERRQ(ierr); 4212 ierr = MatGetBlockSize(pc->pmat,&bs);CHKERRQ(ierr); 4213 ierr = MatSetBlockSize(pcbddc->ChangeOfBasisMatrix,bs);CHKERRQ(ierr); 4214 ierr = MatSetSizes(pcbddc->ChangeOfBasisMatrix,local_size,local_size,global_size,global_size);CHKERRQ(ierr); 4215 ierr = MatISSetMPIXAIJPreallocation_Private(tmat,pcbddc->ChangeOfBasisMatrix,PETSC_TRUE);CHKERRQ(ierr); 4216 ierr = MatISGetMPIXAIJ(tmat,MAT_REUSE_MATRIX,&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 4217 ierr = MatDestroy(&tmat);CHKERRQ(ierr); 4218 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 4219 ierr = VecSet(pcis->vec1_N,1.0);CHKERRQ(ierr); 4220 ierr = VecScatterBegin(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4221 ierr = VecScatterEnd(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4222 ierr = VecReciprocal(pcis->vec1_global);CHKERRQ(ierr); 4223 ierr = MatDiagonalScale(pcbddc->ChangeOfBasisMatrix,pcis->vec1_global,NULL);CHKERRQ(ierr); 4224 4225 /* check */ 4226 if (pcbddc->dbg_flag) { 4227 PetscReal error; 4228 Vec x,x_change; 4229 4230 ierr = VecDuplicate(pcis->vec1_global,&x);CHKERRQ(ierr); 4231 ierr = VecDuplicate(pcis->vec1_global,&x_change);CHKERRQ(ierr); 4232 ierr = VecSetRandom(x,NULL);CHKERRQ(ierr); 4233 ierr = VecCopy(x,pcis->vec1_global);CHKERRQ(ierr); 4234 ierr = VecScatterBegin(matis->rctx,x,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4235 ierr = VecScatterEnd(matis->rctx,x,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4236 ierr = MatMult(localChangeOfBasisMatrix,pcis->vec1_N,pcis->vec2_N);CHKERRQ(ierr); 4237 ierr = VecScatterBegin(matis->rctx,pcis->vec2_N,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4238 ierr = VecScatterEnd(matis->rctx,pcis->vec2_N,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4239 ierr = MatMult(pcbddc->ChangeOfBasisMatrix,pcis->vec1_global,x_change);CHKERRQ(ierr); 4240 ierr = VecAXPY(x,-1.0,x_change);CHKERRQ(ierr); 4241 ierr = VecNorm(x,NORM_INFINITY,&error);CHKERRQ(ierr); 4242 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4243 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Error global vs local change: %1.6e\n",error);CHKERRQ(ierr); 4244 ierr = VecDestroy(&x);CHKERRQ(ierr); 4245 ierr = VecDestroy(&x_change);CHKERRQ(ierr); 4246 } 4247 /* adapt sub_schurs computed (if any) */ 4248 if (pcbddc->use_deluxe_scaling) { 4249 PCBDDCSubSchurs sub_schurs=pcbddc->sub_schurs; 4250 4251 if (pcbddc->use_change_of_basis && pcbddc->adaptive_userdefined) { 4252 SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_SUP,"Cannot mix automatic change of basis, adaptive selection and user-defined constraints");CHKERRQ(ierr); 4253 } 4254 if (sub_schurs->S_Ej_all) { 4255 Mat S_new,tmat; 4256 IS is_all_N,is_V_Sall = NULL; 4257 4258 ierr = ISLocalToGlobalMappingApplyIS(pcis->BtoNmap,sub_schurs->is_Ej_all,&is_all_N);CHKERRQ(ierr); 4259 ierr = MatGetSubMatrix(localChangeOfBasisMatrix,is_all_N,is_all_N,MAT_INITIAL_MATRIX,&tmat);CHKERRQ(ierr); 4260 if (pcbddc->deluxe_zerorows) { 4261 ISLocalToGlobalMapping NtoSall; 4262 IS is_V; 4263 ierr = ISCreateGeneral(PETSC_COMM_SELF,pcbddc->n_vertices,pcbddc->local_primal_ref_node,PETSC_COPY_VALUES,&is_V);CHKERRQ(ierr); 4264 ierr = ISLocalToGlobalMappingCreateIS(is_all_N,&NtoSall);CHKERRQ(ierr); 4265 ierr = ISGlobalToLocalMappingApplyIS(NtoSall,IS_GTOLM_DROP,is_V,&is_V_Sall);CHKERRQ(ierr); 4266 ierr = ISLocalToGlobalMappingDestroy(&NtoSall);CHKERRQ(ierr); 4267 ierr = ISDestroy(&is_V);CHKERRQ(ierr); 4268 } 4269 ierr = ISDestroy(&is_all_N);CHKERRQ(ierr); 4270 ierr = MatPtAP(sub_schurs->S_Ej_all,tmat,MAT_INITIAL_MATRIX,1.0,&S_new);CHKERRQ(ierr); 4271 ierr = MatDestroy(&sub_schurs->S_Ej_all);CHKERRQ(ierr); 4272 ierr = PetscObjectReference((PetscObject)S_new);CHKERRQ(ierr); 4273 if (pcbddc->deluxe_zerorows) { 4274 const PetscScalar *array; 4275 const PetscInt *idxs_V,*idxs_all; 4276 PetscInt i,n_V; 4277 4278 ierr = MatZeroRowsColumnsIS(S_new,is_V_Sall,1.,NULL,NULL);CHKERRQ(ierr); 4279 ierr = ISGetLocalSize(is_V_Sall,&n_V);CHKERRQ(ierr); 4280 ierr = ISGetIndices(is_V_Sall,&idxs_V);CHKERRQ(ierr); 4281 ierr = ISGetIndices(sub_schurs->is_Ej_all,&idxs_all);CHKERRQ(ierr); 4282 ierr = VecGetArrayRead(pcis->D,&array);CHKERRQ(ierr); 4283 for (i=0;i<n_V;i++) { 4284 PetscScalar val; 4285 PetscInt idx; 4286 4287 idx = idxs_V[i]; 4288 val = array[idxs_all[idxs_V[i]]]; 4289 ierr = MatSetValue(S_new,idx,idx,val,INSERT_VALUES);CHKERRQ(ierr); 4290 } 4291 ierr = MatAssemblyBegin(S_new,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4292 ierr = MatAssemblyEnd(S_new,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4293 ierr = VecRestoreArrayRead(pcis->D,&array);CHKERRQ(ierr); 4294 ierr = ISRestoreIndices(sub_schurs->is_Ej_all,&idxs_all);CHKERRQ(ierr); 4295 ierr = ISRestoreIndices(is_V_Sall,&idxs_V);CHKERRQ(ierr); 4296 } 4297 sub_schurs->S_Ej_all = S_new; 4298 ierr = MatDestroy(&S_new);CHKERRQ(ierr); 4299 if (sub_schurs->sum_S_Ej_all) { 4300 ierr = MatPtAP(sub_schurs->sum_S_Ej_all,tmat,MAT_INITIAL_MATRIX,1.0,&S_new);CHKERRQ(ierr); 4301 ierr = MatDestroy(&sub_schurs->sum_S_Ej_all);CHKERRQ(ierr); 4302 ierr = PetscObjectReference((PetscObject)S_new);CHKERRQ(ierr); 4303 if (pcbddc->deluxe_zerorows) { 4304 ierr = MatZeroRowsColumnsIS(S_new,is_V_Sall,1.,NULL,NULL);CHKERRQ(ierr); 4305 } 4306 sub_schurs->sum_S_Ej_all = S_new; 4307 ierr = MatDestroy(&S_new);CHKERRQ(ierr); 4308 } 4309 ierr = ISDestroy(&is_V_Sall);CHKERRQ(ierr); 4310 ierr = MatDestroy(&tmat);CHKERRQ(ierr); 4311 } 4312 /* destroy any change of basis context in sub_schurs */ 4313 if (sub_schurs->change) { 4314 PetscInt i; 4315 4316 for (i=0;i<sub_schurs->n_subs;i++) { 4317 ierr = KSPDestroy(&sub_schurs->change[i]);CHKERRQ(ierr); 4318 } 4319 ierr = PetscFree(sub_schurs->change);CHKERRQ(ierr); 4320 } 4321 } 4322 if (pcbddc->switch_static) { /* need to save the local change */ 4323 pcbddc->switch_static_change = localChangeOfBasisMatrix; 4324 } else { 4325 ierr = MatDestroy(&localChangeOfBasisMatrix);CHKERRQ(ierr); 4326 } 4327 /* determine if any process has changed the pressures locally */ 4328 if (pcbddc->benign_saddle_point) { 4329 PetscBool have_null = (PetscBool)!!pcbddc->benign_change; 4330 ierr = MPI_Allreduce(&have_null,&pcbddc->change_interior,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 4331 } 4332 } else { /* fake change (get back change of basis into ConstraintMatrix and info on qr) */ 4333 ierr = MatDestroy(&pcbddc->ConstraintMatrix);CHKERRQ(ierr); 4334 pcbddc->ConstraintMatrix = localChangeOfBasisMatrix; 4335 pcbddc->use_qr_single = qr_needed; 4336 } 4337 } else if (pcbddc->user_ChangeOfBasisMatrix || pcbddc->benign_saddle_point) { 4338 PetscBool needglobal = PETSC_FALSE; 4339 if (pcbddc->benign_saddle_point) { 4340 PetscBool have_null = (PetscBool)!!pcbddc->benign_change; 4341 ierr = MPI_Allreduce(&have_null,&needglobal,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 4342 } 4343 if (!needglobal && pcbddc->user_ChangeOfBasisMatrix) { 4344 ierr = PetscObjectReference((PetscObject)pcbddc->user_ChangeOfBasisMatrix);CHKERRQ(ierr); 4345 pcbddc->ChangeOfBasisMatrix = pcbddc->user_ChangeOfBasisMatrix; 4346 } else { 4347 Mat benign_global = NULL; 4348 if (needglobal) { 4349 Mat tmat; 4350 4351 pcbddc->change_interior = PETSC_TRUE; 4352 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 4353 ierr = VecSet(pcis->vec1_N,1.0);CHKERRQ(ierr); 4354 ierr = VecScatterBegin(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4355 ierr = VecScatterEnd(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4356 ierr = VecReciprocal(pcis->vec1_global);CHKERRQ(ierr); 4357 ierr = VecScatterBegin(matis->rctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4358 ierr = VecScatterEnd(matis->rctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4359 ierr = MatDuplicate(pc->pmat,MAT_DO_NOT_COPY_VALUES,&tmat);CHKERRQ(ierr); 4360 if (pcbddc->benign_change) { 4361 Mat M; 4362 4363 ierr = MatDuplicate(pcbddc->benign_change,MAT_COPY_VALUES,&M);CHKERRQ(ierr); 4364 ierr = MatDiagonalScale(M,pcis->vec1_N,NULL);CHKERRQ(ierr); 4365 ierr = MatISSetLocalMat(tmat,M);CHKERRQ(ierr); 4366 ierr = MatDestroy(&M);CHKERRQ(ierr); 4367 } else { 4368 Mat eye; 4369 PetscScalar *array; 4370 4371 ierr = VecGetArray(pcis->vec1_N,&array);CHKERRQ(ierr); 4372 ierr = MatCreateSeqAIJ(PETSC_COMM_SELF,pcis->n,pcis->n,1,NULL,&eye);CHKERRQ(ierr); 4373 for (i=0;i<pcis->n;i++) { 4374 ierr = MatSetValue(eye,i,i,array[i],INSERT_VALUES);CHKERRQ(ierr); 4375 } 4376 ierr = VecRestoreArray(pcis->vec1_N,&array);CHKERRQ(ierr); 4377 ierr = MatAssemblyBegin(eye,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4378 ierr = MatAssemblyEnd(eye,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4379 ierr = MatISSetLocalMat(tmat,eye);CHKERRQ(ierr); 4380 ierr = MatDestroy(&eye);CHKERRQ(ierr); 4381 } 4382 ierr = MatISGetMPIXAIJ(tmat,MAT_INITIAL_MATRIX,&benign_global);CHKERRQ(ierr); 4383 ierr = MatDestroy(&tmat);CHKERRQ(ierr); 4384 } 4385 if (pcbddc->user_ChangeOfBasisMatrix) { 4386 ierr = MatMatMult(pcbddc->user_ChangeOfBasisMatrix,benign_global,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 4387 ierr = MatDestroy(&benign_global);CHKERRQ(ierr); 4388 } else if (needglobal) { 4389 pcbddc->ChangeOfBasisMatrix = benign_global; 4390 } 4391 } 4392 if (pcbddc->switch_static && pcbddc->ChangeOfBasisMatrix) { /* need to save the local change */ 4393 IS is_global; 4394 const PetscInt *gidxs; 4395 4396 ierr = ISLocalToGlobalMappingGetIndices(pc->pmat->rmap->mapping,&gidxs);CHKERRQ(ierr); 4397 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),pcis->n,gidxs,PETSC_COPY_VALUES,&is_global);CHKERRQ(ierr); 4398 ierr = ISLocalToGlobalMappingRestoreIndices(pc->pmat->rmap->mapping,&gidxs);CHKERRQ(ierr); 4399 ierr = MatGetSubMatrixUnsorted(pcbddc->ChangeOfBasisMatrix,is_global,is_global,&pcbddc->switch_static_change);CHKERRQ(ierr); 4400 ierr = ISDestroy(&is_global);CHKERRQ(ierr); 4401 } 4402 } 4403 if (!pcbddc->fake_change && pcbddc->ChangeOfBasisMatrix && !pcbddc->work_change) { 4404 ierr = VecDuplicate(pcis->vec1_global,&pcbddc->work_change);CHKERRQ(ierr); 4405 } 4406 4407 if (!pcbddc->fake_change) { 4408 /* add pressure dofs to set of primal nodes for numbering purposes */ 4409 for (i=0;i<pcbddc->benign_n;i++) { 4410 pcbddc->local_primal_ref_node[pcbddc->local_primal_size_cc] = pcbddc->benign_p0_lidx[i]; 4411 pcbddc->primal_indices_local_idxs[pcbddc->local_primal_size] = pcbddc->benign_p0_lidx[i]; 4412 pcbddc->local_primal_ref_mult[pcbddc->local_primal_size_cc] = 1; 4413 pcbddc->local_primal_size_cc++; 4414 pcbddc->local_primal_size++; 4415 } 4416 4417 /* check if a new primal space has been introduced (also take into account benign trick) */ 4418 pcbddc->new_primal_space_local = PETSC_TRUE; 4419 if (olocal_primal_size == pcbddc->local_primal_size) { 4420 ierr = PetscMemcmp(pcbddc->local_primal_ref_node,olocal_primal_ref_node,olocal_primal_size_cc*sizeof(PetscInt),&pcbddc->new_primal_space_local);CHKERRQ(ierr); 4421 pcbddc->new_primal_space_local = (PetscBool)(!pcbddc->new_primal_space_local); 4422 if (!pcbddc->new_primal_space_local) { 4423 ierr = PetscMemcmp(pcbddc->local_primal_ref_mult,olocal_primal_ref_mult,olocal_primal_size_cc*sizeof(PetscInt),&pcbddc->new_primal_space_local);CHKERRQ(ierr); 4424 pcbddc->new_primal_space_local = (PetscBool)(!pcbddc->new_primal_space_local); 4425 } 4426 } 4427 /* new_primal_space will be used for numbering of coarse dofs, so it should be the same across all subdomains */ 4428 ierr = MPIU_Allreduce(&pcbddc->new_primal_space_local,&pcbddc->new_primal_space,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 4429 } 4430 ierr = PetscFree2(olocal_primal_ref_node,olocal_primal_ref_mult);CHKERRQ(ierr); 4431 4432 /* flush dbg viewer */ 4433 if (pcbddc->dbg_flag) { 4434 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4435 } 4436 4437 /* free workspace */ 4438 ierr = PetscBTDestroy(&qr_needed_idx);CHKERRQ(ierr); 4439 ierr = PetscBTDestroy(&change_basis);CHKERRQ(ierr); 4440 if (!pcbddc->adaptive_selection) { 4441 ierr = PetscFree3(constraints_idxs_ptr,constraints_data_ptr,constraints_n);CHKERRQ(ierr); 4442 ierr = PetscFree3(constraints_data,constraints_idxs,constraints_idxs_B);CHKERRQ(ierr); 4443 } else { 4444 ierr = PetscFree5(pcbddc->adaptive_constraints_n, 4445 pcbddc->adaptive_constraints_idxs_ptr, 4446 pcbddc->adaptive_constraints_data_ptr, 4447 pcbddc->adaptive_constraints_idxs, 4448 pcbddc->adaptive_constraints_data);CHKERRQ(ierr); 4449 ierr = PetscFree(constraints_n);CHKERRQ(ierr); 4450 ierr = PetscFree(constraints_idxs_B);CHKERRQ(ierr); 4451 } 4452 PetscFunctionReturn(0); 4453 } 4454 4455 #undef __FUNCT__ 4456 #define __FUNCT__ "PCBDDCAnalyzeInterface" 4457 PetscErrorCode PCBDDCAnalyzeInterface(PC pc) 4458 { 4459 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 4460 PC_IS *pcis = (PC_IS*)pc->data; 4461 Mat_IS *matis = (Mat_IS*)pc->pmat->data; 4462 PetscInt ierr,i,N; 4463 PetscViewer viewer=pcbddc->dbg_viewer; 4464 4465 PetscFunctionBegin; 4466 /* Reset previously computed graph */ 4467 ierr = PCBDDCGraphReset(pcbddc->mat_graph);CHKERRQ(ierr); 4468 /* Init local Graph struct */ 4469 ierr = MatGetSize(pc->pmat,&N,NULL);CHKERRQ(ierr); 4470 ierr = PCBDDCGraphInit(pcbddc->mat_graph,pcis->mapping,N);CHKERRQ(ierr); 4471 4472 /* Check validity of the csr graph passed in by the user */ 4473 if (pcbddc->mat_graph->nvtxs_csr && pcbddc->mat_graph->nvtxs_csr != pcbddc->mat_graph->nvtxs) { 4474 SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Invalid size of local CSR graph! Found %d, expected %d\n",pcbddc->mat_graph->nvtxs_csr,pcbddc->mat_graph->nvtxs); 4475 } 4476 4477 /* Set default CSR adjacency of local dofs if not provided by the user with PCBDDCSetLocalAdjacencyGraph */ 4478 if ( (!pcbddc->mat_graph->xadj || !pcbddc->mat_graph->adjncy) && pcbddc->use_local_adj) { 4479 PetscInt *xadj,*adjncy; 4480 PetscInt nvtxs; 4481 PetscBool flg_row=PETSC_FALSE; 4482 4483 ierr = MatGetRowIJ(matis->A,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);CHKERRQ(ierr); 4484 if (flg_row) { 4485 ierr = PCBDDCSetLocalAdjacencyGraph(pc,nvtxs,xadj,adjncy,PETSC_COPY_VALUES);CHKERRQ(ierr); 4486 pcbddc->computed_rowadj = PETSC_TRUE; 4487 } 4488 ierr = MatRestoreRowIJ(matis->A,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);CHKERRQ(ierr); 4489 } 4490 if (pcbddc->dbg_flag) { 4491 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4492 } 4493 4494 /* Set default dofs' splitting if no information has been provided by the user with PCBDDCSetDofsSplitting or PCBDDCSetDofsSplittingLocal */ 4495 if (pcbddc->user_provided_isfordofs) { 4496 if (pcbddc->n_ISForDofs) { /* need to convert from global to local and remove references to global dofs splitting */ 4497 ierr = PetscMalloc1(pcbddc->n_ISForDofs,&pcbddc->ISForDofsLocal);CHKERRQ(ierr); 4498 for (i=0;i<pcbddc->n_ISForDofs;i++) { 4499 ierr = PCBDDCGlobalToLocal(matis->rctx,pcis->vec1_global,pcis->vec1_N,pcbddc->ISForDofs[i],&pcbddc->ISForDofsLocal[i]);CHKERRQ(ierr); 4500 ierr = ISDestroy(&pcbddc->ISForDofs[i]);CHKERRQ(ierr); 4501 } 4502 pcbddc->n_ISForDofsLocal = pcbddc->n_ISForDofs; 4503 pcbddc->n_ISForDofs = 0; 4504 ierr = PetscFree(pcbddc->ISForDofs);CHKERRQ(ierr); 4505 } 4506 } else { 4507 if (!pcbddc->n_ISForDofsLocal) { /* field split not present, create it in local ordering */ 4508 ierr = MatGetBlockSize(pc->pmat,&pcbddc->n_ISForDofsLocal);CHKERRQ(ierr); 4509 ierr = PetscMalloc1(pcbddc->n_ISForDofsLocal,&pcbddc->ISForDofsLocal);CHKERRQ(ierr); 4510 for (i=0;i<pcbddc->n_ISForDofsLocal;i++) { 4511 ierr = ISCreateStride(PetscObjectComm((PetscObject)pc),pcis->n/pcbddc->n_ISForDofsLocal,i,pcbddc->n_ISForDofsLocal,&pcbddc->ISForDofsLocal[i]);CHKERRQ(ierr); 4512 } 4513 } 4514 } 4515 4516 /* Setup of Graph */ 4517 if (!pcbddc->DirichletBoundariesLocal && pcbddc->DirichletBoundaries) { /* need to convert from global to local */ 4518 ierr = PCBDDCGlobalToLocal(matis->rctx,pcis->vec1_global,pcis->vec1_N,pcbddc->DirichletBoundaries,&pcbddc->DirichletBoundariesLocal);CHKERRQ(ierr); 4519 } 4520 if (!pcbddc->NeumannBoundariesLocal && pcbddc->NeumannBoundaries) { /* need to convert from global to local */ 4521 ierr = PCBDDCGlobalToLocal(matis->rctx,pcis->vec1_global,pcis->vec1_N,pcbddc->NeumannBoundaries,&pcbddc->NeumannBoundariesLocal);CHKERRQ(ierr); 4522 } 4523 if (!pcbddc->user_primal_vertices_local && pcbddc->user_primal_vertices) { /* need to convert from global to local */ 4524 ierr = PCBDDCGlobalToLocal(matis->rctx,pcis->vec1_global,pcis->vec1_N,pcbddc->user_primal_vertices,&pcbddc->user_primal_vertices_local);CHKERRQ(ierr); 4525 } 4526 ierr = PCBDDCGraphSetUp(pcbddc->mat_graph,pcbddc->vertex_size,pcbddc->NeumannBoundariesLocal,pcbddc->DirichletBoundariesLocal,pcbddc->n_ISForDofsLocal,pcbddc->ISForDofsLocal,pcbddc->user_primal_vertices_local);CHKERRQ(ierr); 4527 4528 /* attach info on disconnected subdomains if present */ 4529 if (pcbddc->n_local_subs) { 4530 PetscInt *local_subs; 4531 4532 ierr = PetscMalloc1(N,&local_subs);CHKERRQ(ierr); 4533 for (i=0;i<pcbddc->n_local_subs;i++) { 4534 const PetscInt *idxs; 4535 PetscInt nl,j; 4536 4537 ierr = ISGetLocalSize(pcbddc->local_subs[i],&nl);CHKERRQ(ierr); 4538 ierr = ISGetIndices(pcbddc->local_subs[i],&idxs);CHKERRQ(ierr); 4539 for (j=0;j<nl;j++) { 4540 local_subs[idxs[j]] = i; 4541 } 4542 ierr = ISRestoreIndices(pcbddc->local_subs[i],&idxs);CHKERRQ(ierr); 4543 } 4544 pcbddc->mat_graph->n_local_subs = pcbddc->n_local_subs; 4545 pcbddc->mat_graph->local_subs = local_subs; 4546 } 4547 4548 /* Graph's connected components analysis */ 4549 ierr = PCBDDCGraphComputeConnectedComponents(pcbddc->mat_graph);CHKERRQ(ierr); 4550 4551 /* print some info to stdout */ 4552 if (pcbddc->dbg_flag) { 4553 ierr = PCBDDCGraphASCIIView(pcbddc->mat_graph,pcbddc->dbg_flag,viewer);CHKERRQ(ierr); 4554 } 4555 PetscFunctionReturn(0); 4556 } 4557 4558 /* given an index sets possibly with holes, renumbers the indexes removing the holes */ 4559 #undef __FUNCT__ 4560 #define __FUNCT__ "PCBDDCSubsetNumbering" 4561 PetscErrorCode PCBDDCSubsetNumbering(IS subset, IS subset_mult, PetscInt *N_n, IS *subset_n) 4562 { 4563 PetscSF sf; 4564 PetscLayout map; 4565 const PetscInt *idxs; 4566 PetscInt *leaf_data,*root_data,*gidxs; 4567 PetscInt N,n,i,lbounds[2],gbounds[2],Nl; 4568 PetscInt n_n,nlocals,start,first_index; 4569 PetscMPIInt commsize; 4570 PetscBool first_found; 4571 PetscErrorCode ierr; 4572 4573 PetscFunctionBegin; 4574 ierr = ISGetLocalSize(subset,&n);CHKERRQ(ierr); 4575 if (subset_mult) { 4576 PetscCheckSameComm(subset,1,subset_mult,2); 4577 ierr = ISGetLocalSize(subset,&i);CHKERRQ(ierr); 4578 if (i != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Local subset and multiplicity sizes don't match! %d != %d",n,i); 4579 } 4580 /* create workspace layout for computing global indices of subset */ 4581 ierr = ISGetIndices(subset,&idxs);CHKERRQ(ierr); 4582 lbounds[0] = lbounds[1] = 0; 4583 for (i=0;i<n;i++) { 4584 if (idxs[i] < lbounds[0]) lbounds[0] = idxs[i]; 4585 else if (idxs[i] > lbounds[1]) lbounds[1] = idxs[i]; 4586 } 4587 lbounds[0] = -lbounds[0]; 4588 ierr = MPIU_Allreduce(lbounds,gbounds,2,MPIU_INT,MPI_MAX,PetscObjectComm((PetscObject)subset));CHKERRQ(ierr); 4589 gbounds[0] = -gbounds[0]; 4590 N = gbounds[1] - gbounds[0] + 1; 4591 ierr = PetscLayoutCreate(PetscObjectComm((PetscObject)subset),&map);CHKERRQ(ierr); 4592 ierr = PetscLayoutSetBlockSize(map,1);CHKERRQ(ierr); 4593 ierr = PetscLayoutSetSize(map,N);CHKERRQ(ierr); 4594 ierr = PetscLayoutSetUp(map);CHKERRQ(ierr); 4595 ierr = PetscLayoutGetLocalSize(map,&Nl);CHKERRQ(ierr); 4596 4597 /* create sf : leaf_data == multiplicity of indexes, root data == global index in layout */ 4598 ierr = PetscMalloc2(n,&leaf_data,Nl,&root_data);CHKERRQ(ierr); 4599 if (subset_mult) { 4600 const PetscInt* idxs_mult; 4601 4602 ierr = ISGetIndices(subset_mult,&idxs_mult);CHKERRQ(ierr); 4603 ierr = PetscMemcpy(leaf_data,idxs_mult,n*sizeof(PetscInt));CHKERRQ(ierr); 4604 ierr = ISRestoreIndices(subset_mult,&idxs_mult);CHKERRQ(ierr); 4605 } else { 4606 for (i=0;i<n;i++) leaf_data[i] = 1; 4607 } 4608 /* local size of new subset */ 4609 n_n = 0; 4610 for (i=0;i<n;i++) n_n += leaf_data[i]; 4611 4612 /* global indexes in layout */ 4613 ierr = PetscMalloc1(n_n,&gidxs);CHKERRQ(ierr); /* allocating possibly extra space in gidxs which will be used later */ 4614 for (i=0;i<n;i++) gidxs[i] = idxs[i] - gbounds[0]; 4615 ierr = ISRestoreIndices(subset,&idxs);CHKERRQ(ierr); 4616 ierr = PetscSFCreate(PetscObjectComm((PetscObject)subset),&sf);CHKERRQ(ierr); 4617 ierr = PetscSFSetGraphLayout(sf,map,n,NULL,PETSC_COPY_VALUES,gidxs);CHKERRQ(ierr); 4618 ierr = PetscLayoutDestroy(&map);CHKERRQ(ierr); 4619 4620 /* reduce from leaves to roots */ 4621 ierr = PetscMemzero(root_data,Nl*sizeof(PetscInt));CHKERRQ(ierr); 4622 ierr = PetscSFReduceBegin(sf,MPIU_INT,leaf_data,root_data,MPI_MAX);CHKERRQ(ierr); 4623 ierr = PetscSFReduceEnd(sf,MPIU_INT,leaf_data,root_data,MPI_MAX);CHKERRQ(ierr); 4624 4625 /* count indexes in local part of layout */ 4626 nlocals = 0; 4627 first_index = -1; 4628 first_found = PETSC_FALSE; 4629 for (i=0;i<Nl;i++) { 4630 if (!first_found && root_data[i]) { 4631 first_found = PETSC_TRUE; 4632 first_index = i; 4633 } 4634 nlocals += root_data[i]; 4635 } 4636 4637 /* cumulative of number of indexes and size of subset without holes */ 4638 #if defined(PETSC_HAVE_MPI_EXSCAN) 4639 start = 0; 4640 ierr = MPI_Exscan(&nlocals,&start,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)subset));CHKERRQ(ierr); 4641 #else 4642 ierr = MPI_Scan(&nlocals,&start,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)subset));CHKERRQ(ierr); 4643 start = start-nlocals; 4644 #endif 4645 4646 if (N_n) { /* compute total size of new subset if requested */ 4647 *N_n = start + nlocals; 4648 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)subset),&commsize);CHKERRQ(ierr); 4649 ierr = MPI_Bcast(N_n,1,MPIU_INT,commsize-1,PetscObjectComm((PetscObject)subset));CHKERRQ(ierr); 4650 } 4651 4652 /* adapt root data with cumulative */ 4653 if (first_found) { 4654 PetscInt old_index; 4655 4656 root_data[first_index] += start; 4657 old_index = first_index; 4658 for (i=first_index+1;i<Nl;i++) { 4659 if (root_data[i]) { 4660 root_data[i] += root_data[old_index]; 4661 old_index = i; 4662 } 4663 } 4664 } 4665 4666 /* from roots to leaves */ 4667 ierr = PetscSFBcastBegin(sf,MPIU_INT,root_data,leaf_data);CHKERRQ(ierr); 4668 ierr = PetscSFBcastEnd(sf,MPIU_INT,root_data,leaf_data);CHKERRQ(ierr); 4669 ierr = PetscSFDestroy(&sf);CHKERRQ(ierr); 4670 4671 /* create new IS with global indexes without holes */ 4672 if (subset_mult) { 4673 const PetscInt* idxs_mult; 4674 PetscInt cum; 4675 4676 cum = 0; 4677 ierr = ISGetIndices(subset_mult,&idxs_mult);CHKERRQ(ierr); 4678 for (i=0;i<n;i++) { 4679 PetscInt j; 4680 for (j=0;j<idxs_mult[i];j++) gidxs[cum++] = leaf_data[i] - idxs_mult[i] + j; 4681 } 4682 ierr = ISRestoreIndices(subset_mult,&idxs_mult);CHKERRQ(ierr); 4683 } else { 4684 for (i=0;i<n;i++) { 4685 gidxs[i] = leaf_data[i]-1; 4686 } 4687 } 4688 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)subset),n_n,gidxs,PETSC_OWN_POINTER,subset_n);CHKERRQ(ierr); 4689 ierr = PetscFree2(leaf_data,root_data);CHKERRQ(ierr); 4690 PetscFunctionReturn(0); 4691 } 4692 4693 #undef __FUNCT__ 4694 #define __FUNCT__ "PCBDDCOrthonormalizeVecs" 4695 PetscErrorCode PCBDDCOrthonormalizeVecs(PetscInt n, Vec vecs[]) 4696 { 4697 PetscInt i,j; 4698 PetscScalar *alphas; 4699 PetscErrorCode ierr; 4700 4701 PetscFunctionBegin; 4702 /* this implements stabilized Gram-Schmidt */ 4703 ierr = PetscMalloc1(n,&alphas);CHKERRQ(ierr); 4704 for (i=0;i<n;i++) { 4705 ierr = VecNormalize(vecs[i],NULL);CHKERRQ(ierr); 4706 if (i<n) { ierr = VecMDot(vecs[i],n-i-1,&vecs[i+1],&alphas[i+1]);CHKERRQ(ierr); } 4707 for (j=i+1;j<n;j++) { ierr = VecAXPY(vecs[j],PetscConj(-alphas[j]),vecs[i]);CHKERRQ(ierr); } 4708 } 4709 ierr = PetscFree(alphas);CHKERRQ(ierr); 4710 PetscFunctionReturn(0); 4711 } 4712 4713 #undef __FUNCT__ 4714 #define __FUNCT__ "MatISGetSubassemblingPattern" 4715 PetscErrorCode MatISGetSubassemblingPattern(Mat mat, PetscInt n_subdomains, PetscInt redprocs, IS* is_sends) 4716 { 4717 IS ranks_send_to; 4718 PetscInt n_neighs,*neighs,*n_shared,**shared; 4719 PetscMPIInt size,rank,color; 4720 PetscInt *xadj,*adjncy; 4721 PetscInt *adjncy_wgt,*v_wgt,*ranks_send_to_idx; 4722 PetscInt i,local_size,threshold=0; 4723 PetscBool use_vwgt=PETSC_FALSE,use_square=PETSC_FALSE; 4724 PetscSubcomm subcomm; 4725 PetscErrorCode ierr; 4726 4727 PetscFunctionBegin; 4728 ierr = PetscOptionsGetBool(NULL,NULL,"-matis_partitioning_use_square",&use_square,NULL);CHKERRQ(ierr); 4729 ierr = PetscOptionsGetBool(NULL,NULL,"-matis_partitioning_use_vwgt",&use_vwgt,NULL);CHKERRQ(ierr); 4730 ierr = PetscOptionsGetInt(NULL,NULL,"-matis_partitioning_threshold",&threshold,NULL);CHKERRQ(ierr); 4731 4732 /* Get info on mapping */ 4733 ierr = ISLocalToGlobalMappingGetSize(mat->rmap->mapping,&local_size);CHKERRQ(ierr); 4734 ierr = ISLocalToGlobalMappingGetInfo(mat->rmap->mapping,&n_neighs,&neighs,&n_shared,&shared);CHKERRQ(ierr); 4735 4736 /* build local CSR graph of subdomains' connectivity */ 4737 ierr = PetscMalloc1(2,&xadj);CHKERRQ(ierr); 4738 xadj[0] = 0; 4739 xadj[1] = PetscMax(n_neighs-1,0); 4740 ierr = PetscMalloc1(xadj[1],&adjncy);CHKERRQ(ierr); 4741 ierr = PetscMalloc1(xadj[1],&adjncy_wgt);CHKERRQ(ierr); 4742 4743 if (threshold) { 4744 PetscInt xadj_count = 0; 4745 for (i=1;i<n_neighs;i++) { 4746 if (n_shared[i] > threshold) { 4747 adjncy[xadj_count] = neighs[i]; 4748 adjncy_wgt[xadj_count] = n_shared[i]; 4749 xadj_count++; 4750 } 4751 } 4752 xadj[1] = xadj_count; 4753 } else { 4754 if (xadj[1]) { 4755 ierr = PetscMemcpy(adjncy,&neighs[1],xadj[1]*sizeof(*adjncy));CHKERRQ(ierr); 4756 ierr = PetscMemcpy(adjncy_wgt,&n_shared[1],xadj[1]*sizeof(*adjncy_wgt));CHKERRQ(ierr); 4757 } 4758 } 4759 ierr = ISLocalToGlobalMappingRestoreInfo(mat->rmap->mapping,&n_neighs,&neighs,&n_shared,&shared);CHKERRQ(ierr); 4760 if (use_square) { 4761 for (i=0;i<xadj[1];i++) { 4762 adjncy_wgt[i] = adjncy_wgt[i]*adjncy_wgt[i]; 4763 } 4764 } 4765 ierr = PetscSortIntWithArray(xadj[1],adjncy,adjncy_wgt);CHKERRQ(ierr); 4766 4767 ierr = PetscMalloc1(1,&ranks_send_to_idx);CHKERRQ(ierr); 4768 4769 /* 4770 Restrict work on active processes only. 4771 */ 4772 ierr = PetscSubcommCreate(PetscObjectComm((PetscObject)mat),&subcomm);CHKERRQ(ierr); 4773 ierr = PetscSubcommSetNumber(subcomm,2);CHKERRQ(ierr); /* 2 groups, active process and not active processes */ 4774 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr); 4775 ierr = PetscMPIIntCast(!local_size,&color);CHKERRQ(ierr); 4776 ierr = PetscSubcommSetTypeGeneral(subcomm,color,rank);CHKERRQ(ierr); 4777 if (color) { 4778 ierr = PetscFree(xadj);CHKERRQ(ierr); 4779 ierr = PetscFree(adjncy);CHKERRQ(ierr); 4780 ierr = PetscFree(adjncy_wgt);CHKERRQ(ierr); 4781 } else { 4782 Mat subdomain_adj; 4783 IS new_ranks,new_ranks_contig; 4784 MatPartitioning partitioner; 4785 PetscInt prank,rstart=0,rend=0; 4786 PetscInt *is_indices,*oldranks; 4787 PetscBool aggregate; 4788 4789 ierr = MPI_Comm_size(PetscSubcommChild(subcomm),&size);CHKERRQ(ierr); 4790 ierr = PetscMalloc1(size,&oldranks);CHKERRQ(ierr); 4791 prank = rank; 4792 ierr = MPI_Allgather(&prank,1,MPIU_INT,oldranks,1,MPIU_INT,PetscSubcommChild(subcomm));CHKERRQ(ierr); 4793 /* 4794 for (i=0;i<size;i++) { 4795 PetscPrintf(subcomm->comm,"oldranks[%d] = %d\n",i,oldranks[i]); 4796 } 4797 */ 4798 for (i=0;i<xadj[1];i++) { 4799 ierr = PetscFindInt(adjncy[i],size,oldranks,&adjncy[i]);CHKERRQ(ierr); 4800 } 4801 ierr = PetscSortIntWithArray(xadj[1],adjncy,adjncy_wgt);CHKERRQ(ierr); 4802 aggregate = ((redprocs > 0 && redprocs < size) ? PETSC_TRUE : PETSC_FALSE); 4803 if (aggregate) { 4804 PetscInt lrows,row,ncols,*cols; 4805 PetscMPIInt nrank; 4806 PetscScalar *vals; 4807 4808 ierr = MPI_Comm_rank(PetscSubcommChild(subcomm),&nrank);CHKERRQ(ierr); 4809 lrows = 0; 4810 if (nrank<redprocs) { 4811 lrows = size/redprocs; 4812 if (nrank<size%redprocs) lrows++; 4813 } 4814 ierr = MatCreateAIJ(PetscSubcommChild(subcomm),lrows,lrows,size,size,50,NULL,50,NULL,&subdomain_adj);CHKERRQ(ierr); 4815 ierr = MatGetOwnershipRange(subdomain_adj,&rstart,&rend);CHKERRQ(ierr); 4816 ierr = MatSetOption(subdomain_adj,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr); 4817 ierr = MatSetOption(subdomain_adj,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr); 4818 row = nrank; 4819 ncols = xadj[1]-xadj[0]; 4820 cols = adjncy; 4821 ierr = PetscMalloc1(ncols,&vals);CHKERRQ(ierr); 4822 for (i=0;i<ncols;i++) vals[i] = adjncy_wgt[i]; 4823 ierr = MatSetValues(subdomain_adj,1,&row,ncols,cols,vals,INSERT_VALUES);CHKERRQ(ierr); 4824 ierr = MatAssemblyBegin(subdomain_adj,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4825 ierr = MatAssemblyEnd(subdomain_adj,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4826 ierr = PetscFree(xadj);CHKERRQ(ierr); 4827 ierr = PetscFree(adjncy);CHKERRQ(ierr); 4828 ierr = PetscFree(adjncy_wgt);CHKERRQ(ierr); 4829 ierr = PetscFree(vals);CHKERRQ(ierr); 4830 } else { 4831 ierr = MatCreateMPIAdj(PetscSubcommChild(subcomm),1,(PetscInt)size,xadj,adjncy,adjncy_wgt,&subdomain_adj);CHKERRQ(ierr); 4832 } 4833 /* ierr = MatView(subdomain_adj,0);CHKERRQ(ierr); */ 4834 4835 /* Partition */ 4836 ierr = MatPartitioningCreate(PetscSubcommChild(subcomm),&partitioner);CHKERRQ(ierr); 4837 ierr = MatPartitioningSetAdjacency(partitioner,subdomain_adj);CHKERRQ(ierr); 4838 if (use_vwgt) { 4839 ierr = PetscMalloc1(1,&v_wgt);CHKERRQ(ierr); 4840 v_wgt[0] = local_size; 4841 ierr = MatPartitioningSetVertexWeights(partitioner,v_wgt);CHKERRQ(ierr); 4842 } 4843 n_subdomains = PetscMin((PetscInt)size,n_subdomains); 4844 ierr = MatPartitioningSetNParts(partitioner,n_subdomains);CHKERRQ(ierr); 4845 ierr = MatPartitioningSetFromOptions(partitioner);CHKERRQ(ierr); 4846 ierr = MatPartitioningApply(partitioner,&new_ranks);CHKERRQ(ierr); 4847 /* ierr = MatPartitioningView(partitioner,0);CHKERRQ(ierr); */ 4848 4849 /* renumber new_ranks to avoid "holes" in new set of processors */ 4850 ierr = PCBDDCSubsetNumbering(new_ranks,NULL,NULL,&new_ranks_contig);CHKERRQ(ierr); 4851 ierr = ISDestroy(&new_ranks);CHKERRQ(ierr); 4852 ierr = ISGetIndices(new_ranks_contig,(const PetscInt**)&is_indices);CHKERRQ(ierr); 4853 if (!redprocs) { 4854 ranks_send_to_idx[0] = oldranks[is_indices[0]]; 4855 } else { 4856 PetscInt idxs[1]; 4857 PetscMPIInt tag; 4858 MPI_Request *reqs; 4859 4860 ierr = PetscObjectGetNewTag((PetscObject)subdomain_adj,&tag);CHKERRQ(ierr); 4861 ierr = PetscMalloc1(rend-rstart,&reqs);CHKERRQ(ierr); 4862 for (i=rstart;i<rend;i++) { 4863 ierr = MPI_Isend(is_indices+i-rstart,1,MPIU_INT,i,tag,PetscSubcommChild(subcomm),&reqs[i-rstart]);CHKERRQ(ierr); 4864 } 4865 ierr = MPI_Recv(idxs,1,MPIU_INT,MPI_ANY_SOURCE,tag,PetscSubcommChild(subcomm),MPI_STATUS_IGNORE);CHKERRQ(ierr); 4866 ierr = MPI_Waitall(rend-rstart,reqs,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 4867 ierr = PetscFree(reqs);CHKERRQ(ierr); 4868 ranks_send_to_idx[0] = oldranks[idxs[0]]; 4869 } 4870 ierr = ISRestoreIndices(new_ranks_contig,(const PetscInt**)&is_indices);CHKERRQ(ierr); 4871 /* clean up */ 4872 ierr = PetscFree(oldranks);CHKERRQ(ierr); 4873 ierr = ISDestroy(&new_ranks_contig);CHKERRQ(ierr); 4874 ierr = MatDestroy(&subdomain_adj);CHKERRQ(ierr); 4875 ierr = MatPartitioningDestroy(&partitioner);CHKERRQ(ierr); 4876 } 4877 ierr = PetscSubcommDestroy(&subcomm);CHKERRQ(ierr); 4878 4879 /* assemble parallel IS for sends */ 4880 i = 1; 4881 if (color) i=0; 4882 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)mat),i,ranks_send_to_idx,PETSC_OWN_POINTER,&ranks_send_to);CHKERRQ(ierr); 4883 /* get back IS */ 4884 *is_sends = ranks_send_to; 4885 PetscFunctionReturn(0); 4886 } 4887 4888 typedef enum {MATDENSE_PRIVATE=0,MATAIJ_PRIVATE,MATBAIJ_PRIVATE,MATSBAIJ_PRIVATE}MatTypePrivate; 4889 4890 #undef __FUNCT__ 4891 #define __FUNCT__ "MatISSubassemble" 4892 PetscErrorCode MatISSubassemble(Mat mat, IS is_sends, PetscInt n_subdomains, PetscBool restrict_comm, PetscBool restrict_full, MatReuse reuse, Mat *mat_n, PetscInt nis, IS isarray[]) 4893 { 4894 Mat local_mat; 4895 IS is_sends_internal; 4896 PetscInt rows,cols,new_local_rows; 4897 PetscInt i,bs,buf_size_idxs,buf_size_idxs_is,buf_size_vals; 4898 PetscBool ismatis,isdense,newisdense,destroy_mat; 4899 ISLocalToGlobalMapping l2gmap; 4900 PetscInt* l2gmap_indices; 4901 const PetscInt* is_indices; 4902 MatType new_local_type; 4903 /* buffers */ 4904 PetscInt *ptr_idxs,*send_buffer_idxs,*recv_buffer_idxs; 4905 PetscInt *ptr_idxs_is,*send_buffer_idxs_is,*recv_buffer_idxs_is; 4906 PetscInt *recv_buffer_idxs_local; 4907 PetscScalar *ptr_vals,*send_buffer_vals,*recv_buffer_vals; 4908 /* MPI */ 4909 MPI_Comm comm,comm_n; 4910 PetscSubcomm subcomm; 4911 PetscMPIInt n_sends,n_recvs,commsize; 4912 PetscMPIInt *iflags,*ilengths_idxs,*ilengths_vals,*ilengths_idxs_is; 4913 PetscMPIInt *onodes,*onodes_is,*olengths_idxs,*olengths_idxs_is,*olengths_vals; 4914 PetscMPIInt len,tag_idxs,tag_idxs_is,tag_vals,source_dest; 4915 MPI_Request *send_req_idxs,*send_req_idxs_is,*send_req_vals; 4916 MPI_Request *recv_req_idxs,*recv_req_idxs_is,*recv_req_vals; 4917 PetscErrorCode ierr; 4918 4919 PetscFunctionBegin; 4920 /* TODO: add missing checks */ 4921 PetscValidLogicalCollectiveInt(mat,n_subdomains,3); 4922 PetscValidLogicalCollectiveBool(mat,restrict_comm,4); 4923 PetscValidLogicalCollectiveEnum(mat,reuse,5); 4924 PetscValidLogicalCollectiveInt(mat,nis,7); 4925 ierr = PetscObjectTypeCompare((PetscObject)mat,MATIS,&ismatis);CHKERRQ(ierr); 4926 if (!ismatis) SETERRQ1(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot use %s on a matrix object which is not of type MATIS",__FUNCT__); 4927 ierr = MatISGetLocalMat(mat,&local_mat);CHKERRQ(ierr); 4928 ierr = PetscObjectTypeCompare((PetscObject)local_mat,MATSEQDENSE,&isdense);CHKERRQ(ierr); 4929 if (!isdense) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Currently cannot subassemble MATIS when local matrix type is not of type SEQDENSE"); 4930 ierr = MatGetSize(local_mat,&rows,&cols);CHKERRQ(ierr); 4931 if (rows != cols) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Local MATIS matrices should be square"); 4932 if (reuse == MAT_REUSE_MATRIX && *mat_n) { 4933 PetscInt mrows,mcols,mnrows,mncols; 4934 ierr = PetscObjectTypeCompare((PetscObject)*mat_n,MATIS,&ismatis);CHKERRQ(ierr); 4935 if (!ismatis) SETERRQ(PetscObjectComm((PetscObject)*mat_n),PETSC_ERR_SUP,"Cannot reuse a matrix which is not of type MATIS"); 4936 ierr = MatGetSize(mat,&mrows,&mcols);CHKERRQ(ierr); 4937 ierr = MatGetSize(*mat_n,&mnrows,&mncols);CHKERRQ(ierr); 4938 if (mrows != mnrows) SETERRQ2(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot reuse matrix! Wrong number of rows %D != %D",mrows,mnrows); 4939 if (mcols != mncols) SETERRQ2(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot reuse matrix! Wrong number of cols %D != %D",mcols,mncols); 4940 } 4941 ierr = MatGetBlockSize(local_mat,&bs);CHKERRQ(ierr); 4942 PetscValidLogicalCollectiveInt(mat,bs,0); 4943 /* prepare IS for sending if not provided */ 4944 if (!is_sends) { 4945 if (!n_subdomains) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"You should specify either an IS or a target number of subdomains"); 4946 ierr = MatISGetSubassemblingPattern(mat,n_subdomains,0,&is_sends_internal);CHKERRQ(ierr); 4947 } else { 4948 ierr = PetscObjectReference((PetscObject)is_sends);CHKERRQ(ierr); 4949 is_sends_internal = is_sends; 4950 } 4951 4952 /* get comm */ 4953 ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr); 4954 4955 /* compute number of sends */ 4956 ierr = ISGetLocalSize(is_sends_internal,&i);CHKERRQ(ierr); 4957 ierr = PetscMPIIntCast(i,&n_sends);CHKERRQ(ierr); 4958 4959 /* compute number of receives */ 4960 ierr = MPI_Comm_size(comm,&commsize);CHKERRQ(ierr); 4961 ierr = PetscMalloc1(commsize,&iflags);CHKERRQ(ierr); 4962 ierr = PetscMemzero(iflags,commsize*sizeof(*iflags));CHKERRQ(ierr); 4963 ierr = ISGetIndices(is_sends_internal,&is_indices);CHKERRQ(ierr); 4964 for (i=0;i<n_sends;i++) iflags[is_indices[i]] = 1; 4965 ierr = PetscGatherNumberOfMessages(comm,iflags,NULL,&n_recvs);CHKERRQ(ierr); 4966 ierr = PetscFree(iflags);CHKERRQ(ierr); 4967 4968 /* restrict comm if requested */ 4969 subcomm = 0; 4970 destroy_mat = PETSC_FALSE; 4971 if (restrict_comm) { 4972 PetscMPIInt color,subcommsize; 4973 4974 color = 0; 4975 if (restrict_full) { 4976 if (!n_recvs) color = 1; /* processes not receiving anything will not partecipate in new comm (full restriction) */ 4977 } else { 4978 if (!n_recvs && n_sends) color = 1; /* just those processes that are sending but not receiving anything will not partecipate in new comm */ 4979 } 4980 ierr = MPIU_Allreduce(&color,&subcommsize,1,MPI_INT,MPI_SUM,comm);CHKERRQ(ierr); 4981 subcommsize = commsize - subcommsize; 4982 /* check if reuse has been requested */ 4983 if (reuse == MAT_REUSE_MATRIX) { 4984 if (*mat_n) { 4985 PetscMPIInt subcommsize2; 4986 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)*mat_n),&subcommsize2);CHKERRQ(ierr); 4987 if (subcommsize != subcommsize2) SETERRQ2(PetscObjectComm((PetscObject)*mat_n),PETSC_ERR_PLIB,"Cannot reuse matrix! wrong subcomm size %d != %d",subcommsize,subcommsize2); 4988 comm_n = PetscObjectComm((PetscObject)*mat_n); 4989 } else { 4990 comm_n = PETSC_COMM_SELF; 4991 } 4992 } else { /* MAT_INITIAL_MATRIX */ 4993 PetscMPIInt rank; 4994 4995 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 4996 ierr = PetscSubcommCreate(comm,&subcomm);CHKERRQ(ierr); 4997 ierr = PetscSubcommSetNumber(subcomm,2);CHKERRQ(ierr); 4998 ierr = PetscSubcommSetTypeGeneral(subcomm,color,rank);CHKERRQ(ierr); 4999 comm_n = PetscSubcommChild(subcomm); 5000 } 5001 /* flag to destroy *mat_n if not significative */ 5002 if (color) destroy_mat = PETSC_TRUE; 5003 } else { 5004 comm_n = comm; 5005 } 5006 5007 /* prepare send/receive buffers */ 5008 ierr = PetscMalloc1(commsize,&ilengths_idxs);CHKERRQ(ierr); 5009 ierr = PetscMemzero(ilengths_idxs,commsize*sizeof(*ilengths_idxs));CHKERRQ(ierr); 5010 ierr = PetscMalloc1(commsize,&ilengths_vals);CHKERRQ(ierr); 5011 ierr = PetscMemzero(ilengths_vals,commsize*sizeof(*ilengths_vals));CHKERRQ(ierr); 5012 if (nis) { 5013 ierr = PetscCalloc1(commsize,&ilengths_idxs_is);CHKERRQ(ierr); 5014 } 5015 5016 /* Get data from local matrices */ 5017 if (!isdense) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Subassembling of AIJ local matrices not yet implemented"); 5018 /* TODO: See below some guidelines on how to prepare the local buffers */ 5019 /* 5020 send_buffer_vals should contain the raw values of the local matrix 5021 send_buffer_idxs should contain: 5022 - MatType_PRIVATE type 5023 - PetscInt size_of_l2gmap 5024 - PetscInt global_row_indices[size_of_l2gmap] 5025 - PetscInt all_other_info_which_is_needed_to_compute_preallocation_and_set_values 5026 */ 5027 else { 5028 ierr = MatDenseGetArray(local_mat,&send_buffer_vals);CHKERRQ(ierr); 5029 ierr = ISLocalToGlobalMappingGetSize(mat->rmap->mapping,&i);CHKERRQ(ierr); 5030 ierr = PetscMalloc1(i+2,&send_buffer_idxs);CHKERRQ(ierr); 5031 send_buffer_idxs[0] = (PetscInt)MATDENSE_PRIVATE; 5032 send_buffer_idxs[1] = i; 5033 ierr = ISLocalToGlobalMappingGetIndices(mat->rmap->mapping,(const PetscInt**)&ptr_idxs);CHKERRQ(ierr); 5034 ierr = PetscMemcpy(&send_buffer_idxs[2],ptr_idxs,i*sizeof(PetscInt));CHKERRQ(ierr); 5035 ierr = ISLocalToGlobalMappingRestoreIndices(mat->rmap->mapping,(const PetscInt**)&ptr_idxs);CHKERRQ(ierr); 5036 ierr = PetscMPIIntCast(i,&len);CHKERRQ(ierr); 5037 for (i=0;i<n_sends;i++) { 5038 ilengths_vals[is_indices[i]] = len*len; 5039 ilengths_idxs[is_indices[i]] = len+2; 5040 } 5041 } 5042 ierr = PetscGatherMessageLengths2(comm,n_sends,n_recvs,ilengths_idxs,ilengths_vals,&onodes,&olengths_idxs,&olengths_vals);CHKERRQ(ierr); 5043 /* additional is (if any) */ 5044 if (nis) { 5045 PetscMPIInt psum; 5046 PetscInt j; 5047 for (j=0,psum=0;j<nis;j++) { 5048 PetscInt plen; 5049 ierr = ISGetLocalSize(isarray[j],&plen);CHKERRQ(ierr); 5050 ierr = PetscMPIIntCast(plen,&len);CHKERRQ(ierr); 5051 psum += len+1; /* indices + lenght */ 5052 } 5053 ierr = PetscMalloc1(psum,&send_buffer_idxs_is);CHKERRQ(ierr); 5054 for (j=0,psum=0;j<nis;j++) { 5055 PetscInt plen; 5056 const PetscInt *is_array_idxs; 5057 ierr = ISGetLocalSize(isarray[j],&plen);CHKERRQ(ierr); 5058 send_buffer_idxs_is[psum] = plen; 5059 ierr = ISGetIndices(isarray[j],&is_array_idxs);CHKERRQ(ierr); 5060 ierr = PetscMemcpy(&send_buffer_idxs_is[psum+1],is_array_idxs,plen*sizeof(PetscInt));CHKERRQ(ierr); 5061 ierr = ISRestoreIndices(isarray[j],&is_array_idxs);CHKERRQ(ierr); 5062 psum += plen+1; /* indices + lenght */ 5063 } 5064 for (i=0;i<n_sends;i++) { 5065 ilengths_idxs_is[is_indices[i]] = psum; 5066 } 5067 ierr = PetscGatherMessageLengths(comm,n_sends,n_recvs,ilengths_idxs_is,&onodes_is,&olengths_idxs_is);CHKERRQ(ierr); 5068 } 5069 5070 buf_size_idxs = 0; 5071 buf_size_vals = 0; 5072 buf_size_idxs_is = 0; 5073 for (i=0;i<n_recvs;i++) { 5074 buf_size_idxs += (PetscInt)olengths_idxs[i]; 5075 buf_size_vals += (PetscInt)olengths_vals[i]; 5076 if (nis) buf_size_idxs_is += (PetscInt)olengths_idxs_is[i]; 5077 } 5078 ierr = PetscMalloc1(buf_size_idxs,&recv_buffer_idxs);CHKERRQ(ierr); 5079 ierr = PetscMalloc1(buf_size_vals,&recv_buffer_vals);CHKERRQ(ierr); 5080 ierr = PetscMalloc1(buf_size_idxs_is,&recv_buffer_idxs_is);CHKERRQ(ierr); 5081 5082 /* get new tags for clean communications */ 5083 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_idxs);CHKERRQ(ierr); 5084 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_vals);CHKERRQ(ierr); 5085 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_idxs_is);CHKERRQ(ierr); 5086 5087 /* allocate for requests */ 5088 ierr = PetscMalloc1(n_sends,&send_req_idxs);CHKERRQ(ierr); 5089 ierr = PetscMalloc1(n_sends,&send_req_vals);CHKERRQ(ierr); 5090 ierr = PetscMalloc1(n_sends,&send_req_idxs_is);CHKERRQ(ierr); 5091 ierr = PetscMalloc1(n_recvs,&recv_req_idxs);CHKERRQ(ierr); 5092 ierr = PetscMalloc1(n_recvs,&recv_req_vals);CHKERRQ(ierr); 5093 ierr = PetscMalloc1(n_recvs,&recv_req_idxs_is);CHKERRQ(ierr); 5094 5095 /* communications */ 5096 ptr_idxs = recv_buffer_idxs; 5097 ptr_vals = recv_buffer_vals; 5098 ptr_idxs_is = recv_buffer_idxs_is; 5099 for (i=0;i<n_recvs;i++) { 5100 source_dest = onodes[i]; 5101 ierr = MPI_Irecv(ptr_idxs,olengths_idxs[i],MPIU_INT,source_dest,tag_idxs,comm,&recv_req_idxs[i]);CHKERRQ(ierr); 5102 ierr = MPI_Irecv(ptr_vals,olengths_vals[i],MPIU_SCALAR,source_dest,tag_vals,comm,&recv_req_vals[i]);CHKERRQ(ierr); 5103 ptr_idxs += olengths_idxs[i]; 5104 ptr_vals += olengths_vals[i]; 5105 if (nis) { 5106 ierr = MPI_Irecv(ptr_idxs_is,olengths_idxs_is[i],MPIU_INT,source_dest,tag_idxs_is,comm,&recv_req_idxs_is[i]);CHKERRQ(ierr); 5107 ptr_idxs_is += olengths_idxs_is[i]; 5108 } 5109 } 5110 for (i=0;i<n_sends;i++) { 5111 ierr = PetscMPIIntCast(is_indices[i],&source_dest);CHKERRQ(ierr); 5112 ierr = MPI_Isend(send_buffer_idxs,ilengths_idxs[source_dest],MPIU_INT,source_dest,tag_idxs,comm,&send_req_idxs[i]);CHKERRQ(ierr); 5113 ierr = MPI_Isend(send_buffer_vals,ilengths_vals[source_dest],MPIU_SCALAR,source_dest,tag_vals,comm,&send_req_vals[i]);CHKERRQ(ierr); 5114 if (nis) { 5115 ierr = MPI_Isend(send_buffer_idxs_is,ilengths_idxs_is[source_dest],MPIU_INT,source_dest,tag_idxs_is,comm,&send_req_idxs_is[i]);CHKERRQ(ierr); 5116 } 5117 } 5118 ierr = ISRestoreIndices(is_sends_internal,&is_indices);CHKERRQ(ierr); 5119 ierr = ISDestroy(&is_sends_internal);CHKERRQ(ierr); 5120 5121 /* assemble new l2g map */ 5122 ierr = MPI_Waitall(n_recvs,recv_req_idxs,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 5123 ptr_idxs = recv_buffer_idxs; 5124 new_local_rows = 0; 5125 for (i=0;i<n_recvs;i++) { 5126 new_local_rows += *(ptr_idxs+1); /* second element is the local size of the l2gmap */ 5127 ptr_idxs += olengths_idxs[i]; 5128 } 5129 ierr = PetscMalloc1(new_local_rows,&l2gmap_indices);CHKERRQ(ierr); 5130 ptr_idxs = recv_buffer_idxs; 5131 new_local_rows = 0; 5132 for (i=0;i<n_recvs;i++) { 5133 ierr = PetscMemcpy(&l2gmap_indices[new_local_rows],ptr_idxs+2,(*(ptr_idxs+1))*sizeof(PetscInt));CHKERRQ(ierr); 5134 new_local_rows += *(ptr_idxs+1); /* second element is the local size of the l2gmap */ 5135 ptr_idxs += olengths_idxs[i]; 5136 } 5137 ierr = PetscSortRemoveDupsInt(&new_local_rows,l2gmap_indices);CHKERRQ(ierr); 5138 ierr = ISLocalToGlobalMappingCreate(comm_n,1,new_local_rows,l2gmap_indices,PETSC_COPY_VALUES,&l2gmap);CHKERRQ(ierr); 5139 ierr = PetscFree(l2gmap_indices);CHKERRQ(ierr); 5140 5141 /* infer new local matrix type from received local matrices type */ 5142 /* currently if all local matrices are of type X, then the resulting matrix will be of type X, except for the dense case */ 5143 /* it also assumes that if the block size is set, than it is the same among all local matrices (see checks at the beginning of the function) */ 5144 if (n_recvs) { 5145 MatTypePrivate new_local_type_private = (MatTypePrivate)send_buffer_idxs[0]; 5146 ptr_idxs = recv_buffer_idxs; 5147 for (i=0;i<n_recvs;i++) { 5148 if ((PetscInt)new_local_type_private != *ptr_idxs) { 5149 new_local_type_private = MATAIJ_PRIVATE; 5150 break; 5151 } 5152 ptr_idxs += olengths_idxs[i]; 5153 } 5154 switch (new_local_type_private) { 5155 case MATDENSE_PRIVATE: 5156 if (n_recvs>1) { /* subassembling of dense matrices does not give a dense matrix! */ 5157 new_local_type = MATSEQAIJ; 5158 bs = 1; 5159 } else { /* if I receive only 1 dense matrix */ 5160 new_local_type = MATSEQDENSE; 5161 bs = 1; 5162 } 5163 break; 5164 case MATAIJ_PRIVATE: 5165 new_local_type = MATSEQAIJ; 5166 bs = 1; 5167 break; 5168 case MATBAIJ_PRIVATE: 5169 new_local_type = MATSEQBAIJ; 5170 break; 5171 case MATSBAIJ_PRIVATE: 5172 new_local_type = MATSEQSBAIJ; 5173 break; 5174 default: 5175 SETERRQ2(comm,PETSC_ERR_SUP,"Unsupported private type %d in %s",new_local_type_private,__FUNCT__); 5176 break; 5177 } 5178 } else { /* by default, new_local_type is seqdense */ 5179 new_local_type = MATSEQDENSE; 5180 bs = 1; 5181 } 5182 5183 /* create MATIS object if needed */ 5184 if (reuse == MAT_INITIAL_MATRIX) { 5185 ierr = MatGetSize(mat,&rows,&cols);CHKERRQ(ierr); 5186 ierr = MatCreateIS(comm_n,bs,PETSC_DECIDE,PETSC_DECIDE,rows,cols,l2gmap,NULL,mat_n);CHKERRQ(ierr); 5187 } else { 5188 /* it also destroys the local matrices */ 5189 ierr = MatSetLocalToGlobalMapping(*mat_n,l2gmap,l2gmap);CHKERRQ(ierr); 5190 } 5191 ierr = MatISGetLocalMat(*mat_n,&local_mat);CHKERRQ(ierr); 5192 ierr = MatSetType(local_mat,new_local_type);CHKERRQ(ierr); 5193 5194 ierr = MPI_Waitall(n_recvs,recv_req_vals,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 5195 5196 /* Global to local map of received indices */ 5197 ierr = PetscMalloc1(buf_size_idxs,&recv_buffer_idxs_local);CHKERRQ(ierr); /* needed for values insertion */ 5198 ierr = ISGlobalToLocalMappingApply(l2gmap,IS_GTOLM_MASK,buf_size_idxs,recv_buffer_idxs,&i,recv_buffer_idxs_local);CHKERRQ(ierr); 5199 ierr = ISLocalToGlobalMappingDestroy(&l2gmap);CHKERRQ(ierr); 5200 5201 /* restore attributes -> type of incoming data and its size */ 5202 buf_size_idxs = 0; 5203 for (i=0;i<n_recvs;i++) { 5204 recv_buffer_idxs_local[buf_size_idxs] = recv_buffer_idxs[buf_size_idxs]; 5205 recv_buffer_idxs_local[buf_size_idxs+1] = recv_buffer_idxs[buf_size_idxs+1]; 5206 buf_size_idxs += (PetscInt)olengths_idxs[i]; 5207 } 5208 ierr = PetscFree(recv_buffer_idxs);CHKERRQ(ierr); 5209 5210 /* set preallocation */ 5211 ierr = PetscObjectTypeCompare((PetscObject)local_mat,MATSEQDENSE,&newisdense);CHKERRQ(ierr); 5212 if (!newisdense) { 5213 PetscInt *new_local_nnz=0; 5214 5215 ptr_vals = recv_buffer_vals; 5216 ptr_idxs = recv_buffer_idxs_local; 5217 if (n_recvs) { 5218 ierr = PetscCalloc1(new_local_rows,&new_local_nnz);CHKERRQ(ierr); 5219 } 5220 for (i=0;i<n_recvs;i++) { 5221 PetscInt j; 5222 if (*ptr_idxs == (PetscInt)MATDENSE_PRIVATE) { /* preallocation provided for dense case only */ 5223 for (j=0;j<*(ptr_idxs+1);j++) { 5224 new_local_nnz[*(ptr_idxs+2+j)] += *(ptr_idxs+1); 5225 } 5226 } else { 5227 /* TODO */ 5228 } 5229 ptr_idxs += olengths_idxs[i]; 5230 } 5231 if (new_local_nnz) { 5232 for (i=0;i<new_local_rows;i++) new_local_nnz[i] = PetscMin(new_local_nnz[i],new_local_rows); 5233 ierr = MatSeqAIJSetPreallocation(local_mat,0,new_local_nnz);CHKERRQ(ierr); 5234 for (i=0;i<new_local_rows;i++) new_local_nnz[i] /= bs; 5235 ierr = MatSeqBAIJSetPreallocation(local_mat,bs,0,new_local_nnz);CHKERRQ(ierr); 5236 for (i=0;i<new_local_rows;i++) new_local_nnz[i] = PetscMax(new_local_nnz[i]-i,0); 5237 ierr = MatSeqSBAIJSetPreallocation(local_mat,bs,0,new_local_nnz);CHKERRQ(ierr); 5238 } else { 5239 ierr = MatSetUp(local_mat);CHKERRQ(ierr); 5240 } 5241 ierr = PetscFree(new_local_nnz);CHKERRQ(ierr); 5242 } else { 5243 ierr = MatSetUp(local_mat);CHKERRQ(ierr); 5244 } 5245 5246 /* set values */ 5247 ptr_vals = recv_buffer_vals; 5248 ptr_idxs = recv_buffer_idxs_local; 5249 for (i=0;i<n_recvs;i++) { 5250 if (*ptr_idxs == (PetscInt)MATDENSE_PRIVATE) { /* values insertion provided for dense case only */ 5251 ierr = MatSetOption(local_mat,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr); 5252 ierr = MatSetValues(local_mat,*(ptr_idxs+1),ptr_idxs+2,*(ptr_idxs+1),ptr_idxs+2,ptr_vals,ADD_VALUES);CHKERRQ(ierr); 5253 ierr = MatAssemblyBegin(local_mat,MAT_FLUSH_ASSEMBLY);CHKERRQ(ierr); 5254 ierr = MatAssemblyEnd(local_mat,MAT_FLUSH_ASSEMBLY);CHKERRQ(ierr); 5255 ierr = MatSetOption(local_mat,MAT_ROW_ORIENTED,PETSC_TRUE);CHKERRQ(ierr); 5256 } else { 5257 /* TODO */ 5258 } 5259 ptr_idxs += olengths_idxs[i]; 5260 ptr_vals += olengths_vals[i]; 5261 } 5262 ierr = MatAssemblyBegin(local_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5263 ierr = MatAssemblyEnd(local_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5264 ierr = MatAssemblyBegin(*mat_n,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5265 ierr = MatAssemblyEnd(*mat_n,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5266 ierr = PetscFree(recv_buffer_vals);CHKERRQ(ierr); 5267 ierr = PetscFree(recv_buffer_idxs_local);CHKERRQ(ierr); 5268 5269 #if 0 5270 if (!restrict_comm) { /* check */ 5271 Vec lvec,rvec; 5272 PetscReal infty_error; 5273 5274 ierr = MatCreateVecs(mat,&rvec,&lvec);CHKERRQ(ierr); 5275 ierr = VecSetRandom(rvec,NULL);CHKERRQ(ierr); 5276 ierr = MatMult(mat,rvec,lvec);CHKERRQ(ierr); 5277 ierr = VecScale(lvec,-1.0);CHKERRQ(ierr); 5278 ierr = MatMultAdd(*mat_n,rvec,lvec,lvec);CHKERRQ(ierr); 5279 ierr = VecNorm(lvec,NORM_INFINITY,&infty_error);CHKERRQ(ierr); 5280 ierr = PetscPrintf(PetscObjectComm((PetscObject)mat),"Infinity error subassembling %1.6e\n",infty_error); 5281 ierr = VecDestroy(&rvec);CHKERRQ(ierr); 5282 ierr = VecDestroy(&lvec);CHKERRQ(ierr); 5283 } 5284 #endif 5285 5286 /* assemble new additional is (if any) */ 5287 if (nis) { 5288 PetscInt **temp_idxs,*count_is,j,psum; 5289 5290 ierr = MPI_Waitall(n_recvs,recv_req_idxs_is,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 5291 ierr = PetscCalloc1(nis,&count_is);CHKERRQ(ierr); 5292 ptr_idxs = recv_buffer_idxs_is; 5293 psum = 0; 5294 for (i=0;i<n_recvs;i++) { 5295 for (j=0;j<nis;j++) { 5296 PetscInt plen = *(ptr_idxs); /* first element is the local size of IS's indices */ 5297 count_is[j] += plen; /* increment counting of buffer for j-th IS */ 5298 psum += plen; 5299 ptr_idxs += plen+1; /* shift pointer to received data */ 5300 } 5301 } 5302 ierr = PetscMalloc1(nis,&temp_idxs);CHKERRQ(ierr); 5303 ierr = PetscMalloc1(psum,&temp_idxs[0]);CHKERRQ(ierr); 5304 for (i=1;i<nis;i++) { 5305 temp_idxs[i] = temp_idxs[i-1]+count_is[i-1]; 5306 } 5307 ierr = PetscMemzero(count_is,nis*sizeof(PetscInt));CHKERRQ(ierr); 5308 ptr_idxs = recv_buffer_idxs_is; 5309 for (i=0;i<n_recvs;i++) { 5310 for (j=0;j<nis;j++) { 5311 PetscInt plen = *(ptr_idxs); /* first element is the local size of IS's indices */ 5312 ierr = PetscMemcpy(&temp_idxs[j][count_is[j]],ptr_idxs+1,plen*sizeof(PetscInt));CHKERRQ(ierr); 5313 count_is[j] += plen; /* increment starting point of buffer for j-th IS */ 5314 ptr_idxs += plen+1; /* shift pointer to received data */ 5315 } 5316 } 5317 for (i=0;i<nis;i++) { 5318 ierr = ISDestroy(&isarray[i]);CHKERRQ(ierr); 5319 ierr = PetscSortRemoveDupsInt(&count_is[i],temp_idxs[i]);CHKERRQ(ierr);CHKERRQ(ierr); 5320 ierr = ISCreateGeneral(comm_n,count_is[i],temp_idxs[i],PETSC_COPY_VALUES,&isarray[i]);CHKERRQ(ierr); 5321 } 5322 ierr = PetscFree(count_is);CHKERRQ(ierr); 5323 ierr = PetscFree(temp_idxs[0]);CHKERRQ(ierr); 5324 ierr = PetscFree(temp_idxs);CHKERRQ(ierr); 5325 } 5326 /* free workspace */ 5327 ierr = PetscFree(recv_buffer_idxs_is);CHKERRQ(ierr); 5328 ierr = MPI_Waitall(n_sends,send_req_idxs,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 5329 ierr = PetscFree(send_buffer_idxs);CHKERRQ(ierr); 5330 ierr = MPI_Waitall(n_sends,send_req_vals,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 5331 if (isdense) { 5332 ierr = MatISGetLocalMat(mat,&local_mat);CHKERRQ(ierr); 5333 ierr = MatDenseRestoreArray(local_mat,&send_buffer_vals);CHKERRQ(ierr); 5334 } else { 5335 /* ierr = PetscFree(send_buffer_vals);CHKERRQ(ierr); */ 5336 } 5337 if (nis) { 5338 ierr = MPI_Waitall(n_sends,send_req_idxs_is,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 5339 ierr = PetscFree(send_buffer_idxs_is);CHKERRQ(ierr); 5340 } 5341 ierr = PetscFree(recv_req_idxs);CHKERRQ(ierr); 5342 ierr = PetscFree(recv_req_vals);CHKERRQ(ierr); 5343 ierr = PetscFree(recv_req_idxs_is);CHKERRQ(ierr); 5344 ierr = PetscFree(send_req_idxs);CHKERRQ(ierr); 5345 ierr = PetscFree(send_req_vals);CHKERRQ(ierr); 5346 ierr = PetscFree(send_req_idxs_is);CHKERRQ(ierr); 5347 ierr = PetscFree(ilengths_vals);CHKERRQ(ierr); 5348 ierr = PetscFree(ilengths_idxs);CHKERRQ(ierr); 5349 ierr = PetscFree(olengths_vals);CHKERRQ(ierr); 5350 ierr = PetscFree(olengths_idxs);CHKERRQ(ierr); 5351 ierr = PetscFree(onodes);CHKERRQ(ierr); 5352 if (nis) { 5353 ierr = PetscFree(ilengths_idxs_is);CHKERRQ(ierr); 5354 ierr = PetscFree(olengths_idxs_is);CHKERRQ(ierr); 5355 ierr = PetscFree(onodes_is);CHKERRQ(ierr); 5356 } 5357 ierr = PetscSubcommDestroy(&subcomm);CHKERRQ(ierr); 5358 if (destroy_mat) { /* destroy mat is true only if restrict comm is true and process will not partecipate */ 5359 ierr = MatDestroy(mat_n);CHKERRQ(ierr); 5360 for (i=0;i<nis;i++) { 5361 ierr = ISDestroy(&isarray[i]);CHKERRQ(ierr); 5362 } 5363 *mat_n = NULL; 5364 } 5365 PetscFunctionReturn(0); 5366 } 5367 5368 /* temporary hack into ksp private data structure */ 5369 #include <petsc/private/kspimpl.h> 5370 5371 #undef __FUNCT__ 5372 #define __FUNCT__ "PCBDDCSetUpCoarseSolver" 5373 PetscErrorCode PCBDDCSetUpCoarseSolver(PC pc,PetscScalar* coarse_submat_vals) 5374 { 5375 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 5376 PC_IS *pcis = (PC_IS*)pc->data; 5377 Mat coarse_mat,coarse_mat_is,coarse_submat_dense; 5378 MatNullSpace CoarseNullSpace=NULL; 5379 ISLocalToGlobalMapping coarse_islg; 5380 IS coarse_is,*isarray; 5381 PetscInt i,im_active=-1,active_procs=-1; 5382 PetscInt nis,nisdofs,nisneu,nisvert; 5383 PC pc_temp; 5384 PCType coarse_pc_type; 5385 KSPType coarse_ksp_type; 5386 PetscBool multilevel_requested,multilevel_allowed; 5387 PetscBool isredundant,isbddc,isnn,coarse_reuse; 5388 Mat t_coarse_mat_is; 5389 PetscInt void_procs,ncoarse_ml,ncoarse_ds,ncoarse; 5390 PetscMPIInt all_procs; 5391 PetscBool csin_ml,csin_ds,csin,csin_type_simple,redist; 5392 PetscBool compute_vecs = PETSC_FALSE; 5393 PetscScalar *array; 5394 PetscErrorCode ierr; 5395 5396 PetscFunctionBegin; 5397 /* Assign global numbering to coarse dofs */ 5398 if (pcbddc->new_primal_space || pcbddc->coarse_size == -1) { /* a new primal space is present or it is the first initialization, so recompute global numbering */ 5399 PetscInt ocoarse_size; 5400 compute_vecs = PETSC_TRUE; 5401 ocoarse_size = pcbddc->coarse_size; 5402 ierr = PetscFree(pcbddc->global_primal_indices);CHKERRQ(ierr); 5403 ierr = PCBDDCComputePrimalNumbering(pc,&pcbddc->coarse_size,&pcbddc->global_primal_indices);CHKERRQ(ierr); 5404 /* see if we can avoid some work */ 5405 if (pcbddc->coarse_ksp) { /* coarse ksp has already been created */ 5406 /* if the coarse size is different or we are using adaptive selection, better to not reuse the coarse matrix */ 5407 if (ocoarse_size != pcbddc->coarse_size || pcbddc->adaptive_selection) { 5408 PC pc; 5409 PetscBool isbddc; 5410 5411 /* temporary workaround since PCBDDC does not have a reset method so far */ 5412 ierr = KSPGetPC(pcbddc->coarse_ksp,&pc);CHKERRQ(ierr); 5413 ierr = PetscObjectTypeCompare((PetscObject)pc,PCBDDC,&isbddc);CHKERRQ(ierr); 5414 if (isbddc) { 5415 ierr = KSPDestroy(&pcbddc->coarse_ksp);CHKERRQ(ierr); 5416 } else { 5417 ierr = KSPReset(pcbddc->coarse_ksp);CHKERRQ(ierr); 5418 } 5419 coarse_reuse = PETSC_FALSE; 5420 } else { /* we can safely reuse already computed coarse matrix */ 5421 coarse_reuse = PETSC_TRUE; 5422 } 5423 } else { /* there's no coarse ksp, so we need to create the coarse matrix too */ 5424 coarse_reuse = PETSC_FALSE; 5425 } 5426 /* reset any subassembling information */ 5427 ierr = ISDestroy(&pcbddc->coarse_subassembling);CHKERRQ(ierr); 5428 ierr = ISDestroy(&pcbddc->coarse_subassembling_init);CHKERRQ(ierr); 5429 } else { /* primal space is unchanged, so we can reuse coarse matrix */ 5430 coarse_reuse = PETSC_TRUE; 5431 } 5432 5433 /* count "active" (i.e. with positive local size) and "void" processes */ 5434 im_active = !!(pcis->n); 5435 ierr = MPIU_Allreduce(&im_active,&active_procs,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 5436 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)pc),&all_procs);CHKERRQ(ierr); 5437 void_procs = all_procs-active_procs; 5438 csin_type_simple = PETSC_TRUE; 5439 redist = PETSC_FALSE; 5440 if (pcbddc->current_level && void_procs) { 5441 csin_ml = PETSC_TRUE; 5442 ncoarse_ml = void_procs; 5443 /* it has no sense to redistribute on a set of processors larger than the number of active processes */ 5444 if (pcbddc->redistribute_coarse > 0 && pcbddc->redistribute_coarse < active_procs) { 5445 csin_ds = PETSC_TRUE; 5446 ncoarse_ds = pcbddc->redistribute_coarse; 5447 redist = PETSC_TRUE; 5448 } else { 5449 csin_ds = PETSC_TRUE; 5450 ncoarse_ds = active_procs; 5451 redist = PETSC_TRUE; 5452 } 5453 } else { 5454 csin_ml = PETSC_FALSE; 5455 ncoarse_ml = all_procs; 5456 if (void_procs) { 5457 csin_ds = PETSC_TRUE; 5458 ncoarse_ds = void_procs; 5459 csin_type_simple = PETSC_FALSE; 5460 } else { 5461 if (pcbddc->redistribute_coarse > 0 && pcbddc->redistribute_coarse < all_procs) { 5462 csin_ds = PETSC_TRUE; 5463 ncoarse_ds = pcbddc->redistribute_coarse; 5464 redist = PETSC_TRUE; 5465 } else { 5466 csin_ds = PETSC_FALSE; 5467 ncoarse_ds = all_procs; 5468 } 5469 } 5470 } 5471 5472 /* 5473 test if we can go multilevel: three conditions must be satisfied: 5474 - we have not exceeded the number of levels requested 5475 - we can actually subassemble the active processes 5476 - we can find a suitable number of MPI processes where we can place the subassembled problem 5477 */ 5478 multilevel_allowed = PETSC_FALSE; 5479 multilevel_requested = PETSC_FALSE; 5480 if (pcbddc->current_level < pcbddc->max_levels) { 5481 multilevel_requested = PETSC_TRUE; 5482 if (active_procs/pcbddc->coarsening_ratio < 2 || ncoarse_ml/pcbddc->coarsening_ratio < 2) { 5483 multilevel_allowed = PETSC_FALSE; 5484 } else { 5485 multilevel_allowed = PETSC_TRUE; 5486 } 5487 } 5488 /* determine number of process partecipating to coarse solver */ 5489 if (multilevel_allowed) { 5490 ncoarse = ncoarse_ml; 5491 csin = csin_ml; 5492 redist = PETSC_FALSE; 5493 } else { 5494 ncoarse = ncoarse_ds; 5495 csin = csin_ds; 5496 } 5497 5498 /* creates temporary l2gmap and IS for coarse indexes */ 5499 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),pcbddc->local_primal_size,pcbddc->global_primal_indices,PETSC_COPY_VALUES,&coarse_is);CHKERRQ(ierr); 5500 ierr = ISLocalToGlobalMappingCreateIS(coarse_is,&coarse_islg);CHKERRQ(ierr); 5501 5502 /* creates temporary MATIS object for coarse matrix */ 5503 ierr = MatCreateSeqDense(PETSC_COMM_SELF,pcbddc->local_primal_size,pcbddc->local_primal_size,NULL,&coarse_submat_dense);CHKERRQ(ierr); 5504 ierr = MatDenseGetArray(coarse_submat_dense,&array);CHKERRQ(ierr); 5505 ierr = PetscMemcpy(array,coarse_submat_vals,sizeof(*coarse_submat_vals)*pcbddc->local_primal_size*pcbddc->local_primal_size);CHKERRQ(ierr); 5506 ierr = MatDenseRestoreArray(coarse_submat_dense,&array);CHKERRQ(ierr); 5507 ierr = MatCreateIS(PetscObjectComm((PetscObject)pc),1,PETSC_DECIDE,PETSC_DECIDE,pcbddc->coarse_size,pcbddc->coarse_size,coarse_islg,NULL,&t_coarse_mat_is);CHKERRQ(ierr); 5508 ierr = MatISSetLocalMat(t_coarse_mat_is,coarse_submat_dense);CHKERRQ(ierr); 5509 ierr = MatAssemblyBegin(t_coarse_mat_is,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5510 ierr = MatAssemblyEnd(t_coarse_mat_is,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5511 ierr = MatDestroy(&coarse_submat_dense);CHKERRQ(ierr); 5512 5513 /* compute dofs splitting and neumann boundaries for coarse dofs */ 5514 if (multilevel_allowed && (pcbddc->n_ISForDofsLocal || pcbddc->NeumannBoundariesLocal || (pcbddc->benign_saddle_point && pcbddc->user_primal_vertices_local))) { /* protects from unneded computations */ 5515 PetscInt *tidxs,*tidxs2,nout,tsize,i; 5516 const PetscInt *idxs; 5517 ISLocalToGlobalMapping tmap; 5518 5519 /* create map between primal indices (in local representative ordering) and local primal numbering */ 5520 ierr = ISLocalToGlobalMappingCreate(PETSC_COMM_SELF,1,pcbddc->local_primal_size,pcbddc->primal_indices_local_idxs,PETSC_COPY_VALUES,&tmap);CHKERRQ(ierr); 5521 /* allocate space for temporary storage */ 5522 ierr = PetscMalloc1(pcbddc->local_primal_size,&tidxs);CHKERRQ(ierr); 5523 ierr = PetscMalloc1(pcbddc->local_primal_size,&tidxs2);CHKERRQ(ierr); 5524 /* allocate for IS array */ 5525 nisdofs = pcbddc->n_ISForDofsLocal; 5526 nisneu = !!pcbddc->NeumannBoundariesLocal; 5527 nisvert = 0; 5528 if (pcbddc->benign_saddle_point && pcbddc->user_primal_vertices_local) { 5529 nisvert = 1; 5530 } 5531 nis = nisdofs + nisneu + nisvert; 5532 ierr = PetscMalloc1(nis,&isarray);CHKERRQ(ierr); 5533 /* dofs splitting */ 5534 for (i=0;i<nisdofs;i++) { 5535 /* ierr = ISView(pcbddc->ISForDofsLocal[i],0);CHKERRQ(ierr); */ 5536 ierr = ISGetLocalSize(pcbddc->ISForDofsLocal[i],&tsize);CHKERRQ(ierr); 5537 ierr = ISGetIndices(pcbddc->ISForDofsLocal[i],&idxs);CHKERRQ(ierr); 5538 ierr = ISGlobalToLocalMappingApply(tmap,IS_GTOLM_DROP,tsize,idxs,&nout,tidxs);CHKERRQ(ierr); 5539 ierr = ISRestoreIndices(pcbddc->ISForDofsLocal[i],&idxs);CHKERRQ(ierr); 5540 ierr = ISLocalToGlobalMappingApply(coarse_islg,nout,tidxs,tidxs2);CHKERRQ(ierr); 5541 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),nout,tidxs2,PETSC_COPY_VALUES,&isarray[i]);CHKERRQ(ierr); 5542 /* ierr = ISView(isarray[i],0);CHKERRQ(ierr); */ 5543 } 5544 /* neumann boundaries */ 5545 if (pcbddc->NeumannBoundariesLocal) { 5546 /* ierr = ISView(pcbddc->NeumannBoundariesLocal,0);CHKERRQ(ierr); */ 5547 ierr = ISGetLocalSize(pcbddc->NeumannBoundariesLocal,&tsize);CHKERRQ(ierr); 5548 ierr = ISGetIndices(pcbddc->NeumannBoundariesLocal,&idxs);CHKERRQ(ierr); 5549 ierr = ISGlobalToLocalMappingApply(tmap,IS_GTOLM_DROP,tsize,idxs,&nout,tidxs);CHKERRQ(ierr); 5550 ierr = ISRestoreIndices(pcbddc->NeumannBoundariesLocal,&idxs);CHKERRQ(ierr); 5551 ierr = ISLocalToGlobalMappingApply(coarse_islg,nout,tidxs,tidxs2);CHKERRQ(ierr); 5552 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),nout,tidxs2,PETSC_COPY_VALUES,&isarray[nisdofs]);CHKERRQ(ierr); 5553 /* ierr = ISView(isarray[nisdofs],0);CHKERRQ(ierr); */ 5554 } 5555 /* primal vertices (benign) */ 5556 if (pcbddc->benign_saddle_point && pcbddc->user_primal_vertices_local) { 5557 ierr = ISGetLocalSize(pcbddc->user_primal_vertices_local,&tsize);CHKERRQ(ierr); 5558 ierr = ISGetIndices(pcbddc->user_primal_vertices_local,&idxs);CHKERRQ(ierr); 5559 ierr = ISGlobalToLocalMappingApply(tmap,IS_GTOLM_DROP,tsize,idxs,&nout,tidxs);CHKERRQ(ierr); 5560 ierr = ISRestoreIndices(pcbddc->user_primal_vertices_local,&idxs);CHKERRQ(ierr); 5561 ierr = ISLocalToGlobalMappingApply(coarse_islg,nout,tidxs,tidxs2);CHKERRQ(ierr); 5562 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),nout,tidxs2,PETSC_COPY_VALUES,&isarray[nis-1]);CHKERRQ(ierr); 5563 } 5564 /* free memory */ 5565 ierr = PetscFree(tidxs);CHKERRQ(ierr); 5566 ierr = PetscFree(tidxs2);CHKERRQ(ierr); 5567 ierr = ISLocalToGlobalMappingDestroy(&tmap);CHKERRQ(ierr); 5568 } else { 5569 nis = 0; 5570 nisdofs = 0; 5571 nisneu = 0; 5572 nisvert = 0; 5573 isarray = NULL; 5574 } 5575 /* destroy no longer needed map */ 5576 ierr = ISLocalToGlobalMappingDestroy(&coarse_islg);CHKERRQ(ierr); 5577 5578 /* restrict on coarse candidates (if needed) */ 5579 coarse_mat_is = NULL; 5580 if (csin) { 5581 if (!pcbddc->coarse_subassembling_init ) { /* creates subassembling init pattern if not present */ 5582 if (redist) { 5583 PetscMPIInt rank; 5584 PetscInt spc,n_spc_p1,dest[1],destsize; 5585 5586 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)pc),&rank);CHKERRQ(ierr); 5587 spc = active_procs/ncoarse; 5588 n_spc_p1 = active_procs%ncoarse; 5589 if (im_active) { 5590 destsize = 1; 5591 if (rank > n_spc_p1*(spc+1)-1) { 5592 dest[0] = n_spc_p1+(rank-(n_spc_p1*(spc+1)))/spc; 5593 } else { 5594 dest[0] = rank/(spc+1); 5595 } 5596 } else { 5597 destsize = 0; 5598 } 5599 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),destsize,dest,PETSC_COPY_VALUES,&pcbddc->coarse_subassembling_init);CHKERRQ(ierr); 5600 } else if (csin_type_simple) { 5601 PetscMPIInt rank; 5602 PetscInt issize,isidx; 5603 5604 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)pc),&rank);CHKERRQ(ierr); 5605 if (im_active) { 5606 issize = 1; 5607 isidx = (PetscInt)rank; 5608 } else { 5609 issize = 0; 5610 isidx = -1; 5611 } 5612 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),issize,&isidx,PETSC_COPY_VALUES,&pcbddc->coarse_subassembling_init);CHKERRQ(ierr); 5613 } else { /* get a suitable subassembling pattern from MATIS code */ 5614 ierr = MatISGetSubassemblingPattern(t_coarse_mat_is,ncoarse,pcbddc->coarse_adj_red,&pcbddc->coarse_subassembling_init);CHKERRQ(ierr); 5615 } 5616 5617 /* we need to shift on coarse candidates either if we are not redistributing or we are redistributing and we have enough void processes */ 5618 if (!redist || ncoarse <= void_procs) { 5619 PetscInt ncoarse_cand,tissize,*nisindices; 5620 PetscInt *coarse_candidates; 5621 const PetscInt* tisindices; 5622 5623 /* get coarse candidates' ranks in pc communicator */ 5624 ierr = PetscMalloc1(all_procs,&coarse_candidates);CHKERRQ(ierr); 5625 ierr = MPI_Allgather(&im_active,1,MPIU_INT,coarse_candidates,1,MPIU_INT,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 5626 for (i=0,ncoarse_cand=0;i<all_procs;i++) { 5627 if (!coarse_candidates[i]) { 5628 coarse_candidates[ncoarse_cand++]=i; 5629 } 5630 } 5631 if (ncoarse_cand < ncoarse) SETERRQ2(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"This should not happen! %d < %d",ncoarse_cand,ncoarse); 5632 5633 5634 if (pcbddc->dbg_flag) { 5635 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 5636 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Subassembling pattern init (before shift)\n");CHKERRQ(ierr); 5637 ierr = ISView(pcbddc->coarse_subassembling_init,pcbddc->dbg_viewer);CHKERRQ(ierr); 5638 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Coarse candidates\n");CHKERRQ(ierr); 5639 for (i=0;i<ncoarse_cand;i++) { 5640 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"%d ",coarse_candidates[i]);CHKERRQ(ierr); 5641 } 5642 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"\n");CHKERRQ(ierr); 5643 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 5644 } 5645 /* shift the pattern on coarse candidates */ 5646 ierr = ISGetLocalSize(pcbddc->coarse_subassembling_init,&tissize);CHKERRQ(ierr); 5647 ierr = ISGetIndices(pcbddc->coarse_subassembling_init,&tisindices);CHKERRQ(ierr); 5648 ierr = PetscMalloc1(tissize,&nisindices);CHKERRQ(ierr); 5649 for (i=0;i<tissize;i++) nisindices[i] = coarse_candidates[tisindices[i]]; 5650 ierr = ISRestoreIndices(pcbddc->coarse_subassembling_init,&tisindices);CHKERRQ(ierr); 5651 ierr = ISGeneralSetIndices(pcbddc->coarse_subassembling_init,tissize,nisindices,PETSC_OWN_POINTER);CHKERRQ(ierr); 5652 ierr = PetscFree(coarse_candidates);CHKERRQ(ierr); 5653 } 5654 if (pcbddc->dbg_flag) { 5655 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 5656 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Subassembling pattern init\n");CHKERRQ(ierr); 5657 ierr = ISView(pcbddc->coarse_subassembling_init,pcbddc->dbg_viewer);CHKERRQ(ierr); 5658 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 5659 } 5660 } 5661 /* get temporary coarse mat in IS format restricted on coarse procs (plus additional index sets of isarray) */ 5662 if (multilevel_allowed) { /* we need to keep tracking of void processes for future placements */ 5663 ierr = MatISSubassemble(t_coarse_mat_is,pcbddc->coarse_subassembling_init,0,PETSC_TRUE,PETSC_FALSE,MAT_INITIAL_MATRIX,&coarse_mat_is,nis,isarray);CHKERRQ(ierr); 5664 } else { /* this is the last level, so use just receiving processes in subcomm */ 5665 ierr = MatISSubassemble(t_coarse_mat_is,pcbddc->coarse_subassembling_init,0,PETSC_TRUE,PETSC_TRUE,MAT_INITIAL_MATRIX,&coarse_mat_is,nis,isarray);CHKERRQ(ierr); 5666 } 5667 } else { 5668 if (pcbddc->dbg_flag) { 5669 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 5670 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Subassembling pattern init not needed\n");CHKERRQ(ierr); 5671 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 5672 } 5673 ierr = PetscObjectReference((PetscObject)t_coarse_mat_is);CHKERRQ(ierr); 5674 coarse_mat_is = t_coarse_mat_is; 5675 } 5676 5677 /* create local to global scatters for coarse problem */ 5678 if (compute_vecs) { 5679 PetscInt lrows; 5680 ierr = VecDestroy(&pcbddc->coarse_vec);CHKERRQ(ierr); 5681 if (coarse_mat_is) { 5682 ierr = MatGetLocalSize(coarse_mat_is,&lrows,NULL);CHKERRQ(ierr); 5683 } else { 5684 lrows = 0; 5685 } 5686 ierr = VecCreate(PetscObjectComm((PetscObject)pc),&pcbddc->coarse_vec);CHKERRQ(ierr); 5687 ierr = VecSetSizes(pcbddc->coarse_vec,lrows,PETSC_DECIDE);CHKERRQ(ierr); 5688 ierr = VecSetType(pcbddc->coarse_vec,VECSTANDARD);CHKERRQ(ierr); 5689 ierr = VecScatterDestroy(&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr); 5690 ierr = VecScatterCreate(pcbddc->vec1_P,NULL,pcbddc->coarse_vec,coarse_is,&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr); 5691 } 5692 ierr = ISDestroy(&coarse_is);CHKERRQ(ierr); 5693 ierr = MatDestroy(&t_coarse_mat_is);CHKERRQ(ierr); 5694 5695 /* set defaults for coarse KSP and PC */ 5696 if (multilevel_allowed) { 5697 coarse_ksp_type = KSPRICHARDSON; 5698 coarse_pc_type = PCBDDC; 5699 } else { 5700 coarse_ksp_type = KSPPREONLY; 5701 coarse_pc_type = PCREDUNDANT; 5702 } 5703 5704 /* print some info if requested */ 5705 if (pcbddc->dbg_flag) { 5706 if (!multilevel_allowed) { 5707 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 5708 if (multilevel_requested) { 5709 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Not enough active processes on level %d (active processes %d, coarsening ratio %d)\n",pcbddc->current_level,active_procs,pcbddc->coarsening_ratio);CHKERRQ(ierr); 5710 } else if (pcbddc->max_levels) { 5711 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Maximum number of requested levels reached (%d)\n",pcbddc->max_levels);CHKERRQ(ierr); 5712 } 5713 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 5714 } 5715 } 5716 5717 /* create the coarse KSP object only once with defaults */ 5718 if (coarse_mat_is) { 5719 MatReuse coarse_mat_reuse; 5720 PetscViewer dbg_viewer = NULL; 5721 if (pcbddc->dbg_flag) { 5722 dbg_viewer = PETSC_VIEWER_STDOUT_(PetscObjectComm((PetscObject)coarse_mat_is)); 5723 ierr = PetscViewerASCIIAddTab(dbg_viewer,2*pcbddc->current_level);CHKERRQ(ierr); 5724 } 5725 if (!pcbddc->coarse_ksp) { 5726 char prefix[256],str_level[16]; 5727 size_t len; 5728 ierr = KSPCreate(PetscObjectComm((PetscObject)coarse_mat_is),&pcbddc->coarse_ksp);CHKERRQ(ierr); 5729 ierr = KSPSetErrorIfNotConverged(pcbddc->coarse_ksp,pc->erroriffailure);CHKERRQ(ierr); 5730 ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->coarse_ksp,(PetscObject)pc,1);CHKERRQ(ierr); 5731 ierr = KSPSetTolerances(pcbddc->coarse_ksp,PETSC_DEFAULT,PETSC_DEFAULT,PETSC_DEFAULT,1);CHKERRQ(ierr); 5732 ierr = KSPSetOperators(pcbddc->coarse_ksp,coarse_mat_is,coarse_mat_is);CHKERRQ(ierr); 5733 ierr = KSPSetType(pcbddc->coarse_ksp,coarse_ksp_type);CHKERRQ(ierr); 5734 ierr = KSPSetNormType(pcbddc->coarse_ksp,KSP_NORM_NONE);CHKERRQ(ierr); 5735 ierr = KSPGetPC(pcbddc->coarse_ksp,&pc_temp);CHKERRQ(ierr); 5736 ierr = PCSetType(pc_temp,coarse_pc_type);CHKERRQ(ierr); 5737 /* prefix */ 5738 ierr = PetscStrcpy(prefix,"");CHKERRQ(ierr); 5739 ierr = PetscStrcpy(str_level,"");CHKERRQ(ierr); 5740 if (!pcbddc->current_level) { 5741 ierr = PetscStrcpy(prefix,((PetscObject)pc)->prefix);CHKERRQ(ierr); 5742 ierr = PetscStrcat(prefix,"pc_bddc_coarse_");CHKERRQ(ierr); 5743 } else { 5744 ierr = PetscStrlen(((PetscObject)pc)->prefix,&len);CHKERRQ(ierr); 5745 if (pcbddc->current_level>1) len -= 3; /* remove "lX_" with X level number */ 5746 if (pcbddc->current_level>10) len -= 1; /* remove another char from level number */ 5747 ierr = PetscStrncpy(prefix,((PetscObject)pc)->prefix,len+1);CHKERRQ(ierr); 5748 sprintf(str_level,"l%d_",(int)(pcbddc->current_level)); 5749 ierr = PetscStrcat(prefix,str_level);CHKERRQ(ierr); 5750 } 5751 ierr = KSPSetOptionsPrefix(pcbddc->coarse_ksp,prefix);CHKERRQ(ierr); 5752 /* propagate BDDC info to the next level (these are dummy calls if pc_temp is not of type PCBDDC) */ 5753 ierr = PCBDDCSetLevel(pc_temp,pcbddc->current_level+1);CHKERRQ(ierr); 5754 ierr = PCBDDCSetCoarseningRatio(pc_temp,pcbddc->coarsening_ratio);CHKERRQ(ierr); 5755 ierr = PCBDDCSetLevels(pc_temp,pcbddc->max_levels);CHKERRQ(ierr); 5756 /* allow user customization */ 5757 ierr = KSPSetFromOptions(pcbddc->coarse_ksp);CHKERRQ(ierr); 5758 } 5759 /* propagate BDDC info to the next level (these are dummy calls if pc_temp is not of type PCBDDC) */ 5760 ierr = KSPGetPC(pcbddc->coarse_ksp,&pc_temp);CHKERRQ(ierr); 5761 if (nisdofs) { 5762 ierr = PCBDDCSetDofsSplitting(pc_temp,nisdofs,isarray);CHKERRQ(ierr); 5763 for (i=0;i<nisdofs;i++) { 5764 ierr = ISDestroy(&isarray[i]);CHKERRQ(ierr); 5765 } 5766 } 5767 if (nisneu) { 5768 ierr = PCBDDCSetNeumannBoundaries(pc_temp,isarray[nisdofs]);CHKERRQ(ierr); 5769 ierr = ISDestroy(&isarray[nisdofs]);CHKERRQ(ierr); 5770 } 5771 if (nisvert) { 5772 ierr = PCBDDCSetPrimalVerticesIS(pc_temp,isarray[nis-1]);CHKERRQ(ierr); 5773 ierr = ISDestroy(&isarray[nis-1]);CHKERRQ(ierr); 5774 } 5775 5776 /* get some info after set from options */ 5777 ierr = PetscObjectTypeCompare((PetscObject)pc_temp,PCNN,&isnn);CHKERRQ(ierr); 5778 ierr = PetscObjectTypeCompare((PetscObject)pc_temp,PCBDDC,&isbddc);CHKERRQ(ierr); 5779 ierr = PetscObjectTypeCompare((PetscObject)pc_temp,PCREDUNDANT,&isredundant);CHKERRQ(ierr); 5780 if (isbddc && !multilevel_allowed) { /* multilevel can only be requested via pc_bddc_set_levels */ 5781 ierr = PCSetType(pc_temp,coarse_pc_type);CHKERRQ(ierr); 5782 isbddc = PETSC_FALSE; 5783 } 5784 ierr = PCFactorSetReuseFill(pc_temp,PETSC_TRUE);CHKERRQ(ierr); 5785 if (isredundant) { 5786 KSP inner_ksp; 5787 PC inner_pc; 5788 ierr = PCRedundantGetKSP(pc_temp,&inner_ksp);CHKERRQ(ierr); 5789 ierr = KSPGetPC(inner_ksp,&inner_pc);CHKERRQ(ierr); 5790 ierr = PCFactorSetReuseFill(inner_pc,PETSC_TRUE);CHKERRQ(ierr); 5791 } 5792 5793 /* assemble coarse matrix */ 5794 if (coarse_reuse) { 5795 ierr = KSPGetOperators(pcbddc->coarse_ksp,&coarse_mat,NULL);CHKERRQ(ierr); 5796 ierr = PetscObjectReference((PetscObject)coarse_mat);CHKERRQ(ierr); 5797 coarse_mat_reuse = MAT_REUSE_MATRIX; 5798 } else { 5799 coarse_mat_reuse = MAT_INITIAL_MATRIX; 5800 } 5801 if (isbddc || isnn) { 5802 if (isbddc) { /* currently there are no APIs for these options */ 5803 PC_BDDC* pcbddc_coarse = (PC_BDDC*)pc_temp->data; 5804 pcbddc_coarse->detect_disconnected = PETSC_TRUE; 5805 pcbddc_coarse->benign_saddle_point = pcbddc->benign_saddle_point; 5806 pcbddc_coarse->benign_compute_nonetflux = pcbddc->benign_compute_nonetflux; 5807 if (pcbddc_coarse->benign_compute_nonetflux) { 5808 pcbddc_coarse->adaptive_userdefined = PETSC_TRUE; 5809 } 5810 } 5811 if (pcbddc->coarsening_ratio > 1) { 5812 if (!pcbddc->coarse_subassembling) { /* subassembling info is not present */ 5813 ierr = MatISGetSubassemblingPattern(coarse_mat_is,active_procs/pcbddc->coarsening_ratio,pcbddc->coarse_adj_red,&pcbddc->coarse_subassembling);CHKERRQ(ierr); 5814 if (pcbddc->dbg_flag) { 5815 ierr = PetscViewerASCIIPrintf(dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 5816 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Subassembling pattern\n");CHKERRQ(ierr); 5817 ierr = ISView(pcbddc->coarse_subassembling,dbg_viewer);CHKERRQ(ierr); 5818 ierr = PetscViewerFlush(dbg_viewer);CHKERRQ(ierr); 5819 } 5820 } 5821 ierr = MatISSubassemble(coarse_mat_is,pcbddc->coarse_subassembling,0,PETSC_FALSE,PETSC_FALSE,coarse_mat_reuse,&coarse_mat,0,NULL);CHKERRQ(ierr); 5822 } else { 5823 ierr = PetscObjectReference((PetscObject)coarse_mat_is);CHKERRQ(ierr); 5824 coarse_mat = coarse_mat_is; 5825 } 5826 } else { 5827 ierr = MatISGetMPIXAIJ(coarse_mat_is,coarse_mat_reuse,&coarse_mat);CHKERRQ(ierr); 5828 } 5829 ierr = MatDestroy(&coarse_mat_is);CHKERRQ(ierr); 5830 5831 /* propagate symmetry info of coarse matrix */ 5832 ierr = MatSetOption(coarse_mat,MAT_STRUCTURALLY_SYMMETRIC,PETSC_TRUE);CHKERRQ(ierr); 5833 if (pc->pmat->symmetric_set) { 5834 ierr = MatSetOption(coarse_mat,MAT_SYMMETRIC,pc->pmat->symmetric);CHKERRQ(ierr); 5835 } 5836 if (pc->pmat->hermitian_set) { 5837 ierr = MatSetOption(coarse_mat,MAT_HERMITIAN,pc->pmat->hermitian);CHKERRQ(ierr); 5838 } 5839 if (pc->pmat->spd_set) { 5840 ierr = MatSetOption(coarse_mat,MAT_SPD,pc->pmat->spd);CHKERRQ(ierr); 5841 } 5842 /* set operators */ 5843 ierr = KSPSetOperators(pcbddc->coarse_ksp,coarse_mat,coarse_mat);CHKERRQ(ierr); 5844 if (pcbddc->dbg_flag) { 5845 ierr = PetscViewerASCIISubtractTab(dbg_viewer,2*pcbddc->current_level);CHKERRQ(ierr); 5846 } 5847 } else { /* processes non partecipating to coarse solver (if any) */ 5848 coarse_mat = 0; 5849 } 5850 ierr = PetscFree(isarray);CHKERRQ(ierr); 5851 #if 0 5852 { 5853 PetscViewer viewer; 5854 char filename[256]; 5855 sprintf(filename,"coarse_mat_level%d.m",pcbddc->current_level); 5856 ierr = PetscViewerASCIIOpen(PetscObjectComm((PetscObject)coarse_mat),filename,&viewer);CHKERRQ(ierr); 5857 ierr = PetscViewerPushFormat(viewer,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr); 5858 ierr = MatView(coarse_mat,viewer);CHKERRQ(ierr); 5859 ierr = PetscViewerPopFormat(viewer);CHKERRQ(ierr); 5860 ierr = PetscViewerDestroy(&viewer);CHKERRQ(ierr); 5861 } 5862 #endif 5863 5864 /* Compute coarse null space (special handling by BDDC only) */ 5865 #if 0 5866 if (pcbddc->NullSpace) { 5867 ierr = PCBDDCNullSpaceAssembleCoarse(pc,coarse_mat,&CoarseNullSpace);CHKERRQ(ierr); 5868 } 5869 #endif 5870 /* hack */ 5871 if (pcbddc->coarse_ksp) { 5872 Vec crhs,csol; 5873 5874 ierr = KSPGetSolution(pcbddc->coarse_ksp,&csol);CHKERRQ(ierr); 5875 ierr = KSPGetRhs(pcbddc->coarse_ksp,&crhs);CHKERRQ(ierr); 5876 if (!csol) { 5877 ierr = MatCreateVecs(coarse_mat,&((pcbddc->coarse_ksp)->vec_sol),NULL);CHKERRQ(ierr); 5878 } 5879 if (!crhs) { 5880 ierr = MatCreateVecs(coarse_mat,NULL,&((pcbddc->coarse_ksp)->vec_rhs));CHKERRQ(ierr); 5881 } 5882 } 5883 5884 /* compute null space for coarse solver if the benign trick has been requested */ 5885 if (pcbddc->benign_null) { 5886 5887 ierr = VecSet(pcbddc->vec1_P,0.);CHKERRQ(ierr); 5888 for (i=0;i<pcbddc->benign_n;i++) { 5889 ierr = VecSetValue(pcbddc->vec1_P,pcbddc->local_primal_size-pcbddc->benign_n+i,1.0,INSERT_VALUES);CHKERRQ(ierr); 5890 } 5891 ierr = VecAssemblyBegin(pcbddc->vec1_P);CHKERRQ(ierr); 5892 ierr = VecAssemblyEnd(pcbddc->vec1_P);CHKERRQ(ierr); 5893 ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,pcbddc->vec1_P,pcbddc->coarse_vec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 5894 ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,pcbddc->vec1_P,pcbddc->coarse_vec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 5895 if (coarse_mat) { 5896 Vec nullv; 5897 PetscScalar *array,*array2; 5898 PetscInt nl; 5899 5900 ierr = MatCreateVecs(coarse_mat,&nullv,NULL);CHKERRQ(ierr); 5901 ierr = VecGetLocalSize(nullv,&nl);CHKERRQ(ierr); 5902 ierr = VecGetArrayRead(pcbddc->coarse_vec,(const PetscScalar**)&array);CHKERRQ(ierr); 5903 ierr = VecGetArray(nullv,&array2);CHKERRQ(ierr); 5904 ierr = PetscMemcpy(array2,array,nl*sizeof(*array));CHKERRQ(ierr); 5905 ierr = VecRestoreArray(nullv,&array2);CHKERRQ(ierr); 5906 ierr = VecRestoreArrayRead(pcbddc->coarse_vec,(const PetscScalar**)&array);CHKERRQ(ierr); 5907 ierr = VecNormalize(nullv,NULL);CHKERRQ(ierr); 5908 ierr = MatNullSpaceCreate(PetscObjectComm((PetscObject)coarse_mat),PETSC_FALSE,1,&nullv,&CoarseNullSpace);CHKERRQ(ierr); 5909 ierr = VecDestroy(&nullv);CHKERRQ(ierr); 5910 } 5911 } 5912 5913 if (pcbddc->coarse_ksp) { 5914 PetscBool ispreonly; 5915 5916 if (CoarseNullSpace) { 5917 PetscBool isnull; 5918 ierr = MatNullSpaceTest(CoarseNullSpace,coarse_mat,&isnull);CHKERRQ(ierr); 5919 if (0) { 5920 if (isbddc && !pcbddc->benign_saddle_point) { 5921 ierr = PCBDDCSetNullSpace(pc_temp,CoarseNullSpace);CHKERRQ(ierr); 5922 } else { 5923 ierr = MatSetNullSpace(coarse_mat,CoarseNullSpace);CHKERRQ(ierr); 5924 } 5925 } else { 5926 ierr = MatNullSpaceDestroy(&CoarseNullSpace);CHKERRQ(ierr); 5927 } 5928 } 5929 /* setup coarse ksp */ 5930 ierr = KSPSetUp(pcbddc->coarse_ksp);CHKERRQ(ierr); 5931 /* Check coarse problem if in debug mode or if solving with an iterative method */ 5932 ierr = PetscObjectTypeCompare((PetscObject)pcbddc->coarse_ksp,KSPPREONLY,&ispreonly);CHKERRQ(ierr); 5933 if (pcbddc->dbg_flag || (!ispreonly && pcbddc->use_coarse_estimates) ) { 5934 KSP check_ksp; 5935 KSPType check_ksp_type; 5936 PC check_pc; 5937 Vec check_vec,coarse_vec; 5938 PetscReal abs_infty_error,infty_error,lambda_min=1.0,lambda_max=1.0; 5939 PetscInt its; 5940 PetscBool compute_eigs; 5941 PetscReal *eigs_r,*eigs_c; 5942 PetscInt neigs; 5943 const char *prefix; 5944 5945 /* Create ksp object suitable for estimation of extreme eigenvalues */ 5946 ierr = KSPCreate(PetscObjectComm((PetscObject)pcbddc->coarse_ksp),&check_ksp);CHKERRQ(ierr); 5947 ierr = KSPSetErrorIfNotConverged(pcbddc->coarse_ksp,pc->erroriffailure);CHKERRQ(ierr); 5948 ierr = KSPSetOperators(check_ksp,coarse_mat,coarse_mat);CHKERRQ(ierr); 5949 ierr = KSPSetTolerances(check_ksp,1.e-12,1.e-12,PETSC_DEFAULT,pcbddc->coarse_size);CHKERRQ(ierr); 5950 if (ispreonly) { 5951 check_ksp_type = KSPPREONLY; 5952 compute_eigs = PETSC_FALSE; 5953 } else { 5954 check_ksp_type = KSPGMRES; 5955 compute_eigs = PETSC_TRUE; 5956 } 5957 ierr = KSPSetType(check_ksp,check_ksp_type);CHKERRQ(ierr); 5958 ierr = KSPSetComputeSingularValues(check_ksp,compute_eigs);CHKERRQ(ierr); 5959 ierr = KSPSetComputeEigenvalues(check_ksp,compute_eigs);CHKERRQ(ierr); 5960 ierr = KSPGMRESSetRestart(check_ksp,pcbddc->coarse_size+1);CHKERRQ(ierr); 5961 ierr = KSPGetOptionsPrefix(pcbddc->coarse_ksp,&prefix);CHKERRQ(ierr); 5962 ierr = KSPSetOptionsPrefix(check_ksp,prefix);CHKERRQ(ierr); 5963 ierr = KSPAppendOptionsPrefix(check_ksp,"check_");CHKERRQ(ierr); 5964 ierr = KSPSetFromOptions(check_ksp);CHKERRQ(ierr); 5965 ierr = KSPSetUp(check_ksp);CHKERRQ(ierr); 5966 ierr = KSPGetPC(pcbddc->coarse_ksp,&check_pc);CHKERRQ(ierr); 5967 ierr = KSPSetPC(check_ksp,check_pc);CHKERRQ(ierr); 5968 /* create random vec */ 5969 ierr = KSPGetSolution(pcbddc->coarse_ksp,&coarse_vec);CHKERRQ(ierr); 5970 ierr = VecDuplicate(coarse_vec,&check_vec);CHKERRQ(ierr); 5971 ierr = VecSetRandom(check_vec,NULL);CHKERRQ(ierr); 5972 if (CoarseNullSpace) { 5973 ierr = MatNullSpaceRemove(CoarseNullSpace,check_vec);CHKERRQ(ierr); 5974 } 5975 ierr = MatMult(coarse_mat,check_vec,coarse_vec);CHKERRQ(ierr); 5976 /* solve coarse problem */ 5977 ierr = KSPSolve(check_ksp,coarse_vec,coarse_vec);CHKERRQ(ierr); 5978 if (CoarseNullSpace) { 5979 ierr = MatNullSpaceRemove(CoarseNullSpace,coarse_vec);CHKERRQ(ierr); 5980 } 5981 /* set eigenvalue estimation if preonly has not been requested */ 5982 if (compute_eigs) { 5983 ierr = PetscMalloc1(pcbddc->coarse_size+1,&eigs_r);CHKERRQ(ierr); 5984 ierr = PetscMalloc1(pcbddc->coarse_size+1,&eigs_c);CHKERRQ(ierr); 5985 ierr = KSPComputeEigenvalues(check_ksp,pcbddc->coarse_size+1,eigs_r,eigs_c,&neigs);CHKERRQ(ierr); 5986 lambda_max = eigs_r[neigs-1]; 5987 lambda_min = eigs_r[0]; 5988 if (pcbddc->use_coarse_estimates) { 5989 if (lambda_max>lambda_min) { 5990 ierr = KSPChebyshevSetEigenvalues(pcbddc->coarse_ksp,lambda_max,lambda_min);CHKERRQ(ierr); 5991 ierr = KSPRichardsonSetScale(pcbddc->coarse_ksp,2.0/(lambda_max+lambda_min));CHKERRQ(ierr); 5992 } 5993 } 5994 } 5995 5996 /* check coarse problem residual error */ 5997 if (pcbddc->dbg_flag) { 5998 PetscViewer dbg_viewer = PETSC_VIEWER_STDOUT_(PetscObjectComm((PetscObject)pcbddc->coarse_ksp)); 5999 ierr = PetscViewerASCIIAddTab(dbg_viewer,2*(pcbddc->current_level+1));CHKERRQ(ierr); 6000 ierr = VecAXPY(check_vec,-1.0,coarse_vec);CHKERRQ(ierr); 6001 ierr = VecNorm(check_vec,NORM_INFINITY,&infty_error);CHKERRQ(ierr); 6002 ierr = MatMult(coarse_mat,check_vec,coarse_vec);CHKERRQ(ierr); 6003 ierr = VecNorm(coarse_vec,NORM_INFINITY,&abs_infty_error);CHKERRQ(ierr); 6004 ierr = VecDestroy(&check_vec);CHKERRQ(ierr); 6005 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem details (use estimates %d)\n",pcbddc->use_coarse_estimates);CHKERRQ(ierr); 6006 ierr = PetscObjectPrintClassNamePrefixType((PetscObject)(pcbddc->coarse_ksp),dbg_viewer);CHKERRQ(ierr); 6007 ierr = PetscObjectPrintClassNamePrefixType((PetscObject)(check_pc),dbg_viewer);CHKERRQ(ierr); 6008 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem exact infty_error : %1.6e\n",infty_error);CHKERRQ(ierr); 6009 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem residual infty_error: %1.6e\n",abs_infty_error);CHKERRQ(ierr); 6010 if (CoarseNullSpace) { 6011 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem is singular\n");CHKERRQ(ierr); 6012 } 6013 if (compute_eigs) { 6014 PetscReal lambda_max_s,lambda_min_s; 6015 ierr = KSPGetType(check_ksp,&check_ksp_type);CHKERRQ(ierr); 6016 ierr = KSPGetIterationNumber(check_ksp,&its);CHKERRQ(ierr); 6017 ierr = KSPComputeExtremeSingularValues(check_ksp,&lambda_max_s,&lambda_min_s);CHKERRQ(ierr); 6018 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem eigenvalues (estimated with %d iterations of %s): %1.6e %1.6e (%1.6e %1.6e)\n",its,check_ksp_type,lambda_min,lambda_max,lambda_min_s,lambda_max_s);CHKERRQ(ierr); 6019 for (i=0;i<neigs;i++) { 6020 ierr = PetscViewerASCIIPrintf(dbg_viewer,"%1.6e %1.6ei\n",eigs_r[i],eigs_c[i]);CHKERRQ(ierr); 6021 } 6022 } 6023 ierr = PetscViewerFlush(dbg_viewer);CHKERRQ(ierr); 6024 ierr = PetscViewerASCIISubtractTab(dbg_viewer,2*(pcbddc->current_level+1));CHKERRQ(ierr); 6025 } 6026 ierr = KSPDestroy(&check_ksp);CHKERRQ(ierr); 6027 if (compute_eigs) { 6028 ierr = PetscFree(eigs_r);CHKERRQ(ierr); 6029 ierr = PetscFree(eigs_c);CHKERRQ(ierr); 6030 } 6031 } 6032 } 6033 /* print additional info */ 6034 if (pcbddc->dbg_flag) { 6035 /* waits until all processes reaches this point */ 6036 ierr = PetscBarrier((PetscObject)pc);CHKERRQ(ierr); 6037 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Coarse solver setup completed at level %d\n",pcbddc->current_level);CHKERRQ(ierr); 6038 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 6039 } 6040 6041 /* free memory */ 6042 ierr = MatNullSpaceDestroy(&CoarseNullSpace);CHKERRQ(ierr); 6043 ierr = MatDestroy(&coarse_mat);CHKERRQ(ierr); 6044 PetscFunctionReturn(0); 6045 } 6046 6047 #undef __FUNCT__ 6048 #define __FUNCT__ "PCBDDCComputePrimalNumbering" 6049 PetscErrorCode PCBDDCComputePrimalNumbering(PC pc,PetscInt* coarse_size_n,PetscInt** local_primal_indices_n) 6050 { 6051 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 6052 PC_IS* pcis = (PC_IS*)pc->data; 6053 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 6054 IS subset,subset_mult,subset_n; 6055 PetscInt local_size,coarse_size=0; 6056 PetscInt *local_primal_indices=NULL; 6057 const PetscInt *t_local_primal_indices; 6058 PetscErrorCode ierr; 6059 6060 PetscFunctionBegin; 6061 /* Compute global number of coarse dofs */ 6062 if (pcbddc->local_primal_size && !pcbddc->local_primal_ref_node) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"BDDC ConstraintsSetUp should be called first"); 6063 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)(pc->pmat)),pcbddc->local_primal_size_cc,pcbddc->local_primal_ref_node,PETSC_COPY_VALUES,&subset_n);CHKERRQ(ierr); 6064 ierr = ISLocalToGlobalMappingApplyIS(pcis->mapping,subset_n,&subset);CHKERRQ(ierr); 6065 ierr = ISDestroy(&subset_n);CHKERRQ(ierr); 6066 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)(pc->pmat)),pcbddc->local_primal_size_cc,pcbddc->local_primal_ref_mult,PETSC_COPY_VALUES,&subset_mult);CHKERRQ(ierr); 6067 ierr = PCBDDCSubsetNumbering(subset,subset_mult,&coarse_size,&subset_n);CHKERRQ(ierr); 6068 ierr = ISDestroy(&subset);CHKERRQ(ierr); 6069 ierr = ISDestroy(&subset_mult);CHKERRQ(ierr); 6070 ierr = ISGetLocalSize(subset_n,&local_size);CHKERRQ(ierr); 6071 if (local_size != pcbddc->local_primal_size) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Invalid number of local primal indices computed %D != %D",local_size,pcbddc->local_primal_size); 6072 ierr = PetscMalloc1(local_size,&local_primal_indices);CHKERRQ(ierr); 6073 ierr = ISGetIndices(subset_n,&t_local_primal_indices);CHKERRQ(ierr); 6074 ierr = PetscMemcpy(local_primal_indices,t_local_primal_indices,local_size*sizeof(PetscInt));CHKERRQ(ierr); 6075 ierr = ISRestoreIndices(subset_n,&t_local_primal_indices);CHKERRQ(ierr); 6076 ierr = ISDestroy(&subset_n);CHKERRQ(ierr); 6077 6078 /* check numbering */ 6079 if (pcbddc->dbg_flag) { 6080 PetscScalar coarsesum,*array,*array2; 6081 PetscInt i; 6082 PetscBool set_error = PETSC_FALSE,set_error_reduced = PETSC_FALSE; 6083 6084 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 6085 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 6086 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Check coarse indices\n");CHKERRQ(ierr); 6087 ierr = PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 6088 /* counter */ 6089 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 6090 ierr = VecSet(pcis->vec1_N,1.0);CHKERRQ(ierr); 6091 ierr = VecScatterBegin(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 6092 ierr = VecScatterEnd(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 6093 ierr = VecScatterBegin(matis->rctx,pcis->vec1_global,pcis->vec2_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 6094 ierr = VecScatterEnd(matis->rctx,pcis->vec1_global,pcis->vec2_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 6095 ierr = VecSet(pcis->vec1_N,0.0);CHKERRQ(ierr); 6096 for (i=0;i<pcbddc->local_primal_size;i++) { 6097 ierr = VecSetValue(pcis->vec1_N,pcbddc->primal_indices_local_idxs[i],1.0,INSERT_VALUES);CHKERRQ(ierr); 6098 } 6099 ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr); 6100 ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr); 6101 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 6102 ierr = VecScatterBegin(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 6103 ierr = VecScatterEnd(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 6104 ierr = VecScatterBegin(matis->rctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 6105 ierr = VecScatterEnd(matis->rctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 6106 ierr = VecGetArray(pcis->vec1_N,&array);CHKERRQ(ierr); 6107 ierr = VecGetArray(pcis->vec2_N,&array2);CHKERRQ(ierr); 6108 for (i=0;i<pcis->n;i++) { 6109 if (array[i] != 0.0 && array[i] != array2[i]) { 6110 PetscInt owned = (PetscInt)PetscRealPart(array[i]); 6111 PetscInt neigh = (PetscInt)PetscRealPart(array2[i]); 6112 set_error = PETSC_TRUE; 6113 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d: local index %d owned by a %d processes instead of %d!\n",PetscGlobalRank,i,owned,neigh);CHKERRQ(ierr); 6114 } 6115 } 6116 ierr = VecRestoreArray(pcis->vec2_N,&array2);CHKERRQ(ierr); 6117 ierr = MPIU_Allreduce(&set_error,&set_error_reduced,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 6118 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 6119 for (i=0;i<pcis->n;i++) { 6120 if (PetscRealPart(array[i]) > 0.0) array[i] = 1.0/PetscRealPart(array[i]); 6121 } 6122 ierr = VecRestoreArray(pcis->vec1_N,&array);CHKERRQ(ierr); 6123 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 6124 ierr = VecScatterBegin(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 6125 ierr = VecScatterEnd(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 6126 ierr = VecSum(pcis->vec1_global,&coarsesum);CHKERRQ(ierr); 6127 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Size of coarse problem is %d (%lf)\n",coarse_size,PetscRealPart(coarsesum));CHKERRQ(ierr); 6128 if (pcbddc->dbg_flag > 1 || set_error_reduced) { 6129 PetscInt *gidxs; 6130 6131 ierr = PetscMalloc1(pcbddc->local_primal_size,&gidxs);CHKERRQ(ierr); 6132 ierr = ISLocalToGlobalMappingApply(pcis->mapping,pcbddc->local_primal_size,pcbddc->primal_indices_local_idxs,gidxs);CHKERRQ(ierr); 6133 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Distribution of local primal indices\n");CHKERRQ(ierr); 6134 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 6135 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d\n",PetscGlobalRank);CHKERRQ(ierr); 6136 for (i=0;i<pcbddc->local_primal_size;i++) { 6137 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local_primal_indices[%d]=%d (%d,%d)\n",i,local_primal_indices[i],pcbddc->primal_indices_local_idxs[i],gidxs[i]);CHKERRQ(ierr); 6138 } 6139 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 6140 ierr = PetscFree(gidxs);CHKERRQ(ierr); 6141 } 6142 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 6143 ierr = PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 6144 if (set_error_reduced) SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"BDDC Numbering of coarse dofs failed"); 6145 } 6146 /* ierr = PetscPrintf(PetscObjectComm((PetscObject)pc),"Size of coarse problem is %d\n",coarse_size);CHKERRQ(ierr); */ 6147 /* get back data */ 6148 *coarse_size_n = coarse_size; 6149 *local_primal_indices_n = local_primal_indices; 6150 PetscFunctionReturn(0); 6151 } 6152 6153 #undef __FUNCT__ 6154 #define __FUNCT__ "PCBDDCGlobalToLocal" 6155 PetscErrorCode PCBDDCGlobalToLocal(VecScatter g2l_ctx,Vec gwork, Vec lwork, IS globalis, IS* localis) 6156 { 6157 IS localis_t; 6158 PetscInt i,lsize,*idxs,n; 6159 PetscScalar *vals; 6160 PetscErrorCode ierr; 6161 6162 PetscFunctionBegin; 6163 /* get indices in local ordering exploiting local to global map */ 6164 ierr = ISGetLocalSize(globalis,&lsize);CHKERRQ(ierr); 6165 ierr = PetscMalloc1(lsize,&vals);CHKERRQ(ierr); 6166 for (i=0;i<lsize;i++) vals[i] = 1.0; 6167 ierr = ISGetIndices(globalis,(const PetscInt**)&idxs);CHKERRQ(ierr); 6168 ierr = VecSet(gwork,0.0);CHKERRQ(ierr); 6169 ierr = VecSet(lwork,0.0);CHKERRQ(ierr); 6170 if (idxs) { /* multilevel guard */ 6171 ierr = VecSetValues(gwork,lsize,idxs,vals,INSERT_VALUES);CHKERRQ(ierr); 6172 } 6173 ierr = VecAssemblyBegin(gwork);CHKERRQ(ierr); 6174 ierr = ISRestoreIndices(globalis,(const PetscInt**)&idxs);CHKERRQ(ierr); 6175 ierr = PetscFree(vals);CHKERRQ(ierr); 6176 ierr = VecAssemblyEnd(gwork);CHKERRQ(ierr); 6177 /* now compute set in local ordering */ 6178 ierr = VecScatterBegin(g2l_ctx,gwork,lwork,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 6179 ierr = VecScatterEnd(g2l_ctx,gwork,lwork,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 6180 ierr = VecGetArrayRead(lwork,(const PetscScalar**)&vals);CHKERRQ(ierr); 6181 ierr = VecGetSize(lwork,&n);CHKERRQ(ierr); 6182 for (i=0,lsize=0;i<n;i++) { 6183 if (PetscRealPart(vals[i]) > 0.5) { 6184 lsize++; 6185 } 6186 } 6187 ierr = PetscMalloc1(lsize,&idxs);CHKERRQ(ierr); 6188 for (i=0,lsize=0;i<n;i++) { 6189 if (PetscRealPart(vals[i]) > 0.5) { 6190 idxs[lsize++] = i; 6191 } 6192 } 6193 ierr = VecRestoreArrayRead(lwork,(const PetscScalar**)&vals);CHKERRQ(ierr); 6194 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)gwork),lsize,idxs,PETSC_OWN_POINTER,&localis_t);CHKERRQ(ierr); 6195 *localis = localis_t; 6196 PetscFunctionReturn(0); 6197 } 6198 6199 #undef __FUNCT__ 6200 #define __FUNCT__ "PCBDDCSetUpSubSchurs" 6201 PetscErrorCode PCBDDCSetUpSubSchurs(PC pc) 6202 { 6203 PC_IS *pcis=(PC_IS*)pc->data; 6204 PC_BDDC *pcbddc=(PC_BDDC*)pc->data; 6205 PCBDDCSubSchurs sub_schurs=pcbddc->sub_schurs; 6206 Mat S_j; 6207 PetscInt *used_xadj,*used_adjncy; 6208 PetscBool free_used_adj; 6209 PetscErrorCode ierr; 6210 6211 PetscFunctionBegin; 6212 /* decide the adjacency to be used for determining internal problems for local schur on subsets */ 6213 free_used_adj = PETSC_FALSE; 6214 if (pcbddc->sub_schurs_layers == -1) { 6215 used_xadj = NULL; 6216 used_adjncy = NULL; 6217 } else { 6218 if (pcbddc->sub_schurs_use_useradj && pcbddc->mat_graph->xadj) { 6219 used_xadj = pcbddc->mat_graph->xadj; 6220 used_adjncy = pcbddc->mat_graph->adjncy; 6221 } else if (pcbddc->computed_rowadj) { 6222 used_xadj = pcbddc->mat_graph->xadj; 6223 used_adjncy = pcbddc->mat_graph->adjncy; 6224 } else { 6225 PetscBool flg_row=PETSC_FALSE; 6226 const PetscInt *xadj,*adjncy; 6227 PetscInt nvtxs; 6228 6229 ierr = MatGetRowIJ(pcbddc->local_mat,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,&xadj,&adjncy,&flg_row);CHKERRQ(ierr); 6230 if (flg_row) { 6231 ierr = PetscMalloc2(nvtxs+1,&used_xadj,xadj[nvtxs],&used_adjncy);CHKERRQ(ierr); 6232 ierr = PetscMemcpy(used_xadj,xadj,(nvtxs+1)*sizeof(*xadj));CHKERRQ(ierr); 6233 ierr = PetscMemcpy(used_adjncy,adjncy,(xadj[nvtxs])*sizeof(*adjncy));CHKERRQ(ierr); 6234 free_used_adj = PETSC_TRUE; 6235 } else { 6236 pcbddc->sub_schurs_layers = -1; 6237 used_xadj = NULL; 6238 used_adjncy = NULL; 6239 } 6240 ierr = MatRestoreRowIJ(pcbddc->local_mat,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,&xadj,&adjncy,&flg_row);CHKERRQ(ierr); 6241 } 6242 } 6243 6244 /* setup sub_schurs data */ 6245 ierr = MatCreateSchurComplement(pcis->A_II,pcis->A_II,pcis->A_IB,pcis->A_BI,pcis->A_BB,&S_j);CHKERRQ(ierr); 6246 if (!sub_schurs->schur_explicit) { 6247 /* pcbddc->ksp_D up to date only if not using MatFactor with Schur complement support */ 6248 ierr = MatSchurComplementSetKSP(S_j,pcbddc->ksp_D);CHKERRQ(ierr); 6249 ierr = PCBDDCSubSchursSetUp(sub_schurs,NULL,S_j,PETSC_FALSE,used_xadj,used_adjncy,pcbddc->sub_schurs_layers,pcbddc->faster_deluxe,NULL,pcbddc->adaptive_selection,PETSC_FALSE,PETSC_FALSE,0,NULL,NULL,NULL,NULL);CHKERRQ(ierr); 6250 } else { 6251 PetscBool reuse_solvers = (PetscBool)!pcbddc->use_change_of_basis; 6252 PetscBool isseqaij,need_change = PETSC_FALSE;; 6253 PetscInt benign_n; 6254 Mat change = NULL; 6255 Vec scaling = NULL; 6256 IS change_primal = NULL; 6257 6258 if (!pcbddc->use_vertices && reuse_solvers) { 6259 PetscInt n_vertices; 6260 6261 ierr = ISGetLocalSize(sub_schurs->is_vertices,&n_vertices);CHKERRQ(ierr); 6262 reuse_solvers = (PetscBool)!n_vertices; 6263 } 6264 ierr = PetscObjectTypeCompare((PetscObject)pcbddc->local_mat,MATSEQAIJ,&isseqaij);CHKERRQ(ierr); 6265 if (!isseqaij) { 6266 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 6267 if (matis->A == pcbddc->local_mat) { 6268 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 6269 ierr = MatConvert(matis->A,MATSEQAIJ,MAT_INITIAL_MATRIX,&pcbddc->local_mat);CHKERRQ(ierr); 6270 } else { 6271 ierr = MatConvert(pcbddc->local_mat,MATSEQAIJ,MAT_INPLACE_MATRIX,&pcbddc->local_mat);CHKERRQ(ierr); 6272 } 6273 } 6274 if (!pcbddc->benign_change_explicit) { 6275 benign_n = pcbddc->benign_n; 6276 } else { 6277 benign_n = 0; 6278 } 6279 /* sub_schurs->change is a local object; instead, PCBDDCConstraintsSetUp and the quantities used in the test below are logically collective on pc. 6280 We need a global reduction to avoid possible deadlocks. 6281 We assume that sub_schurs->change is created once, and then reused for different solves, unless the topography has been recomputed */ 6282 if (pcbddc->adaptive_userdefined || (pcbddc->deluxe_zerorows && !pcbddc->use_change_of_basis)) { 6283 PetscBool have_loc_change = !!(sub_schurs->change); 6284 ierr = MPIU_Allreduce(&have_loc_change,&need_change,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 6285 need_change = !need_change; 6286 } 6287 /* If the user defines additional constraints, we import them here. 6288 We need to compute the change of basis according to the quadrature weights attached to pmat via MatSetNearNullSpace, and this could not be done (at the moment) without some hacking */ 6289 if (need_change) { 6290 PC_IS *pcisf; 6291 PC_BDDC *pcbddcf; 6292 PC pcf; 6293 6294 if (pcbddc->sub_schurs_rebuild) { 6295 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Cannot compute change of basis with a different graph"); 6296 } 6297 ierr = PCCreate(PetscObjectComm((PetscObject)pc),&pcf);CHKERRQ(ierr); 6298 ierr = PCSetOperators(pcf,pc->mat,pc->pmat);CHKERRQ(ierr); 6299 ierr = PCSetType(pcf,PCBDDC);CHKERRQ(ierr); 6300 /* hacks */ 6301 pcisf = (PC_IS*)pcf->data; 6302 pcisf->is_B_local = pcis->is_B_local; 6303 pcisf->vec1_N = pcis->vec1_N; 6304 pcisf->BtoNmap = pcis->BtoNmap; 6305 pcisf->n = pcis->n; 6306 pcisf->n_B = pcis->n_B; 6307 pcbddcf = (PC_BDDC*)pcf->data; 6308 ierr = PetscFree(pcbddcf->mat_graph);CHKERRQ(ierr); 6309 pcbddcf->mat_graph = pcbddc->mat_graph; 6310 pcbddcf->use_faces = PETSC_TRUE; 6311 pcbddcf->use_change_of_basis = PETSC_TRUE; 6312 pcbddcf->use_change_on_faces = PETSC_TRUE; 6313 pcbddcf->use_qr_single = PETSC_TRUE; 6314 pcbddcf->fake_change = PETSC_TRUE; 6315 ierr = PCBDDCConstraintsSetUp(pcf);CHKERRQ(ierr); 6316 /* store information on primal vertices and change of basis (in local numbering) */ 6317 sub_schurs->change_with_qr = pcbddcf->use_qr_single; 6318 ierr = ISCreateGeneral(PETSC_COMM_SELF,pcbddcf->n_vertices,pcbddcf->local_primal_ref_node,PETSC_COPY_VALUES,&change_primal);CHKERRQ(ierr); 6319 change = pcbddcf->ConstraintMatrix; 6320 pcbddcf->ConstraintMatrix = NULL; 6321 /* free unneeded memory allocated in PCBDDCConstraintsSetUp */ 6322 ierr = PetscFree(pcbddcf->sub_schurs);CHKERRQ(ierr); 6323 ierr = MatNullSpaceDestroy(&pcbddcf->onearnullspace);CHKERRQ(ierr); 6324 ierr = PetscFree2(pcbddcf->local_primal_ref_node,pcbddcf->local_primal_ref_mult);CHKERRQ(ierr); 6325 ierr = PetscFree(pcbddcf->primal_indices_local_idxs);CHKERRQ(ierr); 6326 ierr = PetscFree(pcbddcf->onearnullvecs_state);CHKERRQ(ierr); 6327 ierr = PetscFree(pcf->data);CHKERRQ(ierr); 6328 pcf->ops->destroy = NULL; 6329 ierr = PCDestroy(&pcf);CHKERRQ(ierr); 6330 } 6331 if (!pcbddc->use_deluxe_scaling) scaling = pcis->D; 6332 ierr = PCBDDCSubSchursSetUp(sub_schurs,pcbddc->local_mat,S_j,pcbddc->sub_schurs_exact_schur,used_xadj,used_adjncy,pcbddc->sub_schurs_layers,pcbddc->faster_deluxe,scaling,pcbddc->adaptive_selection,reuse_solvers,pcbddc->benign_saddle_point,benign_n,pcbddc->benign_p0_lidx,pcbddc->benign_zerodiag_subs,change,change_primal);CHKERRQ(ierr); 6333 ierr = MatDestroy(&change);CHKERRQ(ierr); 6334 ierr = ISDestroy(&change_primal);CHKERRQ(ierr); 6335 } 6336 ierr = MatDestroy(&S_j);CHKERRQ(ierr); 6337 6338 /* free adjacency */ 6339 if (free_used_adj) { 6340 ierr = PetscFree2(used_xadj,used_adjncy);CHKERRQ(ierr); 6341 } 6342 PetscFunctionReturn(0); 6343 } 6344 6345 #undef __FUNCT__ 6346 #define __FUNCT__ "PCBDDCInitSubSchurs" 6347 PetscErrorCode PCBDDCInitSubSchurs(PC pc) 6348 { 6349 PC_IS *pcis=(PC_IS*)pc->data; 6350 PC_BDDC *pcbddc=(PC_BDDC*)pc->data; 6351 PCBDDCSubSchurs sub_schurs=pcbddc->sub_schurs; 6352 PCBDDCGraph graph; 6353 PetscErrorCode ierr; 6354 6355 PetscFunctionBegin; 6356 /* attach interface graph for determining subsets */ 6357 if (pcbddc->sub_schurs_rebuild) { /* in case rebuild has been requested, it uses a graph generated only by the neighbouring information */ 6358 IS verticesIS,verticescomm; 6359 PetscInt vsize,*idxs; 6360 6361 ierr = PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph,NULL,NULL,NULL,NULL,&verticesIS);CHKERRQ(ierr); 6362 ierr = ISGetSize(verticesIS,&vsize);CHKERRQ(ierr); 6363 ierr = ISGetIndices(verticesIS,(const PetscInt**)&idxs);CHKERRQ(ierr); 6364 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),vsize,idxs,PETSC_COPY_VALUES,&verticescomm);CHKERRQ(ierr); 6365 ierr = ISRestoreIndices(verticesIS,(const PetscInt**)&idxs);CHKERRQ(ierr); 6366 ierr = ISDestroy(&verticesIS);CHKERRQ(ierr); 6367 ierr = PCBDDCGraphCreate(&graph);CHKERRQ(ierr); 6368 ierr = PCBDDCGraphInit(graph,pcbddc->mat_graph->l2gmap,pcbddc->mat_graph->nvtxs_global);CHKERRQ(ierr); 6369 ierr = PCBDDCGraphSetUp(graph,pcbddc->mat_graph->custom_minimal_size,NULL,pcbddc->DirichletBoundariesLocal,0,NULL,verticescomm);CHKERRQ(ierr); 6370 ierr = ISDestroy(&verticescomm);CHKERRQ(ierr); 6371 ierr = PCBDDCGraphComputeConnectedComponents(graph);CHKERRQ(ierr); 6372 /* 6373 if (pcbddc->dbg_flag) { 6374 ierr = PCBDDCGraphASCIIView(graph,pcbddc->dbg_flag,pcbddc->dbg_viewer);CHKERRQ(ierr); 6375 } 6376 */ 6377 } else { 6378 graph = pcbddc->mat_graph; 6379 } 6380 6381 /* sub_schurs init */ 6382 ierr = PCBDDCSubSchursInit(sub_schurs,pcis->is_I_local,pcis->is_B_local,graph,pcis->BtoNmap);CHKERRQ(ierr); 6383 6384 /* free graph struct */ 6385 if (pcbddc->sub_schurs_rebuild) { 6386 ierr = PCBDDCGraphDestroy(&graph);CHKERRQ(ierr); 6387 } 6388 PetscFunctionReturn(0); 6389 } 6390 6391 #undef __FUNCT__ 6392 #define __FUNCT__ "PCBDDCCheckOperator" 6393 PetscErrorCode PCBDDCCheckOperator(PC pc) 6394 { 6395 PC_IS *pcis=(PC_IS*)pc->data; 6396 PC_BDDC *pcbddc=(PC_BDDC*)pc->data; 6397 PetscErrorCode ierr; 6398 6399 PetscFunctionBegin; 6400 if (pcbddc->n_vertices == pcbddc->local_primal_size) { 6401 IS zerodiag = NULL; 6402 Mat S_j,B0_B=NULL; 6403 Vec dummy_vec=NULL,vec_check_B,vec_scale_P; 6404 PetscScalar *p0_check,*array,*array2; 6405 PetscReal norm; 6406 PetscInt i; 6407 6408 /* B0 and B0_B */ 6409 if (zerodiag) { 6410 IS dummy; 6411 6412 ierr = ISCreateStride(PETSC_COMM_SELF,pcbddc->benign_n,0,1,&dummy);CHKERRQ(ierr); 6413 ierr = MatGetSubMatrix(pcbddc->benign_B0,dummy,pcis->is_B_local,MAT_INITIAL_MATRIX,&B0_B);CHKERRQ(ierr); 6414 ierr = MatCreateVecs(B0_B,NULL,&dummy_vec);CHKERRQ(ierr); 6415 ierr = ISDestroy(&dummy);CHKERRQ(ierr); 6416 } 6417 /* I need a primal vector to scale primal nodes since BDDC sums contibutions */ 6418 ierr = VecDuplicate(pcbddc->vec1_P,&vec_scale_P);CHKERRQ(ierr); 6419 ierr = VecSet(pcbddc->vec1_P,1.0);CHKERRQ(ierr); 6420 ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,pcbddc->vec1_P,pcbddc->coarse_vec,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 6421 ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,pcbddc->vec1_P,pcbddc->coarse_vec,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 6422 ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,pcbddc->coarse_vec,vec_scale_P,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 6423 ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,pcbddc->coarse_vec,vec_scale_P,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 6424 ierr = VecReciprocal(vec_scale_P);CHKERRQ(ierr); 6425 /* S_j */ 6426 ierr = MatCreateSchurComplement(pcis->A_II,pcis->A_II,pcis->A_IB,pcis->A_BI,pcis->A_BB,&S_j);CHKERRQ(ierr); 6427 ierr = MatSchurComplementSetKSP(S_j,pcbddc->ksp_D);CHKERRQ(ierr); 6428 6429 /* mimic vector in \widetilde{W}_\Gamma */ 6430 ierr = VecSetRandom(pcis->vec1_N,NULL);CHKERRQ(ierr); 6431 /* continuous in primal space */ 6432 ierr = VecSetRandom(pcbddc->coarse_vec,NULL);CHKERRQ(ierr); 6433 ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,pcbddc->coarse_vec,pcbddc->vec1_P,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 6434 ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,pcbddc->coarse_vec,pcbddc->vec1_P,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 6435 ierr = VecGetArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 6436 ierr = PetscCalloc1(pcbddc->benign_n,&p0_check);CHKERRQ(ierr); 6437 for (i=0;i<pcbddc->benign_n;i++) p0_check[i] = array[pcbddc->local_primal_size-pcbddc->benign_n+i]; 6438 ierr = VecSetValues(pcis->vec1_N,pcbddc->local_primal_size,pcbddc->local_primal_ref_node,array,INSERT_VALUES);CHKERRQ(ierr); 6439 ierr = VecRestoreArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 6440 ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr); 6441 ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr); 6442 ierr = VecScatterBegin(pcis->N_to_B,pcis->vec1_N,pcis->vec2_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 6443 ierr = VecScatterEnd(pcis->N_to_B,pcis->vec1_N,pcis->vec2_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 6444 ierr = VecDuplicate(pcis->vec2_B,&vec_check_B);CHKERRQ(ierr); 6445 ierr = VecCopy(pcis->vec2_B,vec_check_B);CHKERRQ(ierr); 6446 6447 /* assemble rhs for coarse problem */ 6448 /* widetilde{S}_\Gamma w_\Gamma + \widetilde{B0}^T_B p0 */ 6449 /* local with Schur */ 6450 ierr = MatMult(S_j,pcis->vec2_B,pcis->vec1_B);CHKERRQ(ierr); 6451 if (zerodiag) { 6452 ierr = VecGetArray(dummy_vec,&array);CHKERRQ(ierr); 6453 for (i=0;i<pcbddc->benign_n;i++) array[i] = p0_check[i]; 6454 ierr = VecRestoreArray(dummy_vec,&array);CHKERRQ(ierr); 6455 ierr = MatMultTransposeAdd(B0_B,dummy_vec,pcis->vec1_B,pcis->vec1_B);CHKERRQ(ierr); 6456 } 6457 /* sum on primal nodes the local contributions */ 6458 ierr = VecScatterBegin(pcis->N_to_B,pcis->vec1_B,pcis->vec1_N,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 6459 ierr = VecScatterEnd(pcis->N_to_B,pcis->vec1_B,pcis->vec1_N,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 6460 ierr = VecGetArray(pcis->vec1_N,&array);CHKERRQ(ierr); 6461 ierr = VecGetArray(pcbddc->vec1_P,&array2);CHKERRQ(ierr); 6462 for (i=0;i<pcbddc->local_primal_size;i++) array2[i] = array[pcbddc->local_primal_ref_node[i]]; 6463 ierr = VecRestoreArray(pcbddc->vec1_P,&array2);CHKERRQ(ierr); 6464 ierr = VecRestoreArray(pcis->vec1_N,&array);CHKERRQ(ierr); 6465 ierr = VecSet(pcbddc->coarse_vec,0.);CHKERRQ(ierr); 6466 ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,pcbddc->vec1_P,pcbddc->coarse_vec,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 6467 ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,pcbddc->vec1_P,pcbddc->coarse_vec,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 6468 ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,pcbddc->coarse_vec,pcbddc->vec1_P,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 6469 ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,pcbddc->coarse_vec,pcbddc->vec1_P,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 6470 ierr = VecGetArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 6471 /* scale primal nodes (BDDC sums contibutions) */ 6472 ierr = VecPointwiseMult(pcbddc->vec1_P,vec_scale_P,pcbddc->vec1_P);CHKERRQ(ierr); 6473 ierr = VecSetValues(pcis->vec1_N,pcbddc->local_primal_size,pcbddc->local_primal_ref_node,array,INSERT_VALUES);CHKERRQ(ierr); 6474 ierr = VecRestoreArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 6475 ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr); 6476 ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr); 6477 ierr = VecScatterBegin(pcis->N_to_B,pcis->vec1_N,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 6478 ierr = VecScatterEnd(pcis->N_to_B,pcis->vec1_N,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 6479 /* global: \widetilde{B0}_B w_\Gamma */ 6480 if (zerodiag) { 6481 ierr = MatMult(B0_B,pcis->vec2_B,dummy_vec);CHKERRQ(ierr); 6482 ierr = VecGetArray(dummy_vec,&array);CHKERRQ(ierr); 6483 for (i=0;i<pcbddc->benign_n;i++) pcbddc->benign_p0[i] = array[i]; 6484 ierr = VecRestoreArray(dummy_vec,&array);CHKERRQ(ierr); 6485 } 6486 /* BDDC */ 6487 ierr = VecSet(pcis->vec1_D,0.);CHKERRQ(ierr); 6488 ierr = PCBDDCApplyInterfacePreconditioner(pc,PETSC_FALSE);CHKERRQ(ierr); 6489 6490 ierr = VecCopy(pcis->vec1_B,pcis->vec2_B);CHKERRQ(ierr); 6491 ierr = VecAXPY(pcis->vec1_B,-1.0,vec_check_B);CHKERRQ(ierr); 6492 ierr = VecNorm(pcis->vec1_B,NORM_INFINITY,&norm);CHKERRQ(ierr); 6493 PetscPrintf(PETSC_COMM_SELF,"[%d] BDDC local error is %1.4e\n",PetscGlobalRank,norm); 6494 for (i=0;i<pcbddc->benign_n;i++) { 6495 PetscPrintf(PETSC_COMM_SELF,"[%d] BDDC p0[%d] error is %1.4e\n",PetscGlobalRank,i,PetscAbsScalar(pcbddc->benign_p0[i]-p0_check[i])); 6496 } 6497 ierr = PetscFree(p0_check);CHKERRQ(ierr); 6498 ierr = VecDestroy(&vec_scale_P);CHKERRQ(ierr); 6499 ierr = VecDestroy(&vec_check_B);CHKERRQ(ierr); 6500 ierr = VecDestroy(&dummy_vec);CHKERRQ(ierr); 6501 ierr = MatDestroy(&S_j);CHKERRQ(ierr); 6502 ierr = MatDestroy(&B0_B);CHKERRQ(ierr); 6503 } 6504 PetscFunctionReturn(0); 6505 } 6506