1 #include <../src/mat/impls/aij/seq/aij.h> 2 #include <../src/ksp/pc/impls/bddc/bddc.h> 3 #include <../src/ksp/pc/impls/bddc/bddcprivate.h> 4 #include <petscblaslapack.h> 5 6 #undef __FUNCT__ 7 #define __FUNCT__ "PCBDDCBenignMatMult_Private_Private" 8 PetscErrorCode PCBDDCBenignMatMult_Private_Private(Mat A, Vec x, Vec y, PetscBool transpose) 9 { 10 PCBDDCBenignMatMult_ctx ctx; 11 PetscErrorCode ierr; 12 PetscBool apply_right,apply_left,reset_x; 13 14 PetscFunctionBegin; 15 ierr = MatShellGetContext(A,&ctx);CHKERRQ(ierr); 16 if (transpose) { 17 apply_right = ctx->apply_left; 18 apply_left = ctx->apply_right; 19 } else { 20 apply_right = ctx->apply_right; 21 apply_left = ctx->apply_left; 22 } 23 reset_x = PETSC_FALSE; 24 if (apply_right) { 25 const PetscScalar *ax; 26 PetscInt nl,i; 27 28 ierr = VecGetLocalSize(x,&nl);CHKERRQ(ierr); 29 ierr = VecGetArrayRead(x,&ax);CHKERRQ(ierr); 30 ierr = PetscMemcpy(ctx->work,ax,nl*sizeof(PetscScalar));CHKERRQ(ierr); 31 ierr = VecRestoreArrayRead(x,&ax);CHKERRQ(ierr); 32 for (i=0;i<ctx->benign_n;i++) { 33 PetscScalar sum,val; 34 const PetscInt *idxs; 35 PetscInt nz,j; 36 ierr = ISGetLocalSize(ctx->benign_zerodiag_subs[i],&nz);CHKERRQ(ierr); 37 ierr = ISGetIndices(ctx->benign_zerodiag_subs[i],&idxs);CHKERRQ(ierr); 38 sum = 0.; 39 if (ctx->apply_p0) { 40 val = ctx->work[idxs[nz-1]]; 41 for (j=0;j<nz-1;j++) { 42 sum += ctx->work[idxs[j]]; 43 ctx->work[idxs[j]] += val; 44 } 45 } else { 46 for (j=0;j<nz-1;j++) { 47 sum += ctx->work[idxs[j]]; 48 } 49 } 50 ctx->work[idxs[nz-1]] -= sum; 51 ierr = ISRestoreIndices(ctx->benign_zerodiag_subs[i],&idxs);CHKERRQ(ierr); 52 } 53 ierr = VecPlaceArray(x,ctx->work);CHKERRQ(ierr); 54 reset_x = PETSC_TRUE; 55 } 56 if (transpose) { 57 ierr = MatMultTranspose(ctx->A,x,y);CHKERRQ(ierr); 58 } else { 59 ierr = MatMult(ctx->A,x,y);CHKERRQ(ierr); 60 } 61 if (reset_x) { 62 ierr = VecResetArray(x);CHKERRQ(ierr); 63 } 64 if (apply_left) { 65 PetscScalar *ay; 66 PetscInt i; 67 68 ierr = VecGetArray(y,&ay);CHKERRQ(ierr); 69 for (i=0;i<ctx->benign_n;i++) { 70 PetscScalar sum,val; 71 const PetscInt *idxs; 72 PetscInt nz,j; 73 ierr = ISGetLocalSize(ctx->benign_zerodiag_subs[i],&nz);CHKERRQ(ierr); 74 ierr = ISGetIndices(ctx->benign_zerodiag_subs[i],&idxs);CHKERRQ(ierr); 75 val = -ay[idxs[nz-1]]; 76 if (ctx->apply_p0) { 77 sum = 0.; 78 for (j=0;j<nz-1;j++) { 79 sum += ay[idxs[j]]; 80 ay[idxs[j]] += val; 81 } 82 ay[idxs[nz-1]] += sum; 83 } else { 84 for (j=0;j<nz-1;j++) { 85 ay[idxs[j]] += val; 86 } 87 ay[idxs[nz-1]] = 0.; 88 } 89 ierr = ISRestoreIndices(ctx->benign_zerodiag_subs[i],&idxs);CHKERRQ(ierr); 90 } 91 ierr = VecRestoreArray(y,&ay);CHKERRQ(ierr); 92 } 93 PetscFunctionReturn(0); 94 } 95 96 #undef __FUNCT__ 97 #define __FUNCT__ "PCBDDCBenignMatMultTranspose_Private" 98 PetscErrorCode PCBDDCBenignMatMultTranspose_Private(Mat A, Vec x, Vec y) 99 { 100 PetscErrorCode ierr; 101 102 PetscFunctionBegin; 103 ierr = PCBDDCBenignMatMult_Private_Private(A,x,y,PETSC_TRUE);CHKERRQ(ierr); 104 PetscFunctionReturn(0); 105 } 106 107 #undef __FUNCT__ 108 #define __FUNCT__ "PCBDDCBenignMatMult_Private" 109 PetscErrorCode PCBDDCBenignMatMult_Private(Mat A, Vec x, Vec y) 110 { 111 PetscErrorCode ierr; 112 113 PetscFunctionBegin; 114 ierr = PCBDDCBenignMatMult_Private_Private(A,x,y,PETSC_FALSE);CHKERRQ(ierr); 115 PetscFunctionReturn(0); 116 } 117 118 #undef __FUNCT__ 119 #define __FUNCT__ "PCBDDCBenignShellMat" 120 PetscErrorCode PCBDDCBenignShellMat(PC pc, PetscBool restore) 121 { 122 PC_IS *pcis = (PC_IS*)pc->data; 123 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 124 PCBDDCBenignMatMult_ctx ctx; 125 PetscErrorCode ierr; 126 127 PetscFunctionBegin; 128 if (!restore) { 129 Mat A_IB,A_BI; 130 PetscScalar *work; 131 PCBDDCReuseSolvers reuse = pcbddc->sub_schurs->reuse_solver; 132 133 if (pcbddc->benign_original_mat) { 134 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Benign original mat has not been restored"); 135 } 136 if (!pcbddc->benign_change || !pcbddc->benign_n || pcbddc->benign_change_explicit) { 137 PetscFunctionReturn(0); 138 } 139 ierr = PetscMalloc1(pcis->n,&work);CHKERRQ(ierr); 140 ierr = MatCreate(PETSC_COMM_SELF,&A_IB);CHKERRQ(ierr); 141 ierr = MatSetSizes(A_IB,pcis->n-pcis->n_B,pcis->n_B,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr); 142 ierr = MatSetType(A_IB,MATSHELL);CHKERRQ(ierr); 143 ierr = MatShellSetOperation(A_IB,MATOP_MULT,(void (*)(void))PCBDDCBenignMatMult_Private);CHKERRQ(ierr); 144 ierr = MatShellSetOperation(A_IB,MATOP_MULT_TRANSPOSE,(void (*)(void))PCBDDCBenignMatMultTranspose_Private);CHKERRQ(ierr); 145 ierr = PetscNew(&ctx);CHKERRQ(ierr); 146 ierr = MatShellSetContext(A_IB,ctx);CHKERRQ(ierr); 147 ctx->apply_left = PETSC_TRUE; 148 ctx->apply_right = PETSC_FALSE; 149 ctx->apply_p0 = PETSC_FALSE; 150 ctx->benign_n = pcbddc->benign_n; 151 if (reuse) { 152 ctx->benign_zerodiag_subs = reuse->benign_zerodiag_subs; 153 ctx->free = PETSC_FALSE; 154 } else { /* TODO: could be optimized for successive solves */ 155 ISLocalToGlobalMapping N_to_D; 156 PetscInt i; 157 158 ierr = ISLocalToGlobalMappingCreateIS(pcis->is_I_local,&N_to_D);CHKERRQ(ierr); 159 ierr = PetscMalloc1(pcbddc->benign_n,&ctx->benign_zerodiag_subs);CHKERRQ(ierr); 160 for (i=0;i<pcbddc->benign_n;i++) { 161 ierr = ISGlobalToLocalMappingApplyIS(N_to_D,IS_GTOLM_DROP,pcbddc->benign_zerodiag_subs[i],&ctx->benign_zerodiag_subs[i]);CHKERRQ(ierr); 162 } 163 ierr = ISLocalToGlobalMappingDestroy(&N_to_D);CHKERRQ(ierr); 164 ctx->free = PETSC_TRUE; 165 } 166 ctx->A = pcis->A_IB; 167 ctx->work = work; 168 ierr = MatSetUp(A_IB);CHKERRQ(ierr); 169 ierr = MatAssemblyBegin(A_IB,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 170 ierr = MatAssemblyEnd(A_IB,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 171 pcis->A_IB = A_IB; 172 173 /* A_BI as A_IB^T */ 174 ierr = MatCreateTranspose(A_IB,&A_BI);CHKERRQ(ierr); 175 pcbddc->benign_original_mat = pcis->A_BI; 176 pcis->A_BI = A_BI; 177 } else { 178 if (!pcbddc->benign_original_mat) { 179 PetscFunctionReturn(0); 180 } 181 ierr = MatShellGetContext(pcis->A_IB,&ctx);CHKERRQ(ierr); 182 ierr = MatDestroy(&pcis->A_IB);CHKERRQ(ierr); 183 pcis->A_IB = ctx->A; 184 ctx->A = NULL; 185 ierr = MatDestroy(&pcis->A_BI);CHKERRQ(ierr); 186 pcis->A_BI = pcbddc->benign_original_mat; 187 pcbddc->benign_original_mat = NULL; 188 if (ctx->free) { 189 PetscInt i; 190 for (i=0;i<ctx->benign_n;i++) { 191 ierr = ISDestroy(&ctx->benign_zerodiag_subs[i]);CHKERRQ(ierr); 192 } 193 ierr = PetscFree(ctx->benign_zerodiag_subs);CHKERRQ(ierr); 194 } 195 ierr = PetscFree(ctx->work);CHKERRQ(ierr); 196 ierr = PetscFree(ctx);CHKERRQ(ierr); 197 } 198 PetscFunctionReturn(0); 199 } 200 201 /* used just in bddc debug mode */ 202 #undef __FUNCT__ 203 #define __FUNCT__ "PCBDDCBenignProject" 204 PetscErrorCode PCBDDCBenignProject(PC pc, IS is1, IS is2, Mat *B) 205 { 206 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 207 Mat_IS *matis = (Mat_IS*)pc->pmat->data; 208 Mat An; 209 PetscErrorCode ierr; 210 211 PetscFunctionBegin; 212 ierr = MatPtAP(matis->A,pcbddc->benign_change,MAT_INITIAL_MATRIX,2.0,&An);CHKERRQ(ierr); 213 ierr = MatZeroRowsColumns(An,pcbddc->benign_n,pcbddc->benign_p0_lidx,1.0,NULL,NULL);CHKERRQ(ierr); 214 if (is1) { 215 ierr = MatGetSubMatrix(An,is1,is2,MAT_INITIAL_MATRIX,B);CHKERRQ(ierr); 216 ierr = MatDestroy(&An);CHKERRQ(ierr); 217 } else { 218 *B = An; 219 } 220 PetscFunctionReturn(0); 221 } 222 223 /* TODO: add reuse flag */ 224 #undef __FUNCT__ 225 #define __FUNCT__ "MatSeqAIJCompress" 226 PetscErrorCode MatSeqAIJCompress(Mat A, Mat *B) 227 { 228 Mat Bt; 229 PetscScalar *a,*bdata; 230 const PetscInt *ii,*ij; 231 PetscInt m,n,i,nnz,*bii,*bij; 232 PetscBool flg_row; 233 PetscErrorCode ierr; 234 235 PetscFunctionBegin; 236 ierr = MatGetSize(A,&n,&m);CHKERRQ(ierr); 237 ierr = MatGetRowIJ(A,0,PETSC_FALSE,PETSC_FALSE,&n,&ii,&ij,&flg_row);CHKERRQ(ierr); 238 ierr = MatSeqAIJGetArray(A,&a);CHKERRQ(ierr); 239 nnz = n; 240 for (i=0;i<ii[n];i++) { 241 if (PetscLikely(PetscAbsScalar(a[i]) > PETSC_SMALL)) nnz++; 242 } 243 ierr = PetscMalloc1(n+1,&bii);CHKERRQ(ierr); 244 ierr = PetscMalloc1(nnz,&bij);CHKERRQ(ierr); 245 ierr = PetscMalloc1(nnz,&bdata);CHKERRQ(ierr); 246 nnz = 0; 247 bii[0] = 0; 248 for (i=0;i<n;i++) { 249 PetscInt j; 250 for (j=ii[i];j<ii[i+1];j++) { 251 PetscScalar entry = a[j]; 252 if (PetscLikely(PetscAbsScalar(entry) > PETSC_SMALL) || ij[j] == i) { 253 bij[nnz] = ij[j]; 254 bdata[nnz] = entry; 255 nnz++; 256 } 257 } 258 bii[i+1] = nnz; 259 } 260 ierr = MatSeqAIJRestoreArray(A,&a);CHKERRQ(ierr); 261 ierr = MatCreateSeqAIJWithArrays(PetscObjectComm((PetscObject)A),n,m,bii,bij,bdata,&Bt);CHKERRQ(ierr); 262 ierr = MatRestoreRowIJ(A,0,PETSC_FALSE,PETSC_FALSE,&n,&ii,&ij,&flg_row);CHKERRQ(ierr); 263 { 264 Mat_SeqAIJ *b = (Mat_SeqAIJ*)(Bt->data); 265 b->free_a = PETSC_TRUE; 266 b->free_ij = PETSC_TRUE; 267 } 268 *B = Bt; 269 PetscFunctionReturn(0); 270 } 271 272 #undef __FUNCT__ 273 #define __FUNCT__ "MatDetectDisconnectedComponents" 274 PetscErrorCode MatDetectDisconnectedComponents(Mat A, PetscBool filter, PetscInt *ncc, IS* cc[]) 275 { 276 Mat B; 277 IS is_dummy,*cc_n; 278 ISLocalToGlobalMapping l2gmap_dummy; 279 PCBDDCGraph graph; 280 PetscInt i,n; 281 PetscInt *xadj,*adjncy; 282 PetscInt *xadj_filtered,*adjncy_filtered; 283 PetscBool flg_row,isseqaij; 284 PetscErrorCode ierr; 285 286 PetscFunctionBegin; 287 if (!A->rmap->N || !A->cmap->N) { 288 *ncc = 0; 289 *cc = NULL; 290 PetscFunctionReturn(0); 291 } 292 ierr = PetscObjectTypeCompare((PetscObject)A,MATSEQAIJ,&isseqaij);CHKERRQ(ierr); 293 if (!isseqaij && filter) { 294 PetscBool isseqdense; 295 296 ierr = PetscObjectTypeCompare((PetscObject)A,MATSEQDENSE,&isseqdense);CHKERRQ(ierr); 297 if (!isseqdense) { 298 ierr = MatConvert(A,MATSEQAIJ,MAT_INITIAL_MATRIX,&B);CHKERRQ(ierr); 299 } else { /* TODO: rectangular case and LDA */ 300 PetscScalar *array; 301 PetscReal chop=1.e-6; 302 303 ierr = MatDuplicate(A,MAT_COPY_VALUES,&B);CHKERRQ(ierr); 304 ierr = MatDenseGetArray(B,&array);CHKERRQ(ierr); 305 ierr = MatGetSize(B,&n,NULL);CHKERRQ(ierr); 306 for (i=0;i<n;i++) { 307 PetscInt j; 308 for (j=i+1;j<n;j++) { 309 PetscReal thresh = chop*(PetscAbsScalar(array[i*(n+1)])+PetscAbsScalar(array[j*(n+1)])); 310 if (PetscAbsScalar(array[i*n+j]) < thresh) array[i*n+j] = 0.; 311 if (PetscAbsScalar(array[j*n+i]) < thresh) array[j*n+i] = 0.; 312 } 313 } 314 ierr = MatDenseRestoreArray(B,&array);CHKERRQ(ierr); 315 ierr = MatConvert(B,MATSEQAIJ,MAT_INPLACE_MATRIX,&B);CHKERRQ(ierr); 316 } 317 } else { 318 B = A; 319 } 320 ierr = MatGetRowIJ(B,0,PETSC_TRUE,PETSC_FALSE,&n,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);CHKERRQ(ierr); 321 322 /* if filter is true, then removes entries lower than PETSC_SMALL in magnitude */ 323 if (filter) { 324 PetscScalar *data; 325 PetscInt j,cum; 326 327 ierr = PetscCalloc2(n+1,&xadj_filtered,xadj[n],&adjncy_filtered);CHKERRQ(ierr); 328 ierr = MatSeqAIJGetArray(B,&data);CHKERRQ(ierr); 329 cum = 0; 330 for (i=0;i<n;i++) { 331 PetscInt t; 332 333 for (j=xadj[i];j<xadj[i+1];j++) { 334 if (PetscUnlikely(PetscAbsScalar(data[j]) < PETSC_SMALL)) { 335 continue; 336 } 337 adjncy_filtered[cum+xadj_filtered[i]++] = adjncy[j]; 338 } 339 t = xadj_filtered[i]; 340 xadj_filtered[i] = cum; 341 cum += t; 342 } 343 ierr = MatSeqAIJRestoreArray(B,&data);CHKERRQ(ierr); 344 } else { 345 xadj_filtered = NULL; 346 adjncy_filtered = NULL; 347 } 348 349 /* compute local connected components using PCBDDCGraph */ 350 ierr = ISCreateStride(PETSC_COMM_SELF,n,0,1,&is_dummy);CHKERRQ(ierr); 351 ierr = ISLocalToGlobalMappingCreateIS(is_dummy,&l2gmap_dummy);CHKERRQ(ierr); 352 ierr = ISDestroy(&is_dummy);CHKERRQ(ierr); 353 ierr = PCBDDCGraphCreate(&graph);CHKERRQ(ierr); 354 ierr = PCBDDCGraphInit(graph,l2gmap_dummy,n);CHKERRQ(ierr); 355 ierr = ISLocalToGlobalMappingDestroy(&l2gmap_dummy);CHKERRQ(ierr); 356 if (xadj_filtered) { 357 graph->xadj = xadj_filtered; 358 graph->adjncy = adjncy_filtered; 359 } else { 360 graph->xadj = xadj; 361 graph->adjncy = adjncy; 362 } 363 ierr = PCBDDCGraphSetUp(graph,1,NULL,NULL,0,NULL,NULL);CHKERRQ(ierr); 364 ierr = PCBDDCGraphComputeConnectedComponents(graph);CHKERRQ(ierr); 365 /* partial clean up */ 366 ierr = PetscFree2(xadj_filtered,adjncy_filtered);CHKERRQ(ierr); 367 ierr = MatRestoreRowIJ(B,0,PETSC_TRUE,PETSC_FALSE,&n,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);CHKERRQ(ierr); 368 if (A != B) { 369 ierr = MatDestroy(&B);CHKERRQ(ierr); 370 } 371 372 /* get back data */ 373 if (ncc) *ncc = graph->ncc; 374 if (cc) { 375 ierr = PetscMalloc1(graph->ncc,&cc_n);CHKERRQ(ierr); 376 for (i=0;i<graph->ncc;i++) { 377 ierr = ISCreateGeneral(PETSC_COMM_SELF,graph->cptr[i+1]-graph->cptr[i],graph->queue+graph->cptr[i],PETSC_COPY_VALUES,&cc_n[i]);CHKERRQ(ierr); 378 } 379 *cc = cc_n; 380 } 381 /* clean up graph */ 382 graph->xadj = 0; 383 graph->adjncy = 0; 384 ierr = PCBDDCGraphDestroy(&graph);CHKERRQ(ierr); 385 PetscFunctionReturn(0); 386 } 387 388 #undef __FUNCT__ 389 #define __FUNCT__ "PCBDDCBenignCheck" 390 PetscErrorCode PCBDDCBenignCheck(PC pc, IS zerodiag) 391 { 392 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 393 PC_IS* pcis = (PC_IS*)(pc->data); 394 IS dirIS = NULL; 395 PetscInt i; 396 PetscErrorCode ierr; 397 398 PetscFunctionBegin; 399 ierr = PCBDDCGraphGetDirichletDofs(pcbddc->mat_graph,&dirIS);CHKERRQ(ierr); 400 if (zerodiag) { 401 Mat A; 402 Vec vec3_N; 403 PetscScalar *vals; 404 const PetscInt *idxs; 405 PetscInt nz,*count; 406 407 /* p0 */ 408 ierr = VecSet(pcis->vec1_N,0.);CHKERRQ(ierr); 409 ierr = PetscMalloc1(pcis->n,&vals);CHKERRQ(ierr); 410 ierr = ISGetLocalSize(zerodiag,&nz);CHKERRQ(ierr); 411 ierr = ISGetIndices(zerodiag,&idxs);CHKERRQ(ierr); 412 for (i=0;i<nz;i++) vals[i] = 1.; 413 ierr = VecSetValues(pcis->vec1_N,nz,idxs,vals,INSERT_VALUES);CHKERRQ(ierr); 414 ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr); 415 ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr); 416 /* v_I */ 417 ierr = VecSetRandom(pcis->vec2_N,NULL);CHKERRQ(ierr); 418 for (i=0;i<nz;i++) vals[i] = 0.; 419 ierr = VecSetValues(pcis->vec2_N,nz,idxs,vals,INSERT_VALUES);CHKERRQ(ierr); 420 ierr = ISRestoreIndices(zerodiag,&idxs);CHKERRQ(ierr); 421 ierr = ISGetIndices(pcis->is_B_local,&idxs);CHKERRQ(ierr); 422 for (i=0;i<pcis->n_B;i++) vals[i] = 0.; 423 ierr = VecSetValues(pcis->vec2_N,pcis->n_B,idxs,vals,INSERT_VALUES);CHKERRQ(ierr); 424 ierr = ISRestoreIndices(pcis->is_B_local,&idxs);CHKERRQ(ierr); 425 if (dirIS) { 426 PetscInt n; 427 428 ierr = ISGetLocalSize(dirIS,&n);CHKERRQ(ierr); 429 ierr = ISGetIndices(dirIS,&idxs);CHKERRQ(ierr); 430 for (i=0;i<n;i++) vals[i] = 0.; 431 ierr = VecSetValues(pcis->vec2_N,n,idxs,vals,INSERT_VALUES);CHKERRQ(ierr); 432 ierr = ISRestoreIndices(dirIS,&idxs);CHKERRQ(ierr); 433 } 434 ierr = VecAssemblyBegin(pcis->vec2_N);CHKERRQ(ierr); 435 ierr = VecAssemblyEnd(pcis->vec2_N);CHKERRQ(ierr); 436 ierr = VecDuplicate(pcis->vec1_N,&vec3_N);CHKERRQ(ierr); 437 ierr = VecSet(vec3_N,0.);CHKERRQ(ierr); 438 ierr = MatISGetLocalMat(pc->mat,&A);CHKERRQ(ierr); 439 ierr = MatMult(A,pcis->vec1_N,vec3_N);CHKERRQ(ierr); 440 ierr = VecDot(vec3_N,pcis->vec2_N,&vals[0]);CHKERRQ(ierr); 441 if (PetscAbsScalar(vals[0]) > PETSC_SMALL) { 442 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"Benign trick can not be applied! b(v_I,p_0) = %f (should be numerically 0.)",PetscAbsScalar(vals[0])); 443 } 444 ierr = PetscFree(vals);CHKERRQ(ierr); 445 ierr = VecDestroy(&vec3_N);CHKERRQ(ierr); 446 447 /* there should not be any pressure dofs lying on the interface */ 448 ierr = PetscCalloc1(pcis->n,&count);CHKERRQ(ierr); 449 ierr = ISGetIndices(pcis->is_B_local,&idxs);CHKERRQ(ierr); 450 for (i=0;i<pcis->n_B;i++) count[idxs[i]]++; 451 ierr = ISRestoreIndices(pcis->is_B_local,&idxs);CHKERRQ(ierr); 452 ierr = ISGetIndices(zerodiag,&idxs);CHKERRQ(ierr); 453 for (i=0;i<nz;i++) { 454 if (count[idxs[i]]) { 455 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"Benign trick can not be applied! pressure dof %d is an interface dof",idxs[i]); 456 } 457 } 458 ierr = ISRestoreIndices(zerodiag,&idxs);CHKERRQ(ierr); 459 ierr = PetscFree(count);CHKERRQ(ierr); 460 } 461 ierr = ISDestroy(&dirIS);CHKERRQ(ierr); 462 463 /* check PCBDDCBenignGetOrSetP0 */ 464 ierr = VecSetRandom(pcis->vec1_global,NULL);CHKERRQ(ierr); 465 for (i=0;i<pcbddc->benign_n;i++) pcbddc->benign_p0[i] = -PetscGlobalRank-i; 466 ierr = PCBDDCBenignGetOrSetP0(pc,pcis->vec1_global,PETSC_FALSE);CHKERRQ(ierr); 467 for (i=0;i<pcbddc->benign_n;i++) pcbddc->benign_p0[i] = 1; 468 ierr = PCBDDCBenignGetOrSetP0(pc,pcis->vec1_global,PETSC_TRUE);CHKERRQ(ierr); 469 for (i=0;i<pcbddc->benign_n;i++) { 470 if ((PetscInt)PetscRealPart(pcbddc->benign_p0[i]) != -PetscGlobalRank-i) { 471 SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error testing PCBDDCBenignGetOrSetP0! Found %1.4e at %d instead of %1.4e\n",pcbddc->benign_p0[i],i,-PetscGlobalRank-i);CHKERRQ(ierr); 472 } 473 } 474 PetscFunctionReturn(0); 475 } 476 477 #undef __FUNCT__ 478 #define __FUNCT__ "PCBDDCBenignDetectSaddlePoint" 479 PetscErrorCode PCBDDCBenignDetectSaddlePoint(PC pc, IS *zerodiaglocal) 480 { 481 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 482 IS pressures,zerodiag,*zerodiag_subs; 483 PetscInt nz,n; 484 PetscBool sorted,have_null,has_null_pressures,recompute_zerodiag; 485 PetscErrorCode ierr; 486 487 PetscFunctionBegin; 488 ierr = PetscSFDestroy(&pcbddc->benign_sf);CHKERRQ(ierr); 489 ierr = MatDestroy(&pcbddc->benign_B0);CHKERRQ(ierr); 490 for (n=0;n<pcbddc->benign_n;n++) { 491 ierr = ISDestroy(&pcbddc->benign_zerodiag_subs[n]);CHKERRQ(ierr); 492 } 493 ierr = PetscFree(pcbddc->benign_zerodiag_subs);CHKERRQ(ierr); 494 pcbddc->benign_n = 0; 495 /* if a local info on dofs is present, assumes the last field is represented by "pressures" 496 otherwise, it uses only zerodiagonal dofs (ok if the pressure block is all zero; it could fail if it is not) 497 Checks if all the pressure dofs in each subdomain have a zero diagonal 498 If not, a change of basis on pressures is not needed 499 since the local Schur complements are SPD 500 */ 501 has_null_pressures = PETSC_TRUE; 502 have_null = PETSC_TRUE; 503 if (pcbddc->n_ISForDofsLocal) { 504 PetscInt npl,*idxs,p = pcbddc->n_ISForDofsLocal-1; 505 506 /* Dofs splitting for BDDC cannot have PETSC_COMM_SELF, so create a sequential IS */ 507 ierr = ISGetLocalSize(pcbddc->ISForDofsLocal[p],&npl);CHKERRQ(ierr); 508 ierr = ISGetIndices(pcbddc->ISForDofsLocal[p],(const PetscInt**)&idxs);CHKERRQ(ierr); 509 ierr = ISCreateGeneral(PETSC_COMM_SELF,npl,idxs,PETSC_COPY_VALUES,&pressures);CHKERRQ(ierr); 510 ierr = ISRestoreIndices(pcbddc->ISForDofsLocal[p],(const PetscInt**)&idxs);CHKERRQ(ierr); 511 ierr = ISSorted(pressures,&sorted);CHKERRQ(ierr); 512 if (!sorted) { 513 ierr = ISSort(pressures);CHKERRQ(ierr); 514 } 515 } else { 516 pressures = NULL; 517 } 518 /* pcis has not been setup yet, so get the local size from the subdomain matrix */ 519 ierr = MatGetLocalSize(pcbddc->local_mat,&n,NULL);CHKERRQ(ierr); 520 ierr = MatFindZeroDiagonals(pcbddc->local_mat,&zerodiag);CHKERRQ(ierr); 521 ierr = ISSorted(zerodiag,&sorted);CHKERRQ(ierr); 522 if (!sorted) { 523 ierr = ISSort(zerodiag);CHKERRQ(ierr); 524 } 525 ierr = ISGetLocalSize(zerodiag,&nz);CHKERRQ(ierr); 526 if (!nz) { 527 if (n) have_null = PETSC_FALSE; 528 has_null_pressures = PETSC_FALSE; 529 ierr = ISDestroy(&zerodiag);CHKERRQ(ierr); 530 } 531 recompute_zerodiag = PETSC_FALSE; 532 /* in case disconnected subdomains info is present, split the pressures accordingly (otherwise the benign trick could fail) */ 533 zerodiag_subs = NULL; 534 pcbddc->benign_n = 0; 535 if (has_null_pressures) { 536 IS *subs; 537 PetscInt nsubs,i; 538 539 subs = pcbddc->local_subs; 540 nsubs = pcbddc->n_local_subs; 541 if (nsubs > 1) { 542 ierr = PetscCalloc1(nsubs,&zerodiag_subs);CHKERRQ(ierr); 543 for (i=0;i<nsubs;i++) { 544 ISLocalToGlobalMapping l2g; 545 IS t_zerodiag_subs; 546 PetscInt nl; 547 548 ierr = ISLocalToGlobalMappingCreateIS(subs[i],&l2g);CHKERRQ(ierr); 549 ierr = ISGlobalToLocalMappingApplyIS(l2g,IS_GTOLM_DROP,zerodiag,&t_zerodiag_subs);CHKERRQ(ierr); 550 ierr = ISGetLocalSize(t_zerodiag_subs,&nl);CHKERRQ(ierr); 551 if (nl) { 552 PetscBool valid = PETSC_TRUE; 553 554 if (pressures) { 555 IS t_pressure_subs; 556 ierr = ISGlobalToLocalMappingApplyIS(l2g,IS_GTOLM_DROP,pressures,&t_pressure_subs);CHKERRQ(ierr); 557 ierr = ISEqual(t_pressure_subs,t_zerodiag_subs,&valid);CHKERRQ(ierr); 558 ierr = ISDestroy(&t_pressure_subs);CHKERRQ(ierr); 559 } 560 if (valid) { 561 ierr = ISLocalToGlobalMappingApplyIS(l2g,t_zerodiag_subs,&zerodiag_subs[pcbddc->benign_n]);CHKERRQ(ierr); 562 pcbddc->benign_n++; 563 } else { 564 recompute_zerodiag = PETSC_TRUE; 565 } 566 } 567 ierr = ISDestroy(&t_zerodiag_subs);CHKERRQ(ierr); 568 ierr = ISLocalToGlobalMappingDestroy(&l2g);CHKERRQ(ierr); 569 } 570 } else { /* there's just one subdomain (or zero if they have not been detected */ 571 PetscBool valid = PETSC_TRUE; 572 if (pressures) { 573 ierr = ISEqual(pressures,zerodiag,&valid);CHKERRQ(ierr); 574 } 575 if (valid) { 576 pcbddc->benign_n = 1; 577 ierr = PetscMalloc1(pcbddc->benign_n,&zerodiag_subs);CHKERRQ(ierr); 578 ierr = PetscObjectReference((PetscObject)zerodiag);CHKERRQ(ierr); 579 zerodiag_subs[0] = zerodiag; 580 } 581 } 582 } 583 584 if (!pcbddc->benign_n) { 585 ierr = ISDestroy(&zerodiag);CHKERRQ(ierr); 586 recompute_zerodiag = PETSC_FALSE; 587 has_null_pressures = PETSC_FALSE; 588 have_null = PETSC_FALSE; 589 } 590 591 /* final check for null pressures */ 592 if (zerodiag && pressures) { 593 PetscInt nz,np; 594 ierr = ISGetLocalSize(zerodiag,&nz);CHKERRQ(ierr); 595 ierr = ISGetLocalSize(pressures,&np);CHKERRQ(ierr); 596 if (nz != np) have_null = PETSC_FALSE; 597 } 598 599 if (recompute_zerodiag) { 600 ierr = ISDestroy(&zerodiag);CHKERRQ(ierr); 601 if (pcbddc->benign_n == 1) { 602 ierr = PetscObjectReference((PetscObject)zerodiag_subs[0]);CHKERRQ(ierr); 603 zerodiag = zerodiag_subs[0]; 604 } else { 605 PetscInt i,nzn,*new_idxs; 606 607 nzn = 0; 608 for (i=0;i<pcbddc->benign_n;i++) { 609 PetscInt ns; 610 ierr = ISGetLocalSize(zerodiag_subs[i],&ns);CHKERRQ(ierr); 611 nzn += ns; 612 } 613 ierr = PetscMalloc1(nzn,&new_idxs);CHKERRQ(ierr); 614 nzn = 0; 615 for (i=0;i<pcbddc->benign_n;i++) { 616 PetscInt ns,*idxs; 617 ierr = ISGetLocalSize(zerodiag_subs[i],&ns);CHKERRQ(ierr); 618 ierr = ISGetIndices(zerodiag_subs[i],(const PetscInt**)&idxs);CHKERRQ(ierr); 619 ierr = PetscMemcpy(new_idxs+nzn,idxs,ns*sizeof(PetscInt));CHKERRQ(ierr); 620 ierr = ISRestoreIndices(zerodiag_subs[i],(const PetscInt**)&idxs);CHKERRQ(ierr); 621 nzn += ns; 622 } 623 ierr = PetscSortInt(nzn,new_idxs);CHKERRQ(ierr); 624 ierr = ISCreateGeneral(PETSC_COMM_SELF,nzn,new_idxs,PETSC_OWN_POINTER,&zerodiag);CHKERRQ(ierr); 625 } 626 have_null = PETSC_FALSE; 627 } 628 629 /* no-net-flux */ 630 if (pcbddc->benign_compute_nonetflux) { 631 Mat_IS* matis = (Mat_IS*)(pc->pmat->data); 632 MatNullSpace near_null_space; 633 ISLocalToGlobalMapping rmap; 634 Vec quad_vec; 635 PetscScalar *pvals; 636 PetscInt i,np,*dummyins; 637 638 /* create vector to hold quadrature weights */ 639 ierr = MatCreateVecs(pc->pmat,&quad_vec,NULL);CHKERRQ(ierr); 640 ierr = VecSet(quad_vec,0.0);CHKERRQ(ierr); 641 ierr = MatGetLocalToGlobalMapping(pc->pmat,&rmap,NULL);CHKERRQ(ierr); 642 ierr = VecSetLocalToGlobalMapping(quad_vec,rmap);CHKERRQ(ierr); 643 644 /* compute B^{(i)T} * 1_p */ 645 np = 0; 646 if (zerodiag) { 647 ierr = ISGetLocalSize(zerodiag,&np);CHKERRQ(ierr); 648 } else if (pressures) { 649 ierr = ISGetLocalSize(pressures,&np);CHKERRQ(ierr); 650 } 651 ierr = PetscMalloc1(np,&pvals);CHKERRQ(ierr); 652 for (i=0;i<np;i++) pvals[i] = 1.; 653 ierr = VecSet(matis->x,0.);CHKERRQ(ierr); 654 if (np) { 655 const PetscInt *pidxs; 656 657 if (zerodiag) { 658 ierr = ISGetIndices(zerodiag,&pidxs);CHKERRQ(ierr); 659 } else if (pressures) { 660 ierr = ISGetIndices(pressures,&pidxs);CHKERRQ(ierr); 661 } 662 ierr = VecSetValues(matis->x,np,pidxs,pvals,INSERT_VALUES);CHKERRQ(ierr); 663 if (zerodiag) { 664 ierr = ISRestoreIndices(zerodiag,&pidxs);CHKERRQ(ierr); 665 } else if (pressures) { 666 ierr = ISRestoreIndices(pressures,&pidxs);CHKERRQ(ierr); 667 } 668 } 669 ierr = VecAssemblyBegin(matis->x);CHKERRQ(ierr); 670 ierr = VecAssemblyEnd(matis->x);CHKERRQ(ierr); 671 ierr = MatMult(matis->A,matis->x,matis->y);CHKERRQ(ierr); 672 ierr = PetscFree(pvals);CHKERRQ(ierr); 673 674 /* decide which of the sharing ranks (per dof) has to insert the values (should just be a matter of having a different orientation) */ 675 ierr = MatISSetUpSF(pc->pmat);CHKERRQ(ierr); 676 for (i=0;i<matis->sf_nroots;i++) matis->sf_rootdata[i] = -1; 677 for (i=0;i<matis->sf_nleaves;i++) matis->sf_leafdata[i] = PetscGlobalRank; 678 ierr = PetscSFReduceBegin(matis->sf,MPIU_INT,matis->sf_leafdata,matis->sf_rootdata,MPI_MAX);CHKERRQ(ierr); 679 ierr = PetscSFReduceEnd(matis->sf,MPIU_INT,matis->sf_leafdata,matis->sf_rootdata,MPI_MAX);CHKERRQ(ierr); 680 ierr = PetscSFBcastBegin(matis->sf,MPIU_INT,matis->sf_rootdata,matis->sf_leafdata);CHKERRQ(ierr); 681 ierr = PetscSFBcastEnd(matis->sf,MPIU_INT,matis->sf_rootdata,matis->sf_leafdata);CHKERRQ(ierr); 682 ierr = VecGetArray(matis->y,&pvals);CHKERRQ(ierr); 683 for (i=0;i<matis->sf_nleaves;i++) { 684 if (PetscGlobalRank != matis->sf_leafdata[i]) { 685 pvals[i] = 0.; 686 } 687 } 688 ierr = PetscMalloc1(matis->sf_nleaves,&dummyins);CHKERRQ(ierr); 689 for (i=0;i<matis->sf_nleaves;i++) dummyins[i] = i; 690 ierr = VecSetValuesLocal(quad_vec,matis->sf_nleaves,dummyins,pvals,ADD_VALUES);CHKERRQ(ierr); 691 ierr = VecRestoreArray(matis->y,&pvals);CHKERRQ(ierr); 692 ierr = PetscFree(dummyins);CHKERRQ(ierr); 693 694 /* assembly quadrature vec and attach near null space to pmat */ 695 ierr = VecAssemblyBegin(quad_vec);CHKERRQ(ierr); 696 ierr = VecAssemblyEnd(quad_vec);CHKERRQ(ierr); 697 ierr = VecNormalize(quad_vec,NULL);CHKERRQ(ierr); 698 ierr = MatNullSpaceCreate(PetscObjectComm((PetscObject)pc),PETSC_FALSE,1,&quad_vec,&near_null_space);CHKERRQ(ierr); 699 ierr = VecDestroy(&quad_vec);CHKERRQ(ierr); 700 ierr = MatSetNearNullSpace(pc->pmat,near_null_space);CHKERRQ(ierr); 701 ierr = MatNullSpaceDestroy(&near_null_space);CHKERRQ(ierr); 702 } 703 704 /* change of basis and p0 dofs */ 705 if (has_null_pressures) { 706 IS zerodiagc; 707 const PetscInt *idxs,*idxsc; 708 PetscInt i,s,*nnz; 709 710 ierr = ISGetLocalSize(zerodiag,&nz);CHKERRQ(ierr); 711 ierr = ISComplement(zerodiag,0,n,&zerodiagc);CHKERRQ(ierr); 712 ierr = ISGetIndices(zerodiagc,&idxsc);CHKERRQ(ierr); 713 /* local change of basis for pressures */ 714 ierr = MatDestroy(&pcbddc->benign_change);CHKERRQ(ierr); 715 ierr = MatCreate(PetscObjectComm((PetscObject)pcbddc->local_mat),&pcbddc->benign_change);CHKERRQ(ierr); 716 ierr = MatSetType(pcbddc->benign_change,MATAIJ);CHKERRQ(ierr); 717 ierr = MatSetSizes(pcbddc->benign_change,n,n,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr); 718 ierr = PetscMalloc1(n,&nnz);CHKERRQ(ierr); 719 for (i=0;i<n-nz;i++) nnz[idxsc[i]] = 1; /* identity on velocities plus pressure dofs for non-singular subdomains */ 720 for (i=0;i<pcbddc->benign_n;i++) { 721 PetscInt nzs,j; 722 723 ierr = ISGetLocalSize(zerodiag_subs[i],&nzs);CHKERRQ(ierr); 724 ierr = ISGetIndices(zerodiag_subs[i],&idxs);CHKERRQ(ierr); 725 for (j=0;j<nzs-1;j++) nnz[idxs[j]] = 2; /* change on pressures */ 726 nnz[idxs[nzs-1]] = nzs; /* last local pressure dof in subdomain */ 727 ierr = ISRestoreIndices(zerodiag_subs[i],&idxs);CHKERRQ(ierr); 728 } 729 ierr = MatSeqAIJSetPreallocation(pcbddc->benign_change,0,nnz);CHKERRQ(ierr); 730 ierr = PetscFree(nnz);CHKERRQ(ierr); 731 /* set identity on velocities */ 732 for (i=0;i<n-nz;i++) { 733 ierr = MatSetValue(pcbddc->benign_change,idxsc[i],idxsc[i],1.,INSERT_VALUES);CHKERRQ(ierr); 734 } 735 ierr = ISRestoreIndices(zerodiagc,&idxsc);CHKERRQ(ierr); 736 ierr = ISDestroy(&zerodiagc);CHKERRQ(ierr); 737 ierr = PetscFree3(pcbddc->benign_p0_lidx,pcbddc->benign_p0_gidx,pcbddc->benign_p0);CHKERRQ(ierr); 738 ierr = PetscMalloc3(pcbddc->benign_n,&pcbddc->benign_p0_lidx,pcbddc->benign_n,&pcbddc->benign_p0_gidx,pcbddc->benign_n,&pcbddc->benign_p0);CHKERRQ(ierr); 739 /* set change on pressures */ 740 for (s=0;s<pcbddc->benign_n;s++) { 741 PetscScalar *array; 742 PetscInt nzs; 743 744 ierr = ISGetLocalSize(zerodiag_subs[s],&nzs);CHKERRQ(ierr); 745 ierr = ISGetIndices(zerodiag_subs[s],&idxs);CHKERRQ(ierr); 746 for (i=0;i<nzs-1;i++) { 747 PetscScalar vals[2]; 748 PetscInt cols[2]; 749 750 cols[0] = idxs[i]; 751 cols[1] = idxs[nzs-1]; 752 vals[0] = 1.; 753 vals[1] = 1.; 754 ierr = MatSetValues(pcbddc->benign_change,1,cols,2,cols,vals,INSERT_VALUES);CHKERRQ(ierr); 755 } 756 ierr = PetscMalloc1(nzs,&array);CHKERRQ(ierr); 757 for (i=0;i<nzs-1;i++) array[i] = -1.; 758 array[nzs-1] = 1.; 759 ierr = MatSetValues(pcbddc->benign_change,1,idxs+nzs-1,nzs,idxs,array,INSERT_VALUES);CHKERRQ(ierr); 760 /* store local idxs for p0 */ 761 pcbddc->benign_p0_lidx[s] = idxs[nzs-1]; 762 ierr = ISRestoreIndices(zerodiag_subs[s],&idxs);CHKERRQ(ierr); 763 ierr = PetscFree(array);CHKERRQ(ierr); 764 } 765 ierr = MatAssemblyBegin(pcbddc->benign_change,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 766 ierr = MatAssemblyEnd(pcbddc->benign_change,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 767 /* project if needed */ 768 if (pcbddc->benign_change_explicit) { 769 Mat M; 770 771 ierr = MatPtAP(pcbddc->local_mat,pcbddc->benign_change,MAT_INITIAL_MATRIX,2.0,&M);CHKERRQ(ierr); 772 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 773 ierr = MatSeqAIJCompress(M,&pcbddc->local_mat);CHKERRQ(ierr); 774 ierr = MatDestroy(&M);CHKERRQ(ierr); 775 } 776 /* store global idxs for p0 */ 777 ierr = ISLocalToGlobalMappingApply(pc->pmat->rmap->mapping,pcbddc->benign_n,pcbddc->benign_p0_lidx,pcbddc->benign_p0_gidx);CHKERRQ(ierr); 778 } 779 pcbddc->benign_zerodiag_subs = zerodiag_subs; 780 ierr = ISDestroy(&pressures);CHKERRQ(ierr); 781 782 /* determines if the coarse solver will be singular or not */ 783 ierr = MPI_Allreduce(&have_null,&pcbddc->benign_null,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 784 *zerodiaglocal = zerodiag; 785 PetscFunctionReturn(0); 786 } 787 788 #undef __FUNCT__ 789 #define __FUNCT__ "PCBDDCBenignGetOrSetP0" 790 PetscErrorCode PCBDDCBenignGetOrSetP0(PC pc, Vec v, PetscBool get) 791 { 792 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 793 PetscErrorCode ierr; 794 795 PetscFunctionBegin; 796 if (!pcbddc->benign_sf) { 797 ierr = PetscSFCreate(PetscObjectComm((PetscObject)pc),&pcbddc->benign_sf);CHKERRQ(ierr); 798 ierr = PetscSFSetGraphLayout(pcbddc->benign_sf,pc->pmat->rmap,pcbddc->benign_n,NULL,PETSC_OWN_POINTER,pcbddc->benign_p0_gidx);CHKERRQ(ierr); 799 } 800 if (get) { /* use SF to get values */ 801 PetscScalar *array; 802 803 ierr = VecGetArrayRead(v,(const PetscScalar**)&array);CHKERRQ(ierr); 804 ierr = PetscSFBcastBegin(pcbddc->benign_sf,MPIU_SCALAR,array,pcbddc->benign_p0);CHKERRQ(ierr); 805 ierr = PetscSFBcastEnd(pcbddc->benign_sf,MPIU_SCALAR,array,pcbddc->benign_p0);CHKERRQ(ierr); 806 ierr = VecRestoreArrayRead(v,(const PetscScalar**)&array);CHKERRQ(ierr); 807 } else { /* use VecSetValues (not scalable, I should try to find a better solution (defining a new MPI_OP for reduction) */ 808 ierr = VecSetValues(v,pcbddc->benign_n,pcbddc->benign_p0_gidx,pcbddc->benign_p0,INSERT_VALUES);CHKERRQ(ierr); 809 ierr = VecAssemblyBegin(v);CHKERRQ(ierr); 810 ierr = VecAssemblyEnd(v);CHKERRQ(ierr); 811 } 812 PetscFunctionReturn(0); 813 } 814 815 #undef __FUNCT__ 816 #define __FUNCT__ "PCBDDCBenignPopOrPushB0" 817 PetscErrorCode PCBDDCBenignPopOrPushB0(PC pc, PetscBool pop) 818 { 819 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 820 PetscErrorCode ierr; 821 822 PetscFunctionBegin; 823 /* TODO: add error checking 824 - avoid nested pop (or push) calls. 825 - cannot push before pop. 826 - cannot call this if pcbddc->local_mat is NULL 827 */ 828 if (!pcbddc->benign_n) { 829 PetscFunctionReturn(0); 830 } 831 if (pop) { 832 if (pcbddc->benign_change_explicit) { 833 IS is_p0; 834 MatReuse reuse; 835 836 /* extract B_0 */ 837 reuse = MAT_INITIAL_MATRIX; 838 if (pcbddc->benign_B0) { 839 reuse = MAT_REUSE_MATRIX; 840 } 841 ierr = ISCreateGeneral(PETSC_COMM_SELF,pcbddc->benign_n,pcbddc->benign_p0_lidx,PETSC_COPY_VALUES,&is_p0);CHKERRQ(ierr); 842 ierr = MatGetSubMatrix(pcbddc->local_mat,is_p0,NULL,reuse,&pcbddc->benign_B0);CHKERRQ(ierr); 843 /* remove rows and cols from local problem */ 844 ierr = MatSetOption(pcbddc->local_mat,MAT_KEEP_NONZERO_PATTERN,PETSC_TRUE);CHKERRQ(ierr); 845 ierr = MatSetOption(pcbddc->local_mat,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr); 846 ierr = MatZeroRowsColumnsIS(pcbddc->local_mat,is_p0,1.0,NULL,NULL);CHKERRQ(ierr); 847 ierr = ISDestroy(&is_p0);CHKERRQ(ierr); 848 } else { 849 Mat_IS *matis = (Mat_IS*)pc->pmat->data; 850 PetscScalar *vals; 851 PetscInt i,n,*idxs_ins; 852 853 ierr = VecGetLocalSize(matis->y,&n);CHKERRQ(ierr); 854 ierr = PetscMalloc2(n,&idxs_ins,n,&vals);CHKERRQ(ierr); 855 if (!pcbddc->benign_B0) { 856 PetscInt *nnz; 857 ierr = MatCreate(PetscObjectComm((PetscObject)pcbddc->local_mat),&pcbddc->benign_B0);CHKERRQ(ierr); 858 ierr = MatSetType(pcbddc->benign_B0,MATAIJ);CHKERRQ(ierr); 859 ierr = MatSetSizes(pcbddc->benign_B0,pcbddc->benign_n,n,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr); 860 ierr = PetscMalloc1(pcbddc->benign_n,&nnz);CHKERRQ(ierr); 861 for (i=0;i<pcbddc->benign_n;i++) { 862 ierr = ISGetLocalSize(pcbddc->benign_zerodiag_subs[i],&nnz[i]);CHKERRQ(ierr); 863 nnz[i] = n - nnz[i]; 864 } 865 ierr = MatSeqAIJSetPreallocation(pcbddc->benign_B0,0,nnz);CHKERRQ(ierr); 866 ierr = PetscFree(nnz);CHKERRQ(ierr); 867 } 868 869 for (i=0;i<pcbddc->benign_n;i++) { 870 PetscScalar *array; 871 PetscInt *idxs,j,nz,cum; 872 873 ierr = VecSet(matis->x,0.);CHKERRQ(ierr); 874 ierr = ISGetLocalSize(pcbddc->benign_zerodiag_subs[i],&nz);CHKERRQ(ierr); 875 ierr = ISGetIndices(pcbddc->benign_zerodiag_subs[i],(const PetscInt**)&idxs);CHKERRQ(ierr); 876 for (j=0;j<nz;j++) vals[j] = 1.; 877 ierr = VecSetValues(matis->x,nz,idxs,vals,INSERT_VALUES);CHKERRQ(ierr); 878 ierr = VecAssemblyBegin(matis->x);CHKERRQ(ierr); 879 ierr = VecAssemblyEnd(matis->x);CHKERRQ(ierr); 880 ierr = VecSet(matis->y,0.);CHKERRQ(ierr); 881 ierr = MatMult(matis->A,matis->x,matis->y);CHKERRQ(ierr); 882 ierr = VecGetArray(matis->y,&array);CHKERRQ(ierr); 883 cum = 0; 884 for (j=0;j<n;j++) { 885 if (PetscUnlikely(PetscAbsReal(array[j]) > PETSC_SMALL)) { 886 vals[cum] = array[j]; 887 idxs_ins[cum] = j; 888 cum++; 889 } 890 } 891 ierr = MatSetValues(pcbddc->benign_B0,1,&i,cum,idxs_ins,vals,INSERT_VALUES);CHKERRQ(ierr); 892 ierr = VecRestoreArray(matis->y,&array);CHKERRQ(ierr); 893 ierr = ISRestoreIndices(pcbddc->benign_zerodiag_subs[i],(const PetscInt**)&idxs);CHKERRQ(ierr); 894 } 895 ierr = MatAssemblyBegin(pcbddc->benign_B0,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 896 ierr = MatAssemblyEnd(pcbddc->benign_B0,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 897 ierr = PetscFree2(idxs_ins,vals);CHKERRQ(ierr); 898 } 899 } else { /* push */ 900 if (pcbddc->benign_change_explicit) { 901 PetscInt i; 902 903 for (i=0;i<pcbddc->benign_n;i++) { 904 PetscScalar *B0_vals; 905 PetscInt *B0_cols,B0_ncol; 906 907 ierr = MatGetRow(pcbddc->benign_B0,i,&B0_ncol,(const PetscInt**)&B0_cols,(const PetscScalar**)&B0_vals);CHKERRQ(ierr); 908 ierr = MatSetValues(pcbddc->local_mat,1,pcbddc->benign_p0_lidx+i,B0_ncol,B0_cols,B0_vals,INSERT_VALUES);CHKERRQ(ierr); 909 ierr = MatSetValues(pcbddc->local_mat,B0_ncol,B0_cols,1,pcbddc->benign_p0_lidx+i,B0_vals,INSERT_VALUES);CHKERRQ(ierr); 910 ierr = MatSetValue(pcbddc->local_mat,pcbddc->benign_p0_lidx[i],pcbddc->benign_p0_lidx[i],0.0,INSERT_VALUES);CHKERRQ(ierr); 911 ierr = MatRestoreRow(pcbddc->benign_B0,i,&B0_ncol,(const PetscInt**)&B0_cols,(const PetscScalar**)&B0_vals);CHKERRQ(ierr); 912 } 913 ierr = MatAssemblyBegin(pcbddc->local_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 914 ierr = MatAssemblyEnd(pcbddc->local_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 915 } else { 916 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Cannot push B0!\n"); 917 } 918 } 919 PetscFunctionReturn(0); 920 } 921 922 #undef __FUNCT__ 923 #define __FUNCT__ "PCBDDCAdaptiveSelection" 924 PetscErrorCode PCBDDCAdaptiveSelection(PC pc) 925 { 926 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 927 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 928 PetscBLASInt B_dummyint,B_neigs,B_ierr,B_lwork; 929 PetscBLASInt *B_iwork,*B_ifail; 930 PetscScalar *work,lwork; 931 PetscScalar *St,*S,*eigv; 932 PetscScalar *Sarray,*Starray; 933 PetscReal *eigs,thresh; 934 PetscInt i,nmax,nmin,nv,cum,mss,cum2,cumarray,maxneigs; 935 PetscBool allocated_S_St; 936 #if defined(PETSC_USE_COMPLEX) 937 PetscReal *rwork; 938 #endif 939 PetscErrorCode ierr; 940 941 PetscFunctionBegin; 942 if (!sub_schurs->schur_explicit) SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_SUP,"Adaptive selection of constraints requires MUMPS and/or MKL_CPARDISO"); 943 if (sub_schurs->n_subs && (!sub_schurs->is_hermitian || !sub_schurs->is_posdef)) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_SUP,"Adaptive selection not yet implemented for general matrix pencils (herm %d, posdef %d)\n",sub_schurs->is_hermitian,sub_schurs->is_posdef); 944 945 if (pcbddc->dbg_flag) { 946 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 947 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 948 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Check adaptive selection of constraints\n");CHKERRQ(ierr); 949 ierr = PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 950 } 951 952 if (pcbddc->dbg_flag) { 953 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d cc %d (%d,%d).\n",PetscGlobalRank,sub_schurs->n_subs,sub_schurs->is_hermitian,sub_schurs->is_posdef); 954 } 955 956 /* max size of subsets */ 957 mss = 0; 958 for (i=0;i<sub_schurs->n_subs;i++) { 959 PetscInt subset_size; 960 961 ierr = ISGetLocalSize(sub_schurs->is_subs[i],&subset_size);CHKERRQ(ierr); 962 mss = PetscMax(mss,subset_size); 963 } 964 965 /* min/max and threshold */ 966 nmax = pcbddc->adaptive_nmax > 0 ? pcbddc->adaptive_nmax : mss; 967 nmin = pcbddc->adaptive_nmin > 0 ? pcbddc->adaptive_nmin : 0; 968 nmax = PetscMax(nmin,nmax); 969 allocated_S_St = PETSC_FALSE; 970 if (nmin) { 971 allocated_S_St = PETSC_TRUE; 972 } 973 974 /* allocate lapack workspace */ 975 cum = cum2 = 0; 976 maxneigs = 0; 977 for (i=0;i<sub_schurs->n_subs;i++) { 978 PetscInt n,subset_size; 979 980 ierr = ISGetLocalSize(sub_schurs->is_subs[i],&subset_size);CHKERRQ(ierr); 981 n = PetscMin(subset_size,nmax); 982 cum += subset_size; 983 cum2 += subset_size*n; 984 maxneigs = PetscMax(maxneigs,n); 985 } 986 if (mss) { 987 if (sub_schurs->is_hermitian && sub_schurs->is_posdef) { 988 PetscBLASInt B_itype = 1; 989 PetscBLASInt B_N = mss; 990 PetscReal zero = 0.0; 991 PetscReal eps = 0.0; /* dlamch? */ 992 993 B_lwork = -1; 994 S = NULL; 995 St = NULL; 996 eigs = NULL; 997 eigv = NULL; 998 B_iwork = NULL; 999 B_ifail = NULL; 1000 #if defined(PETSC_USE_COMPLEX) 1001 rwork = NULL; 1002 #endif 1003 thresh = 1.0; 1004 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 1005 #if defined(PETSC_USE_COMPLEX) 1006 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&zero,&thresh,&B_dummyint,&B_dummyint,&eps,&B_neigs,eigs,eigv,&B_N,&lwork,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr)); 1007 #else 1008 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&zero,&thresh,&B_dummyint,&B_dummyint,&eps,&B_neigs,eigs,eigv,&B_N,&lwork,&B_lwork,B_iwork,B_ifail,&B_ierr)); 1009 #endif 1010 if (B_ierr != 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to SYGVX Lapack routine %d",(int)B_ierr); 1011 ierr = PetscFPTrapPop();CHKERRQ(ierr); 1012 } else { 1013 /* TODO */ 1014 } 1015 } else { 1016 lwork = 0; 1017 } 1018 1019 nv = 0; 1020 if (sub_schurs->is_vertices && pcbddc->use_vertices) { /* complement set of active subsets, each entry is a vertex (boundary made by active subsets, vertices and dirichlet dofs) */ 1021 ierr = ISGetLocalSize(sub_schurs->is_vertices,&nv);CHKERRQ(ierr); 1022 } 1023 ierr = PetscBLASIntCast((PetscInt)PetscRealPart(lwork),&B_lwork);CHKERRQ(ierr); 1024 if (allocated_S_St) { 1025 ierr = PetscMalloc2(mss*mss,&S,mss*mss,&St);CHKERRQ(ierr); 1026 } 1027 ierr = PetscMalloc5(mss*mss,&eigv,mss,&eigs,B_lwork,&work,5*mss,&B_iwork,mss,&B_ifail);CHKERRQ(ierr); 1028 #if defined(PETSC_USE_COMPLEX) 1029 ierr = PetscMalloc1(7*mss,&rwork);CHKERRQ(ierr); 1030 #endif 1031 ierr = PetscMalloc5(nv+sub_schurs->n_subs,&pcbddc->adaptive_constraints_n, 1032 nv+sub_schurs->n_subs+1,&pcbddc->adaptive_constraints_idxs_ptr, 1033 nv+sub_schurs->n_subs+1,&pcbddc->adaptive_constraints_data_ptr, 1034 nv+cum,&pcbddc->adaptive_constraints_idxs, 1035 nv+cum2,&pcbddc->adaptive_constraints_data);CHKERRQ(ierr); 1036 ierr = PetscMemzero(pcbddc->adaptive_constraints_n,(nv+sub_schurs->n_subs)*sizeof(PetscInt));CHKERRQ(ierr); 1037 1038 maxneigs = 0; 1039 cum = cumarray = 0; 1040 pcbddc->adaptive_constraints_idxs_ptr[0] = 0; 1041 pcbddc->adaptive_constraints_data_ptr[0] = 0; 1042 if (sub_schurs->is_vertices && pcbddc->use_vertices) { 1043 const PetscInt *idxs; 1044 1045 ierr = ISGetIndices(sub_schurs->is_vertices,&idxs);CHKERRQ(ierr); 1046 for (cum=0;cum<nv;cum++) { 1047 pcbddc->adaptive_constraints_n[cum] = 1; 1048 pcbddc->adaptive_constraints_idxs[cum] = idxs[cum]; 1049 pcbddc->adaptive_constraints_data[cum] = 1.0; 1050 pcbddc->adaptive_constraints_idxs_ptr[cum+1] = pcbddc->adaptive_constraints_idxs_ptr[cum]+1; 1051 pcbddc->adaptive_constraints_data_ptr[cum+1] = pcbddc->adaptive_constraints_data_ptr[cum]+1; 1052 } 1053 ierr = ISRestoreIndices(sub_schurs->is_vertices,&idxs);CHKERRQ(ierr); 1054 } 1055 1056 if (mss) { /* multilevel */ 1057 ierr = MatSeqAIJGetArray(sub_schurs->sum_S_Ej_inv_all,&Sarray);CHKERRQ(ierr); 1058 ierr = MatSeqAIJGetArray(sub_schurs->sum_S_Ej_tilda_all,&Starray);CHKERRQ(ierr); 1059 } 1060 1061 thresh = pcbddc->adaptive_threshold; 1062 for (i=0;i<sub_schurs->n_subs;i++) { 1063 const PetscInt *idxs; 1064 PetscReal upper,lower; 1065 PetscInt j,subset_size,eigs_start = 0; 1066 PetscBLASInt B_N; 1067 PetscBool same_data = PETSC_FALSE; 1068 1069 if (pcbddc->use_deluxe_scaling) { 1070 upper = PETSC_MAX_REAL; 1071 lower = thresh; 1072 } else { 1073 upper = 1./thresh; 1074 lower = 0.; 1075 } 1076 ierr = ISGetLocalSize(sub_schurs->is_subs[i],&subset_size);CHKERRQ(ierr); 1077 ierr = ISGetIndices(sub_schurs->is_subs[i],&idxs);CHKERRQ(ierr); 1078 ierr = PetscBLASIntCast(subset_size,&B_N);CHKERRQ(ierr); 1079 if (allocated_S_St) { /* S and S_t should be copied since we could need them later */ 1080 if (sub_schurs->is_hermitian) { 1081 PetscInt j,k; 1082 if (sub_schurs->n_subs == 1) { /* zeroing memory to use PetscMemcmp later */ 1083 ierr = PetscMemzero(S,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 1084 ierr = PetscMemzero(St,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 1085 } 1086 for (j=0;j<subset_size;j++) { 1087 for (k=j;k<subset_size;k++) { 1088 S [j*subset_size+k] = Sarray [cumarray+j*subset_size+k]; 1089 St[j*subset_size+k] = Starray[cumarray+j*subset_size+k]; 1090 } 1091 } 1092 } else { 1093 ierr = PetscMemcpy(S,Sarray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 1094 ierr = PetscMemcpy(St,Starray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 1095 } 1096 } else { 1097 S = Sarray + cumarray; 1098 St = Starray + cumarray; 1099 } 1100 /* see if we can save some work */ 1101 if (sub_schurs->n_subs == 1) { 1102 ierr = PetscMemcmp(S,St,subset_size*subset_size*sizeof(PetscScalar),&same_data);CHKERRQ(ierr); 1103 } 1104 1105 if (same_data) { /* there's no need of constraints here, deluxe scaling is enough */ 1106 B_neigs = 0; 1107 } else { 1108 if (sub_schurs->is_hermitian && sub_schurs->is_posdef) { 1109 PetscBLASInt B_itype = 1; 1110 PetscBLASInt B_IL, B_IU; 1111 PetscReal eps = -1.0; /* dlamch? */ 1112 PetscInt nmin_s; 1113 1114 if (pcbddc->dbg_flag) { 1115 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Computing for sub %d/%d %d %d.\n",i,sub_schurs->n_subs,subset_size,pcbddc->mat_graph->count[idxs[0]]); 1116 } 1117 1118 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 1119 if (thresh > 1.+PETSC_SMALL) { 1120 1121 /* ask for eigenvalues larger than thresh */ 1122 #if defined(PETSC_USE_COMPLEX) 1123 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&lower,&upper,&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr)); 1124 #else 1125 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&lower,&upper,&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr)); 1126 #endif 1127 } else { 1128 B_IU = PetscMax(1,PetscMin(B_N,nmax)); 1129 B_IL = 1; 1130 #if defined(PETSC_USE_COMPLEX) 1131 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","I","L",&B_N,St,&B_N,S,&B_N,&lower,&upper,&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr)); 1132 #else 1133 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","I","L",&B_N,St,&B_N,S,&B_N,&lower,&upper,&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr)); 1134 #endif 1135 } 1136 ierr = PetscFPTrapPop();CHKERRQ(ierr); 1137 if (B_ierr) { 1138 if (B_ierr < 0 ) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: illegal value for argument %d",-(int)B_ierr); 1139 else if (B_ierr <= B_N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: %d eigenvalues failed to converge",(int)B_ierr); 1140 else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: leading minor of order %d is not positive definite",(int)B_ierr-B_N-1); 1141 } 1142 1143 if (B_neigs > nmax) { 1144 if (pcbddc->dbg_flag) { 1145 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," found %d eigs, more than maximum required %d.\n",B_neigs,nmax); 1146 } 1147 if (pcbddc->use_deluxe_scaling) eigs_start = B_neigs -nmax; 1148 B_neigs = nmax; 1149 } 1150 1151 nmin_s = PetscMin(nmin,B_N); 1152 if (B_neigs < nmin_s) { 1153 PetscBLASInt B_neigs2; 1154 1155 if (pcbddc->use_deluxe_scaling) { 1156 B_IL = B_N - nmin_s + 1; 1157 B_IU = B_N - B_neigs; 1158 } else { 1159 B_IL = B_neigs + 1; 1160 B_IU = nmin_s; 1161 } 1162 if (pcbddc->dbg_flag) { 1163 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," found %d eigs, less than minimum required %d. Asking for %d to %d incl (fortran like)\n",B_neigs,nmin,B_IL,B_IU); 1164 } 1165 if (sub_schurs->is_hermitian) { 1166 PetscInt j; 1167 for (j=0;j<subset_size;j++) { 1168 ierr = PetscMemcpy(S+j*(subset_size+1),Sarray+cumarray+j*(subset_size+1),(subset_size-j)*sizeof(PetscScalar));CHKERRQ(ierr); 1169 } 1170 for (j=0;j<subset_size;j++) { 1171 ierr = PetscMemcpy(St+j*(subset_size+1),Starray+cumarray+j*(subset_size+1),(subset_size-j)*sizeof(PetscScalar));CHKERRQ(ierr); 1172 } 1173 } else { 1174 ierr = PetscMemcpy(S,Sarray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 1175 ierr = PetscMemcpy(St,Starray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 1176 } 1177 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 1178 #if defined(PETSC_USE_COMPLEX) 1179 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","I","L",&B_N,St,&B_N,S,&B_N,&lower,&upper,&B_IL,&B_IU,&eps,&B_neigs2,eigs+B_neigs,eigv+B_neigs*subset_size,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr)); 1180 #else 1181 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","I","L",&B_N,St,&B_N,S,&B_N,&lower,&upper,&B_IL,&B_IU,&eps,&B_neigs2,eigs+B_neigs,eigv+B_neigs*subset_size,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr)); 1182 #endif 1183 ierr = PetscFPTrapPop();CHKERRQ(ierr); 1184 B_neigs += B_neigs2; 1185 } 1186 if (B_ierr) { 1187 if (B_ierr < 0 ) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: illegal value for argument %d",-(int)B_ierr); 1188 else if (B_ierr <= B_N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: %d eigenvalues failed to converge",(int)B_ierr); 1189 else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: leading minor of order %d is not positive definite",(int)B_ierr-B_N-1); 1190 } 1191 if (pcbddc->dbg_flag) { 1192 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," -> Got %d eigs\n",B_neigs);CHKERRQ(ierr); 1193 for (j=0;j<B_neigs;j++) { 1194 if (eigs[j] == 0.0) { 1195 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," Inf\n");CHKERRQ(ierr); 1196 } else { 1197 if (pcbddc->use_deluxe_scaling) { 1198 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," %1.6e\n",eigs[j+eigs_start]);CHKERRQ(ierr); 1199 } else { 1200 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," %1.6e\n",1./eigs[j+eigs_start]);CHKERRQ(ierr); 1201 } 1202 } 1203 } 1204 } 1205 } else { 1206 /* TODO */ 1207 } 1208 } 1209 /* change the basis back to the original one */ 1210 if (sub_schurs->change) { 1211 Mat change,phi,phit; 1212 1213 if (pcbddc->dbg_flag > 1) { 1214 PetscInt ii; 1215 for (ii=0;ii<B_neigs;ii++) { 1216 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," -> Eigenvector (old basis) %d/%d (%d)\n",ii,B_neigs,B_N);CHKERRQ(ierr); 1217 for (j=0;j<B_N;j++) { 1218 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," %1.4e\n",eigv[(ii+eigs_start)*subset_size+j]);CHKERRQ(ierr); 1219 } 1220 } 1221 } 1222 ierr = KSPGetOperators(sub_schurs->change[i],&change,NULL);CHKERRQ(ierr); 1223 ierr = MatCreateSeqDense(PETSC_COMM_SELF,subset_size,B_neigs,eigv+eigs_start*subset_size,&phit);CHKERRQ(ierr); 1224 ierr = MatMatMult(change,phit,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&phi);CHKERRQ(ierr); 1225 ierr = MatCopy(phi,phit,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 1226 ierr = MatDestroy(&phit);CHKERRQ(ierr); 1227 ierr = MatDestroy(&phi);CHKERRQ(ierr); 1228 } 1229 maxneigs = PetscMax(B_neigs,maxneigs); 1230 pcbddc->adaptive_constraints_n[i+nv] = B_neigs; 1231 if (B_neigs) { 1232 ierr = PetscMemcpy(pcbddc->adaptive_constraints_data+pcbddc->adaptive_constraints_data_ptr[cum],eigv+eigs_start*subset_size,B_neigs*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 1233 1234 if (pcbddc->dbg_flag > 1) { 1235 PetscInt ii; 1236 for (ii=0;ii<B_neigs;ii++) { 1237 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," -> Eigenvector %d/%d (%d)\n",ii,B_neigs,B_N);CHKERRQ(ierr); 1238 for (j=0;j<B_N;j++) { 1239 #if defined(PETSC_USE_COMPLEX) 1240 PetscReal r = PetscRealPart(pcbddc->adaptive_constraints_data[ii*subset_size+j+pcbddc->adaptive_constraints_data_ptr[cum]]); 1241 PetscReal c = PetscImaginaryPart(pcbddc->adaptive_constraints_data[ii*subset_size+j+pcbddc->adaptive_constraints_data_ptr[cum]]); 1242 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," %1.4e + %1.4e i\n",r,c);CHKERRQ(ierr); 1243 #else 1244 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," %1.4e\n",pcbddc->adaptive_constraints_data[ii*subset_size+j+pcbddc->adaptive_constraints_data_ptr[cum]]);CHKERRQ(ierr); 1245 #endif 1246 } 1247 } 1248 } 1249 ierr = PetscMemcpy(pcbddc->adaptive_constraints_idxs+pcbddc->adaptive_constraints_idxs_ptr[cum],idxs,subset_size*sizeof(PetscInt));CHKERRQ(ierr); 1250 pcbddc->adaptive_constraints_idxs_ptr[cum+1] = pcbddc->adaptive_constraints_idxs_ptr[cum] + subset_size; 1251 pcbddc->adaptive_constraints_data_ptr[cum+1] = pcbddc->adaptive_constraints_data_ptr[cum] + subset_size*B_neigs; 1252 cum++; 1253 } 1254 ierr = ISRestoreIndices(sub_schurs->is_subs[i],&idxs);CHKERRQ(ierr); 1255 /* shift for next computation */ 1256 cumarray += subset_size*subset_size; 1257 } 1258 if (pcbddc->dbg_flag) { 1259 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 1260 } 1261 1262 if (mss) { 1263 ierr = MatSeqAIJRestoreArray(sub_schurs->sum_S_Ej_inv_all,&Sarray);CHKERRQ(ierr); 1264 ierr = MatSeqAIJRestoreArray(sub_schurs->sum_S_Ej_tilda_all,&Starray);CHKERRQ(ierr); 1265 /* destroy matrices (junk) */ 1266 ierr = MatDestroy(&sub_schurs->sum_S_Ej_inv_all);CHKERRQ(ierr); 1267 ierr = MatDestroy(&sub_schurs->sum_S_Ej_tilda_all);CHKERRQ(ierr); 1268 } 1269 if (allocated_S_St) { 1270 ierr = PetscFree2(S,St);CHKERRQ(ierr); 1271 } 1272 ierr = PetscFree5(eigv,eigs,work,B_iwork,B_ifail);CHKERRQ(ierr); 1273 #if defined(PETSC_USE_COMPLEX) 1274 ierr = PetscFree(rwork);CHKERRQ(ierr); 1275 #endif 1276 if (pcbddc->dbg_flag) { 1277 PetscInt maxneigs_r; 1278 ierr = MPIU_Allreduce(&maxneigs,&maxneigs_r,1,MPIU_INT,MPI_MAX,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 1279 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Maximum number of constraints per cc %d\n",maxneigs_r);CHKERRQ(ierr); 1280 } 1281 PetscFunctionReturn(0); 1282 } 1283 1284 #undef __FUNCT__ 1285 #define __FUNCT__ "PCBDDCSetUpSolvers" 1286 PetscErrorCode PCBDDCSetUpSolvers(PC pc) 1287 { 1288 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 1289 PetscScalar *coarse_submat_vals; 1290 PetscErrorCode ierr; 1291 1292 PetscFunctionBegin; 1293 /* Setup local scatters R_to_B and (optionally) R_to_D */ 1294 /* PCBDDCSetUpLocalWorkVectors should be called first! */ 1295 ierr = PCBDDCSetUpLocalScatters(pc);CHKERRQ(ierr); 1296 1297 /* Setup local neumann solver ksp_R */ 1298 /* PCBDDCSetUpLocalScatters should be called first! */ 1299 ierr = PCBDDCSetUpLocalSolvers(pc,PETSC_FALSE,PETSC_TRUE);CHKERRQ(ierr); 1300 1301 /* Change global null space passed in by the user if change of basis has been requested */ 1302 if (pcbddc->NullSpace && pcbddc->ChangeOfBasisMatrix) { 1303 ierr = PCBDDCNullSpaceAdaptGlobal(pc);CHKERRQ(ierr); 1304 } 1305 1306 /* 1307 Setup local correction and local part of coarse basis. 1308 Gives back the dense local part of the coarse matrix in column major ordering 1309 */ 1310 ierr = PCBDDCSetUpCorrection(pc,&coarse_submat_vals);CHKERRQ(ierr); 1311 1312 /* Compute total number of coarse nodes and setup coarse solver */ 1313 ierr = PCBDDCSetUpCoarseSolver(pc,coarse_submat_vals);CHKERRQ(ierr); 1314 1315 /* free */ 1316 ierr = PetscFree(coarse_submat_vals);CHKERRQ(ierr); 1317 PetscFunctionReturn(0); 1318 } 1319 1320 #undef __FUNCT__ 1321 #define __FUNCT__ "PCBDDCResetCustomization" 1322 PetscErrorCode PCBDDCResetCustomization(PC pc) 1323 { 1324 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 1325 PetscErrorCode ierr; 1326 1327 PetscFunctionBegin; 1328 ierr = PCBDDCGraphResetCSR(pcbddc->mat_graph);CHKERRQ(ierr); 1329 ierr = ISDestroy(&pcbddc->user_primal_vertices);CHKERRQ(ierr); 1330 ierr = ISDestroy(&pcbddc->user_primal_vertices_local);CHKERRQ(ierr); 1331 ierr = MatNullSpaceDestroy(&pcbddc->NullSpace);CHKERRQ(ierr); 1332 ierr = ISDestroy(&pcbddc->NeumannBoundaries);CHKERRQ(ierr); 1333 ierr = ISDestroy(&pcbddc->NeumannBoundariesLocal);CHKERRQ(ierr); 1334 ierr = ISDestroy(&pcbddc->DirichletBoundaries);CHKERRQ(ierr); 1335 ierr = MatNullSpaceDestroy(&pcbddc->onearnullspace);CHKERRQ(ierr); 1336 ierr = PetscFree(pcbddc->onearnullvecs_state);CHKERRQ(ierr); 1337 ierr = ISDestroy(&pcbddc->DirichletBoundariesLocal);CHKERRQ(ierr); 1338 ierr = PCBDDCSetDofsSplitting(pc,0,NULL);CHKERRQ(ierr); 1339 ierr = PCBDDCSetDofsSplittingLocal(pc,0,NULL);CHKERRQ(ierr); 1340 PetscFunctionReturn(0); 1341 } 1342 1343 #undef __FUNCT__ 1344 #define __FUNCT__ "PCBDDCResetTopography" 1345 PetscErrorCode PCBDDCResetTopography(PC pc) 1346 { 1347 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 1348 PetscInt i; 1349 PetscErrorCode ierr; 1350 1351 PetscFunctionBegin; 1352 ierr = MatDestroy(&pcbddc->user_ChangeOfBasisMatrix);CHKERRQ(ierr); 1353 ierr = MatDestroy(&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 1354 ierr = MatDestroy(&pcbddc->switch_static_change);CHKERRQ(ierr); 1355 ierr = VecDestroy(&pcbddc->work_change);CHKERRQ(ierr); 1356 ierr = MatDestroy(&pcbddc->ConstraintMatrix);CHKERRQ(ierr); 1357 ierr = PCBDDCGraphReset(pcbddc->mat_graph);CHKERRQ(ierr); 1358 for (i=0;i<pcbddc->n_local_subs;i++) { 1359 ierr = ISDestroy(&pcbddc->local_subs[i]);CHKERRQ(ierr); 1360 } 1361 ierr = PetscFree(pcbddc->local_subs);CHKERRQ(ierr); 1362 ierr = PCBDDCSubSchursReset(pcbddc->sub_schurs);CHKERRQ(ierr); 1363 PetscFunctionReturn(0); 1364 } 1365 1366 #undef __FUNCT__ 1367 #define __FUNCT__ "PCBDDCResetSolvers" 1368 PetscErrorCode PCBDDCResetSolvers(PC pc) 1369 { 1370 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 1371 PetscErrorCode ierr; 1372 1373 PetscFunctionBegin; 1374 ierr = VecDestroy(&pcbddc->coarse_vec);CHKERRQ(ierr); 1375 if (pcbddc->coarse_phi_B) { 1376 PetscScalar *array; 1377 ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&array);CHKERRQ(ierr); 1378 ierr = PetscFree(array);CHKERRQ(ierr); 1379 } 1380 ierr = MatDestroy(&pcbddc->coarse_phi_B);CHKERRQ(ierr); 1381 ierr = MatDestroy(&pcbddc->coarse_phi_D);CHKERRQ(ierr); 1382 ierr = MatDestroy(&pcbddc->coarse_psi_B);CHKERRQ(ierr); 1383 ierr = MatDestroy(&pcbddc->coarse_psi_D);CHKERRQ(ierr); 1384 ierr = VecDestroy(&pcbddc->vec1_P);CHKERRQ(ierr); 1385 ierr = VecDestroy(&pcbddc->vec1_C);CHKERRQ(ierr); 1386 ierr = MatDestroy(&pcbddc->local_auxmat2);CHKERRQ(ierr); 1387 ierr = MatDestroy(&pcbddc->local_auxmat1);CHKERRQ(ierr); 1388 ierr = VecDestroy(&pcbddc->vec1_R);CHKERRQ(ierr); 1389 ierr = VecDestroy(&pcbddc->vec2_R);CHKERRQ(ierr); 1390 ierr = ISDestroy(&pcbddc->is_R_local);CHKERRQ(ierr); 1391 ierr = VecScatterDestroy(&pcbddc->R_to_B);CHKERRQ(ierr); 1392 ierr = VecScatterDestroy(&pcbddc->R_to_D);CHKERRQ(ierr); 1393 ierr = VecScatterDestroy(&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr); 1394 ierr = KSPDestroy(&pcbddc->ksp_D);CHKERRQ(ierr); 1395 ierr = KSPDestroy(&pcbddc->ksp_R);CHKERRQ(ierr); 1396 ierr = KSPDestroy(&pcbddc->coarse_ksp);CHKERRQ(ierr); 1397 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 1398 ierr = PetscFree(pcbddc->primal_indices_local_idxs);CHKERRQ(ierr); 1399 ierr = PetscFree2(pcbddc->local_primal_ref_node,pcbddc->local_primal_ref_mult);CHKERRQ(ierr); 1400 ierr = PetscFree(pcbddc->global_primal_indices);CHKERRQ(ierr); 1401 ierr = ISDestroy(&pcbddc->coarse_subassembling);CHKERRQ(ierr); 1402 ierr = ISDestroy(&pcbddc->coarse_subassembling_init);CHKERRQ(ierr); 1403 ierr = MatDestroy(&pcbddc->benign_change);CHKERRQ(ierr); 1404 ierr = VecDestroy(&pcbddc->benign_vec);CHKERRQ(ierr); 1405 ierr = PCBDDCBenignShellMat(pc,PETSC_TRUE);CHKERRQ(ierr); 1406 ierr = MatDestroy(&pcbddc->benign_B0);CHKERRQ(ierr); 1407 ierr = PetscSFDestroy(&pcbddc->benign_sf);CHKERRQ(ierr); 1408 if (pcbddc->benign_zerodiag_subs) { 1409 PetscInt i; 1410 for (i=0;i<pcbddc->benign_n;i++) { 1411 ierr = ISDestroy(&pcbddc->benign_zerodiag_subs[i]);CHKERRQ(ierr); 1412 } 1413 ierr = PetscFree(pcbddc->benign_zerodiag_subs);CHKERRQ(ierr); 1414 } 1415 ierr = PetscFree3(pcbddc->benign_p0_lidx,pcbddc->benign_p0_gidx,pcbddc->benign_p0);CHKERRQ(ierr); 1416 PetscFunctionReturn(0); 1417 } 1418 1419 #undef __FUNCT__ 1420 #define __FUNCT__ "PCBDDCSetUpLocalWorkVectors" 1421 PetscErrorCode PCBDDCSetUpLocalWorkVectors(PC pc) 1422 { 1423 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 1424 PC_IS *pcis = (PC_IS*)pc->data; 1425 VecType impVecType; 1426 PetscInt n_constraints,n_R,old_size; 1427 PetscErrorCode ierr; 1428 1429 PetscFunctionBegin; 1430 if (!pcbddc->ConstraintMatrix) SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"BDDC Constraint matrix has not been created"); 1431 /* get sizes */ 1432 n_constraints = pcbddc->local_primal_size - pcbddc->benign_n - pcbddc->n_vertices; 1433 n_R = pcis->n - pcbddc->n_vertices; 1434 ierr = VecGetType(pcis->vec1_N,&impVecType);CHKERRQ(ierr); 1435 /* local work vectors (try to avoid unneeded work)*/ 1436 /* R nodes */ 1437 old_size = -1; 1438 if (pcbddc->vec1_R) { 1439 ierr = VecGetSize(pcbddc->vec1_R,&old_size);CHKERRQ(ierr); 1440 } 1441 if (n_R != old_size) { 1442 ierr = VecDestroy(&pcbddc->vec1_R);CHKERRQ(ierr); 1443 ierr = VecDestroy(&pcbddc->vec2_R);CHKERRQ(ierr); 1444 ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_R);CHKERRQ(ierr); 1445 ierr = VecSetSizes(pcbddc->vec1_R,PETSC_DECIDE,n_R);CHKERRQ(ierr); 1446 ierr = VecSetType(pcbddc->vec1_R,impVecType);CHKERRQ(ierr); 1447 ierr = VecDuplicate(pcbddc->vec1_R,&pcbddc->vec2_R);CHKERRQ(ierr); 1448 } 1449 /* local primal dofs */ 1450 old_size = -1; 1451 if (pcbddc->vec1_P) { 1452 ierr = VecGetSize(pcbddc->vec1_P,&old_size);CHKERRQ(ierr); 1453 } 1454 if (pcbddc->local_primal_size != old_size) { 1455 ierr = VecDestroy(&pcbddc->vec1_P);CHKERRQ(ierr); 1456 ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_P);CHKERRQ(ierr); 1457 ierr = VecSetSizes(pcbddc->vec1_P,PETSC_DECIDE,pcbddc->local_primal_size);CHKERRQ(ierr); 1458 ierr = VecSetType(pcbddc->vec1_P,impVecType);CHKERRQ(ierr); 1459 } 1460 /* local explicit constraints */ 1461 old_size = -1; 1462 if (pcbddc->vec1_C) { 1463 ierr = VecGetSize(pcbddc->vec1_C,&old_size);CHKERRQ(ierr); 1464 } 1465 if (n_constraints && n_constraints != old_size) { 1466 ierr = VecDestroy(&pcbddc->vec1_C);CHKERRQ(ierr); 1467 ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_C);CHKERRQ(ierr); 1468 ierr = VecSetSizes(pcbddc->vec1_C,PETSC_DECIDE,n_constraints);CHKERRQ(ierr); 1469 ierr = VecSetType(pcbddc->vec1_C,impVecType);CHKERRQ(ierr); 1470 } 1471 PetscFunctionReturn(0); 1472 } 1473 1474 #undef __FUNCT__ 1475 #define __FUNCT__ "PCBDDCSetUpCorrection" 1476 PetscErrorCode PCBDDCSetUpCorrection(PC pc, PetscScalar **coarse_submat_vals_n) 1477 { 1478 PetscErrorCode ierr; 1479 /* pointers to pcis and pcbddc */ 1480 PC_IS* pcis = (PC_IS*)pc->data; 1481 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 1482 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 1483 /* submatrices of local problem */ 1484 Mat A_RV,A_VR,A_VV,local_auxmat2_R; 1485 /* submatrices of local coarse problem */ 1486 Mat S_VV,S_CV,S_VC,S_CC; 1487 /* working matrices */ 1488 Mat C_CR; 1489 /* additional working stuff */ 1490 PC pc_R; 1491 Mat F; 1492 Vec dummy_vec; 1493 PetscBool isLU,isCHOL,isILU,need_benign_correction; 1494 PetscScalar *coarse_submat_vals; /* TODO: use a PETSc matrix */ 1495 PetscScalar *work; 1496 PetscInt *idx_V_B; 1497 PetscInt lda_rhs,n,n_vertices,n_constraints,*p0_lidx_I; 1498 PetscInt i,n_R,n_D,n_B; 1499 1500 /* some shortcuts to scalars */ 1501 PetscScalar one=1.0,m_one=-1.0; 1502 1503 PetscFunctionBegin; 1504 if (!pcbddc->symmetric_primal && pcbddc->benign_n) { 1505 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Non-symmetric primal basis computation with benign trick not yet implemented"); 1506 } 1507 1508 /* Set Non-overlapping dimensions */ 1509 n_vertices = pcbddc->n_vertices; 1510 n_constraints = pcbddc->local_primal_size - pcbddc->benign_n - n_vertices; 1511 n_B = pcis->n_B; 1512 n_D = pcis->n - n_B; 1513 n_R = pcis->n - n_vertices; 1514 1515 /* vertices in boundary numbering */ 1516 ierr = PetscMalloc1(n_vertices,&idx_V_B);CHKERRQ(ierr); 1517 ierr = ISGlobalToLocalMappingApply(pcis->BtoNmap,IS_GTOLM_DROP,n_vertices,pcbddc->local_primal_ref_node,&i,idx_V_B);CHKERRQ(ierr); 1518 if (i != n_vertices) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error in boundary numbering for BDDC vertices! %D != %D\n",n_vertices,i); 1519 1520 /* Subdomain contribution (Non-overlapping) to coarse matrix */ 1521 ierr = PetscCalloc1(pcbddc->local_primal_size*pcbddc->local_primal_size,&coarse_submat_vals);CHKERRQ(ierr); 1522 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_vertices,n_vertices,coarse_submat_vals,&S_VV);CHKERRQ(ierr); 1523 ierr = MatSeqDenseSetLDA(S_VV,pcbddc->local_primal_size);CHKERRQ(ierr); 1524 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_constraints,n_vertices,coarse_submat_vals+n_vertices,&S_CV);CHKERRQ(ierr); 1525 ierr = MatSeqDenseSetLDA(S_CV,pcbddc->local_primal_size);CHKERRQ(ierr); 1526 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_vertices,n_constraints,coarse_submat_vals+pcbddc->local_primal_size*n_vertices,&S_VC);CHKERRQ(ierr); 1527 ierr = MatSeqDenseSetLDA(S_VC,pcbddc->local_primal_size);CHKERRQ(ierr); 1528 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_constraints,n_constraints,coarse_submat_vals+(pcbddc->local_primal_size+1)*n_vertices,&S_CC);CHKERRQ(ierr); 1529 ierr = MatSeqDenseSetLDA(S_CC,pcbddc->local_primal_size);CHKERRQ(ierr); 1530 1531 /* determine if can use MatSolve routines instead of calling KSPSolve on ksp_R */ 1532 ierr = KSPGetPC(pcbddc->ksp_R,&pc_R);CHKERRQ(ierr); 1533 ierr = PetscObjectTypeCompare((PetscObject)pc_R,PCLU,&isLU);CHKERRQ(ierr); 1534 ierr = PetscObjectTypeCompare((PetscObject)pc_R,PCILU,&isILU);CHKERRQ(ierr); 1535 ierr = PetscObjectTypeCompare((PetscObject)pc_R,PCCHOLESKY,&isCHOL);CHKERRQ(ierr); 1536 lda_rhs = n_R; 1537 need_benign_correction = PETSC_FALSE; 1538 if (isLU || isILU || isCHOL) { 1539 ierr = PCFactorGetMatrix(pc_R,&F);CHKERRQ(ierr); 1540 } else if (sub_schurs->reuse_solver) { 1541 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 1542 MatFactorType type; 1543 1544 F = reuse_solver->F; 1545 ierr = MatGetFactorType(F,&type);CHKERRQ(ierr); 1546 if (type == MAT_FACTOR_CHOLESKY) isCHOL = PETSC_TRUE; 1547 ierr = MatGetSize(F,&lda_rhs,NULL);CHKERRQ(ierr); 1548 need_benign_correction = !!reuse_solver->benign_n; 1549 } else { 1550 F = NULL; 1551 } 1552 1553 /* allocate workspace */ 1554 n = 0; 1555 if (n_constraints) { 1556 n += lda_rhs*n_constraints; 1557 } 1558 if (n_vertices) { 1559 n = PetscMax(2*lda_rhs*n_vertices,n); 1560 n = PetscMax((lda_rhs+n_B)*n_vertices,n); 1561 } 1562 ierr = PetscMalloc1(n,&work);CHKERRQ(ierr); 1563 1564 /* create dummy vector to modify rhs and sol of MatMatSolve (work array will never be used) */ 1565 dummy_vec = NULL; 1566 if (need_benign_correction && lda_rhs != n_R && F) { 1567 ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,1,lda_rhs,work,&dummy_vec);CHKERRQ(ierr); 1568 } 1569 1570 /* Precompute stuffs needed for preprocessing and application of BDDC*/ 1571 if (n_constraints) { 1572 Mat M1,M2,M3,C_B; 1573 IS is_aux; 1574 PetscScalar *array,*array2; 1575 1576 ierr = MatDestroy(&pcbddc->local_auxmat1);CHKERRQ(ierr); 1577 ierr = MatDestroy(&pcbddc->local_auxmat2);CHKERRQ(ierr); 1578 1579 /* Extract constraints on R nodes: C_{CR} */ 1580 ierr = ISCreateStride(PETSC_COMM_SELF,n_constraints,n_vertices,1,&is_aux);CHKERRQ(ierr); 1581 ierr = MatGetSubMatrix(pcbddc->ConstraintMatrix,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&C_CR);CHKERRQ(ierr); 1582 ierr = MatGetSubMatrix(pcbddc->ConstraintMatrix,is_aux,pcis->is_B_local,MAT_INITIAL_MATRIX,&C_B);CHKERRQ(ierr); 1583 1584 /* Assemble local_auxmat2_R = (- A_{RR}^{-1} C^T_{CR}) needed by BDDC setup */ 1585 /* Assemble pcbddc->local_auxmat2 = R_to_B (- A_{RR}^{-1} C^T_{CR}) needed by BDDC application */ 1586 ierr = PetscMemzero(work,lda_rhs*n_constraints*sizeof(PetscScalar));CHKERRQ(ierr); 1587 for (i=0;i<n_constraints;i++) { 1588 const PetscScalar *row_cmat_values; 1589 const PetscInt *row_cmat_indices; 1590 PetscInt size_of_constraint,j; 1591 1592 ierr = MatGetRow(C_CR,i,&size_of_constraint,&row_cmat_indices,&row_cmat_values);CHKERRQ(ierr); 1593 for (j=0;j<size_of_constraint;j++) { 1594 work[row_cmat_indices[j]+i*lda_rhs] = -row_cmat_values[j]; 1595 } 1596 ierr = MatRestoreRow(C_CR,i,&size_of_constraint,&row_cmat_indices,&row_cmat_values);CHKERRQ(ierr); 1597 } 1598 ierr = MatCreateSeqDense(PETSC_COMM_SELF,lda_rhs,n_constraints,NULL,&local_auxmat2_R);CHKERRQ(ierr); 1599 if (F) { 1600 Mat B; 1601 1602 ierr = MatCreateSeqDense(PETSC_COMM_SELF,lda_rhs,n_constraints,work,&B);CHKERRQ(ierr); 1603 if (need_benign_correction) { 1604 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 1605 1606 /* rhs is already zero on interior dofs, no need to change the rhs */ 1607 ierr = PetscMemzero(reuse_solver->benign_save_vals,pcbddc->benign_n*sizeof(PetscScalar));CHKERRQ(ierr); 1608 } 1609 ierr = MatMatSolve(F,B,local_auxmat2_R);CHKERRQ(ierr); 1610 if (need_benign_correction) { 1611 PetscScalar *marr; 1612 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 1613 1614 ierr = MatDenseGetArray(local_auxmat2_R,&marr);CHKERRQ(ierr); 1615 if (lda_rhs != n_R) { 1616 for (i=0;i<n_constraints;i++) { 1617 ierr = VecPlaceArray(dummy_vec,marr+i*lda_rhs);CHKERRQ(ierr); 1618 ierr = PCBDDCReuseSolversBenignAdapt(reuse_solver,dummy_vec,NULL,PETSC_TRUE,PETSC_TRUE);CHKERRQ(ierr); 1619 ierr = VecResetArray(dummy_vec);CHKERRQ(ierr); 1620 } 1621 } else { 1622 for (i=0;i<n_constraints;i++) { 1623 ierr = VecPlaceArray(pcbddc->vec1_R,marr+i*lda_rhs);CHKERRQ(ierr); 1624 ierr = PCBDDCReuseSolversBenignAdapt(reuse_solver,pcbddc->vec1_R,NULL,PETSC_TRUE,PETSC_TRUE);CHKERRQ(ierr); 1625 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 1626 } 1627 } 1628 ierr = MatDenseRestoreArray(local_auxmat2_R,&marr);CHKERRQ(ierr); 1629 } 1630 ierr = MatDestroy(&B);CHKERRQ(ierr); 1631 } else { 1632 PetscScalar *marr; 1633 1634 ierr = MatDenseGetArray(local_auxmat2_R,&marr);CHKERRQ(ierr); 1635 for (i=0;i<n_constraints;i++) { 1636 ierr = VecPlaceArray(pcbddc->vec1_R,work+i*lda_rhs);CHKERRQ(ierr); 1637 ierr = VecPlaceArray(pcbddc->vec2_R,marr+i*lda_rhs);CHKERRQ(ierr); 1638 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 1639 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 1640 ierr = VecResetArray(pcbddc->vec2_R);CHKERRQ(ierr); 1641 } 1642 ierr = MatDenseRestoreArray(local_auxmat2_R,&marr);CHKERRQ(ierr); 1643 } 1644 if (!pcbddc->switch_static) { 1645 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_B,n_constraints,NULL,&pcbddc->local_auxmat2);CHKERRQ(ierr); 1646 ierr = MatDenseGetArray(pcbddc->local_auxmat2,&array);CHKERRQ(ierr); 1647 ierr = MatDenseGetArray(local_auxmat2_R,&array2);CHKERRQ(ierr); 1648 for (i=0;i<n_constraints;i++) { 1649 ierr = VecPlaceArray(pcbddc->vec1_R,array2+i*lda_rhs);CHKERRQ(ierr); 1650 ierr = VecPlaceArray(pcis->vec1_B,array+i*n_B);CHKERRQ(ierr); 1651 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1652 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1653 ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr); 1654 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 1655 } 1656 ierr = MatDenseRestoreArray(local_auxmat2_R,&array2);CHKERRQ(ierr); 1657 ierr = MatDenseRestoreArray(pcbddc->local_auxmat2,&array);CHKERRQ(ierr); 1658 ierr = MatMatMult(C_B,pcbddc->local_auxmat2,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&M3);CHKERRQ(ierr); 1659 } else { 1660 if (lda_rhs != n_R) { 1661 IS dummy; 1662 1663 ierr = ISCreateStride(PETSC_COMM_SELF,n_R,0,1,&dummy);CHKERRQ(ierr); 1664 ierr = MatGetSubMatrix(local_auxmat2_R,dummy,NULL,MAT_INITIAL_MATRIX,&pcbddc->local_auxmat2);CHKERRQ(ierr); 1665 ierr = ISDestroy(&dummy);CHKERRQ(ierr); 1666 } else { 1667 ierr = PetscObjectReference((PetscObject)local_auxmat2_R);CHKERRQ(ierr); 1668 pcbddc->local_auxmat2 = local_auxmat2_R; 1669 } 1670 ierr = MatMatMult(C_CR,pcbddc->local_auxmat2,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&M3);CHKERRQ(ierr); 1671 } 1672 ierr = ISDestroy(&is_aux);CHKERRQ(ierr); 1673 /* Assemble explicitly S_CC = ( C_{CR} A_{RR}^{-1} C^T_{CR} )^{-1} */ 1674 ierr = MatScale(M3,m_one);CHKERRQ(ierr); 1675 ierr = MatDuplicate(M3,MAT_DO_NOT_COPY_VALUES,&M1);CHKERRQ(ierr); 1676 ierr = MatDuplicate(M3,MAT_DO_NOT_COPY_VALUES,&M2);CHKERRQ(ierr); 1677 if (isCHOL) { 1678 ierr = MatCholeskyFactor(M3,NULL,NULL);CHKERRQ(ierr); 1679 } else { 1680 ierr = MatLUFactor(M3,NULL,NULL,NULL);CHKERRQ(ierr); 1681 } 1682 ierr = VecSet(pcbddc->vec1_C,one);CHKERRQ(ierr); 1683 ierr = MatDiagonalSet(M2,pcbddc->vec1_C,INSERT_VALUES);CHKERRQ(ierr); 1684 ierr = MatMatSolve(M3,M2,M1);CHKERRQ(ierr); 1685 ierr = MatDestroy(&M2);CHKERRQ(ierr); 1686 ierr = MatDestroy(&M3);CHKERRQ(ierr); 1687 /* Assemble local_auxmat1 = S_CC*C_{CB} needed by BDDC application in KSP and in preproc */ 1688 ierr = MatMatMult(M1,C_B,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&pcbddc->local_auxmat1);CHKERRQ(ierr); 1689 ierr = MatDestroy(&C_B);CHKERRQ(ierr); 1690 ierr = MatCopy(M1,S_CC,SAME_NONZERO_PATTERN);CHKERRQ(ierr); /* S_CC can have a different LDA, MatMatSolve doesn't support it */ 1691 ierr = MatDestroy(&M1);CHKERRQ(ierr); 1692 } 1693 1694 /* Get submatrices from subdomain matrix */ 1695 if (n_vertices) { 1696 IS is_aux; 1697 1698 if (sub_schurs->reuse_solver) { /* is_R_local is not sorted, ISComplement doesn't like it */ 1699 IS tis; 1700 1701 ierr = ISDuplicate(pcbddc->is_R_local,&tis);CHKERRQ(ierr); 1702 ierr = ISSort(tis);CHKERRQ(ierr); 1703 ierr = ISComplement(tis,0,pcis->n,&is_aux);CHKERRQ(ierr); 1704 ierr = ISDestroy(&tis);CHKERRQ(ierr); 1705 } else { 1706 ierr = ISComplement(pcbddc->is_R_local,0,pcis->n,&is_aux);CHKERRQ(ierr); 1707 } 1708 ierr = MatGetSubMatrix(pcbddc->local_mat,pcbddc->is_R_local,is_aux,MAT_INITIAL_MATRIX,&A_RV);CHKERRQ(ierr); 1709 ierr = MatGetSubMatrix(pcbddc->local_mat,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&A_VR);CHKERRQ(ierr); 1710 ierr = MatGetSubMatrix(pcbddc->local_mat,is_aux,is_aux,MAT_INITIAL_MATRIX,&A_VV);CHKERRQ(ierr); 1711 ierr = ISDestroy(&is_aux);CHKERRQ(ierr); 1712 } 1713 1714 /* Matrix of coarse basis functions (local) */ 1715 if (pcbddc->coarse_phi_B) { 1716 PetscInt on_B,on_primal,on_D=n_D; 1717 if (pcbddc->coarse_phi_D) { 1718 ierr = MatGetSize(pcbddc->coarse_phi_D,&on_D,NULL);CHKERRQ(ierr); 1719 } 1720 ierr = MatGetSize(pcbddc->coarse_phi_B,&on_B,&on_primal);CHKERRQ(ierr); 1721 if (on_B != n_B || on_primal != pcbddc->local_primal_size || on_D != n_D) { 1722 PetscScalar *marray; 1723 1724 ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&marray);CHKERRQ(ierr); 1725 ierr = PetscFree(marray);CHKERRQ(ierr); 1726 ierr = MatDestroy(&pcbddc->coarse_phi_B);CHKERRQ(ierr); 1727 ierr = MatDestroy(&pcbddc->coarse_psi_B);CHKERRQ(ierr); 1728 ierr = MatDestroy(&pcbddc->coarse_phi_D);CHKERRQ(ierr); 1729 ierr = MatDestroy(&pcbddc->coarse_psi_D);CHKERRQ(ierr); 1730 } 1731 } 1732 1733 if (!pcbddc->coarse_phi_B) { 1734 PetscScalar *marray; 1735 1736 n = n_B*pcbddc->local_primal_size; 1737 if (pcbddc->switch_static || pcbddc->dbg_flag) { 1738 n += n_D*pcbddc->local_primal_size; 1739 } 1740 if (!pcbddc->symmetric_primal) { 1741 n *= 2; 1742 } 1743 ierr = PetscCalloc1(n,&marray);CHKERRQ(ierr); 1744 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_B,pcbddc->local_primal_size,marray,&pcbddc->coarse_phi_B);CHKERRQ(ierr); 1745 n = n_B*pcbddc->local_primal_size; 1746 if (pcbddc->switch_static || pcbddc->dbg_flag) { 1747 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_D,pcbddc->local_primal_size,marray+n,&pcbddc->coarse_phi_D);CHKERRQ(ierr); 1748 n += n_D*pcbddc->local_primal_size; 1749 } 1750 if (!pcbddc->symmetric_primal) { 1751 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_B,pcbddc->local_primal_size,marray+n,&pcbddc->coarse_psi_B);CHKERRQ(ierr); 1752 if (pcbddc->switch_static || pcbddc->dbg_flag) { 1753 n = n_B*pcbddc->local_primal_size; 1754 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_D,pcbddc->local_primal_size,marray+n,&pcbddc->coarse_psi_D);CHKERRQ(ierr); 1755 } 1756 } else { 1757 ierr = PetscObjectReference((PetscObject)pcbddc->coarse_phi_B);CHKERRQ(ierr); 1758 pcbddc->coarse_psi_B = pcbddc->coarse_phi_B; 1759 if (pcbddc->switch_static || pcbddc->dbg_flag) { 1760 ierr = PetscObjectReference((PetscObject)pcbddc->coarse_phi_D);CHKERRQ(ierr); 1761 pcbddc->coarse_psi_D = pcbddc->coarse_phi_D; 1762 } 1763 } 1764 } 1765 1766 /* We are now ready to evaluate coarse basis functions and subdomain contribution to coarse problem */ 1767 p0_lidx_I = NULL; 1768 if (pcbddc->benign_n && (pcbddc->switch_static || pcbddc->dbg_flag)) { 1769 const PetscInt *idxs; 1770 1771 ierr = ISGetIndices(pcis->is_I_local,&idxs);CHKERRQ(ierr); 1772 ierr = PetscMalloc1(pcbddc->benign_n,&p0_lidx_I);CHKERRQ(ierr); 1773 for (i=0;i<pcbddc->benign_n;i++) { 1774 ierr = PetscFindInt(pcbddc->benign_p0_lidx[i],pcis->n-pcis->n_B,idxs,&p0_lidx_I[i]);CHKERRQ(ierr); 1775 } 1776 ierr = ISRestoreIndices(pcis->is_I_local,&idxs);CHKERRQ(ierr); 1777 } 1778 1779 /* vertices */ 1780 if (n_vertices) { 1781 1782 ierr = MatConvert(A_VV,MATDENSE,MAT_INPLACE_MATRIX,&A_VV);CHKERRQ(ierr); 1783 1784 if (n_R) { 1785 Mat A_RRmA_RV,A_RV_bcorr=NULL,S_VVt; /* S_VVt with LDA=N */ 1786 PetscBLASInt B_N,B_one = 1; 1787 PetscScalar *x,*y; 1788 PetscBool isseqaij; 1789 1790 ierr = MatScale(A_RV,m_one);CHKERRQ(ierr); 1791 if (need_benign_correction) { 1792 ISLocalToGlobalMapping RtoN; 1793 IS is_p0; 1794 PetscInt *idxs_p0,n; 1795 1796 ierr = PetscMalloc1(pcbddc->benign_n,&idxs_p0);CHKERRQ(ierr); 1797 ierr = ISLocalToGlobalMappingCreateIS(pcbddc->is_R_local,&RtoN);CHKERRQ(ierr); 1798 ierr = ISGlobalToLocalMappingApply(RtoN,IS_GTOLM_DROP,pcbddc->benign_n,pcbddc->benign_p0_lidx,&n,idxs_p0);CHKERRQ(ierr); 1799 if (n != pcbddc->benign_n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error in R numbering for benign p0! %d != %d\n",n,pcbddc->benign_n); 1800 ierr = ISLocalToGlobalMappingDestroy(&RtoN);CHKERRQ(ierr); 1801 ierr = ISCreateGeneral(PETSC_COMM_SELF,n,idxs_p0,PETSC_OWN_POINTER,&is_p0);CHKERRQ(ierr); 1802 ierr = MatGetSubMatrix(A_RV,is_p0,NULL,MAT_INITIAL_MATRIX,&A_RV_bcorr);CHKERRQ(ierr); 1803 ierr = ISDestroy(&is_p0);CHKERRQ(ierr); 1804 } 1805 1806 if (lda_rhs == n_R) { 1807 ierr = MatConvert(A_RV,MATDENSE,MAT_INPLACE_MATRIX,&A_RV);CHKERRQ(ierr); 1808 } else { 1809 PetscScalar *av,*array; 1810 const PetscInt *xadj,*adjncy; 1811 PetscInt n; 1812 PetscBool flg_row; 1813 1814 array = work+lda_rhs*n_vertices; 1815 ierr = PetscMemzero(array,lda_rhs*n_vertices*sizeof(PetscScalar));CHKERRQ(ierr); 1816 ierr = MatConvert(A_RV,MATSEQAIJ,MAT_INPLACE_MATRIX,&A_RV);CHKERRQ(ierr); 1817 ierr = MatGetRowIJ(A_RV,0,PETSC_FALSE,PETSC_FALSE,&n,&xadj,&adjncy,&flg_row);CHKERRQ(ierr); 1818 ierr = MatSeqAIJGetArray(A_RV,&av);CHKERRQ(ierr); 1819 for (i=0;i<n;i++) { 1820 PetscInt j; 1821 for (j=xadj[i];j<xadj[i+1];j++) array[lda_rhs*adjncy[j]+i] = av[j]; 1822 } 1823 ierr = MatRestoreRowIJ(A_RV,0,PETSC_FALSE,PETSC_FALSE,&n,&xadj,&adjncy,&flg_row);CHKERRQ(ierr); 1824 ierr = MatDestroy(&A_RV);CHKERRQ(ierr); 1825 ierr = MatCreateSeqDense(PETSC_COMM_SELF,lda_rhs,n_vertices,array,&A_RV);CHKERRQ(ierr); 1826 } 1827 ierr = MatCreateSeqDense(PETSC_COMM_SELF,lda_rhs,n_vertices,work,&A_RRmA_RV);CHKERRQ(ierr); 1828 if (need_benign_correction) { 1829 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 1830 PetscScalar *marr; 1831 1832 ierr = MatDenseGetArray(A_RV,&marr);CHKERRQ(ierr); 1833 /* need \Phi^T A_RV = (I+L)A_RV, L given by 1834 1835 | 0 0 0 | (V) 1836 L = | 0 0 -1 | (P-p0) 1837 | 0 0 -1 | (p0) 1838 1839 */ 1840 for (i=0;i<reuse_solver->benign_n;i++) { 1841 const PetscScalar *vals; 1842 const PetscInt *idxs,*idxs_zero; 1843 PetscInt n,j,nz; 1844 1845 ierr = ISGetLocalSize(reuse_solver->benign_zerodiag_subs[i],&nz);CHKERRQ(ierr); 1846 ierr = ISGetIndices(reuse_solver->benign_zerodiag_subs[i],&idxs_zero);CHKERRQ(ierr); 1847 ierr = MatGetRow(A_RV_bcorr,i,&n,&idxs,&vals);CHKERRQ(ierr); 1848 for (j=0;j<n;j++) { 1849 PetscScalar val = vals[j]; 1850 PetscInt k,col = idxs[j]; 1851 for (k=0;k<nz;k++) marr[idxs_zero[k]+lda_rhs*col] -= val; 1852 } 1853 ierr = MatRestoreRow(A_RV_bcorr,i,&n,&idxs,&vals);CHKERRQ(ierr); 1854 ierr = ISRestoreIndices(reuse_solver->benign_zerodiag_subs[i],&idxs_zero);CHKERRQ(ierr); 1855 } 1856 ierr = MatDenseRestoreArray(A_RV,&marr);CHKERRQ(ierr); 1857 } 1858 if (F) { 1859 /* need to correct the rhs */ 1860 if (need_benign_correction) { 1861 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 1862 PetscScalar *marr; 1863 1864 ierr = MatDenseGetArray(A_RV,&marr);CHKERRQ(ierr); 1865 if (lda_rhs != n_R) { 1866 for (i=0;i<n_vertices;i++) { 1867 ierr = VecPlaceArray(dummy_vec,marr+i*lda_rhs);CHKERRQ(ierr); 1868 ierr = PCBDDCReuseSolversBenignAdapt(reuse_solver,dummy_vec,NULL,PETSC_FALSE,PETSC_TRUE);CHKERRQ(ierr); 1869 ierr = VecResetArray(dummy_vec);CHKERRQ(ierr); 1870 } 1871 } else { 1872 for (i=0;i<n_vertices;i++) { 1873 ierr = VecPlaceArray(pcbddc->vec1_R,marr+i*lda_rhs);CHKERRQ(ierr); 1874 ierr = PCBDDCReuseSolversBenignAdapt(reuse_solver,pcbddc->vec1_R,NULL,PETSC_FALSE,PETSC_TRUE);CHKERRQ(ierr); 1875 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 1876 } 1877 } 1878 ierr = MatDenseRestoreArray(A_RV,&marr);CHKERRQ(ierr); 1879 } 1880 ierr = MatMatSolve(F,A_RV,A_RRmA_RV);CHKERRQ(ierr); 1881 /* need to correct the solution */ 1882 if (need_benign_correction) { 1883 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 1884 PetscScalar *marr; 1885 1886 ierr = MatDenseGetArray(A_RRmA_RV,&marr);CHKERRQ(ierr); 1887 if (lda_rhs != n_R) { 1888 for (i=0;i<n_vertices;i++) { 1889 ierr = VecPlaceArray(dummy_vec,marr+i*lda_rhs);CHKERRQ(ierr); 1890 ierr = PCBDDCReuseSolversBenignAdapt(reuse_solver,dummy_vec,NULL,PETSC_TRUE,PETSC_TRUE);CHKERRQ(ierr); 1891 ierr = VecResetArray(dummy_vec);CHKERRQ(ierr); 1892 } 1893 } else { 1894 for (i=0;i<n_vertices;i++) { 1895 ierr = VecPlaceArray(pcbddc->vec1_R,marr+i*lda_rhs);CHKERRQ(ierr); 1896 ierr = PCBDDCReuseSolversBenignAdapt(reuse_solver,pcbddc->vec1_R,NULL,PETSC_TRUE,PETSC_TRUE);CHKERRQ(ierr); 1897 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 1898 } 1899 } 1900 ierr = MatDenseRestoreArray(A_RRmA_RV,&marr);CHKERRQ(ierr); 1901 } 1902 } else { 1903 ierr = MatDenseGetArray(A_RV,&y);CHKERRQ(ierr); 1904 for (i=0;i<n_vertices;i++) { 1905 ierr = VecPlaceArray(pcbddc->vec1_R,y+i*lda_rhs);CHKERRQ(ierr); 1906 ierr = VecPlaceArray(pcbddc->vec2_R,work+i*lda_rhs);CHKERRQ(ierr); 1907 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 1908 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 1909 ierr = VecResetArray(pcbddc->vec2_R);CHKERRQ(ierr); 1910 } 1911 ierr = MatDenseRestoreArray(A_RV,&y);CHKERRQ(ierr); 1912 } 1913 ierr = MatDestroy(&A_RV);CHKERRQ(ierr); 1914 /* S_VV and S_CV */ 1915 if (n_constraints) { 1916 Mat B; 1917 1918 ierr = PetscMemzero(work+lda_rhs*n_vertices,n_B*n_vertices*sizeof(PetscScalar));CHKERRQ(ierr); 1919 for (i=0;i<n_vertices;i++) { 1920 ierr = VecPlaceArray(pcbddc->vec1_R,work+i*lda_rhs);CHKERRQ(ierr); 1921 ierr = VecPlaceArray(pcis->vec1_B,work+lda_rhs*n_vertices+i*n_B);CHKERRQ(ierr); 1922 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1923 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1924 ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr); 1925 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 1926 } 1927 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_B,n_vertices,work+lda_rhs*n_vertices,&B);CHKERRQ(ierr); 1928 ierr = MatMatMult(pcbddc->local_auxmat1,B,MAT_REUSE_MATRIX,PETSC_DEFAULT,&S_CV);CHKERRQ(ierr); 1929 ierr = MatDestroy(&B);CHKERRQ(ierr); 1930 ierr = MatCreateSeqDense(PETSC_COMM_SELF,lda_rhs,n_vertices,work+lda_rhs*n_vertices,&B);CHKERRQ(ierr); 1931 ierr = MatMatMult(local_auxmat2_R,S_CV,MAT_REUSE_MATRIX,PETSC_DEFAULT,&B);CHKERRQ(ierr); 1932 ierr = MatScale(S_CV,m_one);CHKERRQ(ierr); 1933 ierr = PetscBLASIntCast(lda_rhs*n_vertices,&B_N);CHKERRQ(ierr); 1934 PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&B_N,&one,work+lda_rhs*n_vertices,&B_one,work,&B_one)); 1935 ierr = MatDestroy(&B);CHKERRQ(ierr); 1936 } 1937 ierr = PetscObjectTypeCompare((PetscObject)A_VR,MATSEQAIJ,&isseqaij);CHKERRQ(ierr); 1938 if (!isseqaij) { /* MatMatMult with SEQ(S)BAIJ below will raise an error */ 1939 ierr = MatConvert(A_VR,MATSEQAIJ,MAT_INPLACE_MATRIX,&A_VR);CHKERRQ(ierr); 1940 } 1941 if (lda_rhs != n_R) { 1942 ierr = MatDestroy(&A_RRmA_RV);CHKERRQ(ierr); 1943 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_vertices,work,&A_RRmA_RV);CHKERRQ(ierr); 1944 ierr = MatSeqDenseSetLDA(A_RRmA_RV,lda_rhs);CHKERRQ(ierr); 1945 } 1946 ierr = MatMatMult(A_VR,A_RRmA_RV,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&S_VVt);CHKERRQ(ierr); 1947 /* need A_VR * \Phi * A_RRmA_RV = A_VR * (I+L)^T * A_RRmA_RV, L given as before */ 1948 if (need_benign_correction) { 1949 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 1950 PetscScalar *marr,*sums; 1951 1952 ierr = PetscMalloc1(n_vertices,&sums);CHKERRQ(ierr); 1953 ierr = MatDenseGetArray(S_VVt,&marr); 1954 for (i=0;i<reuse_solver->benign_n;i++) { 1955 const PetscScalar *vals; 1956 const PetscInt *idxs,*idxs_zero; 1957 PetscInt n,j,nz; 1958 1959 ierr = ISGetLocalSize(reuse_solver->benign_zerodiag_subs[i],&nz);CHKERRQ(ierr); 1960 ierr = ISGetIndices(reuse_solver->benign_zerodiag_subs[i],&idxs_zero);CHKERRQ(ierr); 1961 for (j=0;j<n_vertices;j++) { 1962 PetscInt k; 1963 sums[j] = 0.; 1964 for (k=0;k<nz;k++) sums[j] += work[idxs_zero[k]+j*lda_rhs]; 1965 } 1966 ierr = MatGetRow(A_RV_bcorr,i,&n,&idxs,&vals);CHKERRQ(ierr); 1967 for (j=0;j<n;j++) { 1968 PetscScalar val = vals[j]; 1969 PetscInt k; 1970 for (k=0;k<n_vertices;k++) { 1971 marr[idxs[j]+k*n_vertices] += val*sums[k]; 1972 } 1973 } 1974 ierr = MatRestoreRow(A_RV_bcorr,i,&n,&idxs,&vals);CHKERRQ(ierr); 1975 ierr = ISRestoreIndices(reuse_solver->benign_zerodiag_subs[i],&idxs_zero);CHKERRQ(ierr); 1976 } 1977 ierr = PetscFree(sums);CHKERRQ(ierr); 1978 ierr = MatDenseRestoreArray(S_VVt,&marr); 1979 ierr = MatDestroy(&A_RV_bcorr);CHKERRQ(ierr); 1980 } 1981 ierr = MatDestroy(&A_RRmA_RV);CHKERRQ(ierr); 1982 ierr = PetscBLASIntCast(n_vertices*n_vertices,&B_N);CHKERRQ(ierr); 1983 ierr = MatDenseGetArray(A_VV,&x);CHKERRQ(ierr); 1984 ierr = MatDenseGetArray(S_VVt,&y);CHKERRQ(ierr); 1985 PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&B_N,&one,x,&B_one,y,&B_one)); 1986 ierr = MatDenseRestoreArray(A_VV,&x);CHKERRQ(ierr); 1987 ierr = MatDenseRestoreArray(S_VVt,&y);CHKERRQ(ierr); 1988 ierr = MatCopy(S_VVt,S_VV,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 1989 ierr = MatDestroy(&S_VVt);CHKERRQ(ierr); 1990 } else { 1991 ierr = MatCopy(A_VV,S_VV,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 1992 } 1993 ierr = MatDestroy(&A_VV);CHKERRQ(ierr); 1994 1995 /* coarse basis functions */ 1996 for (i=0;i<n_vertices;i++) { 1997 PetscScalar *y; 1998 1999 ierr = VecPlaceArray(pcbddc->vec1_R,work+lda_rhs*i);CHKERRQ(ierr); 2000 ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&y);CHKERRQ(ierr); 2001 ierr = VecPlaceArray(pcis->vec1_B,y+n_B*i);CHKERRQ(ierr); 2002 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2003 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2004 y[n_B*i+idx_V_B[i]] = 1.0; 2005 ierr = MatDenseRestoreArray(pcbddc->coarse_phi_B,&y);CHKERRQ(ierr); 2006 ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr); 2007 2008 if (pcbddc->switch_static || pcbddc->dbg_flag) { 2009 PetscInt j; 2010 2011 ierr = MatDenseGetArray(pcbddc->coarse_phi_D,&y);CHKERRQ(ierr); 2012 ierr = VecPlaceArray(pcis->vec1_D,y+n_D*i);CHKERRQ(ierr); 2013 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2014 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2015 ierr = VecResetArray(pcis->vec1_D);CHKERRQ(ierr); 2016 for (j=0;j<pcbddc->benign_n;j++) y[n_D*i+p0_lidx_I[j]] = 0.0; 2017 ierr = MatDenseRestoreArray(pcbddc->coarse_phi_D,&y);CHKERRQ(ierr); 2018 } 2019 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 2020 } 2021 /* if n_R == 0 the object is not destroyed */ 2022 ierr = MatDestroy(&A_RV);CHKERRQ(ierr); 2023 } 2024 ierr = VecDestroy(&dummy_vec);CHKERRQ(ierr); 2025 2026 if (n_constraints) { 2027 Mat B; 2028 2029 ierr = MatCreateSeqDense(PETSC_COMM_SELF,lda_rhs,n_constraints,work,&B);CHKERRQ(ierr); 2030 ierr = MatScale(S_CC,m_one);CHKERRQ(ierr); 2031 ierr = MatMatMult(local_auxmat2_R,S_CC,MAT_REUSE_MATRIX,PETSC_DEFAULT,&B);CHKERRQ(ierr); 2032 ierr = MatScale(S_CC,m_one);CHKERRQ(ierr); 2033 if (n_vertices) { 2034 if (isCHOL) { /* if we can solve the interior problem with cholesky, we should also be fine with transposing here */ 2035 ierr = MatTranspose(S_CV,MAT_REUSE_MATRIX,&S_VC);CHKERRQ(ierr); 2036 } else { 2037 Mat S_VCt; 2038 2039 if (lda_rhs != n_R) { 2040 ierr = MatDestroy(&B);CHKERRQ(ierr); 2041 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_constraints,work,&B);CHKERRQ(ierr); 2042 ierr = MatSeqDenseSetLDA(B,lda_rhs);CHKERRQ(ierr); 2043 } 2044 ierr = MatMatMult(A_VR,B,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&S_VCt);CHKERRQ(ierr); 2045 ierr = MatCopy(S_VCt,S_VC,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 2046 ierr = MatDestroy(&S_VCt);CHKERRQ(ierr); 2047 } 2048 } 2049 ierr = MatDestroy(&B);CHKERRQ(ierr); 2050 /* coarse basis functions */ 2051 for (i=0;i<n_constraints;i++) { 2052 PetscScalar *y; 2053 2054 ierr = VecPlaceArray(pcbddc->vec1_R,work+lda_rhs*i);CHKERRQ(ierr); 2055 ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&y);CHKERRQ(ierr); 2056 ierr = VecPlaceArray(pcis->vec1_B,y+n_B*(i+n_vertices));CHKERRQ(ierr); 2057 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2058 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2059 ierr = MatDenseRestoreArray(pcbddc->coarse_phi_B,&y);CHKERRQ(ierr); 2060 ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr); 2061 if (pcbddc->switch_static || pcbddc->dbg_flag) { 2062 PetscInt j; 2063 2064 ierr = MatDenseGetArray(pcbddc->coarse_phi_D,&y);CHKERRQ(ierr); 2065 ierr = VecPlaceArray(pcis->vec1_D,y+n_D*(i+n_vertices));CHKERRQ(ierr); 2066 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2067 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2068 ierr = VecResetArray(pcis->vec1_D);CHKERRQ(ierr); 2069 for (j=0;j<pcbddc->benign_n;j++) y[n_D*i+p0_lidx_I[j]] = 0.0; 2070 ierr = MatDenseRestoreArray(pcbddc->coarse_phi_D,&y);CHKERRQ(ierr); 2071 } 2072 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 2073 } 2074 } 2075 if (n_constraints) { 2076 ierr = MatDestroy(&local_auxmat2_R);CHKERRQ(ierr); 2077 } 2078 ierr = PetscFree(p0_lidx_I);CHKERRQ(ierr); 2079 2080 /* coarse matrix entries relative to B_0 */ 2081 if (pcbddc->benign_n) { 2082 Mat B0_B,B0_BPHI; 2083 IS is_dummy; 2084 PetscScalar *data; 2085 PetscInt j; 2086 2087 ierr = ISCreateStride(PETSC_COMM_SELF,pcbddc->benign_n,0,1,&is_dummy);CHKERRQ(ierr); 2088 ierr = MatGetSubMatrix(pcbddc->benign_B0,is_dummy,pcis->is_B_local,MAT_INITIAL_MATRIX,&B0_B);CHKERRQ(ierr); 2089 ierr = ISDestroy(&is_dummy);CHKERRQ(ierr); 2090 ierr = MatMatMult(B0_B,pcbddc->coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&B0_BPHI);CHKERRQ(ierr); 2091 ierr = MatConvert(B0_BPHI,MATSEQDENSE,MAT_INPLACE_MATRIX,&B0_BPHI);CHKERRQ(ierr); 2092 ierr = MatDenseGetArray(B0_BPHI,&data);CHKERRQ(ierr); 2093 for (j=0;j<pcbddc->benign_n;j++) { 2094 PetscInt primal_idx = pcbddc->local_primal_size - pcbddc->benign_n + j; 2095 for (i=0;i<pcbddc->local_primal_size;i++) { 2096 coarse_submat_vals[primal_idx*pcbddc->local_primal_size+i] = data[i*pcbddc->benign_n+j]; 2097 coarse_submat_vals[i*pcbddc->local_primal_size+primal_idx] = data[i*pcbddc->benign_n+j]; 2098 } 2099 } 2100 ierr = MatDenseRestoreArray(B0_BPHI,&data);CHKERRQ(ierr); 2101 ierr = MatDestroy(&B0_B);CHKERRQ(ierr); 2102 ierr = MatDestroy(&B0_BPHI);CHKERRQ(ierr); 2103 } 2104 2105 /* compute other basis functions for non-symmetric problems */ 2106 if (!pcbddc->symmetric_primal) { 2107 Mat B_V=NULL,B_C=NULL; 2108 PetscScalar *marray; 2109 2110 if (n_constraints) { 2111 Mat S_CCT,C_CRT; 2112 2113 ierr = MatTranspose(C_CR,MAT_INPLACE_MATRIX,&C_CRT);CHKERRQ(ierr); 2114 ierr = MatTranspose(S_CC,MAT_INITIAL_MATRIX,&S_CCT);CHKERRQ(ierr); 2115 ierr = MatMatMult(C_CRT,S_CCT,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&B_C);CHKERRQ(ierr); 2116 ierr = MatDestroy(&S_CCT);CHKERRQ(ierr); 2117 if (n_vertices) { 2118 Mat S_VCT; 2119 2120 ierr = MatTranspose(S_VC,MAT_INITIAL_MATRIX,&S_VCT);CHKERRQ(ierr); 2121 ierr = MatMatMult(C_CRT,S_VCT,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&B_V);CHKERRQ(ierr); 2122 ierr = MatDestroy(&S_VCT);CHKERRQ(ierr); 2123 } 2124 ierr = MatDestroy(&C_CRT);CHKERRQ(ierr); 2125 } 2126 if (n_vertices && n_R) { 2127 PetscScalar *av,*marray; 2128 const PetscInt *xadj,*adjncy; 2129 PetscInt n; 2130 PetscBool flg_row; 2131 2132 /* B_V = B_V - A_VR^T */ 2133 ierr = MatConvert(A_VR,MATSEQAIJ,MAT_INPLACE_MATRIX,&A_VR);CHKERRQ(ierr); 2134 ierr = MatGetRowIJ(A_VR,0,PETSC_FALSE,PETSC_FALSE,&n,&xadj,&adjncy,&flg_row);CHKERRQ(ierr); 2135 ierr = MatSeqAIJGetArray(A_VR,&av);CHKERRQ(ierr); 2136 ierr = MatDenseGetArray(B_V,&marray);CHKERRQ(ierr); 2137 for (i=0;i<n;i++) { 2138 PetscInt j; 2139 for (j=xadj[i];j<xadj[i+1];j++) marray[i*n_R + adjncy[j]] -= av[j]; 2140 } 2141 ierr = MatDenseRestoreArray(B_V,&marray);CHKERRQ(ierr); 2142 ierr = MatRestoreRowIJ(A_VR,0,PETSC_FALSE,PETSC_FALSE,&n,&xadj,&adjncy,&flg_row);CHKERRQ(ierr); 2143 ierr = MatDestroy(&A_VR);CHKERRQ(ierr); 2144 } 2145 2146 /* currently there's no support for MatTransposeMatSolve(F,B,X) */ 2147 ierr = MatDenseGetArray(B_V,&marray);CHKERRQ(ierr); 2148 for (i=0;i<n_vertices;i++) { 2149 ierr = VecPlaceArray(pcbddc->vec1_R,marray+i*n_R);CHKERRQ(ierr); 2150 ierr = VecPlaceArray(pcbddc->vec2_R,work+i*n_R);CHKERRQ(ierr); 2151 ierr = KSPSolveTranspose(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 2152 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 2153 ierr = VecResetArray(pcbddc->vec2_R);CHKERRQ(ierr); 2154 } 2155 ierr = MatDenseRestoreArray(B_V,&marray);CHKERRQ(ierr); 2156 ierr = MatDenseGetArray(B_C,&marray);CHKERRQ(ierr); 2157 for (i=n_vertices;i<n_constraints+n_vertices;i++) { 2158 ierr = VecPlaceArray(pcbddc->vec1_R,marray+(i-n_vertices)*n_R);CHKERRQ(ierr); 2159 ierr = VecPlaceArray(pcbddc->vec2_R,work+i*n_R);CHKERRQ(ierr); 2160 ierr = KSPSolveTranspose(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 2161 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 2162 ierr = VecResetArray(pcbddc->vec2_R);CHKERRQ(ierr); 2163 } 2164 ierr = MatDenseRestoreArray(B_C,&marray);CHKERRQ(ierr); 2165 /* coarse basis functions */ 2166 for (i=0;i<pcbddc->local_primal_size;i++) { 2167 PetscScalar *y; 2168 2169 ierr = VecPlaceArray(pcbddc->vec1_R,work+i*n_R);CHKERRQ(ierr); 2170 ierr = MatDenseGetArray(pcbddc->coarse_psi_B,&y);CHKERRQ(ierr); 2171 ierr = VecPlaceArray(pcis->vec1_B,y+n_B*i);CHKERRQ(ierr); 2172 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2173 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2174 if (i<n_vertices) { 2175 y[n_B*i+idx_V_B[i]] = 1.0; 2176 } 2177 ierr = MatDenseRestoreArray(pcbddc->coarse_psi_B,&y);CHKERRQ(ierr); 2178 ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr); 2179 2180 if (pcbddc->switch_static || pcbddc->dbg_flag) { 2181 ierr = MatDenseGetArray(pcbddc->coarse_psi_D,&y);CHKERRQ(ierr); 2182 ierr = VecPlaceArray(pcis->vec1_D,y+n_D*i);CHKERRQ(ierr); 2183 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2184 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2185 ierr = VecResetArray(pcis->vec1_D);CHKERRQ(ierr); 2186 ierr = MatDenseRestoreArray(pcbddc->coarse_psi_D,&y);CHKERRQ(ierr); 2187 } 2188 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 2189 } 2190 ierr = MatDestroy(&B_V);CHKERRQ(ierr); 2191 ierr = MatDestroy(&B_C);CHKERRQ(ierr); 2192 } 2193 /* free memory */ 2194 ierr = PetscFree(idx_V_B);CHKERRQ(ierr); 2195 ierr = MatDestroy(&S_VV);CHKERRQ(ierr); 2196 ierr = MatDestroy(&S_CV);CHKERRQ(ierr); 2197 ierr = MatDestroy(&S_VC);CHKERRQ(ierr); 2198 ierr = MatDestroy(&S_CC);CHKERRQ(ierr); 2199 ierr = PetscFree(work);CHKERRQ(ierr); 2200 if (n_vertices) { 2201 ierr = MatDestroy(&A_VR);CHKERRQ(ierr); 2202 } 2203 if (n_constraints) { 2204 ierr = MatDestroy(&C_CR);CHKERRQ(ierr); 2205 } 2206 /* Checking coarse_sub_mat and coarse basis functios */ 2207 /* Symmetric case : It should be \Phi^{(j)^T} A^{(j)} \Phi^{(j)}=coarse_sub_mat */ 2208 /* Non-symmetric case : It should be \Psi^{(j)^T} A^{(j)} \Phi^{(j)}=coarse_sub_mat */ 2209 if (pcbddc->dbg_flag) { 2210 Mat coarse_sub_mat; 2211 Mat AUXMAT,TM1,TM2,TM3,TM4; 2212 Mat coarse_phi_D,coarse_phi_B; 2213 Mat coarse_psi_D,coarse_psi_B; 2214 Mat A_II,A_BB,A_IB,A_BI; 2215 Mat C_B,CPHI; 2216 IS is_dummy; 2217 Vec mones; 2218 MatType checkmattype=MATSEQAIJ; 2219 PetscReal real_value; 2220 2221 if (pcbddc->benign_n && !pcbddc->benign_change_explicit) { 2222 Mat A; 2223 ierr = PCBDDCBenignProject(pc,NULL,NULL,&A);CHKERRQ(ierr); 2224 ierr = MatGetSubMatrix(A,pcis->is_I_local,pcis->is_I_local,MAT_INITIAL_MATRIX,&A_II);CHKERRQ(ierr); 2225 ierr = MatGetSubMatrix(A,pcis->is_I_local,pcis->is_B_local,MAT_INITIAL_MATRIX,&A_IB);CHKERRQ(ierr); 2226 ierr = MatGetSubMatrix(A,pcis->is_B_local,pcis->is_I_local,MAT_INITIAL_MATRIX,&A_BI);CHKERRQ(ierr); 2227 ierr = MatGetSubMatrix(A,pcis->is_B_local,pcis->is_B_local,MAT_INITIAL_MATRIX,&A_BB);CHKERRQ(ierr); 2228 ierr = MatDestroy(&A);CHKERRQ(ierr); 2229 } else { 2230 ierr = MatConvert(pcis->A_II,checkmattype,MAT_INITIAL_MATRIX,&A_II);CHKERRQ(ierr); 2231 ierr = MatConvert(pcis->A_IB,checkmattype,MAT_INITIAL_MATRIX,&A_IB);CHKERRQ(ierr); 2232 ierr = MatConvert(pcis->A_BI,checkmattype,MAT_INITIAL_MATRIX,&A_BI);CHKERRQ(ierr); 2233 ierr = MatConvert(pcis->A_BB,checkmattype,MAT_INITIAL_MATRIX,&A_BB);CHKERRQ(ierr); 2234 } 2235 ierr = MatConvert(pcbddc->coarse_phi_D,checkmattype,MAT_INITIAL_MATRIX,&coarse_phi_D);CHKERRQ(ierr); 2236 ierr = MatConvert(pcbddc->coarse_phi_B,checkmattype,MAT_INITIAL_MATRIX,&coarse_phi_B);CHKERRQ(ierr); 2237 if (!pcbddc->symmetric_primal) { 2238 ierr = MatConvert(pcbddc->coarse_psi_D,checkmattype,MAT_INITIAL_MATRIX,&coarse_psi_D);CHKERRQ(ierr); 2239 ierr = MatConvert(pcbddc->coarse_psi_B,checkmattype,MAT_INITIAL_MATRIX,&coarse_psi_B);CHKERRQ(ierr); 2240 } 2241 ierr = MatCreateSeqDense(PETSC_COMM_SELF,pcbddc->local_primal_size,pcbddc->local_primal_size,coarse_submat_vals,&coarse_sub_mat);CHKERRQ(ierr); 2242 2243 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 2244 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Check coarse sub mat computation (symmetric %d)\n",pcbddc->symmetric_primal);CHKERRQ(ierr); 2245 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 2246 if (!pcbddc->symmetric_primal) { 2247 ierr = MatMatMult(A_II,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 2248 ierr = MatTransposeMatMult(coarse_psi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM1);CHKERRQ(ierr); 2249 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 2250 ierr = MatMatMult(A_BB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 2251 ierr = MatTransposeMatMult(coarse_psi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM2);CHKERRQ(ierr); 2252 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 2253 ierr = MatMatMult(A_IB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 2254 ierr = MatTransposeMatMult(coarse_psi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM3);CHKERRQ(ierr); 2255 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 2256 ierr = MatMatMult(A_BI,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 2257 ierr = MatTransposeMatMult(coarse_psi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM4);CHKERRQ(ierr); 2258 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 2259 } else { 2260 ierr = MatPtAP(A_II,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&TM1);CHKERRQ(ierr); 2261 ierr = MatPtAP(A_BB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&TM2);CHKERRQ(ierr); 2262 ierr = MatMatMult(A_IB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 2263 ierr = MatTransposeMatMult(coarse_phi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM3);CHKERRQ(ierr); 2264 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 2265 ierr = MatMatMult(A_BI,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 2266 ierr = MatTransposeMatMult(coarse_phi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM4);CHKERRQ(ierr); 2267 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 2268 } 2269 ierr = MatAXPY(TM1,one,TM2,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 2270 ierr = MatAXPY(TM1,one,TM3,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 2271 ierr = MatAXPY(TM1,one,TM4,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 2272 ierr = MatConvert(TM1,MATSEQDENSE,MAT_INPLACE_MATRIX,&TM1);CHKERRQ(ierr); 2273 if (pcbddc->benign_n) { 2274 Mat B0_B,B0_BPHI; 2275 PetscScalar *data,*data2; 2276 PetscInt j; 2277 2278 ierr = ISCreateStride(PETSC_COMM_SELF,pcbddc->benign_n,0,1,&is_dummy);CHKERRQ(ierr); 2279 ierr = MatGetSubMatrix(pcbddc->benign_B0,is_dummy,pcis->is_B_local,MAT_INITIAL_MATRIX,&B0_B);CHKERRQ(ierr); 2280 ierr = MatMatMult(B0_B,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&B0_BPHI);CHKERRQ(ierr); 2281 ierr = MatConvert(B0_BPHI,MATSEQDENSE,MAT_INPLACE_MATRIX,&B0_BPHI);CHKERRQ(ierr); 2282 ierr = MatDenseGetArray(TM1,&data);CHKERRQ(ierr); 2283 ierr = MatDenseGetArray(B0_BPHI,&data2);CHKERRQ(ierr); 2284 for (j=0;j<pcbddc->benign_n;j++) { 2285 PetscInt primal_idx = pcbddc->local_primal_size - pcbddc->benign_n + j; 2286 for (i=0;i<pcbddc->local_primal_size;i++) { 2287 data[primal_idx*pcbddc->local_primal_size+i] += data2[i*pcbddc->benign_n+j]; 2288 data[i*pcbddc->local_primal_size+primal_idx] += data2[i*pcbddc->benign_n+j]; 2289 } 2290 } 2291 ierr = MatDenseRestoreArray(TM1,&data);CHKERRQ(ierr); 2292 ierr = MatDenseRestoreArray(B0_BPHI,&data2);CHKERRQ(ierr); 2293 ierr = MatDestroy(&B0_B);CHKERRQ(ierr); 2294 ierr = ISDestroy(&is_dummy);CHKERRQ(ierr); 2295 ierr = MatDestroy(&B0_BPHI);CHKERRQ(ierr); 2296 } 2297 #if 0 2298 { 2299 PetscViewer viewer; 2300 char filename[256]; 2301 sprintf(filename,"details_local_coarse_mat%d_level%d.m",PetscGlobalRank,pcbddc->current_level); 2302 ierr = PetscViewerASCIIOpen(PETSC_COMM_SELF,filename,&viewer);CHKERRQ(ierr); 2303 ierr = PetscViewerSetFormat(viewer,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr); 2304 ierr = PetscObjectSetName((PetscObject)coarse_sub_mat,"computed");CHKERRQ(ierr); 2305 ierr = MatView(coarse_sub_mat,viewer);CHKERRQ(ierr); 2306 ierr = PetscObjectSetName((PetscObject)TM1,"projected");CHKERRQ(ierr); 2307 ierr = MatView(TM1,viewer);CHKERRQ(ierr); 2308 if (save_change) { 2309 Mat phi_B; 2310 ierr = MatMatMult(save_change,pcbddc->coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&phi_B);CHKERRQ(ierr); 2311 ierr = PetscObjectSetName((PetscObject)phi_B,"phi_B");CHKERRQ(ierr); 2312 ierr = MatView(phi_B,viewer);CHKERRQ(ierr); 2313 ierr = MatDestroy(&phi_B);CHKERRQ(ierr); 2314 } else { 2315 ierr = PetscObjectSetName((PetscObject)pcbddc->coarse_phi_B,"phi_B");CHKERRQ(ierr); 2316 ierr = MatView(pcbddc->coarse_phi_B,viewer);CHKERRQ(ierr); 2317 } 2318 if (pcbddc->coarse_phi_D) { 2319 ierr = PetscObjectSetName((PetscObject)pcbddc->coarse_phi_D,"phi_D");CHKERRQ(ierr); 2320 ierr = MatView(pcbddc->coarse_phi_D,viewer);CHKERRQ(ierr); 2321 } 2322 if (pcbddc->coarse_psi_B) { 2323 ierr = PetscObjectSetName((PetscObject)pcbddc->coarse_psi_B,"psi_B");CHKERRQ(ierr); 2324 ierr = MatView(pcbddc->coarse_psi_B,viewer);CHKERRQ(ierr); 2325 } 2326 if (pcbddc->coarse_psi_D) { 2327 ierr = PetscObjectSetName((PetscObject)pcbddc->coarse_psi_D,"psi_D");CHKERRQ(ierr); 2328 ierr = MatView(pcbddc->coarse_psi_D,viewer);CHKERRQ(ierr); 2329 } 2330 ierr = PetscViewerDestroy(&viewer);CHKERRQ(ierr); 2331 } 2332 #endif 2333 ierr = MatAXPY(TM1,m_one,coarse_sub_mat,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 2334 ierr = MatNorm(TM1,NORM_FROBENIUS,&real_value);CHKERRQ(ierr); 2335 ierr = PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 2336 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d matrix error % 1.14e\n",PetscGlobalRank,real_value);CHKERRQ(ierr); 2337 2338 /* check constraints */ 2339 ierr = ISCreateStride(PETSC_COMM_SELF,pcbddc->local_primal_size-pcbddc->benign_n,0,1,&is_dummy);CHKERRQ(ierr); 2340 ierr = MatGetSubMatrix(pcbddc->ConstraintMatrix,is_dummy,pcis->is_B_local,MAT_INITIAL_MATRIX,&C_B);CHKERRQ(ierr); 2341 if (!pcbddc->benign_n) { /* TODO: add benign case */ 2342 ierr = MatMatMult(C_B,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&CPHI);CHKERRQ(ierr); 2343 } else { 2344 PetscScalar *data; 2345 Mat tmat; 2346 ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&data);CHKERRQ(ierr); 2347 ierr = MatCreateSeqDense(PETSC_COMM_SELF,pcis->n_B,pcbddc->local_primal_size-pcbddc->benign_n,data,&tmat);CHKERRQ(ierr); 2348 ierr = MatDenseRestoreArray(pcbddc->coarse_phi_B,&data);CHKERRQ(ierr); 2349 ierr = MatMatMult(C_B,tmat,MAT_INITIAL_MATRIX,1.0,&CPHI);CHKERRQ(ierr); 2350 ierr = MatDestroy(&tmat);CHKERRQ(ierr); 2351 } 2352 ierr = MatCreateVecs(CPHI,&mones,NULL);CHKERRQ(ierr); 2353 ierr = VecSet(mones,-1.0);CHKERRQ(ierr); 2354 ierr = MatDiagonalSet(CPHI,mones,ADD_VALUES);CHKERRQ(ierr); 2355 ierr = MatNorm(CPHI,NORM_FROBENIUS,&real_value);CHKERRQ(ierr); 2356 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d phi constraints error % 1.14e\n",PetscGlobalRank,real_value);CHKERRQ(ierr); 2357 if (!pcbddc->symmetric_primal) { 2358 ierr = MatMatMult(C_B,coarse_psi_B,MAT_REUSE_MATRIX,1.0,&CPHI);CHKERRQ(ierr); 2359 ierr = VecSet(mones,-1.0);CHKERRQ(ierr); 2360 ierr = MatDiagonalSet(CPHI,mones,ADD_VALUES);CHKERRQ(ierr); 2361 ierr = MatNorm(CPHI,NORM_FROBENIUS,&real_value);CHKERRQ(ierr); 2362 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d psi constraints error % 1.14e\n",PetscGlobalRank,real_value);CHKERRQ(ierr); 2363 } 2364 ierr = MatDestroy(&C_B);CHKERRQ(ierr); 2365 ierr = MatDestroy(&CPHI);CHKERRQ(ierr); 2366 ierr = ISDestroy(&is_dummy);CHKERRQ(ierr); 2367 ierr = VecDestroy(&mones);CHKERRQ(ierr); 2368 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 2369 ierr = MatDestroy(&A_II);CHKERRQ(ierr); 2370 ierr = MatDestroy(&A_BB);CHKERRQ(ierr); 2371 ierr = MatDestroy(&A_IB);CHKERRQ(ierr); 2372 ierr = MatDestroy(&A_BI);CHKERRQ(ierr); 2373 ierr = MatDestroy(&TM1);CHKERRQ(ierr); 2374 ierr = MatDestroy(&TM2);CHKERRQ(ierr); 2375 ierr = MatDestroy(&TM3);CHKERRQ(ierr); 2376 ierr = MatDestroy(&TM4);CHKERRQ(ierr); 2377 ierr = MatDestroy(&coarse_phi_D);CHKERRQ(ierr); 2378 ierr = MatDestroy(&coarse_phi_B);CHKERRQ(ierr); 2379 if (!pcbddc->symmetric_primal) { 2380 ierr = MatDestroy(&coarse_psi_D);CHKERRQ(ierr); 2381 ierr = MatDestroy(&coarse_psi_B);CHKERRQ(ierr); 2382 } 2383 ierr = MatDestroy(&coarse_sub_mat);CHKERRQ(ierr); 2384 } 2385 /* get back data */ 2386 *coarse_submat_vals_n = coarse_submat_vals; 2387 PetscFunctionReturn(0); 2388 } 2389 2390 #undef __FUNCT__ 2391 #define __FUNCT__ "MatGetSubMatrixUnsorted" 2392 PetscErrorCode MatGetSubMatrixUnsorted(Mat A, IS isrow, IS iscol, Mat* B) 2393 { 2394 Mat *work_mat; 2395 IS isrow_s,iscol_s; 2396 PetscBool rsorted,csorted; 2397 PetscInt rsize,*idxs_perm_r,csize,*idxs_perm_c; 2398 PetscErrorCode ierr; 2399 2400 PetscFunctionBegin; 2401 ierr = ISSorted(isrow,&rsorted);CHKERRQ(ierr); 2402 ierr = ISSorted(iscol,&csorted);CHKERRQ(ierr); 2403 ierr = ISGetLocalSize(isrow,&rsize);CHKERRQ(ierr); 2404 ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr); 2405 2406 if (!rsorted) { 2407 const PetscInt *idxs; 2408 PetscInt *idxs_sorted,i; 2409 2410 ierr = PetscMalloc1(rsize,&idxs_perm_r);CHKERRQ(ierr); 2411 ierr = PetscMalloc1(rsize,&idxs_sorted);CHKERRQ(ierr); 2412 for (i=0;i<rsize;i++) { 2413 idxs_perm_r[i] = i; 2414 } 2415 ierr = ISGetIndices(isrow,&idxs);CHKERRQ(ierr); 2416 ierr = PetscSortIntWithPermutation(rsize,idxs,idxs_perm_r);CHKERRQ(ierr); 2417 for (i=0;i<rsize;i++) { 2418 idxs_sorted[i] = idxs[idxs_perm_r[i]]; 2419 } 2420 ierr = ISRestoreIndices(isrow,&idxs);CHKERRQ(ierr); 2421 ierr = ISCreateGeneral(PETSC_COMM_SELF,rsize,idxs_sorted,PETSC_OWN_POINTER,&isrow_s);CHKERRQ(ierr); 2422 } else { 2423 ierr = PetscObjectReference((PetscObject)isrow);CHKERRQ(ierr); 2424 isrow_s = isrow; 2425 } 2426 2427 if (!csorted) { 2428 if (isrow == iscol) { 2429 ierr = PetscObjectReference((PetscObject)isrow_s);CHKERRQ(ierr); 2430 iscol_s = isrow_s; 2431 } else { 2432 const PetscInt *idxs; 2433 PetscInt *idxs_sorted,i; 2434 2435 ierr = PetscMalloc1(csize,&idxs_perm_c);CHKERRQ(ierr); 2436 ierr = PetscMalloc1(csize,&idxs_sorted);CHKERRQ(ierr); 2437 for (i=0;i<csize;i++) { 2438 idxs_perm_c[i] = i; 2439 } 2440 ierr = ISGetIndices(iscol,&idxs);CHKERRQ(ierr); 2441 ierr = PetscSortIntWithPermutation(csize,idxs,idxs_perm_c);CHKERRQ(ierr); 2442 for (i=0;i<csize;i++) { 2443 idxs_sorted[i] = idxs[idxs_perm_c[i]]; 2444 } 2445 ierr = ISRestoreIndices(iscol,&idxs);CHKERRQ(ierr); 2446 ierr = ISCreateGeneral(PETSC_COMM_SELF,csize,idxs_sorted,PETSC_OWN_POINTER,&iscol_s);CHKERRQ(ierr); 2447 } 2448 } else { 2449 ierr = PetscObjectReference((PetscObject)iscol);CHKERRQ(ierr); 2450 iscol_s = iscol; 2451 } 2452 2453 ierr = MatGetSubMatrices(A,1,&isrow_s,&iscol_s,MAT_INITIAL_MATRIX,&work_mat);CHKERRQ(ierr); 2454 2455 if (!rsorted || !csorted) { 2456 Mat new_mat; 2457 IS is_perm_r,is_perm_c; 2458 2459 if (!rsorted) { 2460 PetscInt *idxs_r,i; 2461 ierr = PetscMalloc1(rsize,&idxs_r);CHKERRQ(ierr); 2462 for (i=0;i<rsize;i++) { 2463 idxs_r[idxs_perm_r[i]] = i; 2464 } 2465 ierr = PetscFree(idxs_perm_r);CHKERRQ(ierr); 2466 ierr = ISCreateGeneral(PETSC_COMM_SELF,rsize,idxs_r,PETSC_OWN_POINTER,&is_perm_r);CHKERRQ(ierr); 2467 } else { 2468 ierr = ISCreateStride(PETSC_COMM_SELF,rsize,0,1,&is_perm_r);CHKERRQ(ierr); 2469 } 2470 ierr = ISSetPermutation(is_perm_r);CHKERRQ(ierr); 2471 2472 if (!csorted) { 2473 if (isrow_s == iscol_s) { 2474 ierr = PetscObjectReference((PetscObject)is_perm_r);CHKERRQ(ierr); 2475 is_perm_c = is_perm_r; 2476 } else { 2477 PetscInt *idxs_c,i; 2478 ierr = PetscMalloc1(csize,&idxs_c);CHKERRQ(ierr); 2479 for (i=0;i<csize;i++) { 2480 idxs_c[idxs_perm_c[i]] = i; 2481 } 2482 ierr = PetscFree(idxs_perm_c);CHKERRQ(ierr); 2483 ierr = ISCreateGeneral(PETSC_COMM_SELF,csize,idxs_c,PETSC_OWN_POINTER,&is_perm_c);CHKERRQ(ierr); 2484 } 2485 } else { 2486 ierr = ISCreateStride(PETSC_COMM_SELF,csize,0,1,&is_perm_c);CHKERRQ(ierr); 2487 } 2488 ierr = ISSetPermutation(is_perm_c);CHKERRQ(ierr); 2489 2490 ierr = MatPermute(work_mat[0],is_perm_r,is_perm_c,&new_mat);CHKERRQ(ierr); 2491 ierr = MatDestroy(&work_mat[0]);CHKERRQ(ierr); 2492 work_mat[0] = new_mat; 2493 ierr = ISDestroy(&is_perm_r);CHKERRQ(ierr); 2494 ierr = ISDestroy(&is_perm_c);CHKERRQ(ierr); 2495 } 2496 2497 ierr = PetscObjectReference((PetscObject)work_mat[0]);CHKERRQ(ierr); 2498 *B = work_mat[0]; 2499 ierr = MatDestroyMatrices(1,&work_mat);CHKERRQ(ierr); 2500 ierr = ISDestroy(&isrow_s);CHKERRQ(ierr); 2501 ierr = ISDestroy(&iscol_s);CHKERRQ(ierr); 2502 PetscFunctionReturn(0); 2503 } 2504 2505 #undef __FUNCT__ 2506 #define __FUNCT__ "PCBDDCComputeLocalMatrix" 2507 PetscErrorCode PCBDDCComputeLocalMatrix(PC pc, Mat ChangeOfBasisMatrix) 2508 { 2509 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 2510 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 2511 Mat new_mat; 2512 IS is_local,is_global; 2513 PetscInt local_size; 2514 PetscBool isseqaij; 2515 PetscErrorCode ierr; 2516 2517 PetscFunctionBegin; 2518 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 2519 ierr = MatGetSize(matis->A,&local_size,NULL);CHKERRQ(ierr); 2520 ierr = ISCreateStride(PetscObjectComm((PetscObject)matis->A),local_size,0,1,&is_local);CHKERRQ(ierr); 2521 ierr = ISLocalToGlobalMappingApplyIS(pc->pmat->rmap->mapping,is_local,&is_global);CHKERRQ(ierr); 2522 ierr = ISDestroy(&is_local);CHKERRQ(ierr); 2523 ierr = MatGetSubMatrixUnsorted(ChangeOfBasisMatrix,is_global,is_global,&new_mat);CHKERRQ(ierr); 2524 ierr = ISDestroy(&is_global);CHKERRQ(ierr); 2525 2526 /* check */ 2527 if (pcbddc->dbg_flag) { 2528 Vec x,x_change; 2529 PetscReal error; 2530 2531 ierr = MatCreateVecs(ChangeOfBasisMatrix,&x,&x_change);CHKERRQ(ierr); 2532 ierr = VecSetRandom(x,NULL);CHKERRQ(ierr); 2533 ierr = MatMult(ChangeOfBasisMatrix,x,x_change);CHKERRQ(ierr); 2534 ierr = VecScatterBegin(matis->cctx,x,matis->x,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2535 ierr = VecScatterEnd(matis->cctx,x,matis->x,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2536 ierr = MatMult(new_mat,matis->x,matis->y);CHKERRQ(ierr); 2537 ierr = VecScatterBegin(matis->rctx,matis->y,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2538 ierr = VecScatterEnd(matis->rctx,matis->y,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2539 ierr = VecAXPY(x,-1.0,x_change);CHKERRQ(ierr); 2540 ierr = VecNorm(x,NORM_INFINITY,&error);CHKERRQ(ierr); 2541 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 2542 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Error global vs local change on N: %1.6e\n",error);CHKERRQ(ierr); 2543 ierr = VecDestroy(&x);CHKERRQ(ierr); 2544 ierr = VecDestroy(&x_change);CHKERRQ(ierr); 2545 } 2546 2547 /* TODO: HOW TO WORK WITH BAIJ and SBAIJ and SEQDENSE? */ 2548 ierr = PetscObjectTypeCompare((PetscObject)matis->A,MATSEQAIJ,&isseqaij);CHKERRQ(ierr); 2549 if (isseqaij) { 2550 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 2551 ierr = MatPtAP(matis->A,new_mat,MAT_INITIAL_MATRIX,2.0,&pcbddc->local_mat);CHKERRQ(ierr); 2552 } else { 2553 Mat work_mat; 2554 2555 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 2556 ierr = MatConvert(matis->A,MATSEQAIJ,MAT_INITIAL_MATRIX,&work_mat);CHKERRQ(ierr); 2557 ierr = MatPtAP(work_mat,new_mat,MAT_INITIAL_MATRIX,2.0,&pcbddc->local_mat);CHKERRQ(ierr); 2558 } 2559 if (matis->A->symmetric_set) { 2560 ierr = MatSetOption(pcbddc->local_mat,MAT_SYMMETRIC,matis->A->symmetric);CHKERRQ(ierr); 2561 #if !defined(PETSC_USE_COMPLEX) 2562 ierr = MatSetOption(pcbddc->local_mat,MAT_HERMITIAN,matis->A->symmetric);CHKERRQ(ierr); 2563 #endif 2564 } 2565 ierr = MatDestroy(&new_mat);CHKERRQ(ierr); 2566 PetscFunctionReturn(0); 2567 } 2568 2569 #undef __FUNCT__ 2570 #define __FUNCT__ "PCBDDCSetUpLocalScatters" 2571 PetscErrorCode PCBDDCSetUpLocalScatters(PC pc) 2572 { 2573 PC_IS* pcis = (PC_IS*)(pc->data); 2574 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 2575 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 2576 PetscInt *idx_R_local=NULL; 2577 PetscInt n_vertices,i,j,n_R,n_D,n_B; 2578 PetscInt vbs,bs; 2579 PetscBT bitmask=NULL; 2580 PetscErrorCode ierr; 2581 2582 PetscFunctionBegin; 2583 /* 2584 No need to setup local scatters if 2585 - primal space is unchanged 2586 AND 2587 - we actually have locally some primal dofs (could not be true in multilevel or for isolated subdomains) 2588 AND 2589 - we are not in debugging mode (this is needed since there are Synchronized prints at the end of the subroutine 2590 */ 2591 if (!pcbddc->new_primal_space_local && pcbddc->local_primal_size && !pcbddc->dbg_flag) { 2592 PetscFunctionReturn(0); 2593 } 2594 /* destroy old objects */ 2595 ierr = ISDestroy(&pcbddc->is_R_local);CHKERRQ(ierr); 2596 ierr = VecScatterDestroy(&pcbddc->R_to_B);CHKERRQ(ierr); 2597 ierr = VecScatterDestroy(&pcbddc->R_to_D);CHKERRQ(ierr); 2598 /* Set Non-overlapping dimensions */ 2599 n_B = pcis->n_B; 2600 n_D = pcis->n - n_B; 2601 n_vertices = pcbddc->n_vertices; 2602 2603 /* Dohrmann's notation: dofs splitted in R (Remaining: all dofs but the vertices) and V (Vertices) */ 2604 2605 /* create auxiliary bitmask and allocate workspace */ 2606 if (!sub_schurs->reuse_solver) { 2607 ierr = PetscMalloc1(pcis->n-n_vertices,&idx_R_local);CHKERRQ(ierr); 2608 ierr = PetscBTCreate(pcis->n,&bitmask);CHKERRQ(ierr); 2609 for (i=0;i<n_vertices;i++) { 2610 ierr = PetscBTSet(bitmask,pcbddc->local_primal_ref_node[i]);CHKERRQ(ierr); 2611 } 2612 2613 for (i=0, n_R=0; i<pcis->n; i++) { 2614 if (!PetscBTLookup(bitmask,i)) { 2615 idx_R_local[n_R++] = i; 2616 } 2617 } 2618 } else { /* A different ordering (already computed) is present if we are reusing the Schur solver */ 2619 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 2620 2621 ierr = ISGetIndices(reuse_solver->is_R,(const PetscInt**)&idx_R_local);CHKERRQ(ierr); 2622 ierr = ISGetLocalSize(reuse_solver->is_R,&n_R);CHKERRQ(ierr); 2623 } 2624 2625 /* Block code */ 2626 vbs = 1; 2627 ierr = MatGetBlockSize(pcbddc->local_mat,&bs);CHKERRQ(ierr); 2628 if (bs>1 && !(n_vertices%bs)) { 2629 PetscBool is_blocked = PETSC_TRUE; 2630 PetscInt *vary; 2631 if (!sub_schurs->reuse_solver) { 2632 ierr = PetscMalloc1(pcis->n/bs,&vary);CHKERRQ(ierr); 2633 ierr = PetscMemzero(vary,pcis->n/bs*sizeof(PetscInt));CHKERRQ(ierr); 2634 /* Verify that the vertex indices correspond to each element in a block (code taken from sbaij2.c) */ 2635 /* it is ok to check this way since local_primal_ref_node are always sorted by local numbering and idx_R_local is obtained as a complement */ 2636 for (i=0; i<n_vertices; i++) vary[pcbddc->local_primal_ref_node[i]/bs]++; 2637 for (i=0; i<pcis->n/bs; i++) { 2638 if (vary[i]!=0 && vary[i]!=bs) { 2639 is_blocked = PETSC_FALSE; 2640 break; 2641 } 2642 } 2643 ierr = PetscFree(vary);CHKERRQ(ierr); 2644 } else { 2645 /* Verify directly the R set */ 2646 for (i=0; i<n_R/bs; i++) { 2647 PetscInt j,node=idx_R_local[bs*i]; 2648 for (j=1; j<bs; j++) { 2649 if (node != idx_R_local[bs*i+j]-j) { 2650 is_blocked = PETSC_FALSE; 2651 break; 2652 } 2653 } 2654 } 2655 } 2656 if (is_blocked) { /* build compressed IS for R nodes (complement of vertices) */ 2657 vbs = bs; 2658 for (i=0;i<n_R/vbs;i++) { 2659 idx_R_local[i] = idx_R_local[vbs*i]/vbs; 2660 } 2661 } 2662 } 2663 ierr = ISCreateBlock(PETSC_COMM_SELF,vbs,n_R/vbs,idx_R_local,PETSC_COPY_VALUES,&pcbddc->is_R_local);CHKERRQ(ierr); 2664 if (sub_schurs->reuse_solver) { 2665 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 2666 2667 ierr = ISRestoreIndices(reuse_solver->is_R,(const PetscInt**)&idx_R_local);CHKERRQ(ierr); 2668 ierr = ISDestroy(&reuse_solver->is_R);CHKERRQ(ierr); 2669 ierr = PetscObjectReference((PetscObject)pcbddc->is_R_local);CHKERRQ(ierr); 2670 reuse_solver->is_R = pcbddc->is_R_local; 2671 } else { 2672 ierr = PetscFree(idx_R_local);CHKERRQ(ierr); 2673 } 2674 2675 /* print some info if requested */ 2676 if (pcbddc->dbg_flag) { 2677 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 2678 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 2679 ierr = PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 2680 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d local dimensions\n",PetscGlobalRank);CHKERRQ(ierr); 2681 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local_size = %d, dirichlet_size = %d, boundary_size = %d\n",pcis->n,n_D,n_B);CHKERRQ(ierr); 2682 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"r_size = %d, v_size = %d, constraints = %d, local_primal_size = %d\n",n_R,n_vertices,pcbddc->local_primal_size-n_vertices-pcbddc->benign_n,pcbddc->local_primal_size);CHKERRQ(ierr); 2683 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 2684 } 2685 2686 /* VecScatters pcbddc->R_to_B and (optionally) pcbddc->R_to_D */ 2687 if (!sub_schurs->reuse_solver) { 2688 IS is_aux1,is_aux2; 2689 PetscInt *aux_array1,*aux_array2,*is_indices,*idx_R_local; 2690 2691 ierr = ISGetIndices(pcbddc->is_R_local,(const PetscInt**)&idx_R_local);CHKERRQ(ierr); 2692 ierr = PetscMalloc1(pcis->n_B-n_vertices,&aux_array1);CHKERRQ(ierr); 2693 ierr = PetscMalloc1(pcis->n_B-n_vertices,&aux_array2);CHKERRQ(ierr); 2694 ierr = ISGetIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 2695 for (i=0; i<n_D; i++) { 2696 ierr = PetscBTSet(bitmask,is_indices[i]);CHKERRQ(ierr); 2697 } 2698 ierr = ISRestoreIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 2699 for (i=0, j=0; i<n_R; i++) { 2700 if (!PetscBTLookup(bitmask,idx_R_local[i])) { 2701 aux_array1[j++] = i; 2702 } 2703 } 2704 ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array1,PETSC_OWN_POINTER,&is_aux1);CHKERRQ(ierr); 2705 ierr = ISGetIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 2706 for (i=0, j=0; i<n_B; i++) { 2707 if (!PetscBTLookup(bitmask,is_indices[i])) { 2708 aux_array2[j++] = i; 2709 } 2710 } 2711 ierr = ISRestoreIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 2712 ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array2,PETSC_OWN_POINTER,&is_aux2);CHKERRQ(ierr); 2713 ierr = VecScatterCreate(pcbddc->vec1_R,is_aux1,pcis->vec1_B,is_aux2,&pcbddc->R_to_B);CHKERRQ(ierr); 2714 ierr = ISDestroy(&is_aux1);CHKERRQ(ierr); 2715 ierr = ISDestroy(&is_aux2);CHKERRQ(ierr); 2716 2717 if (pcbddc->switch_static || pcbddc->dbg_flag) { 2718 ierr = PetscMalloc1(n_D,&aux_array1);CHKERRQ(ierr); 2719 for (i=0, j=0; i<n_R; i++) { 2720 if (PetscBTLookup(bitmask,idx_R_local[i])) { 2721 aux_array1[j++] = i; 2722 } 2723 } 2724 ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array1,PETSC_OWN_POINTER,&is_aux1);CHKERRQ(ierr); 2725 ierr = VecScatterCreate(pcbddc->vec1_R,is_aux1,pcis->vec1_D,(IS)0,&pcbddc->R_to_D);CHKERRQ(ierr); 2726 ierr = ISDestroy(&is_aux1);CHKERRQ(ierr); 2727 } 2728 ierr = PetscBTDestroy(&bitmask);CHKERRQ(ierr); 2729 ierr = ISRestoreIndices(pcbddc->is_R_local,(const PetscInt**)&idx_R_local);CHKERRQ(ierr); 2730 } else { 2731 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 2732 IS tis; 2733 PetscInt schur_size; 2734 2735 ierr = ISGetLocalSize(reuse_solver->is_B,&schur_size);CHKERRQ(ierr); 2736 ierr = ISCreateStride(PETSC_COMM_SELF,schur_size,n_D,1,&tis);CHKERRQ(ierr); 2737 ierr = VecScatterCreate(pcbddc->vec1_R,tis,pcis->vec1_B,reuse_solver->is_B,&pcbddc->R_to_B);CHKERRQ(ierr); 2738 ierr = ISDestroy(&tis);CHKERRQ(ierr); 2739 if (pcbddc->switch_static || pcbddc->dbg_flag) { 2740 ierr = ISCreateStride(PETSC_COMM_SELF,n_D,0,1,&tis);CHKERRQ(ierr); 2741 ierr = VecScatterCreate(pcbddc->vec1_R,tis,pcis->vec1_D,(IS)0,&pcbddc->R_to_D);CHKERRQ(ierr); 2742 ierr = ISDestroy(&tis);CHKERRQ(ierr); 2743 } 2744 } 2745 PetscFunctionReturn(0); 2746 } 2747 2748 2749 #undef __FUNCT__ 2750 #define __FUNCT__ "PCBDDCSetUpLocalSolvers" 2751 PetscErrorCode PCBDDCSetUpLocalSolvers(PC pc, PetscBool dirichlet, PetscBool neumann) 2752 { 2753 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 2754 PC_IS *pcis = (PC_IS*)pc->data; 2755 PC pc_temp; 2756 Mat A_RR; 2757 MatReuse reuse; 2758 PetscScalar m_one = -1.0; 2759 PetscReal value; 2760 PetscInt n_D,n_R; 2761 PetscBool use_exact,use_exact_reduced,issbaij; 2762 PetscErrorCode ierr; 2763 /* prefixes stuff */ 2764 char dir_prefix[256],neu_prefix[256],str_level[16]; 2765 size_t len; 2766 2767 PetscFunctionBegin; 2768 2769 /* compute prefixes */ 2770 ierr = PetscStrcpy(dir_prefix,"");CHKERRQ(ierr); 2771 ierr = PetscStrcpy(neu_prefix,"");CHKERRQ(ierr); 2772 if (!pcbddc->current_level) { 2773 ierr = PetscStrcpy(dir_prefix,((PetscObject)pc)->prefix);CHKERRQ(ierr); 2774 ierr = PetscStrcpy(neu_prefix,((PetscObject)pc)->prefix);CHKERRQ(ierr); 2775 ierr = PetscStrcat(dir_prefix,"pc_bddc_dirichlet_");CHKERRQ(ierr); 2776 ierr = PetscStrcat(neu_prefix,"pc_bddc_neumann_");CHKERRQ(ierr); 2777 } else { 2778 ierr = PetscStrcpy(str_level,"");CHKERRQ(ierr); 2779 sprintf(str_level,"l%d_",(int)(pcbddc->current_level)); 2780 ierr = PetscStrlen(((PetscObject)pc)->prefix,&len);CHKERRQ(ierr); 2781 len -= 15; /* remove "pc_bddc_coarse_" */ 2782 if (pcbddc->current_level>1) len -= 3; /* remove "lX_" with X level number */ 2783 if (pcbddc->current_level>10) len -= 1; /* remove another char from level number */ 2784 ierr = PetscStrncpy(dir_prefix,((PetscObject)pc)->prefix,len+1);CHKERRQ(ierr); 2785 ierr = PetscStrncpy(neu_prefix,((PetscObject)pc)->prefix,len+1);CHKERRQ(ierr); 2786 ierr = PetscStrcat(dir_prefix,"pc_bddc_dirichlet_");CHKERRQ(ierr); 2787 ierr = PetscStrcat(neu_prefix,"pc_bddc_neumann_");CHKERRQ(ierr); 2788 ierr = PetscStrcat(dir_prefix,str_level);CHKERRQ(ierr); 2789 ierr = PetscStrcat(neu_prefix,str_level);CHKERRQ(ierr); 2790 } 2791 2792 /* DIRICHLET PROBLEM */ 2793 if (dirichlet) { 2794 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 2795 if (pcbddc->benign_n && !pcbddc->benign_change_explicit) { 2796 if (!sub_schurs->reuse_solver) { 2797 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Not yet implemented\n"); 2798 } 2799 if (pcbddc->dbg_flag) { 2800 Mat A_IIn; 2801 2802 ierr = PCBDDCBenignProject(pc,pcis->is_I_local,pcis->is_I_local,&A_IIn);CHKERRQ(ierr); 2803 ierr = MatDestroy(&pcis->A_II);CHKERRQ(ierr); 2804 pcis->A_II = A_IIn; 2805 } 2806 } 2807 if (pcbddc->local_mat->symmetric_set) { 2808 ierr = MatSetOption(pcis->A_II,MAT_SYMMETRIC,pcbddc->local_mat->symmetric_set);CHKERRQ(ierr); 2809 } 2810 /* Matrix for Dirichlet problem is pcis->A_II */ 2811 n_D = pcis->n - pcis->n_B; 2812 if (!pcbddc->ksp_D) { /* create object if not yet build */ 2813 ierr = KSPCreate(PETSC_COMM_SELF,&pcbddc->ksp_D);CHKERRQ(ierr); 2814 ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->ksp_D,(PetscObject)pc,1);CHKERRQ(ierr); 2815 /* default */ 2816 ierr = KSPSetType(pcbddc->ksp_D,KSPPREONLY);CHKERRQ(ierr); 2817 ierr = KSPSetOptionsPrefix(pcbddc->ksp_D,dir_prefix);CHKERRQ(ierr); 2818 ierr = PetscObjectTypeCompare((PetscObject)pcis->A_II,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr); 2819 ierr = KSPGetPC(pcbddc->ksp_D,&pc_temp);CHKERRQ(ierr); 2820 if (issbaij) { 2821 ierr = PCSetType(pc_temp,PCCHOLESKY);CHKERRQ(ierr); 2822 } else { 2823 ierr = PCSetType(pc_temp,PCLU);CHKERRQ(ierr); 2824 } 2825 /* Allow user's customization */ 2826 ierr = KSPSetFromOptions(pcbddc->ksp_D);CHKERRQ(ierr); 2827 ierr = PCFactorSetReuseFill(pc_temp,PETSC_TRUE);CHKERRQ(ierr); 2828 } 2829 ierr = KSPSetOperators(pcbddc->ksp_D,pcis->A_II,pcis->A_II);CHKERRQ(ierr); 2830 if (sub_schurs->reuse_solver) { 2831 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 2832 2833 ierr = KSPSetPC(pcbddc->ksp_D,reuse_solver->interior_solver);CHKERRQ(ierr); 2834 } 2835 /* umfpack interface has a bug when matrix dimension is zero. TODO solve from umfpack interface */ 2836 if (!n_D) { 2837 ierr = KSPGetPC(pcbddc->ksp_D,&pc_temp);CHKERRQ(ierr); 2838 ierr = PCSetType(pc_temp,PCNONE);CHKERRQ(ierr); 2839 } 2840 /* Set Up KSP for Dirichlet problem of BDDC */ 2841 ierr = KSPSetUp(pcbddc->ksp_D);CHKERRQ(ierr); 2842 /* set ksp_D into pcis data */ 2843 ierr = KSPDestroy(&pcis->ksp_D);CHKERRQ(ierr); 2844 ierr = PetscObjectReference((PetscObject)pcbddc->ksp_D);CHKERRQ(ierr); 2845 pcis->ksp_D = pcbddc->ksp_D; 2846 } 2847 2848 /* NEUMANN PROBLEM */ 2849 A_RR = 0; 2850 if (neumann) { 2851 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 2852 PetscInt ibs,mbs; 2853 PetscBool issbaij; 2854 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 2855 /* Matrix for Neumann problem is A_RR -> we need to create/reuse it at this point */ 2856 ierr = ISGetSize(pcbddc->is_R_local,&n_R);CHKERRQ(ierr); 2857 if (pcbddc->ksp_R) { /* already created ksp */ 2858 PetscInt nn_R; 2859 ierr = KSPGetOperators(pcbddc->ksp_R,NULL,&A_RR);CHKERRQ(ierr); 2860 ierr = PetscObjectReference((PetscObject)A_RR);CHKERRQ(ierr); 2861 ierr = MatGetSize(A_RR,&nn_R,NULL);CHKERRQ(ierr); 2862 if (nn_R != n_R) { /* old ksp is not reusable, so reset it */ 2863 ierr = KSPReset(pcbddc->ksp_R);CHKERRQ(ierr); 2864 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 2865 reuse = MAT_INITIAL_MATRIX; 2866 } else { /* same sizes, but nonzero pattern depend on primal vertices so it can be changed */ 2867 if (pcbddc->new_primal_space_local) { /* we are not sure the matrix will have the same nonzero pattern */ 2868 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 2869 reuse = MAT_INITIAL_MATRIX; 2870 } else { /* safe to reuse the matrix */ 2871 reuse = MAT_REUSE_MATRIX; 2872 } 2873 } 2874 /* last check */ 2875 if (pc->flag == DIFFERENT_NONZERO_PATTERN) { 2876 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 2877 reuse = MAT_INITIAL_MATRIX; 2878 } 2879 } else { /* first time, so we need to create the matrix */ 2880 reuse = MAT_INITIAL_MATRIX; 2881 } 2882 /* convert pcbddc->local_mat if needed later in PCBDDCSetUpCorrection */ 2883 ierr = MatGetBlockSize(pcbddc->local_mat,&mbs);CHKERRQ(ierr); 2884 ierr = ISGetBlockSize(pcbddc->is_R_local,&ibs);CHKERRQ(ierr); 2885 ierr = PetscObjectTypeCompare((PetscObject)pcbddc->local_mat,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr); 2886 if (ibs != mbs) { /* need to convert to SEQAIJ to extract any submatrix with is_R_local */ 2887 if (matis->A == pcbddc->local_mat) { 2888 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 2889 ierr = MatConvert(matis->A,MATSEQAIJ,MAT_INITIAL_MATRIX,&pcbddc->local_mat);CHKERRQ(ierr); 2890 } else { 2891 ierr = MatConvert(pcbddc->local_mat,MATSEQAIJ,MAT_INPLACE_MATRIX,&pcbddc->local_mat);CHKERRQ(ierr); 2892 } 2893 } else if (issbaij) { /* need to convert to BAIJ to get offdiagonal blocks */ 2894 if (matis->A == pcbddc->local_mat) { 2895 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 2896 ierr = MatConvert(matis->A,MATSEQBAIJ,MAT_INITIAL_MATRIX,&pcbddc->local_mat);CHKERRQ(ierr); 2897 } else { 2898 ierr = MatConvert(pcbddc->local_mat,MATSEQBAIJ,MAT_INPLACE_MATRIX,&pcbddc->local_mat);CHKERRQ(ierr); 2899 } 2900 } 2901 /* extract A_RR */ 2902 if (sub_schurs->reuse_solver) { 2903 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 2904 2905 if (pcbddc->dbg_flag) { /* we need A_RR to test the solver later */ 2906 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 2907 if (reuse_solver->benign_n) { /* we are not using the explicit change of basis on the pressures */ 2908 ierr = PCBDDCBenignProject(pc,pcbddc->is_R_local,pcbddc->is_R_local,&A_RR);CHKERRQ(ierr); 2909 } else { 2910 ierr = MatGetSubMatrix(pcbddc->local_mat,pcbddc->is_R_local,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&A_RR);CHKERRQ(ierr); 2911 } 2912 } else { 2913 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 2914 ierr = PCGetOperators(reuse_solver->correction_solver,&A_RR,NULL);CHKERRQ(ierr); 2915 ierr = PetscObjectReference((PetscObject)A_RR);CHKERRQ(ierr); 2916 } 2917 } else { /* we have to build the neumann solver, so we need to extract the relevant matrix */ 2918 ierr = MatGetSubMatrix(pcbddc->local_mat,pcbddc->is_R_local,pcbddc->is_R_local,reuse,&A_RR);CHKERRQ(ierr); 2919 } 2920 if (pcbddc->local_mat->symmetric_set) { 2921 ierr = MatSetOption(A_RR,MAT_SYMMETRIC,pcbddc->local_mat->symmetric_set);CHKERRQ(ierr); 2922 } 2923 if (!pcbddc->ksp_R) { /* create object if not present */ 2924 ierr = KSPCreate(PETSC_COMM_SELF,&pcbddc->ksp_R);CHKERRQ(ierr); 2925 ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->ksp_R,(PetscObject)pc,1);CHKERRQ(ierr); 2926 /* default */ 2927 ierr = KSPSetType(pcbddc->ksp_R,KSPPREONLY);CHKERRQ(ierr); 2928 ierr = KSPSetOptionsPrefix(pcbddc->ksp_R,neu_prefix);CHKERRQ(ierr); 2929 ierr = KSPGetPC(pcbddc->ksp_R,&pc_temp);CHKERRQ(ierr); 2930 ierr = PetscObjectTypeCompare((PetscObject)A_RR,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr); 2931 if (issbaij) { 2932 ierr = PCSetType(pc_temp,PCCHOLESKY);CHKERRQ(ierr); 2933 } else { 2934 ierr = PCSetType(pc_temp,PCLU);CHKERRQ(ierr); 2935 } 2936 /* Allow user's customization */ 2937 ierr = KSPSetFromOptions(pcbddc->ksp_R);CHKERRQ(ierr); 2938 ierr = PCFactorSetReuseFill(pc_temp,PETSC_TRUE);CHKERRQ(ierr); 2939 } 2940 /* umfpack interface has a bug when matrix dimension is zero. TODO solve from umfpack interface */ 2941 if (!n_R) { 2942 ierr = KSPGetPC(pcbddc->ksp_R,&pc_temp);CHKERRQ(ierr); 2943 ierr = PCSetType(pc_temp,PCNONE);CHKERRQ(ierr); 2944 } 2945 ierr = KSPSetOperators(pcbddc->ksp_R,A_RR,A_RR);CHKERRQ(ierr); 2946 /* Reuse solver if it is present */ 2947 if (sub_schurs->reuse_solver) { 2948 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 2949 2950 ierr = KSPSetPC(pcbddc->ksp_R,reuse_solver->correction_solver);CHKERRQ(ierr); 2951 } 2952 /* Set Up KSP for Neumann problem of BDDC */ 2953 ierr = KSPSetUp(pcbddc->ksp_R);CHKERRQ(ierr); 2954 } 2955 2956 /* check Dirichlet and Neumann solvers and adapt them if a nullspace correction is needed */ 2957 if (pcbddc->NullSpace || pcbddc->dbg_flag) { 2958 if (pcbddc->dbg_flag) { 2959 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 2960 ierr = PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 2961 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 2962 } 2963 if (dirichlet) { /* Dirichlet */ 2964 ierr = VecSetRandom(pcis->vec1_D,NULL);CHKERRQ(ierr); 2965 ierr = MatMult(pcis->A_II,pcis->vec1_D,pcis->vec2_D);CHKERRQ(ierr); 2966 ierr = KSPSolve(pcbddc->ksp_D,pcis->vec2_D,pcis->vec2_D);CHKERRQ(ierr); 2967 ierr = VecAXPY(pcis->vec1_D,m_one,pcis->vec2_D);CHKERRQ(ierr); 2968 ierr = VecNorm(pcis->vec1_D,NORM_INFINITY,&value);CHKERRQ(ierr); 2969 /* need to be adapted? */ 2970 use_exact = (PetscAbsReal(value) > 1.e-4 ? PETSC_FALSE : PETSC_TRUE); 2971 ierr = MPIU_Allreduce(&use_exact,&use_exact_reduced,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 2972 ierr = PCBDDCSetUseExactDirichlet(pc,use_exact_reduced);CHKERRQ(ierr); 2973 /* print info */ 2974 if (pcbddc->dbg_flag) { 2975 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d infinity error for Dirichlet solve (%s) = % 1.14e \n",PetscGlobalRank,((PetscObject)(pcbddc->ksp_D))->prefix,value);CHKERRQ(ierr); 2976 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 2977 } 2978 if (pcbddc->NullSpace && !use_exact_reduced && !pcbddc->switch_static) { 2979 ierr = PCBDDCNullSpaceAssembleCorrection(pc,PETSC_TRUE,pcis->is_I_local);CHKERRQ(ierr); 2980 } 2981 } 2982 if (neumann) { /* Neumann */ 2983 ierr = VecSetRandom(pcbddc->vec1_R,NULL);CHKERRQ(ierr); 2984 ierr = MatMult(A_RR,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 2985 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec2_R,pcbddc->vec2_R);CHKERRQ(ierr); 2986 ierr = VecAXPY(pcbddc->vec1_R,m_one,pcbddc->vec2_R);CHKERRQ(ierr); 2987 ierr = VecNorm(pcbddc->vec1_R,NORM_INFINITY,&value);CHKERRQ(ierr); 2988 /* need to be adapted? */ 2989 use_exact = (PetscAbsReal(value) > 1.e-4 ? PETSC_FALSE : PETSC_TRUE); 2990 ierr = MPIU_Allreduce(&use_exact,&use_exact_reduced,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 2991 /* print info */ 2992 if (pcbddc->dbg_flag) { 2993 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d infinity error for Neumann solve (%s) = % 1.14e\n",PetscGlobalRank,((PetscObject)(pcbddc->ksp_R))->prefix,value);CHKERRQ(ierr); 2994 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 2995 } 2996 if (pcbddc->NullSpace && !use_exact_reduced) { /* is it the right logic? */ 2997 ierr = PCBDDCNullSpaceAssembleCorrection(pc,PETSC_FALSE,pcbddc->is_R_local);CHKERRQ(ierr); 2998 } 2999 } 3000 } 3001 /* free Neumann problem's matrix */ 3002 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 3003 PetscFunctionReturn(0); 3004 } 3005 3006 #undef __FUNCT__ 3007 #define __FUNCT__ "PCBDDCSolveSubstructureCorrection" 3008 static PetscErrorCode PCBDDCSolveSubstructureCorrection(PC pc, Vec inout_B, Vec inout_D, PetscBool applytranspose) 3009 { 3010 PetscErrorCode ierr; 3011 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 3012 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 3013 3014 PetscFunctionBegin; 3015 if (!sub_schurs->reuse_solver) { 3016 ierr = VecSet(pcbddc->vec1_R,0.);CHKERRQ(ierr); 3017 } 3018 if (!pcbddc->switch_static) { 3019 if (applytranspose && pcbddc->local_auxmat1) { 3020 ierr = MatMultTranspose(pcbddc->local_auxmat2,inout_B,pcbddc->vec1_C);CHKERRQ(ierr); 3021 ierr = MatMultTransposeAdd(pcbddc->local_auxmat1,pcbddc->vec1_C,inout_B,inout_B);CHKERRQ(ierr); 3022 } 3023 if (!sub_schurs->reuse_solver) { 3024 ierr = VecScatterBegin(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 3025 ierr = VecScatterEnd(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 3026 } else { 3027 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 3028 3029 ierr = VecScatterBegin(reuse_solver->correction_scatter_B,inout_B,reuse_solver->rhs_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3030 ierr = VecScatterEnd(reuse_solver->correction_scatter_B,inout_B,reuse_solver->rhs_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3031 } 3032 } else { 3033 ierr = VecScatterBegin(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 3034 ierr = VecScatterEnd(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 3035 ierr = VecScatterBegin(pcbddc->R_to_D,inout_D,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 3036 ierr = VecScatterEnd(pcbddc->R_to_D,inout_D,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 3037 if (applytranspose && pcbddc->local_auxmat1) { 3038 ierr = MatMultTranspose(pcbddc->local_auxmat2,pcbddc->vec1_R,pcbddc->vec1_C);CHKERRQ(ierr); 3039 ierr = MatMultTransposeAdd(pcbddc->local_auxmat1,pcbddc->vec1_C,inout_B,inout_B);CHKERRQ(ierr); 3040 ierr = VecScatterBegin(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 3041 ierr = VecScatterEnd(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 3042 } 3043 } 3044 if (!sub_schurs->reuse_solver || pcbddc->switch_static) { 3045 if (applytranspose) { 3046 ierr = KSPSolveTranspose(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr); 3047 } else { 3048 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr); 3049 } 3050 } else { 3051 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 3052 3053 if (applytranspose) { 3054 ierr = MatFactorSolveSchurComplementTranspose(reuse_solver->F,reuse_solver->rhs_B,reuse_solver->sol_B);CHKERRQ(ierr); 3055 } else { 3056 ierr = MatFactorSolveSchurComplement(reuse_solver->F,reuse_solver->rhs_B,reuse_solver->sol_B);CHKERRQ(ierr); 3057 } 3058 } 3059 ierr = VecSet(inout_B,0.);CHKERRQ(ierr); 3060 if (!pcbddc->switch_static) { 3061 if (!sub_schurs->reuse_solver) { 3062 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3063 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3064 } else { 3065 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 3066 3067 ierr = VecScatterBegin(reuse_solver->correction_scatter_B,reuse_solver->sol_B,inout_B,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 3068 ierr = VecScatterEnd(reuse_solver->correction_scatter_B,reuse_solver->sol_B,inout_B,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 3069 } 3070 if (!applytranspose && pcbddc->local_auxmat1) { 3071 ierr = MatMult(pcbddc->local_auxmat1,inout_B,pcbddc->vec1_C);CHKERRQ(ierr); 3072 ierr = MatMultAdd(pcbddc->local_auxmat2,pcbddc->vec1_C,inout_B,inout_B);CHKERRQ(ierr); 3073 } 3074 } else { 3075 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3076 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3077 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,inout_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3078 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,inout_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3079 if (!applytranspose && pcbddc->local_auxmat1) { 3080 ierr = MatMult(pcbddc->local_auxmat1,inout_B,pcbddc->vec1_C);CHKERRQ(ierr); 3081 ierr = MatMultAdd(pcbddc->local_auxmat2,pcbddc->vec1_C,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr); 3082 } 3083 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3084 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3085 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,inout_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3086 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,inout_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3087 } 3088 PetscFunctionReturn(0); 3089 } 3090 3091 /* parameter apply transpose determines if the interface preconditioner should be applied transposed or not */ 3092 #undef __FUNCT__ 3093 #define __FUNCT__ "PCBDDCApplyInterfacePreconditioner" 3094 PetscErrorCode PCBDDCApplyInterfacePreconditioner(PC pc, PetscBool applytranspose) 3095 { 3096 PetscErrorCode ierr; 3097 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 3098 PC_IS* pcis = (PC_IS*) (pc->data); 3099 const PetscScalar zero = 0.0; 3100 3101 PetscFunctionBegin; 3102 /* Application of PSI^T or PHI^T (depending on applytranspose, see comment above) */ 3103 if (!pcbddc->benign_apply_coarse_only) { 3104 if (applytranspose) { 3105 ierr = MatMultTranspose(pcbddc->coarse_phi_B,pcis->vec1_B,pcbddc->vec1_P);CHKERRQ(ierr); 3106 if (pcbddc->switch_static) { ierr = MatMultTransposeAdd(pcbddc->coarse_phi_D,pcis->vec1_D,pcbddc->vec1_P,pcbddc->vec1_P);CHKERRQ(ierr); } 3107 } else { 3108 ierr = MatMultTranspose(pcbddc->coarse_psi_B,pcis->vec1_B,pcbddc->vec1_P);CHKERRQ(ierr); 3109 if (pcbddc->switch_static) { ierr = MatMultTransposeAdd(pcbddc->coarse_psi_D,pcis->vec1_D,pcbddc->vec1_P,pcbddc->vec1_P);CHKERRQ(ierr); } 3110 } 3111 } else { 3112 ierr = VecSet(pcbddc->vec1_P,zero);CHKERRQ(ierr); 3113 } 3114 3115 /* add p0 to the last value of vec1_P holding the coarse dof relative to p0 */ 3116 if (pcbddc->benign_n) { 3117 PetscScalar *array; 3118 PetscInt j; 3119 3120 ierr = VecGetArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 3121 for (j=0;j<pcbddc->benign_n;j++) array[pcbddc->local_primal_size-pcbddc->benign_n+j] += pcbddc->benign_p0[j]; 3122 ierr = VecRestoreArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 3123 } 3124 3125 /* start communications from local primal nodes to rhs of coarse solver */ 3126 ierr = VecSet(pcbddc->coarse_vec,zero);CHKERRQ(ierr); 3127 ierr = PCBDDCScatterCoarseDataBegin(pc,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3128 ierr = PCBDDCScatterCoarseDataEnd(pc,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3129 3130 /* Coarse solution -> rhs and sol updated inside PCBDDCScattarCoarseDataBegin/End */ 3131 /* TODO remove null space when doing multilevel */ 3132 if (pcbddc->coarse_ksp) { 3133 Mat coarse_mat; 3134 Vec rhs,sol; 3135 MatNullSpace nullsp; 3136 3137 ierr = KSPGetRhs(pcbddc->coarse_ksp,&rhs);CHKERRQ(ierr); 3138 ierr = KSPGetSolution(pcbddc->coarse_ksp,&sol);CHKERRQ(ierr); 3139 ierr = KSPGetOperators(pcbddc->coarse_ksp,&coarse_mat,NULL);CHKERRQ(ierr); 3140 ierr = MatGetNullSpace(coarse_mat,&nullsp);CHKERRQ(ierr); 3141 if (nullsp) { 3142 ierr = MatNullSpaceRemove(nullsp,rhs);CHKERRQ(ierr); 3143 } 3144 if (applytranspose) { 3145 ierr = KSPSolveTranspose(pcbddc->coarse_ksp,rhs,sol);CHKERRQ(ierr); 3146 } else { 3147 ierr = KSPSolve(pcbddc->coarse_ksp,rhs,sol);CHKERRQ(ierr); 3148 } 3149 if (nullsp) { 3150 ierr = MatNullSpaceRemove(nullsp,sol);CHKERRQ(ierr); 3151 } 3152 } 3153 3154 /* Local solution on R nodes */ 3155 if (pcis->n && !pcbddc->benign_apply_coarse_only) { 3156 ierr = PCBDDCSolveSubstructureCorrection(pc,pcis->vec1_B,pcis->vec1_D,applytranspose);CHKERRQ(ierr); 3157 } 3158 /* communications from coarse sol to local primal nodes */ 3159 ierr = PCBDDCScatterCoarseDataBegin(pc,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 3160 ierr = PCBDDCScatterCoarseDataEnd(pc,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 3161 3162 /* Sum contributions from the two levels */ 3163 if (!pcbddc->benign_apply_coarse_only) { 3164 if (applytranspose) { 3165 ierr = MatMultAdd(pcbddc->coarse_psi_B,pcbddc->vec1_P,pcis->vec1_B,pcis->vec1_B);CHKERRQ(ierr); 3166 if (pcbddc->switch_static) { ierr = MatMultAdd(pcbddc->coarse_psi_D,pcbddc->vec1_P,pcis->vec1_D,pcis->vec1_D);CHKERRQ(ierr); } 3167 } else { 3168 ierr = MatMultAdd(pcbddc->coarse_phi_B,pcbddc->vec1_P,pcis->vec1_B,pcis->vec1_B);CHKERRQ(ierr); 3169 if (pcbddc->switch_static) { ierr = MatMultAdd(pcbddc->coarse_phi_D,pcbddc->vec1_P,pcis->vec1_D,pcis->vec1_D);CHKERRQ(ierr); } 3170 } 3171 /* store p0 */ 3172 if (pcbddc->benign_n) { 3173 PetscScalar *array; 3174 PetscInt j; 3175 3176 ierr = VecGetArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 3177 for (j=0;j<pcbddc->benign_n;j++) pcbddc->benign_p0[j] = array[pcbddc->local_primal_size-pcbddc->benign_n+j]; 3178 ierr = VecRestoreArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 3179 } 3180 } else { /* expand the coarse solution */ 3181 if (applytranspose) { 3182 ierr = MatMult(pcbddc->coarse_psi_B,pcbddc->vec1_P,pcis->vec1_B);CHKERRQ(ierr); 3183 } else { 3184 ierr = MatMult(pcbddc->coarse_phi_B,pcbddc->vec1_P,pcis->vec1_B);CHKERRQ(ierr); 3185 } 3186 } 3187 PetscFunctionReturn(0); 3188 } 3189 3190 #undef __FUNCT__ 3191 #define __FUNCT__ "PCBDDCScatterCoarseDataBegin" 3192 PetscErrorCode PCBDDCScatterCoarseDataBegin(PC pc,InsertMode imode, ScatterMode smode) 3193 { 3194 PetscErrorCode ierr; 3195 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 3196 PetscScalar *array; 3197 Vec from,to; 3198 3199 PetscFunctionBegin; 3200 if (smode == SCATTER_REVERSE) { /* from global to local -> get data from coarse solution */ 3201 from = pcbddc->coarse_vec; 3202 to = pcbddc->vec1_P; 3203 if (pcbddc->coarse_ksp) { /* get array from coarse processes */ 3204 Vec tvec; 3205 3206 ierr = KSPGetRhs(pcbddc->coarse_ksp,&tvec);CHKERRQ(ierr); 3207 ierr = VecResetArray(tvec);CHKERRQ(ierr); 3208 ierr = KSPGetSolution(pcbddc->coarse_ksp,&tvec);CHKERRQ(ierr); 3209 ierr = VecGetArray(tvec,&array);CHKERRQ(ierr); 3210 ierr = VecPlaceArray(from,array);CHKERRQ(ierr); 3211 ierr = VecRestoreArray(tvec,&array);CHKERRQ(ierr); 3212 } 3213 } else { /* from local to global -> put data in coarse right hand side */ 3214 from = pcbddc->vec1_P; 3215 to = pcbddc->coarse_vec; 3216 } 3217 ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,from,to,imode,smode);CHKERRQ(ierr); 3218 PetscFunctionReturn(0); 3219 } 3220 3221 #undef __FUNCT__ 3222 #define __FUNCT__ "PCBDDCScatterCoarseDataEnd" 3223 PetscErrorCode PCBDDCScatterCoarseDataEnd(PC pc, InsertMode imode, ScatterMode smode) 3224 { 3225 PetscErrorCode ierr; 3226 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 3227 PetscScalar *array; 3228 Vec from,to; 3229 3230 PetscFunctionBegin; 3231 if (smode == SCATTER_REVERSE) { /* from global to local -> get data from coarse solution */ 3232 from = pcbddc->coarse_vec; 3233 to = pcbddc->vec1_P; 3234 } else { /* from local to global -> put data in coarse right hand side */ 3235 from = pcbddc->vec1_P; 3236 to = pcbddc->coarse_vec; 3237 } 3238 ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,from,to,imode,smode);CHKERRQ(ierr); 3239 if (smode == SCATTER_FORWARD) { 3240 if (pcbddc->coarse_ksp) { /* get array from coarse processes */ 3241 Vec tvec; 3242 3243 ierr = KSPGetRhs(pcbddc->coarse_ksp,&tvec);CHKERRQ(ierr); 3244 ierr = VecGetArray(to,&array);CHKERRQ(ierr); 3245 ierr = VecPlaceArray(tvec,array);CHKERRQ(ierr); 3246 ierr = VecRestoreArray(to,&array);CHKERRQ(ierr); 3247 } 3248 } else { 3249 if (pcbddc->coarse_ksp) { /* restore array of pcbddc->coarse_vec */ 3250 ierr = VecResetArray(from);CHKERRQ(ierr); 3251 } 3252 } 3253 PetscFunctionReturn(0); 3254 } 3255 3256 /* uncomment for testing purposes */ 3257 /* #define PETSC_MISSING_LAPACK_GESVD 1 */ 3258 #undef __FUNCT__ 3259 #define __FUNCT__ "PCBDDCConstraintsSetUp" 3260 PetscErrorCode PCBDDCConstraintsSetUp(PC pc) 3261 { 3262 PetscErrorCode ierr; 3263 PC_IS* pcis = (PC_IS*)(pc->data); 3264 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 3265 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 3266 /* one and zero */ 3267 PetscScalar one=1.0,zero=0.0; 3268 /* space to store constraints and their local indices */ 3269 PetscScalar *constraints_data; 3270 PetscInt *constraints_idxs,*constraints_idxs_B; 3271 PetscInt *constraints_idxs_ptr,*constraints_data_ptr; 3272 PetscInt *constraints_n; 3273 /* iterators */ 3274 PetscInt i,j,k,total_counts,total_counts_cc,cum; 3275 /* BLAS integers */ 3276 PetscBLASInt lwork,lierr; 3277 PetscBLASInt Blas_N,Blas_M,Blas_K,Blas_one=1; 3278 PetscBLASInt Blas_LDA,Blas_LDB,Blas_LDC; 3279 /* reuse */ 3280 PetscInt olocal_primal_size,olocal_primal_size_cc; 3281 PetscInt *olocal_primal_ref_node,*olocal_primal_ref_mult; 3282 /* change of basis */ 3283 PetscBool qr_needed; 3284 PetscBT change_basis,qr_needed_idx; 3285 /* auxiliary stuff */ 3286 PetscInt *nnz,*is_indices; 3287 PetscInt ncc; 3288 /* some quantities */ 3289 PetscInt n_vertices,total_primal_vertices,valid_constraints; 3290 PetscInt size_of_constraint,max_size_of_constraint=0,max_constraints,temp_constraints; 3291 3292 PetscFunctionBegin; 3293 /* Destroy Mat objects computed previously */ 3294 ierr = MatDestroy(&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 3295 ierr = MatDestroy(&pcbddc->ConstraintMatrix);CHKERRQ(ierr); 3296 ierr = MatDestroy(&pcbddc->switch_static_change);CHKERRQ(ierr); 3297 /* save info on constraints from previous setup (if any) */ 3298 olocal_primal_size = pcbddc->local_primal_size; 3299 olocal_primal_size_cc = pcbddc->local_primal_size_cc; 3300 ierr = PetscMalloc2(olocal_primal_size_cc,&olocal_primal_ref_node,olocal_primal_size_cc,&olocal_primal_ref_mult);CHKERRQ(ierr); 3301 ierr = PetscMemcpy(olocal_primal_ref_node,pcbddc->local_primal_ref_node,olocal_primal_size_cc*sizeof(PetscInt));CHKERRQ(ierr); 3302 ierr = PetscMemcpy(olocal_primal_ref_mult,pcbddc->local_primal_ref_mult,olocal_primal_size_cc*sizeof(PetscInt));CHKERRQ(ierr); 3303 ierr = PetscFree2(pcbddc->local_primal_ref_node,pcbddc->local_primal_ref_mult);CHKERRQ(ierr); 3304 ierr = PetscFree(pcbddc->primal_indices_local_idxs);CHKERRQ(ierr); 3305 3306 /* print some info */ 3307 if (pcbddc->dbg_flag) { 3308 IS vertices; 3309 PetscInt nv,nedges,nfaces; 3310 ierr = PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph,&nfaces,NULL,&nedges,NULL,&vertices);CHKERRQ(ierr); 3311 ierr = ISGetSize(vertices,&nv);CHKERRQ(ierr); 3312 ierr = ISDestroy(&vertices);CHKERRQ(ierr); 3313 ierr = PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 3314 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"--------------------------------------------------------------\n");CHKERRQ(ierr); 3315 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate vertices (%d)\n",PetscGlobalRank,nv,pcbddc->use_vertices);CHKERRQ(ierr); 3316 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate edges (%d)\n",PetscGlobalRank,nedges,pcbddc->use_edges);CHKERRQ(ierr); 3317 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate faces (%d)\n",PetscGlobalRank,nfaces,pcbddc->use_faces);CHKERRQ(ierr); 3318 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 3319 ierr = PetscViewerASCIIPopSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 3320 } 3321 3322 if (!pcbddc->adaptive_selection) { 3323 IS ISForVertices,*ISForFaces,*ISForEdges; 3324 MatNullSpace nearnullsp; 3325 const Vec *nearnullvecs; 3326 Vec *localnearnullsp; 3327 PetscScalar *array; 3328 PetscInt n_ISForFaces,n_ISForEdges,nnsp_size; 3329 PetscBool nnsp_has_cnst; 3330 /* LAPACK working arrays for SVD or POD */ 3331 PetscBool skip_lapack,boolforchange; 3332 PetscScalar *work; 3333 PetscReal *singular_vals; 3334 #if defined(PETSC_USE_COMPLEX) 3335 PetscReal *rwork; 3336 #endif 3337 #if defined(PETSC_MISSING_LAPACK_GESVD) 3338 PetscScalar *temp_basis,*correlation_mat; 3339 #else 3340 PetscBLASInt dummy_int=1; 3341 PetscScalar dummy_scalar=1.; 3342 #endif 3343 3344 /* Get index sets for faces, edges and vertices from graph */ 3345 ierr = PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph,&n_ISForFaces,&ISForFaces,&n_ISForEdges,&ISForEdges,&ISForVertices);CHKERRQ(ierr); 3346 /* free unneeded index sets */ 3347 if (!pcbddc->use_vertices) { 3348 ierr = ISDestroy(&ISForVertices);CHKERRQ(ierr); 3349 } 3350 if (!pcbddc->use_edges) { 3351 for (i=0;i<n_ISForEdges;i++) { 3352 ierr = ISDestroy(&ISForEdges[i]);CHKERRQ(ierr); 3353 } 3354 ierr = PetscFree(ISForEdges);CHKERRQ(ierr); 3355 n_ISForEdges = 0; 3356 } 3357 if (!pcbddc->use_faces) { 3358 for (i=0;i<n_ISForFaces;i++) { 3359 ierr = ISDestroy(&ISForFaces[i]);CHKERRQ(ierr); 3360 } 3361 ierr = PetscFree(ISForFaces);CHKERRQ(ierr); 3362 n_ISForFaces = 0; 3363 } 3364 3365 #if defined(PETSC_USE_DEBUG) 3366 /* HACK: when solving singular problems not using vertices, a change of basis is mandatory. 3367 Also use_change_of_basis should be consistent among processors */ 3368 if (pcbddc->NullSpace) { 3369 PetscBool tbool[2],gbool[2]; 3370 3371 if (!ISForVertices && !pcbddc->user_ChangeOfBasisMatrix) { 3372 pcbddc->use_change_of_basis = PETSC_TRUE; 3373 if (!ISForEdges) { 3374 pcbddc->use_change_on_faces = PETSC_TRUE; 3375 } 3376 } 3377 tbool[0] = pcbddc->use_change_of_basis; 3378 tbool[1] = pcbddc->use_change_on_faces; 3379 ierr = MPIU_Allreduce(tbool,gbool,2,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 3380 pcbddc->use_change_of_basis = gbool[0]; 3381 pcbddc->use_change_on_faces = gbool[1]; 3382 } 3383 #endif 3384 3385 /* check if near null space is attached to global mat */ 3386 ierr = MatGetNearNullSpace(pc->pmat,&nearnullsp);CHKERRQ(ierr); 3387 if (nearnullsp) { 3388 ierr = MatNullSpaceGetVecs(nearnullsp,&nnsp_has_cnst,&nnsp_size,&nearnullvecs);CHKERRQ(ierr); 3389 /* remove any stored info */ 3390 ierr = MatNullSpaceDestroy(&pcbddc->onearnullspace);CHKERRQ(ierr); 3391 ierr = PetscFree(pcbddc->onearnullvecs_state);CHKERRQ(ierr); 3392 /* store information for BDDC solver reuse */ 3393 ierr = PetscObjectReference((PetscObject)nearnullsp);CHKERRQ(ierr); 3394 pcbddc->onearnullspace = nearnullsp; 3395 ierr = PetscMalloc1(nnsp_size,&pcbddc->onearnullvecs_state);CHKERRQ(ierr); 3396 for (i=0;i<nnsp_size;i++) { 3397 ierr = PetscObjectStateGet((PetscObject)nearnullvecs[i],&pcbddc->onearnullvecs_state[i]);CHKERRQ(ierr); 3398 } 3399 } else { /* if near null space is not provided BDDC uses constants by default */ 3400 nnsp_size = 0; 3401 nnsp_has_cnst = PETSC_TRUE; 3402 } 3403 /* get max number of constraints on a single cc */ 3404 max_constraints = nnsp_size; 3405 if (nnsp_has_cnst) max_constraints++; 3406 3407 /* 3408 Evaluate maximum storage size needed by the procedure 3409 - Indices for connected component i stored at "constraints_idxs + constraints_idxs_ptr[i]" 3410 - Values for constraints on connected component i stored at "constraints_data + constraints_data_ptr[i]" 3411 There can be multiple constraints per connected component 3412 */ 3413 n_vertices = 0; 3414 if (ISForVertices) { 3415 ierr = ISGetSize(ISForVertices,&n_vertices);CHKERRQ(ierr); 3416 } 3417 ncc = n_vertices+n_ISForFaces+n_ISForEdges; 3418 ierr = PetscMalloc3(ncc+1,&constraints_idxs_ptr,ncc+1,&constraints_data_ptr,ncc,&constraints_n);CHKERRQ(ierr); 3419 3420 total_counts = n_ISForFaces+n_ISForEdges; 3421 total_counts *= max_constraints; 3422 total_counts += n_vertices; 3423 ierr = PetscBTCreate(total_counts,&change_basis);CHKERRQ(ierr); 3424 3425 total_counts = 0; 3426 max_size_of_constraint = 0; 3427 for (i=0;i<n_ISForEdges+n_ISForFaces;i++) { 3428 IS used_is; 3429 if (i<n_ISForEdges) { 3430 used_is = ISForEdges[i]; 3431 } else { 3432 used_is = ISForFaces[i-n_ISForEdges]; 3433 } 3434 ierr = ISGetSize(used_is,&j);CHKERRQ(ierr); 3435 total_counts += j; 3436 max_size_of_constraint = PetscMax(j,max_size_of_constraint); 3437 } 3438 ierr = PetscMalloc3(total_counts*max_constraints+n_vertices,&constraints_data,total_counts+n_vertices,&constraints_idxs,total_counts+n_vertices,&constraints_idxs_B);CHKERRQ(ierr); 3439 3440 /* get local part of global near null space vectors */ 3441 ierr = PetscMalloc1(nnsp_size,&localnearnullsp);CHKERRQ(ierr); 3442 for (k=0;k<nnsp_size;k++) { 3443 ierr = VecDuplicate(pcis->vec1_N,&localnearnullsp[k]);CHKERRQ(ierr); 3444 ierr = VecScatterBegin(matis->rctx,nearnullvecs[k],localnearnullsp[k],INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3445 ierr = VecScatterEnd(matis->rctx,nearnullvecs[k],localnearnullsp[k],INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3446 } 3447 3448 /* whether or not to skip lapack calls */ 3449 skip_lapack = PETSC_TRUE; 3450 if (n_ISForFaces+n_ISForEdges && max_constraints > 1 && !pcbddc->use_nnsp_true) skip_lapack = PETSC_FALSE; 3451 3452 /* First we issue queries to allocate optimal workspace for LAPACKgesvd (or LAPACKsyev if SVD is missing) */ 3453 if (!skip_lapack) { 3454 PetscScalar temp_work; 3455 3456 #if defined(PETSC_MISSING_LAPACK_GESVD) 3457 /* Proper Orthogonal Decomposition (POD) using the snapshot method */ 3458 ierr = PetscMalloc1(max_constraints*max_constraints,&correlation_mat);CHKERRQ(ierr); 3459 ierr = PetscMalloc1(max_constraints,&singular_vals);CHKERRQ(ierr); 3460 ierr = PetscMalloc1(max_size_of_constraint*max_constraints,&temp_basis);CHKERRQ(ierr); 3461 #if defined(PETSC_USE_COMPLEX) 3462 ierr = PetscMalloc1(3*max_constraints,&rwork);CHKERRQ(ierr); 3463 #endif 3464 /* now we evaluate the optimal workspace using query with lwork=-1 */ 3465 ierr = PetscBLASIntCast(max_constraints,&Blas_N);CHKERRQ(ierr); 3466 ierr = PetscBLASIntCast(max_constraints,&Blas_LDA);CHKERRQ(ierr); 3467 lwork = -1; 3468 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 3469 #if !defined(PETSC_USE_COMPLEX) 3470 PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,&temp_work,&lwork,&lierr)); 3471 #else 3472 PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,&temp_work,&lwork,rwork,&lierr)); 3473 #endif 3474 ierr = PetscFPTrapPop();CHKERRQ(ierr); 3475 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to SYEV Lapack routine %d",(int)lierr); 3476 #else /* on missing GESVD */ 3477 /* SVD */ 3478 PetscInt max_n,min_n; 3479 max_n = max_size_of_constraint; 3480 min_n = max_constraints; 3481 if (max_size_of_constraint < max_constraints) { 3482 min_n = max_size_of_constraint; 3483 max_n = max_constraints; 3484 } 3485 ierr = PetscMalloc1(min_n,&singular_vals);CHKERRQ(ierr); 3486 #if defined(PETSC_USE_COMPLEX) 3487 ierr = PetscMalloc1(5*min_n,&rwork);CHKERRQ(ierr); 3488 #endif 3489 /* now we evaluate the optimal workspace using query with lwork=-1 */ 3490 lwork = -1; 3491 ierr = PetscBLASIntCast(max_n,&Blas_M);CHKERRQ(ierr); 3492 ierr = PetscBLASIntCast(min_n,&Blas_N);CHKERRQ(ierr); 3493 ierr = PetscBLASIntCast(max_n,&Blas_LDA);CHKERRQ(ierr); 3494 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 3495 #if !defined(PETSC_USE_COMPLEX) 3496 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&constraints_data[0],&Blas_LDA,singular_vals,&dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,&temp_work,&lwork,&lierr)); 3497 #else 3498 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&constraints_data[0],&Blas_LDA,singular_vals,&dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,&temp_work,&lwork,rwork,&lierr)); 3499 #endif 3500 ierr = PetscFPTrapPop();CHKERRQ(ierr); 3501 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to GESVD Lapack routine %d",(int)lierr); 3502 #endif /* on missing GESVD */ 3503 /* Allocate optimal workspace */ 3504 ierr = PetscBLASIntCast((PetscInt)PetscRealPart(temp_work),&lwork);CHKERRQ(ierr); 3505 ierr = PetscMalloc1(lwork,&work);CHKERRQ(ierr); 3506 } 3507 /* Now we can loop on constraining sets */ 3508 total_counts = 0; 3509 constraints_idxs_ptr[0] = 0; 3510 constraints_data_ptr[0] = 0; 3511 /* vertices */ 3512 if (n_vertices) { 3513 ierr = ISGetIndices(ISForVertices,(const PetscInt**)&is_indices);CHKERRQ(ierr); 3514 ierr = PetscMemcpy(constraints_idxs,is_indices,n_vertices*sizeof(PetscInt));CHKERRQ(ierr); 3515 for (i=0;i<n_vertices;i++) { 3516 constraints_n[total_counts] = 1; 3517 constraints_data[total_counts] = 1.0; 3518 constraints_idxs_ptr[total_counts+1] = constraints_idxs_ptr[total_counts]+1; 3519 constraints_data_ptr[total_counts+1] = constraints_data_ptr[total_counts]+1; 3520 total_counts++; 3521 } 3522 ierr = ISRestoreIndices(ISForVertices,(const PetscInt**)&is_indices);CHKERRQ(ierr); 3523 n_vertices = total_counts; 3524 } 3525 3526 /* edges and faces */ 3527 total_counts_cc = total_counts; 3528 for (ncc=0;ncc<n_ISForEdges+n_ISForFaces;ncc++) { 3529 IS used_is; 3530 PetscBool idxs_copied = PETSC_FALSE; 3531 3532 if (ncc<n_ISForEdges) { 3533 used_is = ISForEdges[ncc]; 3534 boolforchange = pcbddc->use_change_of_basis; /* change or not the basis on the edge */ 3535 } else { 3536 used_is = ISForFaces[ncc-n_ISForEdges]; 3537 boolforchange = (PetscBool)(pcbddc->use_change_of_basis && pcbddc->use_change_on_faces); /* change or not the basis on the face */ 3538 } 3539 temp_constraints = 0; /* zero the number of constraints I have on this conn comp */ 3540 3541 ierr = ISGetSize(used_is,&size_of_constraint);CHKERRQ(ierr); 3542 ierr = ISGetIndices(used_is,(const PetscInt**)&is_indices);CHKERRQ(ierr); 3543 /* change of basis should not be performed on local periodic nodes */ 3544 if (pcbddc->mat_graph->mirrors && pcbddc->mat_graph->mirrors[is_indices[0]]) boolforchange = PETSC_FALSE; 3545 if (nnsp_has_cnst) { 3546 PetscScalar quad_value; 3547 3548 ierr = PetscMemcpy(constraints_idxs + constraints_idxs_ptr[total_counts_cc],is_indices,size_of_constraint*sizeof(PetscInt));CHKERRQ(ierr); 3549 idxs_copied = PETSC_TRUE; 3550 3551 if (!pcbddc->use_nnsp_true) { 3552 quad_value = (PetscScalar)(1.0/PetscSqrtReal((PetscReal)size_of_constraint)); 3553 } else { 3554 quad_value = 1.0; 3555 } 3556 for (j=0;j<size_of_constraint;j++) { 3557 constraints_data[constraints_data_ptr[total_counts_cc]+j] = quad_value; 3558 } 3559 temp_constraints++; 3560 total_counts++; 3561 } 3562 for (k=0;k<nnsp_size;k++) { 3563 PetscReal real_value; 3564 PetscScalar *ptr_to_data; 3565 3566 ierr = VecGetArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr); 3567 ptr_to_data = &constraints_data[constraints_data_ptr[total_counts_cc]+temp_constraints*size_of_constraint]; 3568 for (j=0;j<size_of_constraint;j++) { 3569 ptr_to_data[j] = array[is_indices[j]]; 3570 } 3571 ierr = VecRestoreArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr); 3572 /* check if array is null on the connected component */ 3573 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 3574 PetscStackCallBLAS("BLASasum",real_value = BLASasum_(&Blas_N,ptr_to_data,&Blas_one)); 3575 if (real_value > 0.0) { /* keep indices and values */ 3576 temp_constraints++; 3577 total_counts++; 3578 if (!idxs_copied) { 3579 ierr = PetscMemcpy(constraints_idxs + constraints_idxs_ptr[total_counts_cc],is_indices,size_of_constraint*sizeof(PetscInt));CHKERRQ(ierr); 3580 idxs_copied = PETSC_TRUE; 3581 } 3582 } 3583 } 3584 ierr = ISRestoreIndices(used_is,(const PetscInt**)&is_indices);CHKERRQ(ierr); 3585 valid_constraints = temp_constraints; 3586 if (!pcbddc->use_nnsp_true && temp_constraints) { 3587 if (temp_constraints == 1) { /* just normalize the constraint */ 3588 PetscScalar norm,*ptr_to_data; 3589 3590 ptr_to_data = &constraints_data[constraints_data_ptr[total_counts_cc]]; 3591 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 3592 PetscStackCallBLAS("BLASdot",norm = BLASdot_(&Blas_N,ptr_to_data,&Blas_one,ptr_to_data,&Blas_one)); 3593 norm = 1.0/PetscSqrtReal(PetscRealPart(norm)); 3594 PetscStackCallBLAS("BLASscal",BLASscal_(&Blas_N,&norm,ptr_to_data,&Blas_one)); 3595 } else { /* perform SVD */ 3596 PetscReal tol = 1.0e-8; /* tolerance for retaining eigenmodes */ 3597 PetscScalar *ptr_to_data = &constraints_data[constraints_data_ptr[total_counts_cc]]; 3598 3599 #if defined(PETSC_MISSING_LAPACK_GESVD) 3600 /* SVD: Y = U*S*V^H -> U (eigenvectors of Y*Y^H) = Y*V*(S)^\dag 3601 POD: Y^H*Y = V*D*V^H, D = S^H*S -> U = Y*V*D^(-1/2) 3602 -> When PETSC_USE_COMPLEX and PETSC_MISSING_LAPACK_GESVD are defined 3603 the constraints basis will differ (by a complex factor with absolute value equal to 1) 3604 from that computed using LAPACKgesvd 3605 -> This is due to a different computation of eigenvectors in LAPACKheev 3606 -> The quality of the POD-computed basis will be the same */ 3607 ierr = PetscMemzero(correlation_mat,temp_constraints*temp_constraints*sizeof(PetscScalar));CHKERRQ(ierr); 3608 /* Store upper triangular part of correlation matrix */ 3609 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 3610 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 3611 for (j=0;j<temp_constraints;j++) { 3612 for (k=0;k<j+1;k++) { 3613 PetscStackCallBLAS("BLASdot",correlation_mat[j*temp_constraints+k] = BLASdot_(&Blas_N,ptr_to_data+k*size_of_constraint,&Blas_one,ptr_to_data+j*size_of_constraint,&Blas_one)); 3614 } 3615 } 3616 /* compute eigenvalues and eigenvectors of correlation matrix */ 3617 ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr); 3618 ierr = PetscBLASIntCast(temp_constraints,&Blas_LDA);CHKERRQ(ierr); 3619 #if !defined(PETSC_USE_COMPLEX) 3620 PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,work,&lwork,&lierr)); 3621 #else 3622 PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,work,&lwork,rwork,&lierr)); 3623 #endif 3624 ierr = PetscFPTrapPop();CHKERRQ(ierr); 3625 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYEV Lapack routine %d",(int)lierr); 3626 /* retain eigenvalues greater than tol: note that LAPACKsyev gives eigs in ascending order */ 3627 j = 0; 3628 while (j < temp_constraints && singular_vals[j] < tol) j++; 3629 total_counts = total_counts-j; 3630 valid_constraints = temp_constraints-j; 3631 /* scale and copy POD basis into used quadrature memory */ 3632 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 3633 ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr); 3634 ierr = PetscBLASIntCast(temp_constraints,&Blas_K);CHKERRQ(ierr); 3635 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 3636 ierr = PetscBLASIntCast(temp_constraints,&Blas_LDB);CHKERRQ(ierr); 3637 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDC);CHKERRQ(ierr); 3638 if (j<temp_constraints) { 3639 PetscInt ii; 3640 for (k=j;k<temp_constraints;k++) singular_vals[k] = 1.0/PetscSqrtReal(singular_vals[k]); 3641 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 3642 PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&Blas_M,&Blas_N,&Blas_K,&one,ptr_to_data,&Blas_LDA,correlation_mat,&Blas_LDB,&zero,temp_basis,&Blas_LDC)); 3643 ierr = PetscFPTrapPop();CHKERRQ(ierr); 3644 for (k=0;k<temp_constraints-j;k++) { 3645 for (ii=0;ii<size_of_constraint;ii++) { 3646 ptr_to_data[k*size_of_constraint+ii] = singular_vals[temp_constraints-1-k]*temp_basis[(temp_constraints-1-k)*size_of_constraint+ii]; 3647 } 3648 } 3649 } 3650 #else /* on missing GESVD */ 3651 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 3652 ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr); 3653 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 3654 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 3655 #if !defined(PETSC_USE_COMPLEX) 3656 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,ptr_to_data,&Blas_LDA,singular_vals,&dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,work,&lwork,&lierr)); 3657 #else 3658 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,ptr_to_data,&Blas_LDA,singular_vals,&dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,work,&lwork,rwork,&lierr)); 3659 #endif 3660 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in GESVD Lapack routine %d",(int)lierr); 3661 ierr = PetscFPTrapPop();CHKERRQ(ierr); 3662 /* retain eigenvalues greater than tol: note that LAPACKgesvd gives eigs in descending order */ 3663 k = temp_constraints; 3664 if (k > size_of_constraint) k = size_of_constraint; 3665 j = 0; 3666 while (j < k && singular_vals[k-j-1] < tol) j++; 3667 valid_constraints = k-j; 3668 total_counts = total_counts-temp_constraints+valid_constraints; 3669 #endif /* on missing GESVD */ 3670 } 3671 } 3672 /* update pointers information */ 3673 if (valid_constraints) { 3674 constraints_n[total_counts_cc] = valid_constraints; 3675 constraints_idxs_ptr[total_counts_cc+1] = constraints_idxs_ptr[total_counts_cc]+size_of_constraint; 3676 constraints_data_ptr[total_counts_cc+1] = constraints_data_ptr[total_counts_cc]+size_of_constraint*valid_constraints; 3677 /* set change_of_basis flag */ 3678 if (boolforchange) { 3679 PetscBTSet(change_basis,total_counts_cc); 3680 } 3681 total_counts_cc++; 3682 } 3683 } 3684 /* free workspace */ 3685 if (!skip_lapack) { 3686 ierr = PetscFree(work);CHKERRQ(ierr); 3687 #if defined(PETSC_USE_COMPLEX) 3688 ierr = PetscFree(rwork);CHKERRQ(ierr); 3689 #endif 3690 ierr = PetscFree(singular_vals);CHKERRQ(ierr); 3691 #if defined(PETSC_MISSING_LAPACK_GESVD) 3692 ierr = PetscFree(correlation_mat);CHKERRQ(ierr); 3693 ierr = PetscFree(temp_basis);CHKERRQ(ierr); 3694 #endif 3695 } 3696 for (k=0;k<nnsp_size;k++) { 3697 ierr = VecDestroy(&localnearnullsp[k]);CHKERRQ(ierr); 3698 } 3699 ierr = PetscFree(localnearnullsp);CHKERRQ(ierr); 3700 /* free index sets of faces, edges and vertices */ 3701 for (i=0;i<n_ISForFaces;i++) { 3702 ierr = ISDestroy(&ISForFaces[i]);CHKERRQ(ierr); 3703 } 3704 if (n_ISForFaces) { 3705 ierr = PetscFree(ISForFaces);CHKERRQ(ierr); 3706 } 3707 for (i=0;i<n_ISForEdges;i++) { 3708 ierr = ISDestroy(&ISForEdges[i]);CHKERRQ(ierr); 3709 } 3710 if (n_ISForEdges) { 3711 ierr = PetscFree(ISForEdges);CHKERRQ(ierr); 3712 } 3713 ierr = ISDestroy(&ISForVertices);CHKERRQ(ierr); 3714 } else { 3715 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 3716 3717 total_counts = 0; 3718 n_vertices = 0; 3719 if (sub_schurs->is_vertices && pcbddc->use_vertices) { 3720 ierr = ISGetLocalSize(sub_schurs->is_vertices,&n_vertices);CHKERRQ(ierr); 3721 } 3722 max_constraints = 0; 3723 total_counts_cc = 0; 3724 for (i=0;i<sub_schurs->n_subs+n_vertices;i++) { 3725 total_counts += pcbddc->adaptive_constraints_n[i]; 3726 if (pcbddc->adaptive_constraints_n[i]) total_counts_cc++; 3727 max_constraints = PetscMax(max_constraints,pcbddc->adaptive_constraints_n[i]); 3728 } 3729 constraints_idxs_ptr = pcbddc->adaptive_constraints_idxs_ptr; 3730 constraints_data_ptr = pcbddc->adaptive_constraints_data_ptr; 3731 constraints_idxs = pcbddc->adaptive_constraints_idxs; 3732 constraints_data = pcbddc->adaptive_constraints_data; 3733 /* constraints_n differs from pcbddc->adaptive_constraints_n */ 3734 ierr = PetscMalloc1(total_counts_cc,&constraints_n);CHKERRQ(ierr); 3735 total_counts_cc = 0; 3736 for (i=0;i<sub_schurs->n_subs+n_vertices;i++) { 3737 if (pcbddc->adaptive_constraints_n[i]) { 3738 constraints_n[total_counts_cc++] = pcbddc->adaptive_constraints_n[i]; 3739 } 3740 } 3741 #if 0 3742 printf("Found %d totals (%d)\n",total_counts_cc,total_counts); 3743 for (i=0;i<total_counts_cc;i++) { 3744 printf("const %d, start %d",i,constraints_idxs_ptr[i]); 3745 printf(" end %d:\n",constraints_idxs_ptr[i+1]); 3746 for (j=constraints_idxs_ptr[i];j<constraints_idxs_ptr[i+1];j++) { 3747 printf(" %d",constraints_idxs[j]); 3748 } 3749 printf("\n"); 3750 printf("number of cc: %d\n",constraints_n[i]); 3751 } 3752 for (i=0;i<n_vertices;i++) { 3753 PetscPrintf(PETSC_COMM_SELF,"[%d] vertex %d, n %d\n",PetscGlobalRank,i,pcbddc->adaptive_constraints_n[i]); 3754 } 3755 for (i=0;i<sub_schurs->n_subs;i++) { 3756 PetscPrintf(PETSC_COMM_SELF,"[%d] sub %d, edge %d, n %d\n",PetscGlobalRank,i,(PetscBool)PetscBTLookup(sub_schurs->is_edge,i),pcbddc->adaptive_constraints_n[i+n_vertices]); 3757 } 3758 #endif 3759 3760 max_size_of_constraint = 0; 3761 for (i=0;i<total_counts_cc;i++) max_size_of_constraint = PetscMax(max_size_of_constraint,constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i]); 3762 ierr = PetscMalloc1(constraints_idxs_ptr[total_counts_cc],&constraints_idxs_B);CHKERRQ(ierr); 3763 /* Change of basis */ 3764 ierr = PetscBTCreate(total_counts_cc,&change_basis);CHKERRQ(ierr); 3765 if (pcbddc->use_change_of_basis) { 3766 for (i=0;i<sub_schurs->n_subs;i++) { 3767 if (PetscBTLookup(sub_schurs->is_edge,i) || pcbddc->use_change_on_faces) { 3768 ierr = PetscBTSet(change_basis,i+n_vertices);CHKERRQ(ierr); 3769 } 3770 } 3771 } 3772 } 3773 pcbddc->local_primal_size = total_counts; 3774 ierr = PetscMalloc1(pcbddc->local_primal_size+pcbddc->benign_n,&pcbddc->primal_indices_local_idxs);CHKERRQ(ierr); 3775 3776 /* map constraints_idxs in boundary numbering */ 3777 ierr = ISGlobalToLocalMappingApply(pcis->BtoNmap,IS_GTOLM_DROP,constraints_idxs_ptr[total_counts_cc],constraints_idxs,&i,constraints_idxs_B);CHKERRQ(ierr); 3778 if (i != constraints_idxs_ptr[total_counts_cc]) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_SUP,"Error in boundary numbering for constraints indices %D != %D\n",constraints_idxs_ptr[total_counts_cc],i); 3779 3780 /* Create constraint matrix */ 3781 ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->ConstraintMatrix);CHKERRQ(ierr); 3782 ierr = MatSetType(pcbddc->ConstraintMatrix,MATAIJ);CHKERRQ(ierr); 3783 ierr = MatSetSizes(pcbddc->ConstraintMatrix,pcbddc->local_primal_size,pcis->n,pcbddc->local_primal_size,pcis->n);CHKERRQ(ierr); 3784 3785 /* find primal_dofs: subdomain corners plus dofs selected as primal after change of basis */ 3786 /* determine if a QR strategy is needed for change of basis */ 3787 qr_needed = PETSC_FALSE; 3788 ierr = PetscBTCreate(total_counts_cc,&qr_needed_idx);CHKERRQ(ierr); 3789 total_primal_vertices=0; 3790 pcbddc->local_primal_size_cc = 0; 3791 for (i=0;i<total_counts_cc;i++) { 3792 size_of_constraint = constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i]; 3793 if (size_of_constraint == 1 && pcbddc->mat_graph->custom_minimal_size) { 3794 pcbddc->primal_indices_local_idxs[total_primal_vertices++] = constraints_idxs[constraints_idxs_ptr[i]]; 3795 pcbddc->local_primal_size_cc += 1; 3796 } else if (PetscBTLookup(change_basis,i)) { 3797 for (k=0;k<constraints_n[i];k++) { 3798 pcbddc->primal_indices_local_idxs[total_primal_vertices++] = constraints_idxs[constraints_idxs_ptr[i]+k]; 3799 } 3800 pcbddc->local_primal_size_cc += constraints_n[i]; 3801 if (constraints_n[i] > 1 || pcbddc->use_qr_single || pcbddc->faster_deluxe) { 3802 PetscBTSet(qr_needed_idx,i); 3803 qr_needed = PETSC_TRUE; 3804 } 3805 } else { 3806 pcbddc->local_primal_size_cc += 1; 3807 } 3808 } 3809 /* note that the local variable n_vertices used below stores the number of pointwise constraints */ 3810 pcbddc->n_vertices = total_primal_vertices; 3811 /* permute indices in order to have a sorted set of vertices */ 3812 ierr = PetscSortInt(total_primal_vertices,pcbddc->primal_indices_local_idxs);CHKERRQ(ierr); 3813 3814 ierr = PetscMalloc2(pcbddc->local_primal_size_cc+pcbddc->benign_n,&pcbddc->local_primal_ref_node,pcbddc->local_primal_size_cc+pcbddc->benign_n,&pcbddc->local_primal_ref_mult);CHKERRQ(ierr); 3815 ierr = PetscMemcpy(pcbddc->local_primal_ref_node,pcbddc->primal_indices_local_idxs,total_primal_vertices*sizeof(PetscInt));CHKERRQ(ierr); 3816 for (i=0;i<total_primal_vertices;i++) pcbddc->local_primal_ref_mult[i] = 1; 3817 3818 /* nonzero structure of constraint matrix */ 3819 /* and get reference dof for local constraints */ 3820 ierr = PetscMalloc1(pcbddc->local_primal_size,&nnz);CHKERRQ(ierr); 3821 for (i=0;i<total_primal_vertices;i++) nnz[i] = 1; 3822 3823 j = total_primal_vertices; 3824 total_counts = total_primal_vertices; 3825 cum = total_primal_vertices; 3826 for (i=n_vertices;i<total_counts_cc;i++) { 3827 if (!PetscBTLookup(change_basis,i)) { 3828 pcbddc->local_primal_ref_node[cum] = constraints_idxs[constraints_idxs_ptr[i]]; 3829 pcbddc->local_primal_ref_mult[cum] = constraints_n[i]; 3830 cum++; 3831 size_of_constraint = constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i]; 3832 for (k=0;k<constraints_n[i];k++) { 3833 pcbddc->primal_indices_local_idxs[total_counts++] = constraints_idxs[constraints_idxs_ptr[i]+k]; 3834 nnz[j+k] = size_of_constraint; 3835 } 3836 j += constraints_n[i]; 3837 } 3838 } 3839 ierr = MatSeqAIJSetPreallocation(pcbddc->ConstraintMatrix,0,nnz);CHKERRQ(ierr); 3840 ierr = PetscFree(nnz);CHKERRQ(ierr); 3841 3842 /* set values in constraint matrix */ 3843 for (i=0;i<total_primal_vertices;i++) { 3844 ierr = MatSetValue(pcbddc->ConstraintMatrix,i,pcbddc->local_primal_ref_node[i],1.0,INSERT_VALUES);CHKERRQ(ierr); 3845 } 3846 total_counts = total_primal_vertices; 3847 for (i=n_vertices;i<total_counts_cc;i++) { 3848 if (!PetscBTLookup(change_basis,i)) { 3849 PetscInt *cols; 3850 3851 size_of_constraint = constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i]; 3852 cols = constraints_idxs+constraints_idxs_ptr[i]; 3853 for (k=0;k<constraints_n[i];k++) { 3854 PetscInt row = total_counts+k; 3855 PetscScalar *vals; 3856 3857 vals = constraints_data+constraints_data_ptr[i]+k*size_of_constraint; 3858 ierr = MatSetValues(pcbddc->ConstraintMatrix,1,&row,size_of_constraint,cols,vals,INSERT_VALUES);CHKERRQ(ierr); 3859 } 3860 total_counts += constraints_n[i]; 3861 } 3862 } 3863 /* assembling */ 3864 ierr = MatAssemblyBegin(pcbddc->ConstraintMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3865 ierr = MatAssemblyEnd(pcbddc->ConstraintMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3866 3867 /* 3868 ierr = PetscViewerPushFormat(PETSC_VIEWER_STDOUT_SELF,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr); 3869 ierr = MatView(pcbddc->ConstraintMatrix,(PetscViewer)0);CHKERRQ(ierr); 3870 ierr = PetscViewerPopFormat(PETSC_VIEWER_STDOUT_SELF);CHKERRQ(ierr); 3871 */ 3872 /* Create matrix for change of basis. We don't need it in case pcbddc->use_change_of_basis is FALSE */ 3873 if (pcbddc->use_change_of_basis) { 3874 /* dual and primal dofs on a single cc */ 3875 PetscInt dual_dofs,primal_dofs; 3876 /* working stuff for GEQRF */ 3877 PetscScalar *qr_basis,*qr_tau = NULL,*qr_work,lqr_work_t; 3878 PetscBLASInt lqr_work; 3879 /* working stuff for UNGQR */ 3880 PetscScalar *gqr_work,lgqr_work_t; 3881 PetscBLASInt lgqr_work; 3882 /* working stuff for TRTRS */ 3883 PetscScalar *trs_rhs; 3884 PetscBLASInt Blas_NRHS; 3885 /* pointers for values insertion into change of basis matrix */ 3886 PetscInt *start_rows,*start_cols; 3887 PetscScalar *start_vals; 3888 /* working stuff for values insertion */ 3889 PetscBT is_primal; 3890 PetscInt *aux_primal_numbering_B; 3891 /* matrix sizes */ 3892 PetscInt global_size,local_size; 3893 /* temporary change of basis */ 3894 Mat localChangeOfBasisMatrix; 3895 /* extra space for debugging */ 3896 PetscScalar *dbg_work; 3897 3898 /* local temporary change of basis acts on local interfaces -> dimension is n_B x n_B */ 3899 ierr = MatCreate(PETSC_COMM_SELF,&localChangeOfBasisMatrix);CHKERRQ(ierr); 3900 ierr = MatSetType(localChangeOfBasisMatrix,MATAIJ);CHKERRQ(ierr); 3901 ierr = MatSetSizes(localChangeOfBasisMatrix,pcis->n,pcis->n,pcis->n,pcis->n);CHKERRQ(ierr); 3902 /* nonzeros for local mat */ 3903 ierr = PetscMalloc1(pcis->n,&nnz);CHKERRQ(ierr); 3904 if (!pcbddc->benign_change || pcbddc->fake_change) { 3905 for (i=0;i<pcis->n;i++) nnz[i]=1; 3906 } else { 3907 const PetscInt *ii; 3908 PetscInt n; 3909 PetscBool flg_row; 3910 ierr = MatGetRowIJ(pcbddc->benign_change,0,PETSC_FALSE,PETSC_FALSE,&n,&ii,NULL,&flg_row);CHKERRQ(ierr); 3911 for (i=0;i<n;i++) nnz[i] = ii[i+1]-ii[i]; 3912 ierr = MatRestoreRowIJ(pcbddc->benign_change,0,PETSC_FALSE,PETSC_FALSE,&n,&ii,NULL,&flg_row);CHKERRQ(ierr); 3913 } 3914 for (i=n_vertices;i<total_counts_cc;i++) { 3915 if (PetscBTLookup(change_basis,i)) { 3916 size_of_constraint = constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i]; 3917 if (PetscBTLookup(qr_needed_idx,i)) { 3918 for (j=0;j<size_of_constraint;j++) nnz[constraints_idxs[constraints_idxs_ptr[i]+j]] = size_of_constraint; 3919 } else { 3920 nnz[constraints_idxs[constraints_idxs_ptr[i]]] = size_of_constraint; 3921 for (j=1;j<size_of_constraint;j++) nnz[constraints_idxs[constraints_idxs_ptr[i]+j]] = 2; 3922 } 3923 } 3924 } 3925 ierr = MatSeqAIJSetPreallocation(localChangeOfBasisMatrix,0,nnz);CHKERRQ(ierr); 3926 ierr = PetscFree(nnz);CHKERRQ(ierr); 3927 /* Set interior change in the matrix */ 3928 if (!pcbddc->benign_change || pcbddc->fake_change) { 3929 for (i=0;i<pcis->n;i++) { 3930 ierr = MatSetValue(localChangeOfBasisMatrix,i,i,1.0,INSERT_VALUES);CHKERRQ(ierr); 3931 } 3932 } else { 3933 const PetscInt *ii,*jj; 3934 PetscScalar *aa; 3935 PetscInt n; 3936 PetscBool flg_row; 3937 ierr = MatGetRowIJ(pcbddc->benign_change,0,PETSC_FALSE,PETSC_FALSE,&n,&ii,&jj,&flg_row);CHKERRQ(ierr); 3938 ierr = MatSeqAIJGetArray(pcbddc->benign_change,&aa);CHKERRQ(ierr); 3939 for (i=0;i<n;i++) { 3940 ierr = MatSetValues(localChangeOfBasisMatrix,1,&i,ii[i+1]-ii[i],jj+ii[i],aa+ii[i],INSERT_VALUES);CHKERRQ(ierr); 3941 } 3942 ierr = MatSeqAIJRestoreArray(pcbddc->benign_change,&aa);CHKERRQ(ierr); 3943 ierr = MatRestoreRowIJ(pcbddc->benign_change,0,PETSC_FALSE,PETSC_FALSE,&n,&ii,&jj,&flg_row);CHKERRQ(ierr); 3944 } 3945 3946 if (pcbddc->dbg_flag) { 3947 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"--------------------------------------------------------------\n");CHKERRQ(ierr); 3948 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Checking change of basis computation for subdomain %04d\n",PetscGlobalRank);CHKERRQ(ierr); 3949 } 3950 3951 3952 /* Now we loop on the constraints which need a change of basis */ 3953 /* 3954 Change of basis matrix is evaluated similarly to the FIRST APPROACH in 3955 Klawonn and Widlund, Dual-primal FETI-DP methods for linear elasticity, (see Sect 6.2.1) 3956 3957 Basic blocks of change of basis matrix T computed by 3958 3959 - Using the following block transformation if there is only a primal dof on the cc (and -pc_bddc_use_qr_single is not specified) 3960 3961 | 1 0 ... 0 s_1/S | 3962 | 0 1 ... 0 s_2/S | 3963 | ... | 3964 | 0 ... 1 s_{n-1}/S | 3965 | -s_1/s_n ... -s_{n-1}/s_n s_n/S | 3966 3967 with S = \sum_{i=1}^n s_i^2 3968 NOTE: in the above example, the primal dof is the last one of the edge in LOCAL ordering 3969 in the current implementation, the primal dof is the first one of the edge in GLOBAL ordering 3970 3971 - QR decomposition of constraints otherwise 3972 */ 3973 if (qr_needed) { 3974 /* space to store Q */ 3975 ierr = PetscMalloc1(max_size_of_constraint*max_size_of_constraint,&qr_basis);CHKERRQ(ierr); 3976 /* first we issue queries for optimal work */ 3977 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_M);CHKERRQ(ierr); 3978 ierr = PetscBLASIntCast(max_constraints,&Blas_N);CHKERRQ(ierr); 3979 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 3980 lqr_work = -1; 3981 PetscStackCallBLAS("LAPACKgeqrf",LAPACKgeqrf_(&Blas_M,&Blas_N,qr_basis,&Blas_LDA,qr_tau,&lqr_work_t,&lqr_work,&lierr)); 3982 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to GEQRF Lapack routine %d",(int)lierr); 3983 ierr = PetscBLASIntCast((PetscInt)PetscRealPart(lqr_work_t),&lqr_work);CHKERRQ(ierr); 3984 ierr = PetscMalloc1((PetscInt)PetscRealPart(lqr_work_t),&qr_work);CHKERRQ(ierr); 3985 lgqr_work = -1; 3986 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_M);CHKERRQ(ierr); 3987 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_N);CHKERRQ(ierr); 3988 ierr = PetscBLASIntCast(max_constraints,&Blas_K);CHKERRQ(ierr); 3989 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 3990 if (Blas_K>Blas_M) Blas_K=Blas_M; /* adjust just for computing optimal work */ 3991 PetscStackCallBLAS("LAPACKungqr",LAPACKungqr_(&Blas_M,&Blas_N,&Blas_K,qr_basis,&Blas_LDA,qr_tau,&lgqr_work_t,&lgqr_work,&lierr)); 3992 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to UNGQR Lapack routine %d",(int)lierr); 3993 ierr = PetscBLASIntCast((PetscInt)PetscRealPart(lgqr_work_t),&lgqr_work);CHKERRQ(ierr); 3994 ierr = PetscMalloc1((PetscInt)PetscRealPart(lgqr_work_t),&gqr_work);CHKERRQ(ierr); 3995 /* array to store scaling factors for reflectors */ 3996 ierr = PetscMalloc1(max_constraints,&qr_tau);CHKERRQ(ierr); 3997 /* array to store rhs and solution of triangular solver */ 3998 ierr = PetscMalloc1(max_constraints*max_constraints,&trs_rhs);CHKERRQ(ierr); 3999 /* allocating workspace for check */ 4000 if (pcbddc->dbg_flag) { 4001 ierr = PetscMalloc1(max_size_of_constraint*(max_constraints+max_size_of_constraint),&dbg_work);CHKERRQ(ierr); 4002 } 4003 } 4004 /* array to store whether a node is primal or not */ 4005 ierr = PetscBTCreate(pcis->n_B,&is_primal);CHKERRQ(ierr); 4006 ierr = PetscMalloc1(total_primal_vertices,&aux_primal_numbering_B);CHKERRQ(ierr); 4007 ierr = ISGlobalToLocalMappingApply(pcis->BtoNmap,IS_GTOLM_DROP,total_primal_vertices,pcbddc->local_primal_ref_node,&i,aux_primal_numbering_B);CHKERRQ(ierr); 4008 if (i != total_primal_vertices) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_SUP,"Error in boundary numbering for BDDC vertices! %D != %D\n",total_primal_vertices,i); 4009 for (i=0;i<total_primal_vertices;i++) { 4010 ierr = PetscBTSet(is_primal,aux_primal_numbering_B[i]);CHKERRQ(ierr); 4011 } 4012 ierr = PetscFree(aux_primal_numbering_B);CHKERRQ(ierr); 4013 4014 /* loop on constraints and see whether or not they need a change of basis and compute it */ 4015 for (total_counts=n_vertices;total_counts<total_counts_cc;total_counts++) { 4016 size_of_constraint = constraints_idxs_ptr[total_counts+1]-constraints_idxs_ptr[total_counts]; 4017 if (PetscBTLookup(change_basis,total_counts)) { 4018 /* get constraint info */ 4019 primal_dofs = constraints_n[total_counts]; 4020 dual_dofs = size_of_constraint-primal_dofs; 4021 4022 if (pcbddc->dbg_flag) { 4023 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Constraints %d: %d need a change of basis (size %d)\n",total_counts,primal_dofs,size_of_constraint);CHKERRQ(ierr); 4024 } 4025 4026 if (PetscBTLookup(qr_needed_idx,total_counts)) { /* QR */ 4027 4028 /* copy quadrature constraints for change of basis check */ 4029 if (pcbddc->dbg_flag) { 4030 ierr = PetscMemcpy(dbg_work,&constraints_data[constraints_data_ptr[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr); 4031 } 4032 /* copy temporary constraints into larger work vector (in order to store all columns of Q) */ 4033 ierr = PetscMemcpy(qr_basis,&constraints_data[constraints_data_ptr[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr); 4034 4035 /* compute QR decomposition of constraints */ 4036 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 4037 ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr); 4038 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 4039 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 4040 PetscStackCallBLAS("LAPACKgeqrf",LAPACKgeqrf_(&Blas_M,&Blas_N,qr_basis,&Blas_LDA,qr_tau,qr_work,&lqr_work,&lierr)); 4041 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in GEQRF Lapack routine %d",(int)lierr); 4042 ierr = PetscFPTrapPop();CHKERRQ(ierr); 4043 4044 /* explictly compute R^-T */ 4045 ierr = PetscMemzero(trs_rhs,primal_dofs*primal_dofs*sizeof(*trs_rhs));CHKERRQ(ierr); 4046 for (j=0;j<primal_dofs;j++) trs_rhs[j*(primal_dofs+1)] = 1.0; 4047 ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr); 4048 ierr = PetscBLASIntCast(primal_dofs,&Blas_NRHS);CHKERRQ(ierr); 4049 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 4050 ierr = PetscBLASIntCast(primal_dofs,&Blas_LDB);CHKERRQ(ierr); 4051 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 4052 PetscStackCallBLAS("LAPACKtrtrs",LAPACKtrtrs_("U","T","N",&Blas_N,&Blas_NRHS,qr_basis,&Blas_LDA,trs_rhs,&Blas_LDB,&lierr)); 4053 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in TRTRS Lapack routine %d",(int)lierr); 4054 ierr = PetscFPTrapPop();CHKERRQ(ierr); 4055 4056 /* explicitly compute all columns of Q (Q = [Q1 | Q2] ) overwriting QR factorization in qr_basis */ 4057 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 4058 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 4059 ierr = PetscBLASIntCast(primal_dofs,&Blas_K);CHKERRQ(ierr); 4060 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 4061 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 4062 PetscStackCallBLAS("LAPACKungqr",LAPACKungqr_(&Blas_M,&Blas_N,&Blas_K,qr_basis,&Blas_LDA,qr_tau,gqr_work,&lgqr_work,&lierr)); 4063 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in UNGQR Lapack routine %d",(int)lierr); 4064 ierr = PetscFPTrapPop();CHKERRQ(ierr); 4065 4066 /* first primal_dofs columns of Q need to be re-scaled in order to be unitary w.r.t constraints 4067 i.e. C_{pxn}*Q_{nxn} should be equal to [I_pxp | 0_pxd] (see check below) 4068 where n=size_of_constraint, p=primal_dofs, d=dual_dofs (n=p+d), I and 0 identity and null matrix resp. */ 4069 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 4070 ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr); 4071 ierr = PetscBLASIntCast(primal_dofs,&Blas_K);CHKERRQ(ierr); 4072 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 4073 ierr = PetscBLASIntCast(primal_dofs,&Blas_LDB);CHKERRQ(ierr); 4074 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDC);CHKERRQ(ierr); 4075 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 4076 PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&Blas_M,&Blas_N,&Blas_K,&one,qr_basis,&Blas_LDA,trs_rhs,&Blas_LDB,&zero,constraints_data+constraints_data_ptr[total_counts],&Blas_LDC)); 4077 ierr = PetscFPTrapPop();CHKERRQ(ierr); 4078 ierr = PetscMemcpy(qr_basis,&constraints_data[constraints_data_ptr[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr); 4079 4080 /* insert values in change of basis matrix respecting global ordering of new primal dofs */ 4081 start_rows = &constraints_idxs[constraints_idxs_ptr[total_counts]]; 4082 /* insert cols for primal dofs */ 4083 for (j=0;j<primal_dofs;j++) { 4084 start_vals = &qr_basis[j*size_of_constraint]; 4085 start_cols = &constraints_idxs[constraints_idxs_ptr[total_counts]+j]; 4086 ierr = MatSetValues(localChangeOfBasisMatrix,size_of_constraint,start_rows,1,start_cols,start_vals,INSERT_VALUES);CHKERRQ(ierr); 4087 } 4088 /* insert cols for dual dofs */ 4089 for (j=0,k=0;j<dual_dofs;k++) { 4090 if (!PetscBTLookup(is_primal,constraints_idxs_B[constraints_idxs_ptr[total_counts]+k])) { 4091 start_vals = &qr_basis[(primal_dofs+j)*size_of_constraint]; 4092 start_cols = &constraints_idxs[constraints_idxs_ptr[total_counts]+k]; 4093 ierr = MatSetValues(localChangeOfBasisMatrix,size_of_constraint,start_rows,1,start_cols,start_vals,INSERT_VALUES);CHKERRQ(ierr); 4094 j++; 4095 } 4096 } 4097 4098 /* check change of basis */ 4099 if (pcbddc->dbg_flag) { 4100 PetscInt ii,jj; 4101 PetscBool valid_qr=PETSC_TRUE; 4102 ierr = PetscBLASIntCast(primal_dofs,&Blas_M);CHKERRQ(ierr); 4103 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 4104 ierr = PetscBLASIntCast(size_of_constraint,&Blas_K);CHKERRQ(ierr); 4105 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 4106 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDB);CHKERRQ(ierr); 4107 ierr = PetscBLASIntCast(primal_dofs,&Blas_LDC);CHKERRQ(ierr); 4108 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 4109 PetscStackCallBLAS("BLASgemm",BLASgemm_("T","N",&Blas_M,&Blas_N,&Blas_K,&one,dbg_work,&Blas_LDA,qr_basis,&Blas_LDB,&zero,&dbg_work[size_of_constraint*primal_dofs],&Blas_LDC)); 4110 ierr = PetscFPTrapPop();CHKERRQ(ierr); 4111 for (jj=0;jj<size_of_constraint;jj++) { 4112 for (ii=0;ii<primal_dofs;ii++) { 4113 if (ii != jj && PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]) > 1.e-12) valid_qr = PETSC_FALSE; 4114 if (ii == jj && PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]-1.0) > 1.e-12) valid_qr = PETSC_FALSE; 4115 } 4116 } 4117 if (!valid_qr) { 4118 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> wrong change of basis!\n");CHKERRQ(ierr); 4119 for (jj=0;jj<size_of_constraint;jj++) { 4120 for (ii=0;ii<primal_dofs;ii++) { 4121 if (ii != jj && PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]) > 1.e-12) { 4122 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\tQr basis function %d is not orthogonal to constraint %d (%1.14e)!\n",jj,ii,PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii])); 4123 } 4124 if (ii == jj && PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]-1.0) > 1.e-12) { 4125 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\tQr basis function %d is not unitary w.r.t constraint %d (%1.14e)!\n",jj,ii,PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii])); 4126 } 4127 } 4128 } 4129 } else { 4130 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> right change of basis!\n");CHKERRQ(ierr); 4131 } 4132 } 4133 } else { /* simple transformation block */ 4134 PetscInt row,col; 4135 PetscScalar val,norm; 4136 4137 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 4138 PetscStackCallBLAS("BLASdot",norm = BLASdot_(&Blas_N,constraints_data+constraints_data_ptr[total_counts],&Blas_one,constraints_data+constraints_data_ptr[total_counts],&Blas_one)); 4139 for (j=0;j<size_of_constraint;j++) { 4140 PetscInt row_B = constraints_idxs_B[constraints_idxs_ptr[total_counts]+j]; 4141 row = constraints_idxs[constraints_idxs_ptr[total_counts]+j]; 4142 if (!PetscBTLookup(is_primal,row_B)) { 4143 col = constraints_idxs[constraints_idxs_ptr[total_counts]]; 4144 ierr = MatSetValue(localChangeOfBasisMatrix,row,row,1.0,INSERT_VALUES);CHKERRQ(ierr); 4145 ierr = MatSetValue(localChangeOfBasisMatrix,row,col,constraints_data[constraints_data_ptr[total_counts]+j]/norm,INSERT_VALUES);CHKERRQ(ierr); 4146 } else { 4147 for (k=0;k<size_of_constraint;k++) { 4148 col = constraints_idxs[constraints_idxs_ptr[total_counts]+k]; 4149 if (row != col) { 4150 val = -constraints_data[constraints_data_ptr[total_counts]+k]/constraints_data[constraints_data_ptr[total_counts]]; 4151 } else { 4152 val = constraints_data[constraints_data_ptr[total_counts]]/norm; 4153 } 4154 ierr = MatSetValue(localChangeOfBasisMatrix,row,col,val,INSERT_VALUES);CHKERRQ(ierr); 4155 } 4156 } 4157 } 4158 if (pcbddc->dbg_flag) { 4159 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> using standard change of basis\n");CHKERRQ(ierr); 4160 } 4161 } 4162 } else { 4163 if (pcbddc->dbg_flag) { 4164 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Constraint %d does not need a change of basis (size %d)\n",total_counts,size_of_constraint);CHKERRQ(ierr); 4165 } 4166 } 4167 } 4168 4169 /* free workspace */ 4170 if (qr_needed) { 4171 if (pcbddc->dbg_flag) { 4172 ierr = PetscFree(dbg_work);CHKERRQ(ierr); 4173 } 4174 ierr = PetscFree(trs_rhs);CHKERRQ(ierr); 4175 ierr = PetscFree(qr_tau);CHKERRQ(ierr); 4176 ierr = PetscFree(qr_work);CHKERRQ(ierr); 4177 ierr = PetscFree(gqr_work);CHKERRQ(ierr); 4178 ierr = PetscFree(qr_basis);CHKERRQ(ierr); 4179 } 4180 ierr = PetscBTDestroy(&is_primal);CHKERRQ(ierr); 4181 ierr = MatAssemblyBegin(localChangeOfBasisMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4182 ierr = MatAssemblyEnd(localChangeOfBasisMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4183 4184 /* assembling of global change of variable */ 4185 if (!pcbddc->fake_change) { 4186 Mat tmat; 4187 PetscInt bs; 4188 4189 ierr = VecGetSize(pcis->vec1_global,&global_size);CHKERRQ(ierr); 4190 ierr = VecGetLocalSize(pcis->vec1_global,&local_size);CHKERRQ(ierr); 4191 ierr = MatDuplicate(pc->pmat,MAT_DO_NOT_COPY_VALUES,&tmat);CHKERRQ(ierr); 4192 ierr = MatISSetLocalMat(tmat,localChangeOfBasisMatrix);CHKERRQ(ierr); 4193 ierr = MatCreate(PetscObjectComm((PetscObject)pc),&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 4194 ierr = MatSetType(pcbddc->ChangeOfBasisMatrix,MATAIJ);CHKERRQ(ierr); 4195 ierr = MatGetBlockSize(pc->pmat,&bs);CHKERRQ(ierr); 4196 ierr = MatSetBlockSize(pcbddc->ChangeOfBasisMatrix,bs);CHKERRQ(ierr); 4197 ierr = MatSetSizes(pcbddc->ChangeOfBasisMatrix,local_size,local_size,global_size,global_size);CHKERRQ(ierr); 4198 ierr = MatISSetMPIXAIJPreallocation_Private(tmat,pcbddc->ChangeOfBasisMatrix,PETSC_TRUE);CHKERRQ(ierr); 4199 ierr = MatISGetMPIXAIJ(tmat,MAT_REUSE_MATRIX,&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 4200 ierr = MatDestroy(&tmat);CHKERRQ(ierr); 4201 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 4202 ierr = VecSet(pcis->vec1_N,1.0);CHKERRQ(ierr); 4203 ierr = VecScatterBegin(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4204 ierr = VecScatterEnd(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4205 ierr = VecReciprocal(pcis->vec1_global);CHKERRQ(ierr); 4206 ierr = MatDiagonalScale(pcbddc->ChangeOfBasisMatrix,pcis->vec1_global,NULL);CHKERRQ(ierr); 4207 4208 /* check */ 4209 if (pcbddc->dbg_flag) { 4210 PetscReal error; 4211 Vec x,x_change; 4212 4213 ierr = VecDuplicate(pcis->vec1_global,&x);CHKERRQ(ierr); 4214 ierr = VecDuplicate(pcis->vec1_global,&x_change);CHKERRQ(ierr); 4215 ierr = VecSetRandom(x,NULL);CHKERRQ(ierr); 4216 ierr = VecCopy(x,pcis->vec1_global);CHKERRQ(ierr); 4217 ierr = VecScatterBegin(matis->rctx,x,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4218 ierr = VecScatterEnd(matis->rctx,x,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4219 ierr = MatMult(localChangeOfBasisMatrix,pcis->vec1_N,pcis->vec2_N);CHKERRQ(ierr); 4220 ierr = VecScatterBegin(matis->rctx,pcis->vec2_N,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4221 ierr = VecScatterEnd(matis->rctx,pcis->vec2_N,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4222 ierr = MatMult(pcbddc->ChangeOfBasisMatrix,pcis->vec1_global,x_change);CHKERRQ(ierr); 4223 ierr = VecAXPY(x,-1.0,x_change);CHKERRQ(ierr); 4224 ierr = VecNorm(x,NORM_INFINITY,&error);CHKERRQ(ierr); 4225 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4226 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Error global vs local change: %1.6e\n",error);CHKERRQ(ierr); 4227 ierr = VecDestroy(&x);CHKERRQ(ierr); 4228 ierr = VecDestroy(&x_change);CHKERRQ(ierr); 4229 } 4230 /* adapt sub_schurs computed (if any) */ 4231 if (pcbddc->use_deluxe_scaling) { 4232 PCBDDCSubSchurs sub_schurs=pcbddc->sub_schurs; 4233 4234 if (pcbddc->use_change_of_basis && pcbddc->adaptive_userdefined) { 4235 SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_SUP,"Cannot mix automatic change of basis, adaptive selection and user-defined constraints");CHKERRQ(ierr); 4236 } 4237 if (sub_schurs->S_Ej_all) { 4238 Mat S_new,tmat; 4239 IS is_all_N,is_V_Sall = NULL; 4240 4241 ierr = ISLocalToGlobalMappingApplyIS(pcis->BtoNmap,sub_schurs->is_Ej_all,&is_all_N);CHKERRQ(ierr); 4242 ierr = MatGetSubMatrix(localChangeOfBasisMatrix,is_all_N,is_all_N,MAT_INITIAL_MATRIX,&tmat);CHKERRQ(ierr); 4243 if (pcbddc->deluxe_zerorows) { 4244 ISLocalToGlobalMapping NtoSall; 4245 IS is_V; 4246 ierr = ISCreateGeneral(PETSC_COMM_SELF,pcbddc->n_vertices,pcbddc->local_primal_ref_node,PETSC_COPY_VALUES,&is_V);CHKERRQ(ierr); 4247 ierr = ISLocalToGlobalMappingCreateIS(is_all_N,&NtoSall);CHKERRQ(ierr); 4248 ierr = ISGlobalToLocalMappingApplyIS(NtoSall,IS_GTOLM_DROP,is_V,&is_V_Sall);CHKERRQ(ierr); 4249 ierr = ISLocalToGlobalMappingDestroy(&NtoSall);CHKERRQ(ierr); 4250 ierr = ISDestroy(&is_V);CHKERRQ(ierr); 4251 } 4252 ierr = ISDestroy(&is_all_N);CHKERRQ(ierr); 4253 ierr = MatPtAP(sub_schurs->S_Ej_all,tmat,MAT_INITIAL_MATRIX,1.0,&S_new);CHKERRQ(ierr); 4254 ierr = MatDestroy(&sub_schurs->S_Ej_all);CHKERRQ(ierr); 4255 ierr = PetscObjectReference((PetscObject)S_new);CHKERRQ(ierr); 4256 if (pcbddc->deluxe_zerorows) { 4257 const PetscScalar *array; 4258 const PetscInt *idxs_V,*idxs_all; 4259 PetscInt i,n_V; 4260 4261 ierr = MatZeroRowsColumnsIS(S_new,is_V_Sall,1.,NULL,NULL);CHKERRQ(ierr); 4262 ierr = ISGetLocalSize(is_V_Sall,&n_V);CHKERRQ(ierr); 4263 ierr = ISGetIndices(is_V_Sall,&idxs_V);CHKERRQ(ierr); 4264 ierr = ISGetIndices(sub_schurs->is_Ej_all,&idxs_all);CHKERRQ(ierr); 4265 ierr = VecGetArrayRead(pcis->D,&array);CHKERRQ(ierr); 4266 for (i=0;i<n_V;i++) { 4267 PetscScalar val; 4268 PetscInt idx; 4269 4270 idx = idxs_V[i]; 4271 val = array[idxs_all[idxs_V[i]]]; 4272 ierr = MatSetValue(S_new,idx,idx,val,INSERT_VALUES);CHKERRQ(ierr); 4273 } 4274 ierr = MatAssemblyBegin(S_new,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4275 ierr = MatAssemblyEnd(S_new,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4276 ierr = VecRestoreArrayRead(pcis->D,&array);CHKERRQ(ierr); 4277 ierr = ISRestoreIndices(sub_schurs->is_Ej_all,&idxs_all);CHKERRQ(ierr); 4278 ierr = ISRestoreIndices(is_V_Sall,&idxs_V);CHKERRQ(ierr); 4279 } 4280 sub_schurs->S_Ej_all = S_new; 4281 ierr = MatDestroy(&S_new);CHKERRQ(ierr); 4282 if (sub_schurs->sum_S_Ej_all) { 4283 ierr = MatPtAP(sub_schurs->sum_S_Ej_all,tmat,MAT_INITIAL_MATRIX,1.0,&S_new);CHKERRQ(ierr); 4284 ierr = MatDestroy(&sub_schurs->sum_S_Ej_all);CHKERRQ(ierr); 4285 ierr = PetscObjectReference((PetscObject)S_new);CHKERRQ(ierr); 4286 if (pcbddc->deluxe_zerorows) { 4287 ierr = MatZeroRowsColumnsIS(S_new,is_V_Sall,1.,NULL,NULL);CHKERRQ(ierr); 4288 } 4289 sub_schurs->sum_S_Ej_all = S_new; 4290 ierr = MatDestroy(&S_new);CHKERRQ(ierr); 4291 } 4292 ierr = ISDestroy(&is_V_Sall);CHKERRQ(ierr); 4293 ierr = MatDestroy(&tmat);CHKERRQ(ierr); 4294 } 4295 /* destroy any change of basis context in sub_schurs */ 4296 if (sub_schurs->change) { 4297 PetscInt i; 4298 4299 for (i=0;i<sub_schurs->n_subs;i++) { 4300 ierr = KSPDestroy(&sub_schurs->change[i]);CHKERRQ(ierr); 4301 } 4302 ierr = PetscFree(sub_schurs->change);CHKERRQ(ierr); 4303 } 4304 } 4305 if (pcbddc->switch_static) { /* need to save the local change */ 4306 pcbddc->switch_static_change = localChangeOfBasisMatrix; 4307 } else { 4308 ierr = MatDestroy(&localChangeOfBasisMatrix);CHKERRQ(ierr); 4309 } 4310 /* determine if any process has changed the pressures locally */ 4311 if (pcbddc->benign_saddle_point) { 4312 PetscBool have_null = (PetscBool)!!pcbddc->benign_change; 4313 ierr = MPI_Allreduce(&have_null,&pcbddc->change_interior,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 4314 } 4315 } else { /* fake change (get back change of basis into ConstraintMatrix and info on qr) */ 4316 ierr = MatDestroy(&pcbddc->ConstraintMatrix);CHKERRQ(ierr); 4317 pcbddc->ConstraintMatrix = localChangeOfBasisMatrix; 4318 pcbddc->use_qr_single = qr_needed; 4319 } 4320 } else if (pcbddc->user_ChangeOfBasisMatrix || pcbddc->benign_saddle_point) { 4321 PetscBool needglobal = PETSC_FALSE; 4322 if (pcbddc->benign_saddle_point) { 4323 PetscBool have_null = (PetscBool)!!pcbddc->benign_change; 4324 ierr = MPI_Allreduce(&have_null,&needglobal,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 4325 } 4326 if (!needglobal && pcbddc->user_ChangeOfBasisMatrix) { 4327 ierr = PetscObjectReference((PetscObject)pcbddc->user_ChangeOfBasisMatrix);CHKERRQ(ierr); 4328 pcbddc->ChangeOfBasisMatrix = pcbddc->user_ChangeOfBasisMatrix; 4329 } else { 4330 Mat benign_global = NULL; 4331 if (needglobal) { 4332 Mat tmat; 4333 4334 pcbddc->change_interior = PETSC_TRUE; 4335 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 4336 ierr = VecSet(pcis->vec1_N,1.0);CHKERRQ(ierr); 4337 ierr = VecScatterBegin(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4338 ierr = VecScatterEnd(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4339 ierr = VecReciprocal(pcis->vec1_global);CHKERRQ(ierr); 4340 ierr = VecScatterBegin(matis->rctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4341 ierr = VecScatterEnd(matis->rctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4342 ierr = MatDuplicate(pc->pmat,MAT_DO_NOT_COPY_VALUES,&tmat);CHKERRQ(ierr); 4343 if (pcbddc->benign_change) { 4344 Mat M; 4345 4346 ierr = MatDuplicate(pcbddc->benign_change,MAT_COPY_VALUES,&M);CHKERRQ(ierr); 4347 ierr = MatDiagonalScale(M,pcis->vec1_N,NULL);CHKERRQ(ierr); 4348 ierr = MatISSetLocalMat(tmat,M);CHKERRQ(ierr); 4349 ierr = MatDestroy(&M);CHKERRQ(ierr); 4350 } else { 4351 Mat eye; 4352 PetscScalar *array; 4353 4354 ierr = VecGetArray(pcis->vec1_N,&array);CHKERRQ(ierr); 4355 ierr = MatCreateSeqAIJ(PETSC_COMM_SELF,pcis->n,pcis->n,1,NULL,&eye);CHKERRQ(ierr); 4356 for (i=0;i<pcis->n;i++) { 4357 ierr = MatSetValue(eye,i,i,array[i],INSERT_VALUES);CHKERRQ(ierr); 4358 } 4359 ierr = VecRestoreArray(pcis->vec1_N,&array);CHKERRQ(ierr); 4360 ierr = MatAssemblyBegin(eye,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4361 ierr = MatAssemblyEnd(eye,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4362 ierr = MatISSetLocalMat(tmat,eye);CHKERRQ(ierr); 4363 ierr = MatDestroy(&eye);CHKERRQ(ierr); 4364 } 4365 ierr = MatISGetMPIXAIJ(tmat,MAT_INITIAL_MATRIX,&benign_global);CHKERRQ(ierr); 4366 ierr = MatDestroy(&tmat);CHKERRQ(ierr); 4367 } 4368 if (pcbddc->user_ChangeOfBasisMatrix) { 4369 ierr = MatMatMult(pcbddc->user_ChangeOfBasisMatrix,benign_global,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 4370 ierr = MatDestroy(&benign_global);CHKERRQ(ierr); 4371 } else if (needglobal) { 4372 pcbddc->ChangeOfBasisMatrix = benign_global; 4373 } 4374 } 4375 if (pcbddc->switch_static && pcbddc->ChangeOfBasisMatrix) { /* need to save the local change */ 4376 IS is_global; 4377 const PetscInt *gidxs; 4378 4379 ierr = ISLocalToGlobalMappingGetIndices(pc->pmat->rmap->mapping,&gidxs);CHKERRQ(ierr); 4380 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),pcis->n,gidxs,PETSC_COPY_VALUES,&is_global);CHKERRQ(ierr); 4381 ierr = ISLocalToGlobalMappingRestoreIndices(pc->pmat->rmap->mapping,&gidxs);CHKERRQ(ierr); 4382 ierr = MatGetSubMatrixUnsorted(pcbddc->ChangeOfBasisMatrix,is_global,is_global,&pcbddc->switch_static_change);CHKERRQ(ierr); 4383 ierr = ISDestroy(&is_global);CHKERRQ(ierr); 4384 } 4385 } 4386 if (!pcbddc->fake_change && pcbddc->ChangeOfBasisMatrix && !pcbddc->work_change) { 4387 ierr = VecDuplicate(pcis->vec1_global,&pcbddc->work_change);CHKERRQ(ierr); 4388 } 4389 4390 if (!pcbddc->fake_change) { 4391 /* add pressure dofs to set of primal nodes for numbering purposes */ 4392 for (i=0;i<pcbddc->benign_n;i++) { 4393 pcbddc->local_primal_ref_node[pcbddc->local_primal_size_cc] = pcbddc->benign_p0_lidx[i]; 4394 pcbddc->primal_indices_local_idxs[pcbddc->local_primal_size] = pcbddc->benign_p0_lidx[i]; 4395 pcbddc->local_primal_ref_mult[pcbddc->local_primal_size_cc] = 1; 4396 pcbddc->local_primal_size_cc++; 4397 pcbddc->local_primal_size++; 4398 } 4399 4400 /* check if a new primal space has been introduced (also take into account benign trick) */ 4401 pcbddc->new_primal_space_local = PETSC_TRUE; 4402 if (olocal_primal_size == pcbddc->local_primal_size) { 4403 ierr = PetscMemcmp(pcbddc->local_primal_ref_node,olocal_primal_ref_node,olocal_primal_size_cc*sizeof(PetscInt),&pcbddc->new_primal_space_local);CHKERRQ(ierr); 4404 pcbddc->new_primal_space_local = (PetscBool)(!pcbddc->new_primal_space_local); 4405 if (!pcbddc->new_primal_space_local) { 4406 ierr = PetscMemcmp(pcbddc->local_primal_ref_mult,olocal_primal_ref_mult,olocal_primal_size_cc*sizeof(PetscInt),&pcbddc->new_primal_space_local);CHKERRQ(ierr); 4407 pcbddc->new_primal_space_local = (PetscBool)(!pcbddc->new_primal_space_local); 4408 } 4409 } 4410 /* new_primal_space will be used for numbering of coarse dofs, so it should be the same across all subdomains */ 4411 ierr = MPIU_Allreduce(&pcbddc->new_primal_space_local,&pcbddc->new_primal_space,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 4412 } 4413 ierr = PetscFree2(olocal_primal_ref_node,olocal_primal_ref_mult);CHKERRQ(ierr); 4414 4415 /* flush dbg viewer */ 4416 if (pcbddc->dbg_flag) { 4417 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4418 } 4419 4420 /* free workspace */ 4421 ierr = PetscBTDestroy(&qr_needed_idx);CHKERRQ(ierr); 4422 ierr = PetscBTDestroy(&change_basis);CHKERRQ(ierr); 4423 if (!pcbddc->adaptive_selection) { 4424 ierr = PetscFree3(constraints_idxs_ptr,constraints_data_ptr,constraints_n);CHKERRQ(ierr); 4425 ierr = PetscFree3(constraints_data,constraints_idxs,constraints_idxs_B);CHKERRQ(ierr); 4426 } else { 4427 ierr = PetscFree5(pcbddc->adaptive_constraints_n, 4428 pcbddc->adaptive_constraints_idxs_ptr, 4429 pcbddc->adaptive_constraints_data_ptr, 4430 pcbddc->adaptive_constraints_idxs, 4431 pcbddc->adaptive_constraints_data);CHKERRQ(ierr); 4432 ierr = PetscFree(constraints_n);CHKERRQ(ierr); 4433 ierr = PetscFree(constraints_idxs_B);CHKERRQ(ierr); 4434 } 4435 PetscFunctionReturn(0); 4436 } 4437 4438 #undef __FUNCT__ 4439 #define __FUNCT__ "PCBDDCAnalyzeInterface" 4440 PetscErrorCode PCBDDCAnalyzeInterface(PC pc) 4441 { 4442 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 4443 PC_IS *pcis = (PC_IS*)pc->data; 4444 Mat_IS *matis = (Mat_IS*)pc->pmat->data; 4445 PetscInt ierr,i,N; 4446 PetscViewer viewer=pcbddc->dbg_viewer; 4447 4448 PetscFunctionBegin; 4449 /* Reset previously computed graph */ 4450 ierr = PCBDDCGraphReset(pcbddc->mat_graph);CHKERRQ(ierr); 4451 /* Init local Graph struct */ 4452 ierr = MatGetSize(pc->pmat,&N,NULL);CHKERRQ(ierr); 4453 ierr = PCBDDCGraphInit(pcbddc->mat_graph,pcis->mapping,N);CHKERRQ(ierr); 4454 4455 /* Check validity of the csr graph passed in by the user */ 4456 if (pcbddc->mat_graph->nvtxs_csr && pcbddc->mat_graph->nvtxs_csr != pcbddc->mat_graph->nvtxs) { 4457 SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Invalid size of local CSR graph! Found %d, expected %d\n",pcbddc->mat_graph->nvtxs_csr,pcbddc->mat_graph->nvtxs); 4458 } 4459 4460 /* Set default CSR adjacency of local dofs if not provided by the user with PCBDDCSetLocalAdjacencyGraph */ 4461 if ( (!pcbddc->mat_graph->xadj || !pcbddc->mat_graph->adjncy) && pcbddc->use_local_adj) { 4462 PetscInt *xadj,*adjncy; 4463 PetscInt nvtxs; 4464 PetscBool flg_row=PETSC_FALSE; 4465 4466 ierr = MatGetRowIJ(matis->A,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);CHKERRQ(ierr); 4467 if (flg_row) { 4468 ierr = PCBDDCSetLocalAdjacencyGraph(pc,nvtxs,xadj,adjncy,PETSC_COPY_VALUES);CHKERRQ(ierr); 4469 pcbddc->computed_rowadj = PETSC_TRUE; 4470 } 4471 ierr = MatRestoreRowIJ(matis->A,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);CHKERRQ(ierr); 4472 } 4473 if (pcbddc->dbg_flag) { 4474 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4475 } 4476 4477 /* Set default dofs' splitting if no information has been provided by the user with PCBDDCSetDofsSplitting or PCBDDCSetDofsSplittingLocal */ 4478 if (pcbddc->user_provided_isfordofs) { 4479 if (pcbddc->n_ISForDofs) { /* need to convert from global to local and remove references to global dofs splitting */ 4480 ierr = PetscMalloc1(pcbddc->n_ISForDofs,&pcbddc->ISForDofsLocal);CHKERRQ(ierr); 4481 for (i=0;i<pcbddc->n_ISForDofs;i++) { 4482 ierr = PCBDDCGlobalToLocal(matis->rctx,pcis->vec1_global,pcis->vec1_N,pcbddc->ISForDofs[i],&pcbddc->ISForDofsLocal[i]);CHKERRQ(ierr); 4483 ierr = ISDestroy(&pcbddc->ISForDofs[i]);CHKERRQ(ierr); 4484 } 4485 pcbddc->n_ISForDofsLocal = pcbddc->n_ISForDofs; 4486 pcbddc->n_ISForDofs = 0; 4487 ierr = PetscFree(pcbddc->ISForDofs);CHKERRQ(ierr); 4488 } 4489 } else { 4490 if (!pcbddc->n_ISForDofsLocal) { /* field split not present, create it in local ordering */ 4491 ierr = MatGetBlockSize(pc->pmat,&pcbddc->n_ISForDofsLocal);CHKERRQ(ierr); 4492 ierr = PetscMalloc1(pcbddc->n_ISForDofsLocal,&pcbddc->ISForDofsLocal);CHKERRQ(ierr); 4493 for (i=0;i<pcbddc->n_ISForDofsLocal;i++) { 4494 ierr = ISCreateStride(PetscObjectComm((PetscObject)pc),pcis->n/pcbddc->n_ISForDofsLocal,i,pcbddc->n_ISForDofsLocal,&pcbddc->ISForDofsLocal[i]);CHKERRQ(ierr); 4495 } 4496 } 4497 } 4498 4499 /* Setup of Graph */ 4500 if (!pcbddc->DirichletBoundariesLocal && pcbddc->DirichletBoundaries) { /* need to convert from global to local */ 4501 ierr = PCBDDCGlobalToLocal(matis->rctx,pcis->vec1_global,pcis->vec1_N,pcbddc->DirichletBoundaries,&pcbddc->DirichletBoundariesLocal);CHKERRQ(ierr); 4502 } 4503 if (!pcbddc->NeumannBoundariesLocal && pcbddc->NeumannBoundaries) { /* need to convert from global to local */ 4504 ierr = PCBDDCGlobalToLocal(matis->rctx,pcis->vec1_global,pcis->vec1_N,pcbddc->NeumannBoundaries,&pcbddc->NeumannBoundariesLocal);CHKERRQ(ierr); 4505 } 4506 if (!pcbddc->user_primal_vertices_local && pcbddc->user_primal_vertices) { /* need to convert from global to local */ 4507 ierr = PCBDDCGlobalToLocal(matis->rctx,pcis->vec1_global,pcis->vec1_N,pcbddc->user_primal_vertices,&pcbddc->user_primal_vertices_local);CHKERRQ(ierr); 4508 } 4509 ierr = PCBDDCGraphSetUp(pcbddc->mat_graph,pcbddc->vertex_size,pcbddc->NeumannBoundariesLocal,pcbddc->DirichletBoundariesLocal,pcbddc->n_ISForDofsLocal,pcbddc->ISForDofsLocal,pcbddc->user_primal_vertices_local);CHKERRQ(ierr); 4510 4511 /* attach info on disconnected subdomains if present */ 4512 if (pcbddc->n_local_subs) { 4513 PetscInt *local_subs; 4514 4515 ierr = PetscMalloc1(N,&local_subs);CHKERRQ(ierr); 4516 for (i=0;i<pcbddc->n_local_subs;i++) { 4517 const PetscInt *idxs; 4518 PetscInt nl,j; 4519 4520 ierr = ISGetLocalSize(pcbddc->local_subs[i],&nl);CHKERRQ(ierr); 4521 ierr = ISGetIndices(pcbddc->local_subs[i],&idxs);CHKERRQ(ierr); 4522 for (j=0;j<nl;j++) { 4523 local_subs[idxs[j]] = i; 4524 } 4525 ierr = ISRestoreIndices(pcbddc->local_subs[i],&idxs);CHKERRQ(ierr); 4526 } 4527 pcbddc->mat_graph->n_local_subs = pcbddc->n_local_subs; 4528 pcbddc->mat_graph->local_subs = local_subs; 4529 } 4530 4531 /* Graph's connected components analysis */ 4532 ierr = PCBDDCGraphComputeConnectedComponents(pcbddc->mat_graph);CHKERRQ(ierr); 4533 4534 /* print some info to stdout */ 4535 if (pcbddc->dbg_flag) { 4536 ierr = PCBDDCGraphASCIIView(pcbddc->mat_graph,pcbddc->dbg_flag,viewer);CHKERRQ(ierr); 4537 } 4538 PetscFunctionReturn(0); 4539 } 4540 4541 /* given an index sets possibly with holes, renumbers the indexes removing the holes */ 4542 #undef __FUNCT__ 4543 #define __FUNCT__ "PCBDDCSubsetNumbering" 4544 PetscErrorCode PCBDDCSubsetNumbering(IS subset, IS subset_mult, PetscInt *N_n, IS *subset_n) 4545 { 4546 PetscSF sf; 4547 PetscLayout map; 4548 const PetscInt *idxs; 4549 PetscInt *leaf_data,*root_data,*gidxs; 4550 PetscInt N,n,i,lbounds[2],gbounds[2],Nl; 4551 PetscInt n_n,nlocals,start,first_index; 4552 PetscMPIInt commsize; 4553 PetscBool first_found; 4554 PetscErrorCode ierr; 4555 4556 PetscFunctionBegin; 4557 ierr = ISGetLocalSize(subset,&n);CHKERRQ(ierr); 4558 if (subset_mult) { 4559 PetscCheckSameComm(subset,1,subset_mult,2); 4560 ierr = ISGetLocalSize(subset,&i);CHKERRQ(ierr); 4561 if (i != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Local subset and multiplicity sizes don't match! %d != %d",n,i); 4562 } 4563 /* create workspace layout for computing global indices of subset */ 4564 ierr = ISGetIndices(subset,&idxs);CHKERRQ(ierr); 4565 lbounds[0] = lbounds[1] = 0; 4566 for (i=0;i<n;i++) { 4567 if (idxs[i] < lbounds[0]) lbounds[0] = idxs[i]; 4568 else if (idxs[i] > lbounds[1]) lbounds[1] = idxs[i]; 4569 } 4570 lbounds[0] = -lbounds[0]; 4571 ierr = MPIU_Allreduce(lbounds,gbounds,2,MPIU_INT,MPI_MAX,PetscObjectComm((PetscObject)subset));CHKERRQ(ierr); 4572 gbounds[0] = -gbounds[0]; 4573 N = gbounds[1] - gbounds[0] + 1; 4574 ierr = PetscLayoutCreate(PetscObjectComm((PetscObject)subset),&map);CHKERRQ(ierr); 4575 ierr = PetscLayoutSetBlockSize(map,1);CHKERRQ(ierr); 4576 ierr = PetscLayoutSetSize(map,N);CHKERRQ(ierr); 4577 ierr = PetscLayoutSetUp(map);CHKERRQ(ierr); 4578 ierr = PetscLayoutGetLocalSize(map,&Nl);CHKERRQ(ierr); 4579 4580 /* create sf : leaf_data == multiplicity of indexes, root data == global index in layout */ 4581 ierr = PetscMalloc2(n,&leaf_data,Nl,&root_data);CHKERRQ(ierr); 4582 if (subset_mult) { 4583 const PetscInt* idxs_mult; 4584 4585 ierr = ISGetIndices(subset_mult,&idxs_mult);CHKERRQ(ierr); 4586 ierr = PetscMemcpy(leaf_data,idxs_mult,n*sizeof(PetscInt));CHKERRQ(ierr); 4587 ierr = ISRestoreIndices(subset_mult,&idxs_mult);CHKERRQ(ierr); 4588 } else { 4589 for (i=0;i<n;i++) leaf_data[i] = 1; 4590 } 4591 /* local size of new subset */ 4592 n_n = 0; 4593 for (i=0;i<n;i++) n_n += leaf_data[i]; 4594 4595 /* global indexes in layout */ 4596 ierr = PetscMalloc1(n_n,&gidxs);CHKERRQ(ierr); /* allocating possibly extra space in gidxs which will be used later */ 4597 for (i=0;i<n;i++) gidxs[i] = idxs[i] - gbounds[0]; 4598 ierr = ISRestoreIndices(subset,&idxs);CHKERRQ(ierr); 4599 ierr = PetscSFCreate(PetscObjectComm((PetscObject)subset),&sf);CHKERRQ(ierr); 4600 ierr = PetscSFSetGraphLayout(sf,map,n,NULL,PETSC_COPY_VALUES,gidxs);CHKERRQ(ierr); 4601 ierr = PetscLayoutDestroy(&map);CHKERRQ(ierr); 4602 4603 /* reduce from leaves to roots */ 4604 ierr = PetscMemzero(root_data,Nl*sizeof(PetscInt));CHKERRQ(ierr); 4605 ierr = PetscSFReduceBegin(sf,MPIU_INT,leaf_data,root_data,MPI_MAX);CHKERRQ(ierr); 4606 ierr = PetscSFReduceEnd(sf,MPIU_INT,leaf_data,root_data,MPI_MAX);CHKERRQ(ierr); 4607 4608 /* count indexes in local part of layout */ 4609 nlocals = 0; 4610 first_index = -1; 4611 first_found = PETSC_FALSE; 4612 for (i=0;i<Nl;i++) { 4613 if (!first_found && root_data[i]) { 4614 first_found = PETSC_TRUE; 4615 first_index = i; 4616 } 4617 nlocals += root_data[i]; 4618 } 4619 4620 /* cumulative of number of indexes and size of subset without holes */ 4621 #if defined(PETSC_HAVE_MPI_EXSCAN) 4622 start = 0; 4623 ierr = MPI_Exscan(&nlocals,&start,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)subset));CHKERRQ(ierr); 4624 #else 4625 ierr = MPI_Scan(&nlocals,&start,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)subset));CHKERRQ(ierr); 4626 start = start-nlocals; 4627 #endif 4628 4629 if (N_n) { /* compute total size of new subset if requested */ 4630 *N_n = start + nlocals; 4631 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)subset),&commsize);CHKERRQ(ierr); 4632 ierr = MPI_Bcast(N_n,1,MPIU_INT,commsize-1,PetscObjectComm((PetscObject)subset));CHKERRQ(ierr); 4633 } 4634 4635 /* adapt root data with cumulative */ 4636 if (first_found) { 4637 PetscInt old_index; 4638 4639 root_data[first_index] += start; 4640 old_index = first_index; 4641 for (i=first_index+1;i<Nl;i++) { 4642 if (root_data[i]) { 4643 root_data[i] += root_data[old_index]; 4644 old_index = i; 4645 } 4646 } 4647 } 4648 4649 /* from roots to leaves */ 4650 ierr = PetscSFBcastBegin(sf,MPIU_INT,root_data,leaf_data);CHKERRQ(ierr); 4651 ierr = PetscSFBcastEnd(sf,MPIU_INT,root_data,leaf_data);CHKERRQ(ierr); 4652 ierr = PetscSFDestroy(&sf);CHKERRQ(ierr); 4653 4654 /* create new IS with global indexes without holes */ 4655 if (subset_mult) { 4656 const PetscInt* idxs_mult; 4657 PetscInt cum; 4658 4659 cum = 0; 4660 ierr = ISGetIndices(subset_mult,&idxs_mult);CHKERRQ(ierr); 4661 for (i=0;i<n;i++) { 4662 PetscInt j; 4663 for (j=0;j<idxs_mult[i];j++) gidxs[cum++] = leaf_data[i] - idxs_mult[i] + j; 4664 } 4665 ierr = ISRestoreIndices(subset_mult,&idxs_mult);CHKERRQ(ierr); 4666 } else { 4667 for (i=0;i<n;i++) { 4668 gidxs[i] = leaf_data[i]-1; 4669 } 4670 } 4671 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)subset),n_n,gidxs,PETSC_OWN_POINTER,subset_n);CHKERRQ(ierr); 4672 ierr = PetscFree2(leaf_data,root_data);CHKERRQ(ierr); 4673 PetscFunctionReturn(0); 4674 } 4675 4676 #undef __FUNCT__ 4677 #define __FUNCT__ "PCBDDCOrthonormalizeVecs" 4678 PetscErrorCode PCBDDCOrthonormalizeVecs(PetscInt n, Vec vecs[]) 4679 { 4680 PetscInt i,j; 4681 PetscScalar *alphas; 4682 PetscErrorCode ierr; 4683 4684 PetscFunctionBegin; 4685 /* this implements stabilized Gram-Schmidt */ 4686 ierr = PetscMalloc1(n,&alphas);CHKERRQ(ierr); 4687 for (i=0;i<n;i++) { 4688 ierr = VecNormalize(vecs[i],NULL);CHKERRQ(ierr); 4689 if (i<n) { ierr = VecMDot(vecs[i],n-i-1,&vecs[i+1],&alphas[i+1]);CHKERRQ(ierr); } 4690 for (j=i+1;j<n;j++) { ierr = VecAXPY(vecs[j],PetscConj(-alphas[j]),vecs[i]);CHKERRQ(ierr); } 4691 } 4692 ierr = PetscFree(alphas);CHKERRQ(ierr); 4693 PetscFunctionReturn(0); 4694 } 4695 4696 #undef __FUNCT__ 4697 #define __FUNCT__ "MatISGetSubassemblingPattern" 4698 PetscErrorCode MatISGetSubassemblingPattern(Mat mat, PetscInt n_subdomains, PetscInt redprocs, IS* is_sends) 4699 { 4700 IS ranks_send_to; 4701 PetscInt n_neighs,*neighs,*n_shared,**shared; 4702 PetscMPIInt size,rank,color; 4703 PetscInt *xadj,*adjncy; 4704 PetscInt *adjncy_wgt,*v_wgt,*ranks_send_to_idx; 4705 PetscInt i,local_size,threshold=0; 4706 PetscBool use_vwgt=PETSC_FALSE,use_square=PETSC_FALSE; 4707 PetscSubcomm subcomm; 4708 PetscErrorCode ierr; 4709 4710 PetscFunctionBegin; 4711 ierr = PetscOptionsGetBool(NULL,NULL,"-matis_partitioning_use_square",&use_square,NULL);CHKERRQ(ierr); 4712 ierr = PetscOptionsGetBool(NULL,NULL,"-matis_partitioning_use_vwgt",&use_vwgt,NULL);CHKERRQ(ierr); 4713 ierr = PetscOptionsGetInt(NULL,NULL,"-matis_partitioning_threshold",&threshold,NULL);CHKERRQ(ierr); 4714 4715 /* Get info on mapping */ 4716 ierr = ISLocalToGlobalMappingGetSize(mat->rmap->mapping,&local_size);CHKERRQ(ierr); 4717 ierr = ISLocalToGlobalMappingGetInfo(mat->rmap->mapping,&n_neighs,&neighs,&n_shared,&shared);CHKERRQ(ierr); 4718 4719 /* build local CSR graph of subdomains' connectivity */ 4720 ierr = PetscMalloc1(2,&xadj);CHKERRQ(ierr); 4721 xadj[0] = 0; 4722 xadj[1] = PetscMax(n_neighs-1,0); 4723 ierr = PetscMalloc1(xadj[1],&adjncy);CHKERRQ(ierr); 4724 ierr = PetscMalloc1(xadj[1],&adjncy_wgt);CHKERRQ(ierr); 4725 4726 if (threshold) { 4727 PetscInt xadj_count = 0; 4728 for (i=1;i<n_neighs;i++) { 4729 if (n_shared[i] > threshold) { 4730 adjncy[xadj_count] = neighs[i]; 4731 adjncy_wgt[xadj_count] = n_shared[i]; 4732 xadj_count++; 4733 } 4734 } 4735 xadj[1] = xadj_count; 4736 } else { 4737 if (xadj[1]) { 4738 ierr = PetscMemcpy(adjncy,&neighs[1],xadj[1]*sizeof(*adjncy));CHKERRQ(ierr); 4739 ierr = PetscMemcpy(adjncy_wgt,&n_shared[1],xadj[1]*sizeof(*adjncy_wgt));CHKERRQ(ierr); 4740 } 4741 } 4742 ierr = ISLocalToGlobalMappingRestoreInfo(mat->rmap->mapping,&n_neighs,&neighs,&n_shared,&shared);CHKERRQ(ierr); 4743 if (use_square) { 4744 for (i=0;i<xadj[1];i++) { 4745 adjncy_wgt[i] = adjncy_wgt[i]*adjncy_wgt[i]; 4746 } 4747 } 4748 ierr = PetscSortIntWithArray(xadj[1],adjncy,adjncy_wgt);CHKERRQ(ierr); 4749 4750 ierr = PetscMalloc1(1,&ranks_send_to_idx);CHKERRQ(ierr); 4751 4752 /* 4753 Restrict work on active processes only. 4754 */ 4755 ierr = PetscSubcommCreate(PetscObjectComm((PetscObject)mat),&subcomm);CHKERRQ(ierr); 4756 ierr = PetscSubcommSetNumber(subcomm,2);CHKERRQ(ierr); /* 2 groups, active process and not active processes */ 4757 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr); 4758 ierr = PetscMPIIntCast(!local_size,&color);CHKERRQ(ierr); 4759 ierr = PetscSubcommSetTypeGeneral(subcomm,color,rank);CHKERRQ(ierr); 4760 if (color) { 4761 ierr = PetscFree(xadj);CHKERRQ(ierr); 4762 ierr = PetscFree(adjncy);CHKERRQ(ierr); 4763 ierr = PetscFree(adjncy_wgt);CHKERRQ(ierr); 4764 } else { 4765 Mat subdomain_adj; 4766 IS new_ranks,new_ranks_contig; 4767 MatPartitioning partitioner; 4768 PetscInt prank,rstart=0,rend=0; 4769 PetscInt *is_indices,*oldranks; 4770 PetscBool aggregate; 4771 4772 ierr = MPI_Comm_size(PetscSubcommChild(subcomm),&size);CHKERRQ(ierr); 4773 ierr = PetscMalloc1(size,&oldranks);CHKERRQ(ierr); 4774 prank = rank; 4775 ierr = MPI_Allgather(&prank,1,MPIU_INT,oldranks,1,MPIU_INT,PetscSubcommChild(subcomm));CHKERRQ(ierr); 4776 /* 4777 for (i=0;i<size;i++) { 4778 PetscPrintf(subcomm->comm,"oldranks[%d] = %d\n",i,oldranks[i]); 4779 } 4780 */ 4781 for (i=0;i<xadj[1];i++) { 4782 ierr = PetscFindInt(adjncy[i],size,oldranks,&adjncy[i]);CHKERRQ(ierr); 4783 } 4784 ierr = PetscSortIntWithArray(xadj[1],adjncy,adjncy_wgt);CHKERRQ(ierr); 4785 aggregate = ((redprocs > 0 && redprocs < size) ? PETSC_TRUE : PETSC_FALSE); 4786 if (aggregate) { 4787 PetscInt lrows,row,ncols,*cols; 4788 PetscMPIInt nrank; 4789 PetscScalar *vals; 4790 4791 ierr = MPI_Comm_rank(PetscSubcommChild(subcomm),&nrank);CHKERRQ(ierr); 4792 lrows = 0; 4793 if (nrank<redprocs) { 4794 lrows = size/redprocs; 4795 if (nrank<size%redprocs) lrows++; 4796 } 4797 ierr = MatCreateAIJ(PetscSubcommChild(subcomm),lrows,lrows,size,size,50,NULL,50,NULL,&subdomain_adj);CHKERRQ(ierr); 4798 ierr = MatGetOwnershipRange(subdomain_adj,&rstart,&rend);CHKERRQ(ierr); 4799 ierr = MatSetOption(subdomain_adj,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr); 4800 ierr = MatSetOption(subdomain_adj,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr); 4801 row = nrank; 4802 ncols = xadj[1]-xadj[0]; 4803 cols = adjncy; 4804 ierr = PetscMalloc1(ncols,&vals);CHKERRQ(ierr); 4805 for (i=0;i<ncols;i++) vals[i] = adjncy_wgt[i]; 4806 ierr = MatSetValues(subdomain_adj,1,&row,ncols,cols,vals,INSERT_VALUES);CHKERRQ(ierr); 4807 ierr = MatAssemblyBegin(subdomain_adj,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4808 ierr = MatAssemblyEnd(subdomain_adj,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4809 ierr = PetscFree(xadj);CHKERRQ(ierr); 4810 ierr = PetscFree(adjncy);CHKERRQ(ierr); 4811 ierr = PetscFree(adjncy_wgt);CHKERRQ(ierr); 4812 ierr = PetscFree(vals);CHKERRQ(ierr); 4813 } else { 4814 ierr = MatCreateMPIAdj(PetscSubcommChild(subcomm),1,(PetscInt)size,xadj,adjncy,adjncy_wgt,&subdomain_adj);CHKERRQ(ierr); 4815 } 4816 /* ierr = MatView(subdomain_adj,0);CHKERRQ(ierr); */ 4817 4818 /* Partition */ 4819 ierr = MatPartitioningCreate(PetscSubcommChild(subcomm),&partitioner);CHKERRQ(ierr); 4820 ierr = MatPartitioningSetAdjacency(partitioner,subdomain_adj);CHKERRQ(ierr); 4821 if (use_vwgt) { 4822 ierr = PetscMalloc1(1,&v_wgt);CHKERRQ(ierr); 4823 v_wgt[0] = local_size; 4824 ierr = MatPartitioningSetVertexWeights(partitioner,v_wgt);CHKERRQ(ierr); 4825 } 4826 n_subdomains = PetscMin((PetscInt)size,n_subdomains); 4827 ierr = MatPartitioningSetNParts(partitioner,n_subdomains);CHKERRQ(ierr); 4828 ierr = MatPartitioningSetFromOptions(partitioner);CHKERRQ(ierr); 4829 ierr = MatPartitioningApply(partitioner,&new_ranks);CHKERRQ(ierr); 4830 /* ierr = MatPartitioningView(partitioner,0);CHKERRQ(ierr); */ 4831 4832 /* renumber new_ranks to avoid "holes" in new set of processors */ 4833 ierr = PCBDDCSubsetNumbering(new_ranks,NULL,NULL,&new_ranks_contig);CHKERRQ(ierr); 4834 ierr = ISDestroy(&new_ranks);CHKERRQ(ierr); 4835 ierr = ISGetIndices(new_ranks_contig,(const PetscInt**)&is_indices);CHKERRQ(ierr); 4836 if (!redprocs) { 4837 ranks_send_to_idx[0] = oldranks[is_indices[0]]; 4838 } else { 4839 PetscInt idxs[1]; 4840 PetscMPIInt tag; 4841 MPI_Request *reqs; 4842 4843 ierr = PetscObjectGetNewTag((PetscObject)subdomain_adj,&tag);CHKERRQ(ierr); 4844 ierr = PetscMalloc1(rend-rstart,&reqs);CHKERRQ(ierr); 4845 for (i=rstart;i<rend;i++) { 4846 ierr = MPI_Isend(is_indices+i-rstart,1,MPIU_INT,i,tag,PetscSubcommChild(subcomm),&reqs[i-rstart]);CHKERRQ(ierr); 4847 } 4848 ierr = MPI_Recv(idxs,1,MPIU_INT,MPI_ANY_SOURCE,tag,PetscSubcommChild(subcomm),MPI_STATUS_IGNORE);CHKERRQ(ierr); 4849 ierr = MPI_Waitall(rend-rstart,reqs,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 4850 ierr = PetscFree(reqs);CHKERRQ(ierr); 4851 ranks_send_to_idx[0] = oldranks[idxs[0]]; 4852 } 4853 ierr = ISRestoreIndices(new_ranks_contig,(const PetscInt**)&is_indices);CHKERRQ(ierr); 4854 /* clean up */ 4855 ierr = PetscFree(oldranks);CHKERRQ(ierr); 4856 ierr = ISDestroy(&new_ranks_contig);CHKERRQ(ierr); 4857 ierr = MatDestroy(&subdomain_adj);CHKERRQ(ierr); 4858 ierr = MatPartitioningDestroy(&partitioner);CHKERRQ(ierr); 4859 } 4860 ierr = PetscSubcommDestroy(&subcomm);CHKERRQ(ierr); 4861 4862 /* assemble parallel IS for sends */ 4863 i = 1; 4864 if (color) i=0; 4865 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)mat),i,ranks_send_to_idx,PETSC_OWN_POINTER,&ranks_send_to);CHKERRQ(ierr); 4866 /* get back IS */ 4867 *is_sends = ranks_send_to; 4868 PetscFunctionReturn(0); 4869 } 4870 4871 typedef enum {MATDENSE_PRIVATE=0,MATAIJ_PRIVATE,MATBAIJ_PRIVATE,MATSBAIJ_PRIVATE}MatTypePrivate; 4872 4873 #undef __FUNCT__ 4874 #define __FUNCT__ "MatISSubassemble" 4875 PetscErrorCode MatISSubassemble(Mat mat, IS is_sends, PetscInt n_subdomains, PetscBool restrict_comm, PetscBool restrict_full, MatReuse reuse, Mat *mat_n, PetscInt nis, IS isarray[]) 4876 { 4877 Mat local_mat; 4878 IS is_sends_internal; 4879 PetscInt rows,cols,new_local_rows; 4880 PetscInt i,bs,buf_size_idxs,buf_size_idxs_is,buf_size_vals; 4881 PetscBool ismatis,isdense,newisdense,destroy_mat; 4882 ISLocalToGlobalMapping l2gmap; 4883 PetscInt* l2gmap_indices; 4884 const PetscInt* is_indices; 4885 MatType new_local_type; 4886 /* buffers */ 4887 PetscInt *ptr_idxs,*send_buffer_idxs,*recv_buffer_idxs; 4888 PetscInt *ptr_idxs_is,*send_buffer_idxs_is,*recv_buffer_idxs_is; 4889 PetscInt *recv_buffer_idxs_local; 4890 PetscScalar *ptr_vals,*send_buffer_vals,*recv_buffer_vals; 4891 /* MPI */ 4892 MPI_Comm comm,comm_n; 4893 PetscSubcomm subcomm; 4894 PetscMPIInt n_sends,n_recvs,commsize; 4895 PetscMPIInt *iflags,*ilengths_idxs,*ilengths_vals,*ilengths_idxs_is; 4896 PetscMPIInt *onodes,*onodes_is,*olengths_idxs,*olengths_idxs_is,*olengths_vals; 4897 PetscMPIInt len,tag_idxs,tag_idxs_is,tag_vals,source_dest; 4898 MPI_Request *send_req_idxs,*send_req_idxs_is,*send_req_vals; 4899 MPI_Request *recv_req_idxs,*recv_req_idxs_is,*recv_req_vals; 4900 PetscErrorCode ierr; 4901 4902 PetscFunctionBegin; 4903 /* TODO: add missing checks */ 4904 PetscValidLogicalCollectiveInt(mat,n_subdomains,3); 4905 PetscValidLogicalCollectiveBool(mat,restrict_comm,4); 4906 PetscValidLogicalCollectiveEnum(mat,reuse,5); 4907 PetscValidLogicalCollectiveInt(mat,nis,7); 4908 ierr = PetscObjectTypeCompare((PetscObject)mat,MATIS,&ismatis);CHKERRQ(ierr); 4909 if (!ismatis) SETERRQ1(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot use %s on a matrix object which is not of type MATIS",__FUNCT__); 4910 ierr = MatISGetLocalMat(mat,&local_mat);CHKERRQ(ierr); 4911 ierr = PetscObjectTypeCompare((PetscObject)local_mat,MATSEQDENSE,&isdense);CHKERRQ(ierr); 4912 if (!isdense) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Currently cannot subassemble MATIS when local matrix type is not of type SEQDENSE"); 4913 ierr = MatGetSize(local_mat,&rows,&cols);CHKERRQ(ierr); 4914 if (rows != cols) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Local MATIS matrices should be square"); 4915 if (reuse == MAT_REUSE_MATRIX && *mat_n) { 4916 PetscInt mrows,mcols,mnrows,mncols; 4917 ierr = PetscObjectTypeCompare((PetscObject)*mat_n,MATIS,&ismatis);CHKERRQ(ierr); 4918 if (!ismatis) SETERRQ(PetscObjectComm((PetscObject)*mat_n),PETSC_ERR_SUP,"Cannot reuse a matrix which is not of type MATIS"); 4919 ierr = MatGetSize(mat,&mrows,&mcols);CHKERRQ(ierr); 4920 ierr = MatGetSize(*mat_n,&mnrows,&mncols);CHKERRQ(ierr); 4921 if (mrows != mnrows) SETERRQ2(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot reuse matrix! Wrong number of rows %D != %D",mrows,mnrows); 4922 if (mcols != mncols) SETERRQ2(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot reuse matrix! Wrong number of cols %D != %D",mcols,mncols); 4923 } 4924 ierr = MatGetBlockSize(local_mat,&bs);CHKERRQ(ierr); 4925 PetscValidLogicalCollectiveInt(mat,bs,0); 4926 /* prepare IS for sending if not provided */ 4927 if (!is_sends) { 4928 if (!n_subdomains) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"You should specify either an IS or a target number of subdomains"); 4929 ierr = MatISGetSubassemblingPattern(mat,n_subdomains,0,&is_sends_internal);CHKERRQ(ierr); 4930 } else { 4931 ierr = PetscObjectReference((PetscObject)is_sends);CHKERRQ(ierr); 4932 is_sends_internal = is_sends; 4933 } 4934 4935 /* get comm */ 4936 ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr); 4937 4938 /* compute number of sends */ 4939 ierr = ISGetLocalSize(is_sends_internal,&i);CHKERRQ(ierr); 4940 ierr = PetscMPIIntCast(i,&n_sends);CHKERRQ(ierr); 4941 4942 /* compute number of receives */ 4943 ierr = MPI_Comm_size(comm,&commsize);CHKERRQ(ierr); 4944 ierr = PetscMalloc1(commsize,&iflags);CHKERRQ(ierr); 4945 ierr = PetscMemzero(iflags,commsize*sizeof(*iflags));CHKERRQ(ierr); 4946 ierr = ISGetIndices(is_sends_internal,&is_indices);CHKERRQ(ierr); 4947 for (i=0;i<n_sends;i++) iflags[is_indices[i]] = 1; 4948 ierr = PetscGatherNumberOfMessages(comm,iflags,NULL,&n_recvs);CHKERRQ(ierr); 4949 ierr = PetscFree(iflags);CHKERRQ(ierr); 4950 4951 /* restrict comm if requested */ 4952 subcomm = 0; 4953 destroy_mat = PETSC_FALSE; 4954 if (restrict_comm) { 4955 PetscMPIInt color,subcommsize; 4956 4957 color = 0; 4958 if (restrict_full) { 4959 if (!n_recvs) color = 1; /* processes not receiving anything will not partecipate in new comm (full restriction) */ 4960 } else { 4961 if (!n_recvs && n_sends) color = 1; /* just those processes that are sending but not receiving anything will not partecipate in new comm */ 4962 } 4963 ierr = MPIU_Allreduce(&color,&subcommsize,1,MPI_INT,MPI_SUM,comm);CHKERRQ(ierr); 4964 subcommsize = commsize - subcommsize; 4965 /* check if reuse has been requested */ 4966 if (reuse == MAT_REUSE_MATRIX) { 4967 if (*mat_n) { 4968 PetscMPIInt subcommsize2; 4969 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)*mat_n),&subcommsize2);CHKERRQ(ierr); 4970 if (subcommsize != subcommsize2) SETERRQ2(PetscObjectComm((PetscObject)*mat_n),PETSC_ERR_PLIB,"Cannot reuse matrix! wrong subcomm size %d != %d",subcommsize,subcommsize2); 4971 comm_n = PetscObjectComm((PetscObject)*mat_n); 4972 } else { 4973 comm_n = PETSC_COMM_SELF; 4974 } 4975 } else { /* MAT_INITIAL_MATRIX */ 4976 PetscMPIInt rank; 4977 4978 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 4979 ierr = PetscSubcommCreate(comm,&subcomm);CHKERRQ(ierr); 4980 ierr = PetscSubcommSetNumber(subcomm,2);CHKERRQ(ierr); 4981 ierr = PetscSubcommSetTypeGeneral(subcomm,color,rank);CHKERRQ(ierr); 4982 comm_n = PetscSubcommChild(subcomm); 4983 } 4984 /* flag to destroy *mat_n if not significative */ 4985 if (color) destroy_mat = PETSC_TRUE; 4986 } else { 4987 comm_n = comm; 4988 } 4989 4990 /* prepare send/receive buffers */ 4991 ierr = PetscMalloc1(commsize,&ilengths_idxs);CHKERRQ(ierr); 4992 ierr = PetscMemzero(ilengths_idxs,commsize*sizeof(*ilengths_idxs));CHKERRQ(ierr); 4993 ierr = PetscMalloc1(commsize,&ilengths_vals);CHKERRQ(ierr); 4994 ierr = PetscMemzero(ilengths_vals,commsize*sizeof(*ilengths_vals));CHKERRQ(ierr); 4995 if (nis) { 4996 ierr = PetscCalloc1(commsize,&ilengths_idxs_is);CHKERRQ(ierr); 4997 } 4998 4999 /* Get data from local matrices */ 5000 if (!isdense) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Subassembling of AIJ local matrices not yet implemented"); 5001 /* TODO: See below some guidelines on how to prepare the local buffers */ 5002 /* 5003 send_buffer_vals should contain the raw values of the local matrix 5004 send_buffer_idxs should contain: 5005 - MatType_PRIVATE type 5006 - PetscInt size_of_l2gmap 5007 - PetscInt global_row_indices[size_of_l2gmap] 5008 - PetscInt all_other_info_which_is_needed_to_compute_preallocation_and_set_values 5009 */ 5010 else { 5011 ierr = MatDenseGetArray(local_mat,&send_buffer_vals);CHKERRQ(ierr); 5012 ierr = ISLocalToGlobalMappingGetSize(mat->rmap->mapping,&i);CHKERRQ(ierr); 5013 ierr = PetscMalloc1(i+2,&send_buffer_idxs);CHKERRQ(ierr); 5014 send_buffer_idxs[0] = (PetscInt)MATDENSE_PRIVATE; 5015 send_buffer_idxs[1] = i; 5016 ierr = ISLocalToGlobalMappingGetIndices(mat->rmap->mapping,(const PetscInt**)&ptr_idxs);CHKERRQ(ierr); 5017 ierr = PetscMemcpy(&send_buffer_idxs[2],ptr_idxs,i*sizeof(PetscInt));CHKERRQ(ierr); 5018 ierr = ISLocalToGlobalMappingRestoreIndices(mat->rmap->mapping,(const PetscInt**)&ptr_idxs);CHKERRQ(ierr); 5019 ierr = PetscMPIIntCast(i,&len);CHKERRQ(ierr); 5020 for (i=0;i<n_sends;i++) { 5021 ilengths_vals[is_indices[i]] = len*len; 5022 ilengths_idxs[is_indices[i]] = len+2; 5023 } 5024 } 5025 ierr = PetscGatherMessageLengths2(comm,n_sends,n_recvs,ilengths_idxs,ilengths_vals,&onodes,&olengths_idxs,&olengths_vals);CHKERRQ(ierr); 5026 /* additional is (if any) */ 5027 if (nis) { 5028 PetscMPIInt psum; 5029 PetscInt j; 5030 for (j=0,psum=0;j<nis;j++) { 5031 PetscInt plen; 5032 ierr = ISGetLocalSize(isarray[j],&plen);CHKERRQ(ierr); 5033 ierr = PetscMPIIntCast(plen,&len);CHKERRQ(ierr); 5034 psum += len+1; /* indices + lenght */ 5035 } 5036 ierr = PetscMalloc1(psum,&send_buffer_idxs_is);CHKERRQ(ierr); 5037 for (j=0,psum=0;j<nis;j++) { 5038 PetscInt plen; 5039 const PetscInt *is_array_idxs; 5040 ierr = ISGetLocalSize(isarray[j],&plen);CHKERRQ(ierr); 5041 send_buffer_idxs_is[psum] = plen; 5042 ierr = ISGetIndices(isarray[j],&is_array_idxs);CHKERRQ(ierr); 5043 ierr = PetscMemcpy(&send_buffer_idxs_is[psum+1],is_array_idxs,plen*sizeof(PetscInt));CHKERRQ(ierr); 5044 ierr = ISRestoreIndices(isarray[j],&is_array_idxs);CHKERRQ(ierr); 5045 psum += plen+1; /* indices + lenght */ 5046 } 5047 for (i=0;i<n_sends;i++) { 5048 ilengths_idxs_is[is_indices[i]] = psum; 5049 } 5050 ierr = PetscGatherMessageLengths(comm,n_sends,n_recvs,ilengths_idxs_is,&onodes_is,&olengths_idxs_is);CHKERRQ(ierr); 5051 } 5052 5053 buf_size_idxs = 0; 5054 buf_size_vals = 0; 5055 buf_size_idxs_is = 0; 5056 for (i=0;i<n_recvs;i++) { 5057 buf_size_idxs += (PetscInt)olengths_idxs[i]; 5058 buf_size_vals += (PetscInt)olengths_vals[i]; 5059 if (nis) buf_size_idxs_is += (PetscInt)olengths_idxs_is[i]; 5060 } 5061 ierr = PetscMalloc1(buf_size_idxs,&recv_buffer_idxs);CHKERRQ(ierr); 5062 ierr = PetscMalloc1(buf_size_vals,&recv_buffer_vals);CHKERRQ(ierr); 5063 ierr = PetscMalloc1(buf_size_idxs_is,&recv_buffer_idxs_is);CHKERRQ(ierr); 5064 5065 /* get new tags for clean communications */ 5066 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_idxs);CHKERRQ(ierr); 5067 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_vals);CHKERRQ(ierr); 5068 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_idxs_is);CHKERRQ(ierr); 5069 5070 /* allocate for requests */ 5071 ierr = PetscMalloc1(n_sends,&send_req_idxs);CHKERRQ(ierr); 5072 ierr = PetscMalloc1(n_sends,&send_req_vals);CHKERRQ(ierr); 5073 ierr = PetscMalloc1(n_sends,&send_req_idxs_is);CHKERRQ(ierr); 5074 ierr = PetscMalloc1(n_recvs,&recv_req_idxs);CHKERRQ(ierr); 5075 ierr = PetscMalloc1(n_recvs,&recv_req_vals);CHKERRQ(ierr); 5076 ierr = PetscMalloc1(n_recvs,&recv_req_idxs_is);CHKERRQ(ierr); 5077 5078 /* communications */ 5079 ptr_idxs = recv_buffer_idxs; 5080 ptr_vals = recv_buffer_vals; 5081 ptr_idxs_is = recv_buffer_idxs_is; 5082 for (i=0;i<n_recvs;i++) { 5083 source_dest = onodes[i]; 5084 ierr = MPI_Irecv(ptr_idxs,olengths_idxs[i],MPIU_INT,source_dest,tag_idxs,comm,&recv_req_idxs[i]);CHKERRQ(ierr); 5085 ierr = MPI_Irecv(ptr_vals,olengths_vals[i],MPIU_SCALAR,source_dest,tag_vals,comm,&recv_req_vals[i]);CHKERRQ(ierr); 5086 ptr_idxs += olengths_idxs[i]; 5087 ptr_vals += olengths_vals[i]; 5088 if (nis) { 5089 ierr = MPI_Irecv(ptr_idxs_is,olengths_idxs_is[i],MPIU_INT,source_dest,tag_idxs_is,comm,&recv_req_idxs_is[i]);CHKERRQ(ierr); 5090 ptr_idxs_is += olengths_idxs_is[i]; 5091 } 5092 } 5093 for (i=0;i<n_sends;i++) { 5094 ierr = PetscMPIIntCast(is_indices[i],&source_dest);CHKERRQ(ierr); 5095 ierr = MPI_Isend(send_buffer_idxs,ilengths_idxs[source_dest],MPIU_INT,source_dest,tag_idxs,comm,&send_req_idxs[i]);CHKERRQ(ierr); 5096 ierr = MPI_Isend(send_buffer_vals,ilengths_vals[source_dest],MPIU_SCALAR,source_dest,tag_vals,comm,&send_req_vals[i]);CHKERRQ(ierr); 5097 if (nis) { 5098 ierr = MPI_Isend(send_buffer_idxs_is,ilengths_idxs_is[source_dest],MPIU_INT,source_dest,tag_idxs_is,comm,&send_req_idxs_is[i]);CHKERRQ(ierr); 5099 } 5100 } 5101 ierr = ISRestoreIndices(is_sends_internal,&is_indices);CHKERRQ(ierr); 5102 ierr = ISDestroy(&is_sends_internal);CHKERRQ(ierr); 5103 5104 /* assemble new l2g map */ 5105 ierr = MPI_Waitall(n_recvs,recv_req_idxs,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 5106 ptr_idxs = recv_buffer_idxs; 5107 new_local_rows = 0; 5108 for (i=0;i<n_recvs;i++) { 5109 new_local_rows += *(ptr_idxs+1); /* second element is the local size of the l2gmap */ 5110 ptr_idxs += olengths_idxs[i]; 5111 } 5112 ierr = PetscMalloc1(new_local_rows,&l2gmap_indices);CHKERRQ(ierr); 5113 ptr_idxs = recv_buffer_idxs; 5114 new_local_rows = 0; 5115 for (i=0;i<n_recvs;i++) { 5116 ierr = PetscMemcpy(&l2gmap_indices[new_local_rows],ptr_idxs+2,(*(ptr_idxs+1))*sizeof(PetscInt));CHKERRQ(ierr); 5117 new_local_rows += *(ptr_idxs+1); /* second element is the local size of the l2gmap */ 5118 ptr_idxs += olengths_idxs[i]; 5119 } 5120 ierr = PetscSortRemoveDupsInt(&new_local_rows,l2gmap_indices);CHKERRQ(ierr); 5121 ierr = ISLocalToGlobalMappingCreate(comm_n,1,new_local_rows,l2gmap_indices,PETSC_COPY_VALUES,&l2gmap);CHKERRQ(ierr); 5122 ierr = PetscFree(l2gmap_indices);CHKERRQ(ierr); 5123 5124 /* infer new local matrix type from received local matrices type */ 5125 /* currently if all local matrices are of type X, then the resulting matrix will be of type X, except for the dense case */ 5126 /* it also assumes that if the block size is set, than it is the same among all local matrices (see checks at the beginning of the function) */ 5127 if (n_recvs) { 5128 MatTypePrivate new_local_type_private = (MatTypePrivate)send_buffer_idxs[0]; 5129 ptr_idxs = recv_buffer_idxs; 5130 for (i=0;i<n_recvs;i++) { 5131 if ((PetscInt)new_local_type_private != *ptr_idxs) { 5132 new_local_type_private = MATAIJ_PRIVATE; 5133 break; 5134 } 5135 ptr_idxs += olengths_idxs[i]; 5136 } 5137 switch (new_local_type_private) { 5138 case MATDENSE_PRIVATE: 5139 if (n_recvs>1) { /* subassembling of dense matrices does not give a dense matrix! */ 5140 new_local_type = MATSEQAIJ; 5141 bs = 1; 5142 } else { /* if I receive only 1 dense matrix */ 5143 new_local_type = MATSEQDENSE; 5144 bs = 1; 5145 } 5146 break; 5147 case MATAIJ_PRIVATE: 5148 new_local_type = MATSEQAIJ; 5149 bs = 1; 5150 break; 5151 case MATBAIJ_PRIVATE: 5152 new_local_type = MATSEQBAIJ; 5153 break; 5154 case MATSBAIJ_PRIVATE: 5155 new_local_type = MATSEQSBAIJ; 5156 break; 5157 default: 5158 SETERRQ2(comm,PETSC_ERR_SUP,"Unsupported private type %d in %s",new_local_type_private,__FUNCT__); 5159 break; 5160 } 5161 } else { /* by default, new_local_type is seqdense */ 5162 new_local_type = MATSEQDENSE; 5163 bs = 1; 5164 } 5165 5166 /* create MATIS object if needed */ 5167 if (reuse == MAT_INITIAL_MATRIX) { 5168 ierr = MatGetSize(mat,&rows,&cols);CHKERRQ(ierr); 5169 ierr = MatCreateIS(comm_n,bs,PETSC_DECIDE,PETSC_DECIDE,rows,cols,l2gmap,NULL,mat_n);CHKERRQ(ierr); 5170 } else { 5171 /* it also destroys the local matrices */ 5172 ierr = MatSetLocalToGlobalMapping(*mat_n,l2gmap,l2gmap);CHKERRQ(ierr); 5173 } 5174 ierr = MatISGetLocalMat(*mat_n,&local_mat);CHKERRQ(ierr); 5175 ierr = MatSetType(local_mat,new_local_type);CHKERRQ(ierr); 5176 5177 ierr = MPI_Waitall(n_recvs,recv_req_vals,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 5178 5179 /* Global to local map of received indices */ 5180 ierr = PetscMalloc1(buf_size_idxs,&recv_buffer_idxs_local);CHKERRQ(ierr); /* needed for values insertion */ 5181 ierr = ISGlobalToLocalMappingApply(l2gmap,IS_GTOLM_MASK,buf_size_idxs,recv_buffer_idxs,&i,recv_buffer_idxs_local);CHKERRQ(ierr); 5182 ierr = ISLocalToGlobalMappingDestroy(&l2gmap);CHKERRQ(ierr); 5183 5184 /* restore attributes -> type of incoming data and its size */ 5185 buf_size_idxs = 0; 5186 for (i=0;i<n_recvs;i++) { 5187 recv_buffer_idxs_local[buf_size_idxs] = recv_buffer_idxs[buf_size_idxs]; 5188 recv_buffer_idxs_local[buf_size_idxs+1] = recv_buffer_idxs[buf_size_idxs+1]; 5189 buf_size_idxs += (PetscInt)olengths_idxs[i]; 5190 } 5191 ierr = PetscFree(recv_buffer_idxs);CHKERRQ(ierr); 5192 5193 /* set preallocation */ 5194 ierr = PetscObjectTypeCompare((PetscObject)local_mat,MATSEQDENSE,&newisdense);CHKERRQ(ierr); 5195 if (!newisdense) { 5196 PetscInt *new_local_nnz=0; 5197 5198 ptr_vals = recv_buffer_vals; 5199 ptr_idxs = recv_buffer_idxs_local; 5200 if (n_recvs) { 5201 ierr = PetscCalloc1(new_local_rows,&new_local_nnz);CHKERRQ(ierr); 5202 } 5203 for (i=0;i<n_recvs;i++) { 5204 PetscInt j; 5205 if (*ptr_idxs == (PetscInt)MATDENSE_PRIVATE) { /* preallocation provided for dense case only */ 5206 for (j=0;j<*(ptr_idxs+1);j++) { 5207 new_local_nnz[*(ptr_idxs+2+j)] += *(ptr_idxs+1); 5208 } 5209 } else { 5210 /* TODO */ 5211 } 5212 ptr_idxs += olengths_idxs[i]; 5213 } 5214 if (new_local_nnz) { 5215 for (i=0;i<new_local_rows;i++) new_local_nnz[i] = PetscMin(new_local_nnz[i],new_local_rows); 5216 ierr = MatSeqAIJSetPreallocation(local_mat,0,new_local_nnz);CHKERRQ(ierr); 5217 for (i=0;i<new_local_rows;i++) new_local_nnz[i] /= bs; 5218 ierr = MatSeqBAIJSetPreallocation(local_mat,bs,0,new_local_nnz);CHKERRQ(ierr); 5219 for (i=0;i<new_local_rows;i++) new_local_nnz[i] = PetscMax(new_local_nnz[i]-i,0); 5220 ierr = MatSeqSBAIJSetPreallocation(local_mat,bs,0,new_local_nnz);CHKERRQ(ierr); 5221 } else { 5222 ierr = MatSetUp(local_mat);CHKERRQ(ierr); 5223 } 5224 ierr = PetscFree(new_local_nnz);CHKERRQ(ierr); 5225 } else { 5226 ierr = MatSetUp(local_mat);CHKERRQ(ierr); 5227 } 5228 5229 /* set values */ 5230 ptr_vals = recv_buffer_vals; 5231 ptr_idxs = recv_buffer_idxs_local; 5232 for (i=0;i<n_recvs;i++) { 5233 if (*ptr_idxs == (PetscInt)MATDENSE_PRIVATE) { /* values insertion provided for dense case only */ 5234 ierr = MatSetOption(local_mat,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr); 5235 ierr = MatSetValues(local_mat,*(ptr_idxs+1),ptr_idxs+2,*(ptr_idxs+1),ptr_idxs+2,ptr_vals,ADD_VALUES);CHKERRQ(ierr); 5236 ierr = MatAssemblyBegin(local_mat,MAT_FLUSH_ASSEMBLY);CHKERRQ(ierr); 5237 ierr = MatAssemblyEnd(local_mat,MAT_FLUSH_ASSEMBLY);CHKERRQ(ierr); 5238 ierr = MatSetOption(local_mat,MAT_ROW_ORIENTED,PETSC_TRUE);CHKERRQ(ierr); 5239 } else { 5240 /* TODO */ 5241 } 5242 ptr_idxs += olengths_idxs[i]; 5243 ptr_vals += olengths_vals[i]; 5244 } 5245 ierr = MatAssemblyBegin(local_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5246 ierr = MatAssemblyEnd(local_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5247 ierr = MatAssemblyBegin(*mat_n,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5248 ierr = MatAssemblyEnd(*mat_n,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5249 ierr = PetscFree(recv_buffer_vals);CHKERRQ(ierr); 5250 ierr = PetscFree(recv_buffer_idxs_local);CHKERRQ(ierr); 5251 5252 #if 0 5253 if (!restrict_comm) { /* check */ 5254 Vec lvec,rvec; 5255 PetscReal infty_error; 5256 5257 ierr = MatCreateVecs(mat,&rvec,&lvec);CHKERRQ(ierr); 5258 ierr = VecSetRandom(rvec,NULL);CHKERRQ(ierr); 5259 ierr = MatMult(mat,rvec,lvec);CHKERRQ(ierr); 5260 ierr = VecScale(lvec,-1.0);CHKERRQ(ierr); 5261 ierr = MatMultAdd(*mat_n,rvec,lvec,lvec);CHKERRQ(ierr); 5262 ierr = VecNorm(lvec,NORM_INFINITY,&infty_error);CHKERRQ(ierr); 5263 ierr = PetscPrintf(PetscObjectComm((PetscObject)mat),"Infinity error subassembling %1.6e\n",infty_error); 5264 ierr = VecDestroy(&rvec);CHKERRQ(ierr); 5265 ierr = VecDestroy(&lvec);CHKERRQ(ierr); 5266 } 5267 #endif 5268 5269 /* assemble new additional is (if any) */ 5270 if (nis) { 5271 PetscInt **temp_idxs,*count_is,j,psum; 5272 5273 ierr = MPI_Waitall(n_recvs,recv_req_idxs_is,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 5274 ierr = PetscCalloc1(nis,&count_is);CHKERRQ(ierr); 5275 ptr_idxs = recv_buffer_idxs_is; 5276 psum = 0; 5277 for (i=0;i<n_recvs;i++) { 5278 for (j=0;j<nis;j++) { 5279 PetscInt plen = *(ptr_idxs); /* first element is the local size of IS's indices */ 5280 count_is[j] += plen; /* increment counting of buffer for j-th IS */ 5281 psum += plen; 5282 ptr_idxs += plen+1; /* shift pointer to received data */ 5283 } 5284 } 5285 ierr = PetscMalloc1(nis,&temp_idxs);CHKERRQ(ierr); 5286 ierr = PetscMalloc1(psum,&temp_idxs[0]);CHKERRQ(ierr); 5287 for (i=1;i<nis;i++) { 5288 temp_idxs[i] = temp_idxs[i-1]+count_is[i-1]; 5289 } 5290 ierr = PetscMemzero(count_is,nis*sizeof(PetscInt));CHKERRQ(ierr); 5291 ptr_idxs = recv_buffer_idxs_is; 5292 for (i=0;i<n_recvs;i++) { 5293 for (j=0;j<nis;j++) { 5294 PetscInt plen = *(ptr_idxs); /* first element is the local size of IS's indices */ 5295 ierr = PetscMemcpy(&temp_idxs[j][count_is[j]],ptr_idxs+1,plen*sizeof(PetscInt));CHKERRQ(ierr); 5296 count_is[j] += plen; /* increment starting point of buffer for j-th IS */ 5297 ptr_idxs += plen+1; /* shift pointer to received data */ 5298 } 5299 } 5300 for (i=0;i<nis;i++) { 5301 ierr = ISDestroy(&isarray[i]);CHKERRQ(ierr); 5302 ierr = PetscSortRemoveDupsInt(&count_is[i],temp_idxs[i]);CHKERRQ(ierr);CHKERRQ(ierr); 5303 ierr = ISCreateGeneral(comm_n,count_is[i],temp_idxs[i],PETSC_COPY_VALUES,&isarray[i]);CHKERRQ(ierr); 5304 } 5305 ierr = PetscFree(count_is);CHKERRQ(ierr); 5306 ierr = PetscFree(temp_idxs[0]);CHKERRQ(ierr); 5307 ierr = PetscFree(temp_idxs);CHKERRQ(ierr); 5308 } 5309 /* free workspace */ 5310 ierr = PetscFree(recv_buffer_idxs_is);CHKERRQ(ierr); 5311 ierr = MPI_Waitall(n_sends,send_req_idxs,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 5312 ierr = PetscFree(send_buffer_idxs);CHKERRQ(ierr); 5313 ierr = MPI_Waitall(n_sends,send_req_vals,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 5314 if (isdense) { 5315 ierr = MatISGetLocalMat(mat,&local_mat);CHKERRQ(ierr); 5316 ierr = MatDenseRestoreArray(local_mat,&send_buffer_vals);CHKERRQ(ierr); 5317 } else { 5318 /* ierr = PetscFree(send_buffer_vals);CHKERRQ(ierr); */ 5319 } 5320 if (nis) { 5321 ierr = MPI_Waitall(n_sends,send_req_idxs_is,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 5322 ierr = PetscFree(send_buffer_idxs_is);CHKERRQ(ierr); 5323 } 5324 ierr = PetscFree(recv_req_idxs);CHKERRQ(ierr); 5325 ierr = PetscFree(recv_req_vals);CHKERRQ(ierr); 5326 ierr = PetscFree(recv_req_idxs_is);CHKERRQ(ierr); 5327 ierr = PetscFree(send_req_idxs);CHKERRQ(ierr); 5328 ierr = PetscFree(send_req_vals);CHKERRQ(ierr); 5329 ierr = PetscFree(send_req_idxs_is);CHKERRQ(ierr); 5330 ierr = PetscFree(ilengths_vals);CHKERRQ(ierr); 5331 ierr = PetscFree(ilengths_idxs);CHKERRQ(ierr); 5332 ierr = PetscFree(olengths_vals);CHKERRQ(ierr); 5333 ierr = PetscFree(olengths_idxs);CHKERRQ(ierr); 5334 ierr = PetscFree(onodes);CHKERRQ(ierr); 5335 if (nis) { 5336 ierr = PetscFree(ilengths_idxs_is);CHKERRQ(ierr); 5337 ierr = PetscFree(olengths_idxs_is);CHKERRQ(ierr); 5338 ierr = PetscFree(onodes_is);CHKERRQ(ierr); 5339 } 5340 ierr = PetscSubcommDestroy(&subcomm);CHKERRQ(ierr); 5341 if (destroy_mat) { /* destroy mat is true only if restrict comm is true and process will not partecipate */ 5342 ierr = MatDestroy(mat_n);CHKERRQ(ierr); 5343 for (i=0;i<nis;i++) { 5344 ierr = ISDestroy(&isarray[i]);CHKERRQ(ierr); 5345 } 5346 *mat_n = NULL; 5347 } 5348 PetscFunctionReturn(0); 5349 } 5350 5351 /* temporary hack into ksp private data structure */ 5352 #include <petsc/private/kspimpl.h> 5353 5354 #undef __FUNCT__ 5355 #define __FUNCT__ "PCBDDCSetUpCoarseSolver" 5356 PetscErrorCode PCBDDCSetUpCoarseSolver(PC pc,PetscScalar* coarse_submat_vals) 5357 { 5358 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 5359 PC_IS *pcis = (PC_IS*)pc->data; 5360 Mat coarse_mat,coarse_mat_is,coarse_submat_dense; 5361 MatNullSpace CoarseNullSpace=NULL; 5362 ISLocalToGlobalMapping coarse_islg; 5363 IS coarse_is,*isarray; 5364 PetscInt i,im_active=-1,active_procs=-1; 5365 PetscInt nis,nisdofs,nisneu,nisvert; 5366 PC pc_temp; 5367 PCType coarse_pc_type; 5368 KSPType coarse_ksp_type; 5369 PetscBool multilevel_requested,multilevel_allowed; 5370 PetscBool isredundant,isbddc,isnn,coarse_reuse; 5371 Mat t_coarse_mat_is; 5372 PetscInt void_procs,ncoarse_ml,ncoarse_ds,ncoarse; 5373 PetscMPIInt all_procs; 5374 PetscBool csin_ml,csin_ds,csin,csin_type_simple,redist; 5375 PetscBool compute_vecs = PETSC_FALSE; 5376 PetscScalar *array; 5377 PetscErrorCode ierr; 5378 5379 PetscFunctionBegin; 5380 /* Assign global numbering to coarse dofs */ 5381 if (pcbddc->new_primal_space || pcbddc->coarse_size == -1) { /* a new primal space is present or it is the first initialization, so recompute global numbering */ 5382 PetscInt ocoarse_size; 5383 compute_vecs = PETSC_TRUE; 5384 ocoarse_size = pcbddc->coarse_size; 5385 ierr = PetscFree(pcbddc->global_primal_indices);CHKERRQ(ierr); 5386 ierr = PCBDDCComputePrimalNumbering(pc,&pcbddc->coarse_size,&pcbddc->global_primal_indices);CHKERRQ(ierr); 5387 /* see if we can avoid some work */ 5388 if (pcbddc->coarse_ksp) { /* coarse ksp has already been created */ 5389 /* if the coarse size is different or we are using adaptive selection, better to not reuse the coarse matrix */ 5390 if (ocoarse_size != pcbddc->coarse_size || pcbddc->adaptive_selection) { 5391 PC pc; 5392 PetscBool isbddc; 5393 5394 /* temporary workaround since PCBDDC does not have a reset method so far */ 5395 ierr = KSPGetPC(pcbddc->coarse_ksp,&pc);CHKERRQ(ierr); 5396 ierr = PetscObjectTypeCompare((PetscObject)pc,PCBDDC,&isbddc);CHKERRQ(ierr); 5397 if (isbddc) { 5398 ierr = KSPDestroy(&pcbddc->coarse_ksp);CHKERRQ(ierr); 5399 } else { 5400 ierr = KSPReset(pcbddc->coarse_ksp);CHKERRQ(ierr); 5401 } 5402 coarse_reuse = PETSC_FALSE; 5403 } else { /* we can safely reuse already computed coarse matrix */ 5404 coarse_reuse = PETSC_TRUE; 5405 } 5406 } else { /* there's no coarse ksp, so we need to create the coarse matrix too */ 5407 coarse_reuse = PETSC_FALSE; 5408 } 5409 /* reset any subassembling information */ 5410 ierr = ISDestroy(&pcbddc->coarse_subassembling);CHKERRQ(ierr); 5411 ierr = ISDestroy(&pcbddc->coarse_subassembling_init);CHKERRQ(ierr); 5412 } else { /* primal space is unchanged, so we can reuse coarse matrix */ 5413 coarse_reuse = PETSC_TRUE; 5414 } 5415 5416 /* count "active" (i.e. with positive local size) and "void" processes */ 5417 im_active = !!(pcis->n); 5418 ierr = MPIU_Allreduce(&im_active,&active_procs,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 5419 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)pc),&all_procs);CHKERRQ(ierr); 5420 void_procs = all_procs-active_procs; 5421 csin_type_simple = PETSC_TRUE; 5422 redist = PETSC_FALSE; 5423 if (pcbddc->current_level && void_procs) { 5424 csin_ml = PETSC_TRUE; 5425 ncoarse_ml = void_procs; 5426 /* it has no sense to redistribute on a set of processors larger than the number of active processes */ 5427 if (pcbddc->redistribute_coarse > 0 && pcbddc->redistribute_coarse < active_procs) { 5428 csin_ds = PETSC_TRUE; 5429 ncoarse_ds = pcbddc->redistribute_coarse; 5430 redist = PETSC_TRUE; 5431 } else { 5432 csin_ds = PETSC_TRUE; 5433 ncoarse_ds = active_procs; 5434 redist = PETSC_TRUE; 5435 } 5436 } else { 5437 csin_ml = PETSC_FALSE; 5438 ncoarse_ml = all_procs; 5439 if (void_procs) { 5440 csin_ds = PETSC_TRUE; 5441 ncoarse_ds = void_procs; 5442 csin_type_simple = PETSC_FALSE; 5443 } else { 5444 if (pcbddc->redistribute_coarse > 0 && pcbddc->redistribute_coarse < all_procs) { 5445 csin_ds = PETSC_TRUE; 5446 ncoarse_ds = pcbddc->redistribute_coarse; 5447 redist = PETSC_TRUE; 5448 } else { 5449 csin_ds = PETSC_FALSE; 5450 ncoarse_ds = all_procs; 5451 } 5452 } 5453 } 5454 5455 /* 5456 test if we can go multilevel: three conditions must be satisfied: 5457 - we have not exceeded the number of levels requested 5458 - we can actually subassemble the active processes 5459 - we can find a suitable number of MPI processes where we can place the subassembled problem 5460 */ 5461 multilevel_allowed = PETSC_FALSE; 5462 multilevel_requested = PETSC_FALSE; 5463 if (pcbddc->current_level < pcbddc->max_levels) { 5464 multilevel_requested = PETSC_TRUE; 5465 if (active_procs/pcbddc->coarsening_ratio < 2 || ncoarse_ml/pcbddc->coarsening_ratio < 2) { 5466 multilevel_allowed = PETSC_FALSE; 5467 } else { 5468 multilevel_allowed = PETSC_TRUE; 5469 } 5470 } 5471 /* determine number of process partecipating to coarse solver */ 5472 if (multilevel_allowed) { 5473 ncoarse = ncoarse_ml; 5474 csin = csin_ml; 5475 redist = PETSC_FALSE; 5476 } else { 5477 ncoarse = ncoarse_ds; 5478 csin = csin_ds; 5479 } 5480 5481 /* creates temporary l2gmap and IS for coarse indexes */ 5482 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),pcbddc->local_primal_size,pcbddc->global_primal_indices,PETSC_COPY_VALUES,&coarse_is);CHKERRQ(ierr); 5483 ierr = ISLocalToGlobalMappingCreateIS(coarse_is,&coarse_islg);CHKERRQ(ierr); 5484 5485 /* creates temporary MATIS object for coarse matrix */ 5486 ierr = MatCreateSeqDense(PETSC_COMM_SELF,pcbddc->local_primal_size,pcbddc->local_primal_size,NULL,&coarse_submat_dense);CHKERRQ(ierr); 5487 ierr = MatDenseGetArray(coarse_submat_dense,&array);CHKERRQ(ierr); 5488 ierr = PetscMemcpy(array,coarse_submat_vals,sizeof(*coarse_submat_vals)*pcbddc->local_primal_size*pcbddc->local_primal_size);CHKERRQ(ierr); 5489 ierr = MatDenseRestoreArray(coarse_submat_dense,&array);CHKERRQ(ierr); 5490 ierr = MatCreateIS(PetscObjectComm((PetscObject)pc),1,PETSC_DECIDE,PETSC_DECIDE,pcbddc->coarse_size,pcbddc->coarse_size,coarse_islg,NULL,&t_coarse_mat_is);CHKERRQ(ierr); 5491 ierr = MatISSetLocalMat(t_coarse_mat_is,coarse_submat_dense);CHKERRQ(ierr); 5492 ierr = MatAssemblyBegin(t_coarse_mat_is,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5493 ierr = MatAssemblyEnd(t_coarse_mat_is,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5494 ierr = MatDestroy(&coarse_submat_dense);CHKERRQ(ierr); 5495 5496 /* compute dofs splitting and neumann boundaries for coarse dofs */ 5497 if (multilevel_allowed && (pcbddc->n_ISForDofsLocal || pcbddc->NeumannBoundariesLocal || (pcbddc->benign_saddle_point && pcbddc->user_primal_vertices_local))) { /* protects from unneded computations */ 5498 PetscInt *tidxs,*tidxs2,nout,tsize,i; 5499 const PetscInt *idxs; 5500 ISLocalToGlobalMapping tmap; 5501 5502 /* create map between primal indices (in local representative ordering) and local primal numbering */ 5503 ierr = ISLocalToGlobalMappingCreate(PETSC_COMM_SELF,1,pcbddc->local_primal_size,pcbddc->primal_indices_local_idxs,PETSC_COPY_VALUES,&tmap);CHKERRQ(ierr); 5504 /* allocate space for temporary storage */ 5505 ierr = PetscMalloc1(pcbddc->local_primal_size,&tidxs);CHKERRQ(ierr); 5506 ierr = PetscMalloc1(pcbddc->local_primal_size,&tidxs2);CHKERRQ(ierr); 5507 /* allocate for IS array */ 5508 nisdofs = pcbddc->n_ISForDofsLocal; 5509 nisneu = !!pcbddc->NeumannBoundariesLocal; 5510 nisvert = 0; 5511 if (pcbddc->benign_saddle_point && pcbddc->user_primal_vertices_local) { 5512 nisvert = 1; 5513 } 5514 nis = nisdofs + nisneu + nisvert; 5515 ierr = PetscMalloc1(nis,&isarray);CHKERRQ(ierr); 5516 /* dofs splitting */ 5517 for (i=0;i<nisdofs;i++) { 5518 /* ierr = ISView(pcbddc->ISForDofsLocal[i],0);CHKERRQ(ierr); */ 5519 ierr = ISGetLocalSize(pcbddc->ISForDofsLocal[i],&tsize);CHKERRQ(ierr); 5520 ierr = ISGetIndices(pcbddc->ISForDofsLocal[i],&idxs);CHKERRQ(ierr); 5521 ierr = ISGlobalToLocalMappingApply(tmap,IS_GTOLM_DROP,tsize,idxs,&nout,tidxs);CHKERRQ(ierr); 5522 ierr = ISRestoreIndices(pcbddc->ISForDofsLocal[i],&idxs);CHKERRQ(ierr); 5523 ierr = ISLocalToGlobalMappingApply(coarse_islg,nout,tidxs,tidxs2);CHKERRQ(ierr); 5524 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),nout,tidxs2,PETSC_COPY_VALUES,&isarray[i]);CHKERRQ(ierr); 5525 /* ierr = ISView(isarray[i],0);CHKERRQ(ierr); */ 5526 } 5527 /* neumann boundaries */ 5528 if (pcbddc->NeumannBoundariesLocal) { 5529 /* ierr = ISView(pcbddc->NeumannBoundariesLocal,0);CHKERRQ(ierr); */ 5530 ierr = ISGetLocalSize(pcbddc->NeumannBoundariesLocal,&tsize);CHKERRQ(ierr); 5531 ierr = ISGetIndices(pcbddc->NeumannBoundariesLocal,&idxs);CHKERRQ(ierr); 5532 ierr = ISGlobalToLocalMappingApply(tmap,IS_GTOLM_DROP,tsize,idxs,&nout,tidxs);CHKERRQ(ierr); 5533 ierr = ISRestoreIndices(pcbddc->NeumannBoundariesLocal,&idxs);CHKERRQ(ierr); 5534 ierr = ISLocalToGlobalMappingApply(coarse_islg,nout,tidxs,tidxs2);CHKERRQ(ierr); 5535 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),nout,tidxs2,PETSC_COPY_VALUES,&isarray[nisdofs]);CHKERRQ(ierr); 5536 /* ierr = ISView(isarray[nisdofs],0);CHKERRQ(ierr); */ 5537 } 5538 /* primal vertices (benign) */ 5539 if (pcbddc->benign_saddle_point && pcbddc->user_primal_vertices_local) { 5540 ierr = ISGetLocalSize(pcbddc->user_primal_vertices_local,&tsize);CHKERRQ(ierr); 5541 ierr = ISGetIndices(pcbddc->user_primal_vertices_local,&idxs);CHKERRQ(ierr); 5542 ierr = ISGlobalToLocalMappingApply(tmap,IS_GTOLM_DROP,tsize,idxs,&nout,tidxs);CHKERRQ(ierr); 5543 ierr = ISRestoreIndices(pcbddc->user_primal_vertices_local,&idxs);CHKERRQ(ierr); 5544 ierr = ISLocalToGlobalMappingApply(coarse_islg,nout,tidxs,tidxs2);CHKERRQ(ierr); 5545 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),nout,tidxs2,PETSC_COPY_VALUES,&isarray[nis-1]);CHKERRQ(ierr); 5546 } 5547 /* free memory */ 5548 ierr = PetscFree(tidxs);CHKERRQ(ierr); 5549 ierr = PetscFree(tidxs2);CHKERRQ(ierr); 5550 ierr = ISLocalToGlobalMappingDestroy(&tmap);CHKERRQ(ierr); 5551 } else { 5552 nis = 0; 5553 nisdofs = 0; 5554 nisneu = 0; 5555 nisvert = 0; 5556 isarray = NULL; 5557 } 5558 /* destroy no longer needed map */ 5559 ierr = ISLocalToGlobalMappingDestroy(&coarse_islg);CHKERRQ(ierr); 5560 5561 /* restrict on coarse candidates (if needed) */ 5562 coarse_mat_is = NULL; 5563 if (csin) { 5564 if (!pcbddc->coarse_subassembling_init ) { /* creates subassembling init pattern if not present */ 5565 if (redist) { 5566 PetscMPIInt rank; 5567 PetscInt spc,n_spc_p1,dest[1],destsize; 5568 5569 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)pc),&rank);CHKERRQ(ierr); 5570 spc = active_procs/ncoarse; 5571 n_spc_p1 = active_procs%ncoarse; 5572 if (im_active) { 5573 destsize = 1; 5574 if (rank > n_spc_p1*(spc+1)-1) { 5575 dest[0] = n_spc_p1+(rank-(n_spc_p1*(spc+1)))/spc; 5576 } else { 5577 dest[0] = rank/(spc+1); 5578 } 5579 } else { 5580 destsize = 0; 5581 } 5582 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),destsize,dest,PETSC_COPY_VALUES,&pcbddc->coarse_subassembling_init);CHKERRQ(ierr); 5583 } else if (csin_type_simple) { 5584 PetscMPIInt rank; 5585 PetscInt issize,isidx; 5586 5587 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)pc),&rank);CHKERRQ(ierr); 5588 if (im_active) { 5589 issize = 1; 5590 isidx = (PetscInt)rank; 5591 } else { 5592 issize = 0; 5593 isidx = -1; 5594 } 5595 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),issize,&isidx,PETSC_COPY_VALUES,&pcbddc->coarse_subassembling_init);CHKERRQ(ierr); 5596 } else { /* get a suitable subassembling pattern from MATIS code */ 5597 ierr = MatISGetSubassemblingPattern(t_coarse_mat_is,ncoarse,pcbddc->coarse_adj_red,&pcbddc->coarse_subassembling_init);CHKERRQ(ierr); 5598 } 5599 5600 /* we need to shift on coarse candidates either if we are not redistributing or we are redistributing and we have enough void processes */ 5601 if (!redist || ncoarse <= void_procs) { 5602 PetscInt ncoarse_cand,tissize,*nisindices; 5603 PetscInt *coarse_candidates; 5604 const PetscInt* tisindices; 5605 5606 /* get coarse candidates' ranks in pc communicator */ 5607 ierr = PetscMalloc1(all_procs,&coarse_candidates);CHKERRQ(ierr); 5608 ierr = MPI_Allgather(&im_active,1,MPIU_INT,coarse_candidates,1,MPIU_INT,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 5609 for (i=0,ncoarse_cand=0;i<all_procs;i++) { 5610 if (!coarse_candidates[i]) { 5611 coarse_candidates[ncoarse_cand++]=i; 5612 } 5613 } 5614 if (ncoarse_cand < ncoarse) SETERRQ2(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"This should not happen! %d < %d",ncoarse_cand,ncoarse); 5615 5616 5617 if (pcbddc->dbg_flag) { 5618 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 5619 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Subassembling pattern init (before shift)\n");CHKERRQ(ierr); 5620 ierr = ISView(pcbddc->coarse_subassembling_init,pcbddc->dbg_viewer);CHKERRQ(ierr); 5621 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Coarse candidates\n");CHKERRQ(ierr); 5622 for (i=0;i<ncoarse_cand;i++) { 5623 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"%d ",coarse_candidates[i]);CHKERRQ(ierr); 5624 } 5625 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"\n");CHKERRQ(ierr); 5626 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 5627 } 5628 /* shift the pattern on coarse candidates */ 5629 ierr = ISGetLocalSize(pcbddc->coarse_subassembling_init,&tissize);CHKERRQ(ierr); 5630 ierr = ISGetIndices(pcbddc->coarse_subassembling_init,&tisindices);CHKERRQ(ierr); 5631 ierr = PetscMalloc1(tissize,&nisindices);CHKERRQ(ierr); 5632 for (i=0;i<tissize;i++) nisindices[i] = coarse_candidates[tisindices[i]]; 5633 ierr = ISRestoreIndices(pcbddc->coarse_subassembling_init,&tisindices);CHKERRQ(ierr); 5634 ierr = ISGeneralSetIndices(pcbddc->coarse_subassembling_init,tissize,nisindices,PETSC_OWN_POINTER);CHKERRQ(ierr); 5635 ierr = PetscFree(coarse_candidates);CHKERRQ(ierr); 5636 } 5637 if (pcbddc->dbg_flag) { 5638 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 5639 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Subassembling pattern init\n");CHKERRQ(ierr); 5640 ierr = ISView(pcbddc->coarse_subassembling_init,pcbddc->dbg_viewer);CHKERRQ(ierr); 5641 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 5642 } 5643 } 5644 /* get temporary coarse mat in IS format restricted on coarse procs (plus additional index sets of isarray) */ 5645 if (multilevel_allowed) { /* we need to keep tracking of void processes for future placements */ 5646 ierr = MatISSubassemble(t_coarse_mat_is,pcbddc->coarse_subassembling_init,0,PETSC_TRUE,PETSC_FALSE,MAT_INITIAL_MATRIX,&coarse_mat_is,nis,isarray);CHKERRQ(ierr); 5647 } else { /* this is the last level, so use just receiving processes in subcomm */ 5648 ierr = MatISSubassemble(t_coarse_mat_is,pcbddc->coarse_subassembling_init,0,PETSC_TRUE,PETSC_TRUE,MAT_INITIAL_MATRIX,&coarse_mat_is,nis,isarray);CHKERRQ(ierr); 5649 } 5650 } else { 5651 if (pcbddc->dbg_flag) { 5652 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 5653 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Subassembling pattern init not needed\n");CHKERRQ(ierr); 5654 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 5655 } 5656 ierr = PetscObjectReference((PetscObject)t_coarse_mat_is);CHKERRQ(ierr); 5657 coarse_mat_is = t_coarse_mat_is; 5658 } 5659 5660 /* create local to global scatters for coarse problem */ 5661 if (compute_vecs) { 5662 PetscInt lrows; 5663 ierr = VecDestroy(&pcbddc->coarse_vec);CHKERRQ(ierr); 5664 if (coarse_mat_is) { 5665 ierr = MatGetLocalSize(coarse_mat_is,&lrows,NULL);CHKERRQ(ierr); 5666 } else { 5667 lrows = 0; 5668 } 5669 ierr = VecCreate(PetscObjectComm((PetscObject)pc),&pcbddc->coarse_vec);CHKERRQ(ierr); 5670 ierr = VecSetSizes(pcbddc->coarse_vec,lrows,PETSC_DECIDE);CHKERRQ(ierr); 5671 ierr = VecSetType(pcbddc->coarse_vec,VECSTANDARD);CHKERRQ(ierr); 5672 ierr = VecScatterDestroy(&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr); 5673 ierr = VecScatterCreate(pcbddc->vec1_P,NULL,pcbddc->coarse_vec,coarse_is,&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr); 5674 } 5675 ierr = ISDestroy(&coarse_is);CHKERRQ(ierr); 5676 ierr = MatDestroy(&t_coarse_mat_is);CHKERRQ(ierr); 5677 5678 /* set defaults for coarse KSP and PC */ 5679 if (multilevel_allowed) { 5680 coarse_ksp_type = KSPRICHARDSON; 5681 coarse_pc_type = PCBDDC; 5682 } else { 5683 coarse_ksp_type = KSPPREONLY; 5684 coarse_pc_type = PCREDUNDANT; 5685 } 5686 5687 /* print some info if requested */ 5688 if (pcbddc->dbg_flag) { 5689 if (!multilevel_allowed) { 5690 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 5691 if (multilevel_requested) { 5692 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Not enough active processes on level %d (active processes %d, coarsening ratio %d)\n",pcbddc->current_level,active_procs,pcbddc->coarsening_ratio);CHKERRQ(ierr); 5693 } else if (pcbddc->max_levels) { 5694 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Maximum number of requested levels reached (%d)\n",pcbddc->max_levels);CHKERRQ(ierr); 5695 } 5696 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 5697 } 5698 } 5699 5700 /* create the coarse KSP object only once with defaults */ 5701 if (coarse_mat_is) { 5702 MatReuse coarse_mat_reuse; 5703 PetscViewer dbg_viewer = NULL; 5704 if (pcbddc->dbg_flag) { 5705 dbg_viewer = PETSC_VIEWER_STDOUT_(PetscObjectComm((PetscObject)coarse_mat_is)); 5706 ierr = PetscViewerASCIIAddTab(dbg_viewer,2*pcbddc->current_level);CHKERRQ(ierr); 5707 } 5708 if (!pcbddc->coarse_ksp) { 5709 char prefix[256],str_level[16]; 5710 size_t len; 5711 ierr = KSPCreate(PetscObjectComm((PetscObject)coarse_mat_is),&pcbddc->coarse_ksp);CHKERRQ(ierr); 5712 ierr = KSPSetErrorIfNotConverged(pcbddc->coarse_ksp,pc->erroriffailure);CHKERRQ(ierr); 5713 ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->coarse_ksp,(PetscObject)pc,1);CHKERRQ(ierr); 5714 ierr = KSPSetTolerances(pcbddc->coarse_ksp,PETSC_DEFAULT,PETSC_DEFAULT,PETSC_DEFAULT,1);CHKERRQ(ierr); 5715 ierr = KSPSetOperators(pcbddc->coarse_ksp,coarse_mat_is,coarse_mat_is);CHKERRQ(ierr); 5716 ierr = KSPSetType(pcbddc->coarse_ksp,coarse_ksp_type);CHKERRQ(ierr); 5717 ierr = KSPSetNormType(pcbddc->coarse_ksp,KSP_NORM_NONE);CHKERRQ(ierr); 5718 ierr = KSPGetPC(pcbddc->coarse_ksp,&pc_temp);CHKERRQ(ierr); 5719 ierr = PCSetType(pc_temp,coarse_pc_type);CHKERRQ(ierr); 5720 /* prefix */ 5721 ierr = PetscStrcpy(prefix,"");CHKERRQ(ierr); 5722 ierr = PetscStrcpy(str_level,"");CHKERRQ(ierr); 5723 if (!pcbddc->current_level) { 5724 ierr = PetscStrcpy(prefix,((PetscObject)pc)->prefix);CHKERRQ(ierr); 5725 ierr = PetscStrcat(prefix,"pc_bddc_coarse_");CHKERRQ(ierr); 5726 } else { 5727 ierr = PetscStrlen(((PetscObject)pc)->prefix,&len);CHKERRQ(ierr); 5728 if (pcbddc->current_level>1) len -= 3; /* remove "lX_" with X level number */ 5729 if (pcbddc->current_level>10) len -= 1; /* remove another char from level number */ 5730 ierr = PetscStrncpy(prefix,((PetscObject)pc)->prefix,len+1);CHKERRQ(ierr); 5731 sprintf(str_level,"l%d_",(int)(pcbddc->current_level)); 5732 ierr = PetscStrcat(prefix,str_level);CHKERRQ(ierr); 5733 } 5734 ierr = KSPSetOptionsPrefix(pcbddc->coarse_ksp,prefix);CHKERRQ(ierr); 5735 /* propagate BDDC info to the next level (these are dummy calls if pc_temp is not of type PCBDDC) */ 5736 ierr = PCBDDCSetLevel(pc_temp,pcbddc->current_level+1);CHKERRQ(ierr); 5737 ierr = PCBDDCSetCoarseningRatio(pc_temp,pcbddc->coarsening_ratio);CHKERRQ(ierr); 5738 ierr = PCBDDCSetLevels(pc_temp,pcbddc->max_levels);CHKERRQ(ierr); 5739 /* allow user customization */ 5740 ierr = KSPSetFromOptions(pcbddc->coarse_ksp);CHKERRQ(ierr); 5741 } 5742 /* propagate BDDC info to the next level (these are dummy calls if pc_temp is not of type PCBDDC) */ 5743 ierr = KSPGetPC(pcbddc->coarse_ksp,&pc_temp);CHKERRQ(ierr); 5744 if (nisdofs) { 5745 ierr = PCBDDCSetDofsSplitting(pc_temp,nisdofs,isarray);CHKERRQ(ierr); 5746 for (i=0;i<nisdofs;i++) { 5747 ierr = ISDestroy(&isarray[i]);CHKERRQ(ierr); 5748 } 5749 } 5750 if (nisneu) { 5751 ierr = PCBDDCSetNeumannBoundaries(pc_temp,isarray[nisdofs]);CHKERRQ(ierr); 5752 ierr = ISDestroy(&isarray[nisdofs]);CHKERRQ(ierr); 5753 } 5754 if (nisvert) { 5755 ierr = PCBDDCSetPrimalVerticesIS(pc_temp,isarray[nis-1]);CHKERRQ(ierr); 5756 ierr = ISDestroy(&isarray[nis-1]);CHKERRQ(ierr); 5757 } 5758 5759 /* get some info after set from options */ 5760 ierr = PetscObjectTypeCompare((PetscObject)pc_temp,PCNN,&isnn);CHKERRQ(ierr); 5761 ierr = PetscObjectTypeCompare((PetscObject)pc_temp,PCBDDC,&isbddc);CHKERRQ(ierr); 5762 ierr = PetscObjectTypeCompare((PetscObject)pc_temp,PCREDUNDANT,&isredundant);CHKERRQ(ierr); 5763 if (isbddc && !multilevel_allowed) { /* multilevel can only be requested via pc_bddc_set_levels */ 5764 ierr = PCSetType(pc_temp,coarse_pc_type);CHKERRQ(ierr); 5765 isbddc = PETSC_FALSE; 5766 } 5767 ierr = PCFactorSetReuseFill(pc_temp,PETSC_TRUE);CHKERRQ(ierr); 5768 if (isredundant) { 5769 KSP inner_ksp; 5770 PC inner_pc; 5771 ierr = PCRedundantGetKSP(pc_temp,&inner_ksp);CHKERRQ(ierr); 5772 ierr = KSPGetPC(inner_ksp,&inner_pc);CHKERRQ(ierr); 5773 ierr = PCFactorSetReuseFill(inner_pc,PETSC_TRUE);CHKERRQ(ierr); 5774 } 5775 5776 /* assemble coarse matrix */ 5777 if (coarse_reuse) { 5778 ierr = KSPGetOperators(pcbddc->coarse_ksp,&coarse_mat,NULL);CHKERRQ(ierr); 5779 ierr = PetscObjectReference((PetscObject)coarse_mat);CHKERRQ(ierr); 5780 coarse_mat_reuse = MAT_REUSE_MATRIX; 5781 } else { 5782 coarse_mat_reuse = MAT_INITIAL_MATRIX; 5783 } 5784 if (isbddc || isnn) { 5785 if (isbddc) { /* currently there are no APIs for these options */ 5786 PC_BDDC* pcbddc_coarse = (PC_BDDC*)pc_temp->data; 5787 pcbddc_coarse->detect_disconnected = PETSC_TRUE; 5788 pcbddc_coarse->benign_saddle_point = pcbddc->benign_saddle_point; 5789 pcbddc_coarse->benign_compute_nonetflux = pcbddc->benign_compute_nonetflux; 5790 if (pcbddc_coarse->benign_compute_nonetflux) { 5791 pcbddc_coarse->adaptive_userdefined = PETSC_TRUE; 5792 } 5793 } 5794 if (pcbddc->coarsening_ratio > 1) { 5795 if (!pcbddc->coarse_subassembling) { /* subassembling info is not present */ 5796 ierr = MatISGetSubassemblingPattern(coarse_mat_is,active_procs/pcbddc->coarsening_ratio,pcbddc->coarse_adj_red,&pcbddc->coarse_subassembling);CHKERRQ(ierr); 5797 if (pcbddc->dbg_flag) { 5798 ierr = PetscViewerASCIIPrintf(dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 5799 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Subassembling pattern\n");CHKERRQ(ierr); 5800 ierr = ISView(pcbddc->coarse_subassembling,dbg_viewer);CHKERRQ(ierr); 5801 ierr = PetscViewerFlush(dbg_viewer);CHKERRQ(ierr); 5802 } 5803 } 5804 ierr = MatISSubassemble(coarse_mat_is,pcbddc->coarse_subassembling,0,PETSC_FALSE,PETSC_FALSE,coarse_mat_reuse,&coarse_mat,0,NULL);CHKERRQ(ierr); 5805 } else { 5806 ierr = PetscObjectReference((PetscObject)coarse_mat_is);CHKERRQ(ierr); 5807 coarse_mat = coarse_mat_is; 5808 } 5809 } else { 5810 ierr = MatISGetMPIXAIJ(coarse_mat_is,coarse_mat_reuse,&coarse_mat);CHKERRQ(ierr); 5811 } 5812 ierr = MatDestroy(&coarse_mat_is);CHKERRQ(ierr); 5813 5814 /* propagate symmetry info of coarse matrix */ 5815 ierr = MatSetOption(coarse_mat,MAT_STRUCTURALLY_SYMMETRIC,PETSC_TRUE);CHKERRQ(ierr); 5816 if (pc->pmat->symmetric_set) { 5817 ierr = MatSetOption(coarse_mat,MAT_SYMMETRIC,pc->pmat->symmetric);CHKERRQ(ierr); 5818 } 5819 if (pc->pmat->hermitian_set) { 5820 ierr = MatSetOption(coarse_mat,MAT_HERMITIAN,pc->pmat->hermitian);CHKERRQ(ierr); 5821 } 5822 if (pc->pmat->spd_set) { 5823 ierr = MatSetOption(coarse_mat,MAT_SPD,pc->pmat->spd);CHKERRQ(ierr); 5824 } 5825 /* set operators */ 5826 ierr = KSPSetOperators(pcbddc->coarse_ksp,coarse_mat,coarse_mat);CHKERRQ(ierr); 5827 if (pcbddc->dbg_flag) { 5828 ierr = PetscViewerASCIISubtractTab(dbg_viewer,2*pcbddc->current_level);CHKERRQ(ierr); 5829 } 5830 } else { /* processes non partecipating to coarse solver (if any) */ 5831 coarse_mat = 0; 5832 } 5833 ierr = PetscFree(isarray);CHKERRQ(ierr); 5834 #if 0 5835 { 5836 PetscViewer viewer; 5837 char filename[256]; 5838 sprintf(filename,"coarse_mat_level%d.m",pcbddc->current_level); 5839 ierr = PetscViewerASCIIOpen(PetscObjectComm((PetscObject)coarse_mat),filename,&viewer);CHKERRQ(ierr); 5840 ierr = PetscViewerPushFormat(viewer,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr); 5841 ierr = MatView(coarse_mat,viewer);CHKERRQ(ierr); 5842 ierr = PetscViewerPopFormat(viewer);CHKERRQ(ierr); 5843 ierr = PetscViewerDestroy(&viewer);CHKERRQ(ierr); 5844 } 5845 #endif 5846 5847 /* Compute coarse null space (special handling by BDDC only) */ 5848 #if 0 5849 if (pcbddc->NullSpace) { 5850 ierr = PCBDDCNullSpaceAssembleCoarse(pc,coarse_mat,&CoarseNullSpace);CHKERRQ(ierr); 5851 } 5852 #endif 5853 /* hack */ 5854 if (pcbddc->coarse_ksp) { 5855 Vec crhs,csol; 5856 5857 ierr = KSPGetSolution(pcbddc->coarse_ksp,&csol);CHKERRQ(ierr); 5858 ierr = KSPGetRhs(pcbddc->coarse_ksp,&crhs);CHKERRQ(ierr); 5859 if (!csol) { 5860 ierr = MatCreateVecs(coarse_mat,&((pcbddc->coarse_ksp)->vec_sol),NULL);CHKERRQ(ierr); 5861 } 5862 if (!crhs) { 5863 ierr = MatCreateVecs(coarse_mat,NULL,&((pcbddc->coarse_ksp)->vec_rhs));CHKERRQ(ierr); 5864 } 5865 } 5866 5867 /* compute null space for coarse solver if the benign trick has been requested */ 5868 if (pcbddc->benign_null) { 5869 5870 ierr = VecSet(pcbddc->vec1_P,0.);CHKERRQ(ierr); 5871 for (i=0;i<pcbddc->benign_n;i++) { 5872 ierr = VecSetValue(pcbddc->vec1_P,pcbddc->local_primal_size-pcbddc->benign_n+i,1.0,INSERT_VALUES);CHKERRQ(ierr); 5873 } 5874 ierr = VecAssemblyBegin(pcbddc->vec1_P);CHKERRQ(ierr); 5875 ierr = VecAssemblyEnd(pcbddc->vec1_P);CHKERRQ(ierr); 5876 ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,pcbddc->vec1_P,pcbddc->coarse_vec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 5877 ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,pcbddc->vec1_P,pcbddc->coarse_vec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 5878 if (coarse_mat) { 5879 Vec nullv; 5880 PetscScalar *array,*array2; 5881 PetscInt nl; 5882 5883 ierr = MatCreateVecs(coarse_mat,&nullv,NULL);CHKERRQ(ierr); 5884 ierr = VecGetLocalSize(nullv,&nl);CHKERRQ(ierr); 5885 ierr = VecGetArrayRead(pcbddc->coarse_vec,(const PetscScalar**)&array);CHKERRQ(ierr); 5886 ierr = VecGetArray(nullv,&array2);CHKERRQ(ierr); 5887 ierr = PetscMemcpy(array2,array,nl*sizeof(*array));CHKERRQ(ierr); 5888 ierr = VecRestoreArray(nullv,&array2);CHKERRQ(ierr); 5889 ierr = VecRestoreArrayRead(pcbddc->coarse_vec,(const PetscScalar**)&array);CHKERRQ(ierr); 5890 ierr = VecNormalize(nullv,NULL);CHKERRQ(ierr); 5891 ierr = MatNullSpaceCreate(PetscObjectComm((PetscObject)coarse_mat),PETSC_FALSE,1,&nullv,&CoarseNullSpace);CHKERRQ(ierr); 5892 ierr = VecDestroy(&nullv);CHKERRQ(ierr); 5893 } 5894 } 5895 5896 if (pcbddc->coarse_ksp) { 5897 PetscBool ispreonly; 5898 5899 if (CoarseNullSpace) { 5900 PetscBool isnull; 5901 ierr = MatNullSpaceTest(CoarseNullSpace,coarse_mat,&isnull);CHKERRQ(ierr); 5902 if (0) { 5903 if (isbddc && !pcbddc->benign_saddle_point) { 5904 ierr = PCBDDCSetNullSpace(pc_temp,CoarseNullSpace);CHKERRQ(ierr); 5905 } else { 5906 ierr = MatSetNullSpace(coarse_mat,CoarseNullSpace);CHKERRQ(ierr); 5907 } 5908 } else { 5909 ierr = MatNullSpaceDestroy(&CoarseNullSpace);CHKERRQ(ierr); 5910 } 5911 } 5912 /* setup coarse ksp */ 5913 ierr = KSPSetUp(pcbddc->coarse_ksp);CHKERRQ(ierr); 5914 /* Check coarse problem if in debug mode or if solving with an iterative method */ 5915 ierr = PetscObjectTypeCompare((PetscObject)pcbddc->coarse_ksp,KSPPREONLY,&ispreonly);CHKERRQ(ierr); 5916 if (pcbddc->dbg_flag || (!ispreonly && pcbddc->use_coarse_estimates) ) { 5917 KSP check_ksp; 5918 KSPType check_ksp_type; 5919 PC check_pc; 5920 Vec check_vec,coarse_vec; 5921 PetscReal abs_infty_error,infty_error,lambda_min=1.0,lambda_max=1.0; 5922 PetscInt its; 5923 PetscBool compute_eigs; 5924 PetscReal *eigs_r,*eigs_c; 5925 PetscInt neigs; 5926 const char *prefix; 5927 5928 /* Create ksp object suitable for estimation of extreme eigenvalues */ 5929 ierr = KSPCreate(PetscObjectComm((PetscObject)pcbddc->coarse_ksp),&check_ksp);CHKERRQ(ierr); 5930 ierr = KSPSetErrorIfNotConverged(pcbddc->coarse_ksp,pc->erroriffailure);CHKERRQ(ierr); 5931 ierr = KSPSetOperators(check_ksp,coarse_mat,coarse_mat);CHKERRQ(ierr); 5932 ierr = KSPSetTolerances(check_ksp,1.e-12,1.e-12,PETSC_DEFAULT,pcbddc->coarse_size);CHKERRQ(ierr); 5933 if (ispreonly) { 5934 check_ksp_type = KSPPREONLY; 5935 compute_eigs = PETSC_FALSE; 5936 } else { 5937 check_ksp_type = KSPGMRES; 5938 compute_eigs = PETSC_TRUE; 5939 } 5940 ierr = KSPSetType(check_ksp,check_ksp_type);CHKERRQ(ierr); 5941 ierr = KSPSetComputeSingularValues(check_ksp,compute_eigs);CHKERRQ(ierr); 5942 ierr = KSPSetComputeEigenvalues(check_ksp,compute_eigs);CHKERRQ(ierr); 5943 ierr = KSPGMRESSetRestart(check_ksp,pcbddc->coarse_size+1);CHKERRQ(ierr); 5944 ierr = KSPGetOptionsPrefix(pcbddc->coarse_ksp,&prefix);CHKERRQ(ierr); 5945 ierr = KSPSetOptionsPrefix(check_ksp,prefix);CHKERRQ(ierr); 5946 ierr = KSPAppendOptionsPrefix(check_ksp,"check_");CHKERRQ(ierr); 5947 ierr = KSPSetFromOptions(check_ksp);CHKERRQ(ierr); 5948 ierr = KSPSetUp(check_ksp);CHKERRQ(ierr); 5949 ierr = KSPGetPC(pcbddc->coarse_ksp,&check_pc);CHKERRQ(ierr); 5950 ierr = KSPSetPC(check_ksp,check_pc);CHKERRQ(ierr); 5951 /* create random vec */ 5952 ierr = KSPGetSolution(pcbddc->coarse_ksp,&coarse_vec);CHKERRQ(ierr); 5953 ierr = VecDuplicate(coarse_vec,&check_vec);CHKERRQ(ierr); 5954 ierr = VecSetRandom(check_vec,NULL);CHKERRQ(ierr); 5955 if (CoarseNullSpace) { 5956 ierr = MatNullSpaceRemove(CoarseNullSpace,check_vec);CHKERRQ(ierr); 5957 } 5958 ierr = MatMult(coarse_mat,check_vec,coarse_vec);CHKERRQ(ierr); 5959 /* solve coarse problem */ 5960 ierr = KSPSolve(check_ksp,coarse_vec,coarse_vec);CHKERRQ(ierr); 5961 if (CoarseNullSpace) { 5962 ierr = MatNullSpaceRemove(CoarseNullSpace,coarse_vec);CHKERRQ(ierr); 5963 } 5964 /* set eigenvalue estimation if preonly has not been requested */ 5965 if (compute_eigs) { 5966 ierr = PetscMalloc1(pcbddc->coarse_size+1,&eigs_r);CHKERRQ(ierr); 5967 ierr = PetscMalloc1(pcbddc->coarse_size+1,&eigs_c);CHKERRQ(ierr); 5968 ierr = KSPComputeEigenvalues(check_ksp,pcbddc->coarse_size+1,eigs_r,eigs_c,&neigs);CHKERRQ(ierr); 5969 lambda_max = eigs_r[neigs-1]; 5970 lambda_min = eigs_r[0]; 5971 if (pcbddc->use_coarse_estimates) { 5972 if (lambda_max>lambda_min) { 5973 ierr = KSPChebyshevSetEigenvalues(pcbddc->coarse_ksp,lambda_max,lambda_min);CHKERRQ(ierr); 5974 ierr = KSPRichardsonSetScale(pcbddc->coarse_ksp,2.0/(lambda_max+lambda_min));CHKERRQ(ierr); 5975 } 5976 } 5977 } 5978 5979 /* check coarse problem residual error */ 5980 if (pcbddc->dbg_flag) { 5981 PetscViewer dbg_viewer = PETSC_VIEWER_STDOUT_(PetscObjectComm((PetscObject)pcbddc->coarse_ksp)); 5982 ierr = PetscViewerASCIIAddTab(dbg_viewer,2*(pcbddc->current_level+1));CHKERRQ(ierr); 5983 ierr = VecAXPY(check_vec,-1.0,coarse_vec);CHKERRQ(ierr); 5984 ierr = VecNorm(check_vec,NORM_INFINITY,&infty_error);CHKERRQ(ierr); 5985 ierr = MatMult(coarse_mat,check_vec,coarse_vec);CHKERRQ(ierr); 5986 ierr = VecNorm(coarse_vec,NORM_INFINITY,&abs_infty_error);CHKERRQ(ierr); 5987 ierr = VecDestroy(&check_vec);CHKERRQ(ierr); 5988 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem details (use estimates %d)\n",pcbddc->use_coarse_estimates);CHKERRQ(ierr); 5989 ierr = PetscObjectPrintClassNamePrefixType((PetscObject)(pcbddc->coarse_ksp),dbg_viewer);CHKERRQ(ierr); 5990 ierr = PetscObjectPrintClassNamePrefixType((PetscObject)(check_pc),dbg_viewer);CHKERRQ(ierr); 5991 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem exact infty_error : %1.6e\n",infty_error);CHKERRQ(ierr); 5992 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem residual infty_error: %1.6e\n",abs_infty_error);CHKERRQ(ierr); 5993 if (CoarseNullSpace) { 5994 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem is singular\n");CHKERRQ(ierr); 5995 } 5996 if (compute_eigs) { 5997 PetscReal lambda_max_s,lambda_min_s; 5998 ierr = KSPGetType(check_ksp,&check_ksp_type);CHKERRQ(ierr); 5999 ierr = KSPGetIterationNumber(check_ksp,&its);CHKERRQ(ierr); 6000 ierr = KSPComputeExtremeSingularValues(check_ksp,&lambda_max_s,&lambda_min_s);CHKERRQ(ierr); 6001 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem eigenvalues (estimated with %d iterations of %s): %1.6e %1.6e (%1.6e %1.6e)\n",its,check_ksp_type,lambda_min,lambda_max,lambda_min_s,lambda_max_s);CHKERRQ(ierr); 6002 for (i=0;i<neigs;i++) { 6003 ierr = PetscViewerASCIIPrintf(dbg_viewer,"%1.6e %1.6ei\n",eigs_r[i],eigs_c[i]);CHKERRQ(ierr); 6004 } 6005 } 6006 ierr = PetscViewerFlush(dbg_viewer);CHKERRQ(ierr); 6007 ierr = PetscViewerASCIISubtractTab(dbg_viewer,2*(pcbddc->current_level+1));CHKERRQ(ierr); 6008 } 6009 ierr = KSPDestroy(&check_ksp);CHKERRQ(ierr); 6010 if (compute_eigs) { 6011 ierr = PetscFree(eigs_r);CHKERRQ(ierr); 6012 ierr = PetscFree(eigs_c);CHKERRQ(ierr); 6013 } 6014 } 6015 } 6016 /* print additional info */ 6017 if (pcbddc->dbg_flag) { 6018 /* waits until all processes reaches this point */ 6019 ierr = PetscBarrier((PetscObject)pc);CHKERRQ(ierr); 6020 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Coarse solver setup completed at level %d\n",pcbddc->current_level);CHKERRQ(ierr); 6021 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 6022 } 6023 6024 /* free memory */ 6025 ierr = MatNullSpaceDestroy(&CoarseNullSpace);CHKERRQ(ierr); 6026 ierr = MatDestroy(&coarse_mat);CHKERRQ(ierr); 6027 PetscFunctionReturn(0); 6028 } 6029 6030 #undef __FUNCT__ 6031 #define __FUNCT__ "PCBDDCComputePrimalNumbering" 6032 PetscErrorCode PCBDDCComputePrimalNumbering(PC pc,PetscInt* coarse_size_n,PetscInt** local_primal_indices_n) 6033 { 6034 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 6035 PC_IS* pcis = (PC_IS*)pc->data; 6036 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 6037 IS subset,subset_mult,subset_n; 6038 PetscInt local_size,coarse_size=0; 6039 PetscInt *local_primal_indices=NULL; 6040 const PetscInt *t_local_primal_indices; 6041 PetscErrorCode ierr; 6042 6043 PetscFunctionBegin; 6044 /* Compute global number of coarse dofs */ 6045 if (pcbddc->local_primal_size && !pcbddc->local_primal_ref_node) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"BDDC ConstraintsSetUp should be called first"); 6046 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)(pc->pmat)),pcbddc->local_primal_size_cc,pcbddc->local_primal_ref_node,PETSC_COPY_VALUES,&subset_n);CHKERRQ(ierr); 6047 ierr = ISLocalToGlobalMappingApplyIS(pcis->mapping,subset_n,&subset);CHKERRQ(ierr); 6048 ierr = ISDestroy(&subset_n);CHKERRQ(ierr); 6049 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)(pc->pmat)),pcbddc->local_primal_size_cc,pcbddc->local_primal_ref_mult,PETSC_COPY_VALUES,&subset_mult);CHKERRQ(ierr); 6050 ierr = PCBDDCSubsetNumbering(subset,subset_mult,&coarse_size,&subset_n);CHKERRQ(ierr); 6051 ierr = ISDestroy(&subset);CHKERRQ(ierr); 6052 ierr = ISDestroy(&subset_mult);CHKERRQ(ierr); 6053 ierr = ISGetLocalSize(subset_n,&local_size);CHKERRQ(ierr); 6054 if (local_size != pcbddc->local_primal_size) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Invalid number of local primal indices computed %D != %D",local_size,pcbddc->local_primal_size); 6055 ierr = PetscMalloc1(local_size,&local_primal_indices);CHKERRQ(ierr); 6056 ierr = ISGetIndices(subset_n,&t_local_primal_indices);CHKERRQ(ierr); 6057 ierr = PetscMemcpy(local_primal_indices,t_local_primal_indices,local_size*sizeof(PetscInt));CHKERRQ(ierr); 6058 ierr = ISRestoreIndices(subset_n,&t_local_primal_indices);CHKERRQ(ierr); 6059 ierr = ISDestroy(&subset_n);CHKERRQ(ierr); 6060 6061 /* check numbering */ 6062 if (pcbddc->dbg_flag) { 6063 PetscScalar coarsesum,*array,*array2; 6064 PetscInt i; 6065 PetscBool set_error = PETSC_FALSE,set_error_reduced = PETSC_FALSE; 6066 6067 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 6068 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 6069 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Check coarse indices\n");CHKERRQ(ierr); 6070 ierr = PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 6071 /* counter */ 6072 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 6073 ierr = VecSet(pcis->vec1_N,1.0);CHKERRQ(ierr); 6074 ierr = VecScatterBegin(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 6075 ierr = VecScatterEnd(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 6076 ierr = VecScatterBegin(matis->rctx,pcis->vec1_global,pcis->vec2_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 6077 ierr = VecScatterEnd(matis->rctx,pcis->vec1_global,pcis->vec2_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 6078 ierr = VecSet(pcis->vec1_N,0.0);CHKERRQ(ierr); 6079 for (i=0;i<pcbddc->local_primal_size;i++) { 6080 ierr = VecSetValue(pcis->vec1_N,pcbddc->primal_indices_local_idxs[i],1.0,INSERT_VALUES);CHKERRQ(ierr); 6081 } 6082 ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr); 6083 ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr); 6084 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 6085 ierr = VecScatterBegin(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 6086 ierr = VecScatterEnd(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 6087 ierr = VecScatterBegin(matis->rctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 6088 ierr = VecScatterEnd(matis->rctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 6089 ierr = VecGetArray(pcis->vec1_N,&array);CHKERRQ(ierr); 6090 ierr = VecGetArray(pcis->vec2_N,&array2);CHKERRQ(ierr); 6091 for (i=0;i<pcis->n;i++) { 6092 if (array[i] != 0.0 && array[i] != array2[i]) { 6093 PetscInt owned = (PetscInt)PetscRealPart(array[i]); 6094 PetscInt neigh = (PetscInt)PetscRealPart(array2[i]); 6095 set_error = PETSC_TRUE; 6096 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d: local index %d owned by a %d processes instead of %d!\n",PetscGlobalRank,i,owned,neigh);CHKERRQ(ierr); 6097 } 6098 } 6099 ierr = VecRestoreArray(pcis->vec2_N,&array2);CHKERRQ(ierr); 6100 ierr = MPIU_Allreduce(&set_error,&set_error_reduced,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 6101 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 6102 for (i=0;i<pcis->n;i++) { 6103 if (PetscRealPart(array[i]) > 0.0) array[i] = 1.0/PetscRealPart(array[i]); 6104 } 6105 ierr = VecRestoreArray(pcis->vec1_N,&array);CHKERRQ(ierr); 6106 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 6107 ierr = VecScatterBegin(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 6108 ierr = VecScatterEnd(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 6109 ierr = VecSum(pcis->vec1_global,&coarsesum);CHKERRQ(ierr); 6110 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Size of coarse problem is %d (%lf)\n",coarse_size,PetscRealPart(coarsesum));CHKERRQ(ierr); 6111 if (pcbddc->dbg_flag > 1 || set_error_reduced) { 6112 PetscInt *gidxs; 6113 6114 ierr = PetscMalloc1(pcbddc->local_primal_size,&gidxs);CHKERRQ(ierr); 6115 ierr = ISLocalToGlobalMappingApply(pcis->mapping,pcbddc->local_primal_size,pcbddc->primal_indices_local_idxs,gidxs);CHKERRQ(ierr); 6116 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Distribution of local primal indices\n");CHKERRQ(ierr); 6117 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 6118 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d\n",PetscGlobalRank);CHKERRQ(ierr); 6119 for (i=0;i<pcbddc->local_primal_size;i++) { 6120 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local_primal_indices[%d]=%d (%d,%d)\n",i,local_primal_indices[i],pcbddc->primal_indices_local_idxs[i],gidxs[i]);CHKERRQ(ierr); 6121 } 6122 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 6123 ierr = PetscFree(gidxs);CHKERRQ(ierr); 6124 } 6125 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 6126 ierr = PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 6127 if (set_error_reduced) SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"BDDC Numbering of coarse dofs failed"); 6128 } 6129 /* ierr = PetscPrintf(PetscObjectComm((PetscObject)pc),"Size of coarse problem is %d\n",coarse_size);CHKERRQ(ierr); */ 6130 /* get back data */ 6131 *coarse_size_n = coarse_size; 6132 *local_primal_indices_n = local_primal_indices; 6133 PetscFunctionReturn(0); 6134 } 6135 6136 #undef __FUNCT__ 6137 #define __FUNCT__ "PCBDDCGlobalToLocal" 6138 PetscErrorCode PCBDDCGlobalToLocal(VecScatter g2l_ctx,Vec gwork, Vec lwork, IS globalis, IS* localis) 6139 { 6140 IS localis_t; 6141 PetscInt i,lsize,*idxs,n; 6142 PetscScalar *vals; 6143 PetscErrorCode ierr; 6144 6145 PetscFunctionBegin; 6146 /* get indices in local ordering exploiting local to global map */ 6147 ierr = ISGetLocalSize(globalis,&lsize);CHKERRQ(ierr); 6148 ierr = PetscMalloc1(lsize,&vals);CHKERRQ(ierr); 6149 for (i=0;i<lsize;i++) vals[i] = 1.0; 6150 ierr = ISGetIndices(globalis,(const PetscInt**)&idxs);CHKERRQ(ierr); 6151 ierr = VecSet(gwork,0.0);CHKERRQ(ierr); 6152 ierr = VecSet(lwork,0.0);CHKERRQ(ierr); 6153 if (idxs) { /* multilevel guard */ 6154 ierr = VecSetValues(gwork,lsize,idxs,vals,INSERT_VALUES);CHKERRQ(ierr); 6155 } 6156 ierr = VecAssemblyBegin(gwork);CHKERRQ(ierr); 6157 ierr = ISRestoreIndices(globalis,(const PetscInt**)&idxs);CHKERRQ(ierr); 6158 ierr = PetscFree(vals);CHKERRQ(ierr); 6159 ierr = VecAssemblyEnd(gwork);CHKERRQ(ierr); 6160 /* now compute set in local ordering */ 6161 ierr = VecScatterBegin(g2l_ctx,gwork,lwork,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 6162 ierr = VecScatterEnd(g2l_ctx,gwork,lwork,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 6163 ierr = VecGetArrayRead(lwork,(const PetscScalar**)&vals);CHKERRQ(ierr); 6164 ierr = VecGetSize(lwork,&n);CHKERRQ(ierr); 6165 for (i=0,lsize=0;i<n;i++) { 6166 if (PetscRealPart(vals[i]) > 0.5) { 6167 lsize++; 6168 } 6169 } 6170 ierr = PetscMalloc1(lsize,&idxs);CHKERRQ(ierr); 6171 for (i=0,lsize=0;i<n;i++) { 6172 if (PetscRealPart(vals[i]) > 0.5) { 6173 idxs[lsize++] = i; 6174 } 6175 } 6176 ierr = VecRestoreArrayRead(lwork,(const PetscScalar**)&vals);CHKERRQ(ierr); 6177 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)gwork),lsize,idxs,PETSC_OWN_POINTER,&localis_t);CHKERRQ(ierr); 6178 *localis = localis_t; 6179 PetscFunctionReturn(0); 6180 } 6181 6182 #undef __FUNCT__ 6183 #define __FUNCT__ "PCBDDCSetUpSubSchurs" 6184 PetscErrorCode PCBDDCSetUpSubSchurs(PC pc) 6185 { 6186 PC_IS *pcis=(PC_IS*)pc->data; 6187 PC_BDDC *pcbddc=(PC_BDDC*)pc->data; 6188 PCBDDCSubSchurs sub_schurs=pcbddc->sub_schurs; 6189 Mat S_j; 6190 PetscInt *used_xadj,*used_adjncy; 6191 PetscBool free_used_adj; 6192 PetscErrorCode ierr; 6193 6194 PetscFunctionBegin; 6195 /* decide the adjacency to be used for determining internal problems for local schur on subsets */ 6196 free_used_adj = PETSC_FALSE; 6197 if (pcbddc->sub_schurs_layers == -1) { 6198 used_xadj = NULL; 6199 used_adjncy = NULL; 6200 } else { 6201 if (pcbddc->sub_schurs_use_useradj && pcbddc->mat_graph->xadj) { 6202 used_xadj = pcbddc->mat_graph->xadj; 6203 used_adjncy = pcbddc->mat_graph->adjncy; 6204 } else if (pcbddc->computed_rowadj) { 6205 used_xadj = pcbddc->mat_graph->xadj; 6206 used_adjncy = pcbddc->mat_graph->adjncy; 6207 } else { 6208 PetscBool flg_row=PETSC_FALSE; 6209 const PetscInt *xadj,*adjncy; 6210 PetscInt nvtxs; 6211 6212 ierr = MatGetRowIJ(pcbddc->local_mat,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,&xadj,&adjncy,&flg_row);CHKERRQ(ierr); 6213 if (flg_row) { 6214 ierr = PetscMalloc2(nvtxs+1,&used_xadj,xadj[nvtxs],&used_adjncy);CHKERRQ(ierr); 6215 ierr = PetscMemcpy(used_xadj,xadj,(nvtxs+1)*sizeof(*xadj));CHKERRQ(ierr); 6216 ierr = PetscMemcpy(used_adjncy,adjncy,(xadj[nvtxs])*sizeof(*adjncy));CHKERRQ(ierr); 6217 free_used_adj = PETSC_TRUE; 6218 } else { 6219 pcbddc->sub_schurs_layers = -1; 6220 used_xadj = NULL; 6221 used_adjncy = NULL; 6222 } 6223 ierr = MatRestoreRowIJ(pcbddc->local_mat,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,&xadj,&adjncy,&flg_row);CHKERRQ(ierr); 6224 } 6225 } 6226 6227 /* setup sub_schurs data */ 6228 ierr = MatCreateSchurComplement(pcis->A_II,pcis->A_II,pcis->A_IB,pcis->A_BI,pcis->A_BB,&S_j);CHKERRQ(ierr); 6229 if (!sub_schurs->schur_explicit) { 6230 /* pcbddc->ksp_D up to date only if not using MatFactor with Schur complement support */ 6231 ierr = MatSchurComplementSetKSP(S_j,pcbddc->ksp_D);CHKERRQ(ierr); 6232 ierr = PCBDDCSubSchursSetUp(sub_schurs,NULL,S_j,PETSC_FALSE,used_xadj,used_adjncy,pcbddc->sub_schurs_layers,pcbddc->faster_deluxe,NULL,pcbddc->adaptive_selection,PETSC_FALSE,PETSC_FALSE,0,NULL,NULL,NULL,NULL);CHKERRQ(ierr); 6233 } else { 6234 PetscBool reuse_solvers = (PetscBool)!pcbddc->use_change_of_basis; 6235 PetscBool isseqaij; 6236 PetscInt benign_n; 6237 Mat change = NULL; 6238 Vec scaling = NULL; 6239 IS change_primal = NULL; 6240 6241 if (!pcbddc->use_vertices && reuse_solvers) { 6242 PetscInt n_vertices; 6243 6244 ierr = ISGetLocalSize(sub_schurs->is_vertices,&n_vertices);CHKERRQ(ierr); 6245 reuse_solvers = (PetscBool)!n_vertices; 6246 } 6247 ierr = PetscObjectTypeCompare((PetscObject)pcbddc->local_mat,MATSEQAIJ,&isseqaij);CHKERRQ(ierr); 6248 if (!isseqaij) { 6249 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 6250 if (matis->A == pcbddc->local_mat) { 6251 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 6252 ierr = MatConvert(matis->A,MATSEQAIJ,MAT_INITIAL_MATRIX,&pcbddc->local_mat);CHKERRQ(ierr); 6253 } else { 6254 ierr = MatConvert(pcbddc->local_mat,MATSEQAIJ,MAT_INPLACE_MATRIX,&pcbddc->local_mat);CHKERRQ(ierr); 6255 } 6256 } 6257 if (!pcbddc->benign_change_explicit) { 6258 benign_n = pcbddc->benign_n; 6259 } else { 6260 benign_n = 0; 6261 } 6262 /* If the user defines additional constraints, we import them here. 6263 We need to compute the change of basis according to the quadrature weights attached to pmat via MatSetNearNullSpace, and this could not be done, at the moment, without some hacking */ 6264 if (pcbddc->adaptive_userdefined || (pcbddc->deluxe_zerorows && !pcbddc->use_change_of_basis)) { 6265 PC_IS *pcisf; 6266 PC_BDDC *pcbddcf; 6267 PC pcf; 6268 6269 if (pcbddc->sub_schurs_rebuild) { 6270 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Cannot compute change of basis with a different graph"); 6271 } 6272 ierr = PCCreate(PetscObjectComm((PetscObject)pc),&pcf);CHKERRQ(ierr); 6273 ierr = PCSetOperators(pcf,pc->mat,pc->pmat);CHKERRQ(ierr); 6274 ierr = PCSetType(pcf,PCBDDC);CHKERRQ(ierr); 6275 /* hacks */ 6276 pcisf = (PC_IS*)pcf->data; 6277 pcisf->is_B_local = pcis->is_B_local; 6278 pcisf->vec1_N = pcis->vec1_N; 6279 pcisf->BtoNmap = pcis->BtoNmap; 6280 pcisf->n = pcis->n; 6281 pcisf->n_B = pcis->n_B; 6282 pcbddcf = (PC_BDDC*)pcf->data; 6283 ierr = PetscFree(pcbddcf->mat_graph);CHKERRQ(ierr); 6284 pcbddcf->mat_graph = pcbddc->mat_graph; 6285 pcbddcf->use_faces = PETSC_TRUE; 6286 pcbddcf->use_change_of_basis = PETSC_TRUE; 6287 pcbddcf->use_change_on_faces = PETSC_TRUE; 6288 pcbddcf->use_qr_single = PETSC_TRUE; 6289 pcbddcf->fake_change = PETSC_TRUE; 6290 ierr = PCBDDCConstraintsSetUp(pcf);CHKERRQ(ierr); 6291 /* store information on primal vertices and change of basis (in local numbering) */ 6292 sub_schurs->change_with_qr = pcbddcf->use_qr_single; 6293 ierr = ISCreateGeneral(PETSC_COMM_SELF,pcbddcf->n_vertices,pcbddcf->local_primal_ref_node,PETSC_COPY_VALUES,&change_primal);CHKERRQ(ierr); 6294 change = pcbddcf->ConstraintMatrix; 6295 pcbddcf->ConstraintMatrix = NULL; 6296 /* free unneeded memory allocated in PCBDDCConstraintsSetUp */ 6297 ierr = PetscFree(pcbddcf->sub_schurs);CHKERRQ(ierr); 6298 ierr = MatNullSpaceDestroy(&pcbddcf->onearnullspace);CHKERRQ(ierr); 6299 ierr = PetscFree2(pcbddcf->local_primal_ref_node,pcbddcf->local_primal_ref_mult);CHKERRQ(ierr); 6300 ierr = PetscFree(pcbddcf->primal_indices_local_idxs);CHKERRQ(ierr); 6301 ierr = PetscFree(pcbddcf->onearnullvecs_state);CHKERRQ(ierr); 6302 ierr = PetscFree(pcf->data);CHKERRQ(ierr); 6303 pcf->ops->destroy = NULL; 6304 ierr = PCDestroy(&pcf);CHKERRQ(ierr); 6305 } 6306 if (!pcbddc->use_deluxe_scaling) scaling = pcis->D; 6307 ierr = PCBDDCSubSchursSetUp(sub_schurs,pcbddc->local_mat,S_j,pcbddc->sub_schurs_exact_schur,used_xadj,used_adjncy,pcbddc->sub_schurs_layers,pcbddc->faster_deluxe,scaling,pcbddc->adaptive_selection,reuse_solvers,pcbddc->benign_saddle_point,benign_n,pcbddc->benign_p0_lidx,pcbddc->benign_zerodiag_subs,change,change_primal);CHKERRQ(ierr); 6308 ierr = MatDestroy(&change);CHKERRQ(ierr); 6309 ierr = ISDestroy(&change_primal);CHKERRQ(ierr); 6310 } 6311 ierr = MatDestroy(&S_j);CHKERRQ(ierr); 6312 6313 /* free adjacency */ 6314 if (free_used_adj) { 6315 ierr = PetscFree2(used_xadj,used_adjncy);CHKERRQ(ierr); 6316 } 6317 PetscFunctionReturn(0); 6318 } 6319 6320 #undef __FUNCT__ 6321 #define __FUNCT__ "PCBDDCInitSubSchurs" 6322 PetscErrorCode PCBDDCInitSubSchurs(PC pc) 6323 { 6324 PC_IS *pcis=(PC_IS*)pc->data; 6325 PC_BDDC *pcbddc=(PC_BDDC*)pc->data; 6326 PCBDDCSubSchurs sub_schurs=pcbddc->sub_schurs; 6327 PCBDDCGraph graph; 6328 PetscErrorCode ierr; 6329 6330 PetscFunctionBegin; 6331 /* attach interface graph for determining subsets */ 6332 if (pcbddc->sub_schurs_rebuild) { /* in case rebuild has been requested, it uses a graph generated only by the neighbouring information */ 6333 IS verticesIS,verticescomm; 6334 PetscInt vsize,*idxs; 6335 6336 ierr = PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph,NULL,NULL,NULL,NULL,&verticesIS);CHKERRQ(ierr); 6337 ierr = ISGetSize(verticesIS,&vsize);CHKERRQ(ierr); 6338 ierr = ISGetIndices(verticesIS,(const PetscInt**)&idxs);CHKERRQ(ierr); 6339 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),vsize,idxs,PETSC_COPY_VALUES,&verticescomm);CHKERRQ(ierr); 6340 ierr = ISRestoreIndices(verticesIS,(const PetscInt**)&idxs);CHKERRQ(ierr); 6341 ierr = ISDestroy(&verticesIS);CHKERRQ(ierr); 6342 ierr = PCBDDCGraphCreate(&graph);CHKERRQ(ierr); 6343 ierr = PCBDDCGraphInit(graph,pcbddc->mat_graph->l2gmap,pcbddc->mat_graph->nvtxs_global);CHKERRQ(ierr); 6344 ierr = PCBDDCGraphSetUp(graph,pcbddc->mat_graph->custom_minimal_size,NULL,pcbddc->DirichletBoundariesLocal,0,NULL,verticescomm);CHKERRQ(ierr); 6345 ierr = ISDestroy(&verticescomm);CHKERRQ(ierr); 6346 ierr = PCBDDCGraphComputeConnectedComponents(graph);CHKERRQ(ierr); 6347 /* 6348 if (pcbddc->dbg_flag) { 6349 ierr = PCBDDCGraphASCIIView(graph,pcbddc->dbg_flag,pcbddc->dbg_viewer);CHKERRQ(ierr); 6350 } 6351 */ 6352 } else { 6353 graph = pcbddc->mat_graph; 6354 } 6355 6356 /* sub_schurs init */ 6357 ierr = PCBDDCSubSchursInit(sub_schurs,pcis->is_I_local,pcis->is_B_local,graph,pcis->BtoNmap);CHKERRQ(ierr); 6358 6359 /* free graph struct */ 6360 if (pcbddc->sub_schurs_rebuild) { 6361 ierr = PCBDDCGraphDestroy(&graph);CHKERRQ(ierr); 6362 } 6363 PetscFunctionReturn(0); 6364 } 6365 6366 #undef __FUNCT__ 6367 #define __FUNCT__ "PCBDDCCheckOperator" 6368 PetscErrorCode PCBDDCCheckOperator(PC pc) 6369 { 6370 PC_IS *pcis=(PC_IS*)pc->data; 6371 PC_BDDC *pcbddc=(PC_BDDC*)pc->data; 6372 PetscErrorCode ierr; 6373 6374 PetscFunctionBegin; 6375 if (pcbddc->n_vertices == pcbddc->local_primal_size) { 6376 IS zerodiag = NULL; 6377 Mat S_j,B0_B=NULL; 6378 Vec dummy_vec=NULL,vec_check_B,vec_scale_P; 6379 PetscScalar *p0_check,*array,*array2; 6380 PetscReal norm; 6381 PetscInt i; 6382 6383 /* B0 and B0_B */ 6384 if (zerodiag) { 6385 IS dummy; 6386 6387 ierr = ISCreateStride(PETSC_COMM_SELF,pcbddc->benign_n,0,1,&dummy);CHKERRQ(ierr); 6388 ierr = MatGetSubMatrix(pcbddc->benign_B0,dummy,pcis->is_B_local,MAT_INITIAL_MATRIX,&B0_B);CHKERRQ(ierr); 6389 ierr = MatCreateVecs(B0_B,NULL,&dummy_vec);CHKERRQ(ierr); 6390 ierr = ISDestroy(&dummy);CHKERRQ(ierr); 6391 } 6392 /* I need a primal vector to scale primal nodes since BDDC sums contibutions */ 6393 ierr = VecDuplicate(pcbddc->vec1_P,&vec_scale_P);CHKERRQ(ierr); 6394 ierr = VecSet(pcbddc->vec1_P,1.0);CHKERRQ(ierr); 6395 ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,pcbddc->vec1_P,pcbddc->coarse_vec,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 6396 ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,pcbddc->vec1_P,pcbddc->coarse_vec,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 6397 ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,pcbddc->coarse_vec,vec_scale_P,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 6398 ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,pcbddc->coarse_vec,vec_scale_P,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 6399 ierr = VecReciprocal(vec_scale_P);CHKERRQ(ierr); 6400 /* S_j */ 6401 ierr = MatCreateSchurComplement(pcis->A_II,pcis->A_II,pcis->A_IB,pcis->A_BI,pcis->A_BB,&S_j);CHKERRQ(ierr); 6402 ierr = MatSchurComplementSetKSP(S_j,pcbddc->ksp_D);CHKERRQ(ierr); 6403 6404 /* mimic vector in \widetilde{W}_\Gamma */ 6405 ierr = VecSetRandom(pcis->vec1_N,NULL);CHKERRQ(ierr); 6406 /* continuous in primal space */ 6407 ierr = VecSetRandom(pcbddc->coarse_vec,NULL);CHKERRQ(ierr); 6408 ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,pcbddc->coarse_vec,pcbddc->vec1_P,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 6409 ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,pcbddc->coarse_vec,pcbddc->vec1_P,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 6410 ierr = VecGetArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 6411 ierr = PetscCalloc1(pcbddc->benign_n,&p0_check);CHKERRQ(ierr); 6412 for (i=0;i<pcbddc->benign_n;i++) p0_check[i] = array[pcbddc->local_primal_size-pcbddc->benign_n+i]; 6413 ierr = VecSetValues(pcis->vec1_N,pcbddc->local_primal_size,pcbddc->local_primal_ref_node,array,INSERT_VALUES);CHKERRQ(ierr); 6414 ierr = VecRestoreArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 6415 ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr); 6416 ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr); 6417 ierr = VecScatterBegin(pcis->N_to_B,pcis->vec1_N,pcis->vec2_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 6418 ierr = VecScatterEnd(pcis->N_to_B,pcis->vec1_N,pcis->vec2_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 6419 ierr = VecDuplicate(pcis->vec2_B,&vec_check_B);CHKERRQ(ierr); 6420 ierr = VecCopy(pcis->vec2_B,vec_check_B);CHKERRQ(ierr); 6421 6422 /* assemble rhs for coarse problem */ 6423 /* widetilde{S}_\Gamma w_\Gamma + \widetilde{B0}^T_B p0 */ 6424 /* local with Schur */ 6425 ierr = MatMult(S_j,pcis->vec2_B,pcis->vec1_B);CHKERRQ(ierr); 6426 if (zerodiag) { 6427 ierr = VecGetArray(dummy_vec,&array);CHKERRQ(ierr); 6428 for (i=0;i<pcbddc->benign_n;i++) array[i] = p0_check[i]; 6429 ierr = VecRestoreArray(dummy_vec,&array);CHKERRQ(ierr); 6430 ierr = MatMultTransposeAdd(B0_B,dummy_vec,pcis->vec1_B,pcis->vec1_B);CHKERRQ(ierr); 6431 } 6432 /* sum on primal nodes the local contributions */ 6433 ierr = VecScatterBegin(pcis->N_to_B,pcis->vec1_B,pcis->vec1_N,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 6434 ierr = VecScatterEnd(pcis->N_to_B,pcis->vec1_B,pcis->vec1_N,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 6435 ierr = VecGetArray(pcis->vec1_N,&array);CHKERRQ(ierr); 6436 ierr = VecGetArray(pcbddc->vec1_P,&array2);CHKERRQ(ierr); 6437 for (i=0;i<pcbddc->local_primal_size;i++) array2[i] = array[pcbddc->local_primal_ref_node[i]]; 6438 ierr = VecRestoreArray(pcbddc->vec1_P,&array2);CHKERRQ(ierr); 6439 ierr = VecRestoreArray(pcis->vec1_N,&array);CHKERRQ(ierr); 6440 ierr = VecSet(pcbddc->coarse_vec,0.);CHKERRQ(ierr); 6441 ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,pcbddc->vec1_P,pcbddc->coarse_vec,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 6442 ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,pcbddc->vec1_P,pcbddc->coarse_vec,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 6443 ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,pcbddc->coarse_vec,pcbddc->vec1_P,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 6444 ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,pcbddc->coarse_vec,pcbddc->vec1_P,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 6445 ierr = VecGetArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 6446 /* scale primal nodes (BDDC sums contibutions) */ 6447 ierr = VecPointwiseMult(pcbddc->vec1_P,vec_scale_P,pcbddc->vec1_P);CHKERRQ(ierr); 6448 ierr = VecSetValues(pcis->vec1_N,pcbddc->local_primal_size,pcbddc->local_primal_ref_node,array,INSERT_VALUES);CHKERRQ(ierr); 6449 ierr = VecRestoreArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 6450 ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr); 6451 ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr); 6452 ierr = VecScatterBegin(pcis->N_to_B,pcis->vec1_N,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 6453 ierr = VecScatterEnd(pcis->N_to_B,pcis->vec1_N,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 6454 /* global: \widetilde{B0}_B w_\Gamma */ 6455 if (zerodiag) { 6456 ierr = MatMult(B0_B,pcis->vec2_B,dummy_vec);CHKERRQ(ierr); 6457 ierr = VecGetArray(dummy_vec,&array);CHKERRQ(ierr); 6458 for (i=0;i<pcbddc->benign_n;i++) pcbddc->benign_p0[i] = array[i]; 6459 ierr = VecRestoreArray(dummy_vec,&array);CHKERRQ(ierr); 6460 } 6461 /* BDDC */ 6462 ierr = VecSet(pcis->vec1_D,0.);CHKERRQ(ierr); 6463 ierr = PCBDDCApplyInterfacePreconditioner(pc,PETSC_FALSE);CHKERRQ(ierr); 6464 6465 ierr = VecCopy(pcis->vec1_B,pcis->vec2_B);CHKERRQ(ierr); 6466 ierr = VecAXPY(pcis->vec1_B,-1.0,vec_check_B);CHKERRQ(ierr); 6467 ierr = VecNorm(pcis->vec1_B,NORM_INFINITY,&norm);CHKERRQ(ierr); 6468 PetscPrintf(PETSC_COMM_SELF,"[%d] BDDC local error is %1.4e\n",PetscGlobalRank,norm); 6469 for (i=0;i<pcbddc->benign_n;i++) { 6470 PetscPrintf(PETSC_COMM_SELF,"[%d] BDDC p0[%d] error is %1.4e\n",PetscGlobalRank,i,PetscAbsScalar(pcbddc->benign_p0[i]-p0_check[i])); 6471 } 6472 ierr = PetscFree(p0_check);CHKERRQ(ierr); 6473 ierr = VecDestroy(&vec_scale_P);CHKERRQ(ierr); 6474 ierr = VecDestroy(&vec_check_B);CHKERRQ(ierr); 6475 ierr = VecDestroy(&dummy_vec);CHKERRQ(ierr); 6476 ierr = MatDestroy(&S_j);CHKERRQ(ierr); 6477 ierr = MatDestroy(&B0_B);CHKERRQ(ierr); 6478 } 6479 PetscFunctionReturn(0); 6480 } 6481