1 #include <../src/mat/impls/aij/seq/aij.h> 2 #include <../src/ksp/pc/impls/bddc/bddc.h> 3 #include <../src/ksp/pc/impls/bddc/bddcprivate.h> 4 #include <petscblaslapack.h> 5 6 static PetscErrorCode PCBDDCMatMultTranspose_Private(Mat A, Vec x, Vec y); 7 static PetscErrorCode PCBDDCMatMult_Private(Mat A, Vec x, Vec y); 8 9 #undef __FUNCT__ 10 #define __FUNCT__ "PCBDDCBenignMatMult_Private_Private" 11 PetscErrorCode PCBDDCBenignMatMult_Private_Private(Mat A, Vec x, Vec y, PetscBool transpose) 12 { 13 PCBDDCBenignMatMult_ctx ctx; 14 PetscErrorCode ierr; 15 PetscBool apply_right,apply_left,reset_x; 16 17 PetscFunctionBegin; 18 ierr = MatShellGetContext(A,&ctx);CHKERRQ(ierr); 19 if (transpose) { 20 apply_right = ctx->apply_left; 21 apply_left = ctx->apply_right; 22 } else { 23 apply_right = ctx->apply_right; 24 apply_left = ctx->apply_left; 25 } 26 reset_x = PETSC_FALSE; 27 if (apply_right) { 28 const PetscScalar *ax; 29 PetscInt nl,i; 30 31 ierr = VecGetLocalSize(x,&nl);CHKERRQ(ierr); 32 ierr = VecGetArrayRead(x,&ax);CHKERRQ(ierr); 33 ierr = PetscMemcpy(ctx->work,ax,nl*sizeof(PetscScalar));CHKERRQ(ierr); 34 ierr = VecRestoreArrayRead(x,&ax);CHKERRQ(ierr); 35 for (i=0;i<ctx->benign_n;i++) { 36 PetscScalar sum,val; 37 const PetscInt *idxs; 38 PetscInt nz,j; 39 ierr = ISGetLocalSize(ctx->benign_zerodiag_subs[i],&nz);CHKERRQ(ierr); 40 ierr = ISGetIndices(ctx->benign_zerodiag_subs[i],&idxs);CHKERRQ(ierr); 41 sum = 0.; 42 if (ctx->apply_p0) { 43 val = ctx->work[idxs[nz-1]]; 44 for (j=0;j<nz-1;j++) { 45 sum += ctx->work[idxs[j]]; 46 ctx->work[idxs[j]] += val; 47 } 48 } else { 49 for (j=0;j<nz-1;j++) { 50 sum += ctx->work[idxs[j]]; 51 } 52 } 53 ctx->work[idxs[nz-1]] -= sum; 54 ierr = ISRestoreIndices(ctx->benign_zerodiag_subs[i],&idxs);CHKERRQ(ierr); 55 } 56 ierr = VecPlaceArray(x,ctx->work);CHKERRQ(ierr); 57 reset_x = PETSC_TRUE; 58 } 59 if (transpose) { 60 ierr = MatMultTranspose(ctx->A,x,y);CHKERRQ(ierr); 61 } else { 62 ierr = MatMult(ctx->A,x,y);CHKERRQ(ierr); 63 } 64 if (reset_x) { 65 ierr = VecResetArray(x);CHKERRQ(ierr); 66 } 67 if (apply_left) { 68 PetscScalar *ay; 69 PetscInt i; 70 71 ierr = VecGetArray(y,&ay);CHKERRQ(ierr); 72 for (i=0;i<ctx->benign_n;i++) { 73 PetscScalar sum,val; 74 const PetscInt *idxs; 75 PetscInt nz,j; 76 ierr = ISGetLocalSize(ctx->benign_zerodiag_subs[i],&nz);CHKERRQ(ierr); 77 ierr = ISGetIndices(ctx->benign_zerodiag_subs[i],&idxs);CHKERRQ(ierr); 78 val = -ay[idxs[nz-1]]; 79 if (ctx->apply_p0) { 80 sum = 0.; 81 for (j=0;j<nz-1;j++) { 82 sum += ay[idxs[j]]; 83 ay[idxs[j]] += val; 84 } 85 ay[idxs[nz-1]] += sum; 86 } else { 87 for (j=0;j<nz-1;j++) { 88 ay[idxs[j]] += val; 89 } 90 ay[idxs[nz-1]] = 0.; 91 } 92 ierr = ISRestoreIndices(ctx->benign_zerodiag_subs[i],&idxs);CHKERRQ(ierr); 93 } 94 ierr = VecRestoreArray(y,&ay);CHKERRQ(ierr); 95 } 96 PetscFunctionReturn(0); 97 } 98 99 #undef __FUNCT__ 100 #define __FUNCT__ "PCBDDCBenignMatMultTranspose_Private" 101 PetscErrorCode PCBDDCBenignMatMultTranspose_Private(Mat A, Vec x, Vec y) 102 { 103 PetscErrorCode ierr; 104 105 PetscFunctionBegin; 106 ierr = PCBDDCBenignMatMult_Private_Private(A,x,y,PETSC_TRUE);CHKERRQ(ierr); 107 PetscFunctionReturn(0); 108 } 109 110 #undef __FUNCT__ 111 #define __FUNCT__ "PCBDDCBenignMatMult_Private" 112 PetscErrorCode PCBDDCBenignMatMult_Private(Mat A, Vec x, Vec y) 113 { 114 PetscErrorCode ierr; 115 116 PetscFunctionBegin; 117 ierr = PCBDDCBenignMatMult_Private_Private(A,x,y,PETSC_FALSE);CHKERRQ(ierr); 118 PetscFunctionReturn(0); 119 } 120 121 #undef __FUNCT__ 122 #define __FUNCT__ "PCBDDCBenignShellMat" 123 PetscErrorCode PCBDDCBenignShellMat(PC pc, PetscBool restore) 124 { 125 PC_IS *pcis = (PC_IS*)pc->data; 126 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 127 Mat_IS *matis = (Mat_IS*)pc->pmat->data; 128 PCBDDCBenignMatMult_ctx ctx; 129 PetscErrorCode ierr; 130 131 PetscFunctionBegin; 132 if (!restore) { 133 Mat A,A_IB,A_BI; 134 PetscScalar *work; 135 PCBDDCReuseSolvers reuse = pcbddc->sub_schurs->reuse_solver; 136 137 if (!reuse) { 138 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Cannot setup shell matrices!"); 139 } 140 141 /* local mat */ 142 ierr = MatCreate(PETSC_COMM_SELF,&A);CHKERRQ(ierr); 143 ierr = MatSetSizes(A,pcis->n,pcis->n,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr); 144 ierr = MatSetType(A,MATSHELL);CHKERRQ(ierr); 145 ierr = MatShellSetOperation(A,MATOP_MULT,(void (*)(void))PCBDDCBenignMatMult_Private);CHKERRQ(ierr); 146 ierr = MatShellSetOperation(A,MATOP_MULT_TRANSPOSE,(void (*)(void))PCBDDCBenignMatMultTranspose_Private);CHKERRQ(ierr); 147 ierr = PetscNew(&ctx);CHKERRQ(ierr); 148 ierr = MatShellSetContext(A,ctx);CHKERRQ(ierr); 149 ctx->apply_left = PETSC_TRUE; 150 ctx->apply_right = PETSC_TRUE; 151 ctx->apply_p0 = PETSC_TRUE; 152 ctx->benign_n = pcbddc->benign_n; 153 ctx->benign_zerodiag_subs = pcbddc->benign_zerodiag_subs; 154 ctx->A = matis->A; 155 ierr = PetscMalloc1(pcis->n,&work);CHKERRQ(ierr); 156 ctx->work = work; 157 ierr = MatSetUp(A);CHKERRQ(ierr); 158 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 159 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 160 matis->A = A; 161 162 /* A_IB */ 163 ierr = MatCreate(PETSC_COMM_SELF,&A_IB);CHKERRQ(ierr); 164 ierr = MatSetSizes(A_IB,pcis->n-pcis->n_B,pcis->n_B,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr); 165 ierr = MatSetType(A_IB,MATSHELL);CHKERRQ(ierr); 166 ierr = MatShellSetOperation(A_IB,MATOP_MULT,(void (*)(void))PCBDDCBenignMatMult_Private);CHKERRQ(ierr); 167 ierr = MatShellSetOperation(A_IB,MATOP_MULT_TRANSPOSE,(void (*)(void))PCBDDCBenignMatMultTranspose_Private);CHKERRQ(ierr); 168 ierr = PetscNew(&ctx);CHKERRQ(ierr); 169 ierr = MatShellSetContext(A_IB,ctx);CHKERRQ(ierr); 170 ctx->apply_left = PETSC_TRUE; 171 ctx->apply_right = PETSC_FALSE; 172 ctx->apply_p0 = PETSC_FALSE; 173 ctx->benign_n = pcbddc->benign_n; 174 ctx->benign_zerodiag_subs = reuse->benign_zerodiag_subs; 175 ctx->A = pcis->A_IB; 176 ctx->work = work; 177 ierr = MatSetUp(A_IB);CHKERRQ(ierr); 178 ierr = MatAssemblyBegin(A_IB,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 179 ierr = MatAssemblyEnd(A_IB,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 180 pcis->A_IB = A_IB; 181 182 /* A_BI as A_IB^T */ 183 ierr = MatCreateTranspose(A_IB,&A_BI);CHKERRQ(ierr); 184 pcbddc->benign_original_mat = pcis->A_BI; 185 pcis->A_BI = A_BI; 186 } else { 187 ierr = MatShellGetContext(matis->A,&ctx);CHKERRQ(ierr); 188 ierr = MatDestroy(&matis->A);CHKERRQ(ierr); 189 matis->A = ctx->A; 190 ierr = PetscFree(ctx);CHKERRQ(ierr); 191 ierr = MatShellGetContext(pcis->A_IB,&ctx);CHKERRQ(ierr); 192 ierr = MatDestroy(&pcis->A_IB);CHKERRQ(ierr); 193 pcis->A_IB = ctx->A; 194 ierr = PetscFree(ctx->work);CHKERRQ(ierr); 195 ierr = PetscFree(ctx);CHKERRQ(ierr); 196 ierr = MatDestroy(&pcis->A_BI);CHKERRQ(ierr); 197 pcis->A_BI = pcbddc->benign_original_mat; 198 pcbddc->benign_original_mat = NULL; 199 } 200 PetscFunctionReturn(0); 201 } 202 203 /* used just in bddc debug mode */ 204 #undef __FUNCT__ 205 #define __FUNCT__ "PCBDDCBenignProject" 206 PetscErrorCode PCBDDCBenignProject(PC pc, IS is1, IS is2, Mat *B) 207 { 208 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 209 Mat_IS *matis = (Mat_IS*)pc->pmat->data; 210 Mat An; 211 PetscErrorCode ierr; 212 213 PetscFunctionBegin; 214 ierr = MatPtAP(matis->A,pcbddc->benign_change,MAT_INITIAL_MATRIX,2.0,&An);CHKERRQ(ierr); 215 ierr = MatZeroRowsColumns(An,pcbddc->benign_n,pcbddc->benign_p0_lidx,1.0,NULL,NULL);CHKERRQ(ierr); 216 if (is1) { 217 ierr = MatGetSubMatrix(An,is1,is2,MAT_INITIAL_MATRIX,B);CHKERRQ(ierr); 218 ierr = MatDestroy(&An);CHKERRQ(ierr); 219 } else { 220 *B = An; 221 } 222 PetscFunctionReturn(0); 223 } 224 225 /* TODO: add reuse flag */ 226 #undef __FUNCT__ 227 #define __FUNCT__ "MatSeqAIJCompress" 228 PetscErrorCode MatSeqAIJCompress(Mat A, Mat *B) 229 { 230 Mat Bt; 231 PetscScalar *a,*bdata; 232 const PetscInt *ii,*ij; 233 PetscInt m,n,i,nnz,*bii,*bij; 234 PetscBool flg_row; 235 PetscErrorCode ierr; 236 237 PetscFunctionBegin; 238 ierr = MatGetSize(A,&n,&m);CHKERRQ(ierr); 239 ierr = MatGetRowIJ(A,0,PETSC_FALSE,PETSC_FALSE,&n,&ii,&ij,&flg_row);CHKERRQ(ierr); 240 ierr = MatSeqAIJGetArray(A,&a);CHKERRQ(ierr); 241 nnz = n; 242 for (i=0;i<ii[n];i++) { 243 if (PetscLikely(PetscAbsScalar(a[i]) > PETSC_SMALL)) nnz++; 244 } 245 ierr = PetscMalloc1(n+1,&bii);CHKERRQ(ierr); 246 ierr = PetscMalloc1(nnz,&bij);CHKERRQ(ierr); 247 ierr = PetscMalloc1(nnz,&bdata);CHKERRQ(ierr); 248 nnz = 0; 249 bii[0] = 0; 250 for (i=0;i<n;i++) { 251 PetscInt j; 252 for (j=ii[i];j<ii[i+1];j++) { 253 PetscScalar entry = a[j]; 254 if (PetscLikely(PetscAbsScalar(entry) > PETSC_SMALL) || ij[j] == i) { 255 bij[nnz] = ij[j]; 256 bdata[nnz] = entry; 257 nnz++; 258 } 259 } 260 bii[i+1] = nnz; 261 } 262 ierr = MatSeqAIJRestoreArray(A,&a);CHKERRQ(ierr); 263 ierr = MatCreateSeqAIJWithArrays(PetscObjectComm((PetscObject)A),n,m,bii,bij,bdata,&Bt);CHKERRQ(ierr); 264 ierr = MatRestoreRowIJ(A,0,PETSC_FALSE,PETSC_FALSE,&n,&ii,&ij,&flg_row);CHKERRQ(ierr); 265 { 266 Mat_SeqAIJ *b = (Mat_SeqAIJ*)(Bt->data); 267 b->free_a = PETSC_TRUE; 268 b->free_ij = PETSC_TRUE; 269 } 270 *B = Bt; 271 PetscFunctionReturn(0); 272 } 273 274 #undef __FUNCT__ 275 #define __FUNCT__ "MatDetectDisconnectedComponents" 276 PetscErrorCode MatDetectDisconnectedComponents(Mat A, PetscBool filter, PetscInt *ncc, IS* cc[]) 277 { 278 Mat B; 279 IS is_dummy,*cc_n; 280 ISLocalToGlobalMapping l2gmap_dummy; 281 PCBDDCGraph graph; 282 PetscInt i,n; 283 PetscInt *xadj,*adjncy; 284 PetscInt *xadj_filtered,*adjncy_filtered; 285 PetscBool flg_row,isseqaij; 286 PetscErrorCode ierr; 287 288 PetscFunctionBegin; 289 if (!A->rmap->N || !A->cmap->N) { 290 *ncc = 0; 291 *cc = NULL; 292 PetscFunctionReturn(0); 293 } 294 ierr = PetscObjectTypeCompare((PetscObject)A,MATSEQAIJ,&isseqaij);CHKERRQ(ierr); 295 if (!isseqaij && filter) { 296 PetscBool isseqdense; 297 298 ierr = PetscObjectTypeCompare((PetscObject)A,MATSEQDENSE,&isseqdense);CHKERRQ(ierr); 299 if (!isseqdense) { 300 ierr = MatConvert(A,MATSEQAIJ,MAT_INITIAL_MATRIX,&B);CHKERRQ(ierr); 301 } else { /* TODO: rectangular case and LDA */ 302 PetscScalar *array; 303 PetscReal chop=1.e-6; 304 305 ierr = MatDuplicate(A,MAT_COPY_VALUES,&B);CHKERRQ(ierr); 306 ierr = MatDenseGetArray(B,&array);CHKERRQ(ierr); 307 ierr = MatGetSize(B,&n,NULL);CHKERRQ(ierr); 308 for (i=0;i<n;i++) { 309 PetscInt j; 310 for (j=i+1;j<n;j++) { 311 PetscReal thresh = chop*(PetscAbsScalar(array[i*(n+1)])+PetscAbsScalar(array[j*(n+1)])); 312 if (PetscAbsScalar(array[i*n+j]) < thresh) array[i*n+j] = 0.; 313 if (PetscAbsScalar(array[j*n+i]) < thresh) array[j*n+i] = 0.; 314 } 315 } 316 ierr = MatDenseRestoreArray(B,&array);CHKERRQ(ierr); 317 ierr = MatConvert(B,MATSEQAIJ,MAT_REUSE_MATRIX,&B);CHKERRQ(ierr); 318 } 319 } else { 320 B = A; 321 } 322 ierr = MatGetRowIJ(B,0,PETSC_TRUE,PETSC_FALSE,&n,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);CHKERRQ(ierr); 323 324 /* if filter is true, then removes entries lower than PETSC_SMALL in magnitude */ 325 if (filter) { 326 PetscScalar *data; 327 PetscInt j,cum; 328 329 ierr = PetscCalloc2(n+1,&xadj_filtered,xadj[n],&adjncy_filtered);CHKERRQ(ierr); 330 ierr = MatSeqAIJGetArray(B,&data);CHKERRQ(ierr); 331 cum = 0; 332 for (i=0;i<n;i++) { 333 PetscInt t; 334 335 for (j=xadj[i];j<xadj[i+1];j++) { 336 if (PetscUnlikely(PetscAbsScalar(data[j]) < PETSC_SMALL)) { 337 continue; 338 } 339 adjncy_filtered[cum+xadj_filtered[i]++] = adjncy[j]; 340 } 341 t = xadj_filtered[i]; 342 xadj_filtered[i] = cum; 343 cum += t; 344 } 345 ierr = MatSeqAIJRestoreArray(B,&data);CHKERRQ(ierr); 346 } else { 347 xadj_filtered = NULL; 348 adjncy_filtered = NULL; 349 } 350 351 /* compute local connected components using PCBDDCGraph */ 352 ierr = ISCreateStride(PETSC_COMM_SELF,n,0,1,&is_dummy);CHKERRQ(ierr); 353 ierr = ISLocalToGlobalMappingCreateIS(is_dummy,&l2gmap_dummy);CHKERRQ(ierr); 354 ierr = ISDestroy(&is_dummy);CHKERRQ(ierr); 355 ierr = PCBDDCGraphCreate(&graph);CHKERRQ(ierr); 356 ierr = PCBDDCGraphInit(graph,l2gmap_dummy,n);CHKERRQ(ierr); 357 ierr = ISLocalToGlobalMappingDestroy(&l2gmap_dummy);CHKERRQ(ierr); 358 if (xadj_filtered) { 359 graph->xadj = xadj_filtered; 360 graph->adjncy = adjncy_filtered; 361 } else { 362 graph->xadj = xadj; 363 graph->adjncy = adjncy; 364 } 365 ierr = PCBDDCGraphSetUp(graph,1,NULL,NULL,0,NULL,NULL);CHKERRQ(ierr); 366 ierr = PCBDDCGraphComputeConnectedComponents(graph);CHKERRQ(ierr); 367 /* partial clean up */ 368 ierr = PetscFree2(xadj_filtered,adjncy_filtered);CHKERRQ(ierr); 369 ierr = MatRestoreRowIJ(B,0,PETSC_TRUE,PETSC_FALSE,&n,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);CHKERRQ(ierr); 370 if (A != B) { 371 ierr = MatDestroy(&B);CHKERRQ(ierr); 372 } 373 374 /* get back data */ 375 if (ncc) *ncc = graph->ncc; 376 if (cc) { 377 ierr = PetscMalloc1(graph->ncc,&cc_n);CHKERRQ(ierr); 378 for (i=0;i<graph->ncc;i++) { 379 ierr = ISCreateGeneral(PETSC_COMM_SELF,graph->cptr[i+1]-graph->cptr[i],graph->queue+graph->cptr[i],PETSC_COPY_VALUES,&cc_n[i]);CHKERRQ(ierr); 380 } 381 *cc = cc_n; 382 } 383 /* clean up graph */ 384 graph->xadj = 0; 385 graph->adjncy = 0; 386 ierr = PCBDDCGraphDestroy(&graph);CHKERRQ(ierr); 387 PetscFunctionReturn(0); 388 } 389 390 #undef __FUNCT__ 391 #define __FUNCT__ "PCBDDCBenignCheck" 392 PetscErrorCode PCBDDCBenignCheck(PC pc, IS zerodiag) 393 { 394 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 395 PC_IS* pcis = (PC_IS*)(pc->data); 396 IS dirIS = NULL; 397 PetscInt i; 398 PetscErrorCode ierr; 399 400 PetscFunctionBegin; 401 ierr = PCBDDCGraphGetDirichletDofs(pcbddc->mat_graph,&dirIS);CHKERRQ(ierr); 402 if (zerodiag) { 403 Mat A; 404 Vec vec3_N; 405 PetscScalar *vals; 406 const PetscInt *idxs; 407 PetscInt nz,*count; 408 409 /* p0 */ 410 ierr = VecSet(pcis->vec1_N,0.);CHKERRQ(ierr); 411 ierr = PetscMalloc1(pcis->n,&vals);CHKERRQ(ierr); 412 ierr = ISGetLocalSize(zerodiag,&nz);CHKERRQ(ierr); 413 ierr = ISGetIndices(zerodiag,&idxs);CHKERRQ(ierr); 414 for (i=0;i<nz;i++) vals[i] = 1.; 415 ierr = VecSetValues(pcis->vec1_N,nz,idxs,vals,INSERT_VALUES);CHKERRQ(ierr); 416 ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr); 417 ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr); 418 /* v_I */ 419 ierr = VecSetRandom(pcis->vec2_N,NULL);CHKERRQ(ierr); 420 for (i=0;i<nz;i++) vals[i] = 0.; 421 ierr = VecSetValues(pcis->vec2_N,nz,idxs,vals,INSERT_VALUES);CHKERRQ(ierr); 422 ierr = ISRestoreIndices(zerodiag,&idxs);CHKERRQ(ierr); 423 ierr = ISGetIndices(pcis->is_B_local,&idxs);CHKERRQ(ierr); 424 for (i=0;i<pcis->n_B;i++) vals[i] = 0.; 425 ierr = VecSetValues(pcis->vec2_N,pcis->n_B,idxs,vals,INSERT_VALUES);CHKERRQ(ierr); 426 ierr = ISRestoreIndices(pcis->is_B_local,&idxs);CHKERRQ(ierr); 427 if (dirIS) { 428 PetscInt n; 429 430 ierr = ISGetLocalSize(dirIS,&n);CHKERRQ(ierr); 431 ierr = ISGetIndices(dirIS,&idxs);CHKERRQ(ierr); 432 for (i=0;i<n;i++) vals[i] = 0.; 433 ierr = VecSetValues(pcis->vec2_N,n,idxs,vals,INSERT_VALUES);CHKERRQ(ierr); 434 ierr = ISRestoreIndices(dirIS,&idxs);CHKERRQ(ierr); 435 } 436 ierr = VecAssemblyBegin(pcis->vec2_N);CHKERRQ(ierr); 437 ierr = VecAssemblyEnd(pcis->vec2_N);CHKERRQ(ierr); 438 ierr = VecDuplicate(pcis->vec1_N,&vec3_N);CHKERRQ(ierr); 439 ierr = VecSet(vec3_N,0.);CHKERRQ(ierr); 440 ierr = MatISGetLocalMat(pc->mat,&A);CHKERRQ(ierr); 441 ierr = MatMult(A,pcis->vec1_N,vec3_N);CHKERRQ(ierr); 442 ierr = VecDot(vec3_N,pcis->vec2_N,&vals[0]);CHKERRQ(ierr); 443 if (PetscAbsScalar(vals[0]) > PETSC_SMALL) { 444 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"Benign trick can not be applied! b(v_I,p_0) = %f (should be numerically 0.)",PetscAbsScalar(vals[0])); 445 } 446 ierr = PetscFree(vals);CHKERRQ(ierr); 447 ierr = VecDestroy(&vec3_N);CHKERRQ(ierr); 448 449 /* there should not be any pressure dofs lying on the interface */ 450 ierr = PetscCalloc1(pcis->n,&count);CHKERRQ(ierr); 451 ierr = ISGetIndices(pcis->is_B_local,&idxs);CHKERRQ(ierr); 452 for (i=0;i<pcis->n_B;i++) count[idxs[i]]++; 453 ierr = ISRestoreIndices(pcis->is_B_local,&idxs);CHKERRQ(ierr); 454 ierr = ISGetIndices(zerodiag,&idxs);CHKERRQ(ierr); 455 for (i=0;i<nz;i++) { 456 if (count[idxs[i]]) { 457 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"Benign trick can not be applied! pressure dof %d is an interface dof",idxs[i]); 458 } 459 } 460 ierr = ISRestoreIndices(zerodiag,&idxs);CHKERRQ(ierr); 461 ierr = PetscFree(count);CHKERRQ(ierr); 462 } 463 ierr = ISDestroy(&dirIS);CHKERRQ(ierr); 464 465 /* check PCBDDCBenignGetOrSetP0 */ 466 ierr = VecSetRandom(pcis->vec1_global,NULL);CHKERRQ(ierr); 467 for (i=0;i<pcbddc->benign_n;i++) pcbddc->benign_p0[i] = -PetscGlobalRank-i; 468 ierr = PCBDDCBenignGetOrSetP0(pc,pcis->vec1_global,PETSC_FALSE);CHKERRQ(ierr); 469 for (i=0;i<pcbddc->benign_n;i++) pcbddc->benign_p0[i] = 1; 470 ierr = PCBDDCBenignGetOrSetP0(pc,pcis->vec1_global,PETSC_TRUE);CHKERRQ(ierr); 471 for (i=0;i<pcbddc->benign_n;i++) { 472 if ((PetscInt)PetscRealPart(pcbddc->benign_p0[i]) != -PetscGlobalRank-i) { 473 SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error testing PCBDDCBenignGetOrSetP0! Found %1.4e at %d instead of %1.4e\n",pcbddc->benign_p0[i],i,-PetscGlobalRank-i);CHKERRQ(ierr); 474 } 475 } 476 PetscFunctionReturn(0); 477 } 478 479 #undef __FUNCT__ 480 #define __FUNCT__ "PCBDDCBenignDetectSaddlePoint" 481 PetscErrorCode PCBDDCBenignDetectSaddlePoint(PC pc, IS *zerodiaglocal) 482 { 483 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 484 IS pressures,zerodiag,*zerodiag_subs; 485 PetscInt nz,n; 486 PetscBool sorted,have_null,has_null_pressures,recompute_zerodiag; 487 PetscErrorCode ierr; 488 489 PetscFunctionBegin; 490 ierr = PetscSFDestroy(&pcbddc->benign_sf);CHKERRQ(ierr); 491 ierr = MatDestroy(&pcbddc->benign_B0);CHKERRQ(ierr); 492 ierr = MatDestroy(&pcbddc->benign_original_mat);CHKERRQ(ierr); 493 for (n=0;n<pcbddc->benign_n;n++) { 494 ierr = ISDestroy(&pcbddc->benign_zerodiag_subs[n]);CHKERRQ(ierr); 495 } 496 ierr = PetscFree(pcbddc->benign_zerodiag_subs);CHKERRQ(ierr); 497 pcbddc->benign_n = 0; 498 /* if a local info on dofs is present, assumes the last field is represented by "pressures" 499 otherwise, it uses only zerodiagonal dofs (ok if the pressure block is all zero; it could fail if it is not) 500 Checks if all the pressure dofs in each subdomain have a zero diagonal 501 If not, a change of basis on pressures is not needed 502 since the local Schur complements are SPD 503 */ 504 has_null_pressures = PETSC_TRUE; 505 have_null = PETSC_TRUE; 506 if (pcbddc->n_ISForDofsLocal) { 507 PetscInt npl,*idxs,p = pcbddc->n_ISForDofsLocal-1; 508 509 /* Dofs splitting for BDDC cannot have PETSC_COMM_SELF, so create a sequential IS */ 510 ierr = ISGetLocalSize(pcbddc->ISForDofsLocal[p],&npl);CHKERRQ(ierr); 511 ierr = ISGetIndices(pcbddc->ISForDofsLocal[p],(const PetscInt**)&idxs);CHKERRQ(ierr); 512 ierr = ISCreateGeneral(PETSC_COMM_SELF,npl,idxs,PETSC_COPY_VALUES,&pressures);CHKERRQ(ierr); 513 ierr = ISRestoreIndices(pcbddc->ISForDofsLocal[p],(const PetscInt**)&idxs);CHKERRQ(ierr); 514 ierr = ISSorted(pressures,&sorted);CHKERRQ(ierr); 515 if (!sorted) { 516 ierr = ISSort(pressures);CHKERRQ(ierr); 517 } 518 } else { 519 pressures = NULL; 520 } 521 /* pcis has not been setup yet, so get the local size from the subdomain matrix */ 522 ierr = MatGetLocalSize(pcbddc->local_mat,&n,NULL);CHKERRQ(ierr); 523 ierr = MatFindZeroDiagonals(pcbddc->local_mat,&zerodiag);CHKERRQ(ierr); 524 ierr = ISSorted(zerodiag,&sorted);CHKERRQ(ierr); 525 if (!sorted) { 526 ierr = ISSort(zerodiag);CHKERRQ(ierr); 527 } 528 ierr = ISGetLocalSize(zerodiag,&nz);CHKERRQ(ierr); 529 if (!nz) { 530 if (n) have_null = PETSC_FALSE; 531 has_null_pressures = PETSC_FALSE; 532 ierr = ISDestroy(&zerodiag);CHKERRQ(ierr); 533 } 534 recompute_zerodiag = PETSC_FALSE; 535 /* in case disconnected subdomains info is present, split the pressures accordingly (otherwise the benign trick could fail) */ 536 zerodiag_subs = NULL; 537 pcbddc->benign_n = 0; 538 if (has_null_pressures) { 539 IS *subs; 540 PetscInt nsubs,i; 541 542 subs = pcbddc->local_subs; 543 nsubs = pcbddc->n_local_subs; 544 if (nsubs > 1) { 545 ierr = PetscCalloc1(nsubs,&zerodiag_subs);CHKERRQ(ierr); 546 for (i=0;i<nsubs;i++) { 547 ISLocalToGlobalMapping l2g; 548 IS t_zerodiag_subs; 549 PetscInt nl; 550 551 ierr = ISLocalToGlobalMappingCreateIS(subs[i],&l2g);CHKERRQ(ierr); 552 ierr = ISGlobalToLocalMappingApplyIS(l2g,IS_GTOLM_DROP,zerodiag,&t_zerodiag_subs);CHKERRQ(ierr); 553 ierr = ISGetLocalSize(t_zerodiag_subs,&nl);CHKERRQ(ierr); 554 if (nl) { 555 PetscBool valid = PETSC_TRUE; 556 557 if (pressures) { 558 IS t_pressure_subs; 559 ierr = ISGlobalToLocalMappingApplyIS(l2g,IS_GTOLM_DROP,pressures,&t_pressure_subs);CHKERRQ(ierr); 560 ierr = ISEqual(t_pressure_subs,t_zerodiag_subs,&valid);CHKERRQ(ierr); 561 ierr = ISDestroy(&t_pressure_subs);CHKERRQ(ierr); 562 } 563 if (valid) { 564 ierr = ISLocalToGlobalMappingApplyIS(l2g,t_zerodiag_subs,&zerodiag_subs[pcbddc->benign_n]);CHKERRQ(ierr); 565 pcbddc->benign_n++; 566 } else { 567 recompute_zerodiag = PETSC_TRUE; 568 } 569 } 570 ierr = ISDestroy(&t_zerodiag_subs);CHKERRQ(ierr); 571 ierr = ISLocalToGlobalMappingDestroy(&l2g);CHKERRQ(ierr); 572 } 573 } else { /* there's just one subdomain (or zero if they have not been detected */ 574 PetscBool valid = PETSC_TRUE; 575 if (pressures) { 576 ierr = ISEqual(pressures,zerodiag,&valid);CHKERRQ(ierr); 577 } 578 if (valid) { 579 pcbddc->benign_n = 1; 580 ierr = PetscMalloc1(pcbddc->benign_n,&zerodiag_subs);CHKERRQ(ierr); 581 ierr = PetscObjectReference((PetscObject)zerodiag);CHKERRQ(ierr); 582 zerodiag_subs[0] = zerodiag; 583 } 584 } 585 } 586 587 if (!pcbddc->benign_n) { 588 ierr = ISDestroy(&zerodiag);CHKERRQ(ierr); 589 recompute_zerodiag = PETSC_FALSE; 590 has_null_pressures = PETSC_FALSE; 591 have_null = PETSC_FALSE; 592 } 593 594 /* final check for null pressures */ 595 if (zerodiag && pressures) { 596 PetscInt nz,np; 597 ierr = ISGetLocalSize(zerodiag,&nz);CHKERRQ(ierr); 598 ierr = ISGetLocalSize(pressures,&np);CHKERRQ(ierr); 599 if (nz != np) have_null = PETSC_FALSE; 600 } 601 602 if (recompute_zerodiag) { 603 ierr = ISDestroy(&zerodiag);CHKERRQ(ierr); 604 if (pcbddc->benign_n == 1) { 605 ierr = PetscObjectReference((PetscObject)zerodiag_subs[0]);CHKERRQ(ierr); 606 zerodiag = zerodiag_subs[0]; 607 } else { 608 PetscInt i,nzn,*new_idxs; 609 610 nzn = 0; 611 for (i=0;i<pcbddc->benign_n;i++) { 612 PetscInt ns; 613 ierr = ISGetLocalSize(zerodiag_subs[i],&ns);CHKERRQ(ierr); 614 nzn += ns; 615 } 616 ierr = PetscMalloc1(nzn,&new_idxs);CHKERRQ(ierr); 617 nzn = 0; 618 for (i=0;i<pcbddc->benign_n;i++) { 619 PetscInt ns,*idxs; 620 ierr = ISGetLocalSize(zerodiag_subs[i],&ns);CHKERRQ(ierr); 621 ierr = ISGetIndices(zerodiag_subs[i],(const PetscInt**)&idxs);CHKERRQ(ierr); 622 ierr = PetscMemcpy(new_idxs+nzn,idxs,ns*sizeof(PetscInt));CHKERRQ(ierr); 623 ierr = ISRestoreIndices(zerodiag_subs[i],(const PetscInt**)&idxs);CHKERRQ(ierr); 624 nzn += ns; 625 } 626 ierr = PetscSortInt(nzn,new_idxs);CHKERRQ(ierr); 627 ierr = ISCreateGeneral(PETSC_COMM_SELF,nzn,new_idxs,PETSC_OWN_POINTER,&zerodiag);CHKERRQ(ierr); 628 } 629 have_null = PETSC_FALSE; 630 } 631 632 if (has_null_pressures) { 633 IS zerodiagc; 634 const PetscInt *idxs,*idxsc; 635 PetscInt i,s,*nnz; 636 637 ierr = ISGetLocalSize(zerodiag,&nz);CHKERRQ(ierr); 638 ierr = ISComplement(zerodiag,0,n,&zerodiagc);CHKERRQ(ierr); 639 ierr = ISGetIndices(zerodiagc,&idxsc);CHKERRQ(ierr); 640 /* local change of basis for pressures */ 641 ierr = MatDestroy(&pcbddc->benign_change);CHKERRQ(ierr); 642 ierr = MatCreate(PetscObjectComm((PetscObject)pcbddc->local_mat),&pcbddc->benign_change);CHKERRQ(ierr); 643 ierr = MatSetType(pcbddc->benign_change,MATAIJ);CHKERRQ(ierr); 644 ierr = MatSetSizes(pcbddc->benign_change,n,n,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr); 645 ierr = PetscMalloc1(n,&nnz);CHKERRQ(ierr); 646 for (i=0;i<n-nz;i++) nnz[idxsc[i]] = 1; /* identity on velocities plus pressure dofs for non-singular subdomains */ 647 for (i=0;i<pcbddc->benign_n;i++) { 648 PetscInt nzs,j; 649 650 ierr = ISGetLocalSize(zerodiag_subs[i],&nzs);CHKERRQ(ierr); 651 ierr = ISGetIndices(zerodiag_subs[i],&idxs);CHKERRQ(ierr); 652 for (j=0;j<nzs-1;j++) nnz[idxs[j]] = 2; /* change on pressures */ 653 nnz[idxs[nzs-1]] = nzs; /* last local pressure dof in subdomain */ 654 ierr = ISRestoreIndices(zerodiag_subs[i],&idxs);CHKERRQ(ierr); 655 } 656 ierr = MatSeqAIJSetPreallocation(pcbddc->benign_change,0,nnz);CHKERRQ(ierr); 657 ierr = PetscFree(nnz);CHKERRQ(ierr); 658 /* set identity on velocities */ 659 for (i=0;i<n-nz;i++) { 660 ierr = MatSetValue(pcbddc->benign_change,idxsc[i],idxsc[i],1.,INSERT_VALUES);CHKERRQ(ierr); 661 } 662 ierr = ISRestoreIndices(zerodiagc,&idxsc);CHKERRQ(ierr); 663 ierr = ISDestroy(&zerodiagc);CHKERRQ(ierr); 664 ierr = PetscFree3(pcbddc->benign_p0_lidx,pcbddc->benign_p0_gidx,pcbddc->benign_p0);CHKERRQ(ierr); 665 ierr = PetscMalloc3(pcbddc->benign_n,&pcbddc->benign_p0_lidx,pcbddc->benign_n,&pcbddc->benign_p0_gidx,pcbddc->benign_n,&pcbddc->benign_p0);CHKERRQ(ierr); 666 /* set change on pressures */ 667 for (s=0;s<pcbddc->benign_n;s++) { 668 PetscScalar *array; 669 PetscInt nzs; 670 671 ierr = ISGetLocalSize(zerodiag_subs[s],&nzs);CHKERRQ(ierr); 672 ierr = ISGetIndices(zerodiag_subs[s],&idxs);CHKERRQ(ierr); 673 for (i=0;i<nzs-1;i++) { 674 PetscScalar vals[2]; 675 PetscInt cols[2]; 676 677 cols[0] = idxs[i]; 678 cols[1] = idxs[nzs-1]; 679 vals[0] = 1.; 680 vals[1] = 1.; 681 ierr = MatSetValues(pcbddc->benign_change,1,cols,2,cols,vals,INSERT_VALUES);CHKERRQ(ierr); 682 } 683 ierr = PetscMalloc1(nzs,&array);CHKERRQ(ierr); 684 for (i=0;i<nzs-1;i++) array[i] = -1.; 685 array[nzs-1] = 1.; 686 ierr = MatSetValues(pcbddc->benign_change,1,idxs+nzs-1,nzs,idxs,array,INSERT_VALUES);CHKERRQ(ierr); 687 /* store local idxs for p0 */ 688 pcbddc->benign_p0_lidx[s] = idxs[nzs-1]; 689 ierr = ISRestoreIndices(zerodiag_subs[s],&idxs);CHKERRQ(ierr); 690 ierr = PetscFree(array);CHKERRQ(ierr); 691 } 692 ierr = MatAssemblyBegin(pcbddc->benign_change,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 693 ierr = MatAssemblyEnd(pcbddc->benign_change,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 694 /* project if needed */ 695 if (pcbddc->benign_change_explicit) { 696 ierr = MatPtAP(pcbddc->local_mat,pcbddc->benign_change,MAT_INITIAL_MATRIX,2.0,&pcbddc->benign_original_mat);CHKERRQ(ierr); 697 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 698 ierr = MatSeqAIJCompress(pcbddc->benign_original_mat,&pcbddc->local_mat);CHKERRQ(ierr); 699 ierr = MatDestroy(&pcbddc->benign_original_mat);CHKERRQ(ierr); 700 } 701 /* store global idxs for p0 */ 702 ierr = ISLocalToGlobalMappingApply(pc->pmat->rmap->mapping,pcbddc->benign_n,pcbddc->benign_p0_lidx,pcbddc->benign_p0_gidx);CHKERRQ(ierr); 703 } 704 pcbddc->benign_zerodiag_subs = zerodiag_subs; 705 ierr = ISDestroy(&pressures);CHKERRQ(ierr); 706 707 /* determines if the coarse solver will be singular or not */ 708 ierr = MPI_Allreduce(&have_null,&pcbddc->benign_null,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 709 *zerodiaglocal = zerodiag; 710 PetscFunctionReturn(0); 711 } 712 713 #undef __FUNCT__ 714 #define __FUNCT__ "PCBDDCBenignGetOrSetP0" 715 PetscErrorCode PCBDDCBenignGetOrSetP0(PC pc, Vec v, PetscBool get) 716 { 717 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 718 PetscErrorCode ierr; 719 720 PetscFunctionBegin; 721 if (!pcbddc->benign_sf) { 722 ierr = PetscSFCreate(PetscObjectComm((PetscObject)pc),&pcbddc->benign_sf);CHKERRQ(ierr); 723 ierr = PetscSFSetGraphLayout(pcbddc->benign_sf,pc->pmat->rmap,pcbddc->benign_n,NULL,PETSC_OWN_POINTER,pcbddc->benign_p0_gidx);CHKERRQ(ierr); 724 } 725 if (get) { /* use SF to get values */ 726 PetscScalar *array; 727 728 ierr = VecGetArrayRead(v,(const PetscScalar**)&array);CHKERRQ(ierr); 729 ierr = PetscSFBcastBegin(pcbddc->benign_sf,MPIU_SCALAR,array,pcbddc->benign_p0);CHKERRQ(ierr); 730 ierr = PetscSFBcastEnd(pcbddc->benign_sf,MPIU_SCALAR,array,pcbddc->benign_p0);CHKERRQ(ierr); 731 ierr = VecRestoreArrayRead(v,(const PetscScalar**)&array);CHKERRQ(ierr); 732 } else { /* use VecSetValues (not scalable, I should try to find a better solution (defining a new MPI_OP for reduction) */ 733 ierr = VecSetValues(v,pcbddc->benign_n,pcbddc->benign_p0_gidx,pcbddc->benign_p0,INSERT_VALUES);CHKERRQ(ierr); 734 ierr = VecAssemblyBegin(v);CHKERRQ(ierr); 735 ierr = VecAssemblyEnd(v);CHKERRQ(ierr); 736 } 737 PetscFunctionReturn(0); 738 } 739 740 #undef __FUNCT__ 741 #define __FUNCT__ "PCBDDCBenignPopOrPushB0" 742 PetscErrorCode PCBDDCBenignPopOrPushB0(PC pc, PetscBool pop) 743 { 744 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 745 PetscErrorCode ierr; 746 747 PetscFunctionBegin; 748 /* TODO: add error checking 749 - avoid nested pop (or push) calls. 750 - cannot push before pop. 751 - cannot call this if pcbddc->local_mat is NULL 752 */ 753 if (!pcbddc->benign_n) { 754 PetscFunctionReturn(0); 755 } 756 if (pop) { 757 if (pcbddc->benign_change_explicit) { 758 IS is_p0; 759 MatReuse reuse; 760 761 /* extract B_0 */ 762 reuse = MAT_INITIAL_MATRIX; 763 if (pcbddc->benign_B0) { 764 reuse = MAT_REUSE_MATRIX; 765 } 766 ierr = ISCreateGeneral(PETSC_COMM_SELF,pcbddc->benign_n,pcbddc->benign_p0_lidx,PETSC_COPY_VALUES,&is_p0);CHKERRQ(ierr); 767 ierr = MatGetSubMatrix(pcbddc->local_mat,is_p0,NULL,reuse,&pcbddc->benign_B0);CHKERRQ(ierr); 768 /* remove rows and cols from local problem */ 769 ierr = MatSetOption(pcbddc->local_mat,MAT_KEEP_NONZERO_PATTERN,PETSC_TRUE);CHKERRQ(ierr); 770 ierr = MatSetOption(pcbddc->local_mat,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr); 771 ierr = MatZeroRowsColumnsIS(pcbddc->local_mat,is_p0,1.0,NULL,NULL);CHKERRQ(ierr); 772 ierr = ISDestroy(&is_p0);CHKERRQ(ierr); 773 } else { 774 Mat_IS *matis = (Mat_IS*)pc->pmat->data; 775 PetscScalar *vals; 776 PetscInt i,n,*idxs_ins; 777 778 ierr = VecGetLocalSize(matis->y,&n);CHKERRQ(ierr); 779 ierr = PetscMalloc2(n,&idxs_ins,n,&vals);CHKERRQ(ierr); 780 if (!pcbddc->benign_B0) { 781 ierr = MatCreate(PetscObjectComm((PetscObject)pcbddc->local_mat),&pcbddc->benign_B0);CHKERRQ(ierr); 782 ierr = MatSetType(pcbddc->benign_B0,MATAIJ);CHKERRQ(ierr); 783 ierr = MatSetSizes(pcbddc->benign_B0,pcbddc->benign_n,n,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr); 784 /* this matrix is very sparse: the nnz pattern is not known unless we do 2 sweeps of the next loop. 785 Setting nnz=100 should be more than enough */ 786 ierr = MatSeqAIJSetPreallocation(pcbddc->benign_B0,100,NULL);CHKERRQ(ierr); 787 } 788 789 for (i=0;i<pcbddc->benign_n;i++) { 790 PetscScalar *array; 791 PetscInt *idxs,j,nz,cum; 792 793 ierr = VecSet(matis->x,0.);CHKERRQ(ierr); 794 ierr = ISGetLocalSize(pcbddc->benign_zerodiag_subs[i],&nz);CHKERRQ(ierr); 795 ierr = ISGetIndices(pcbddc->benign_zerodiag_subs[i],(const PetscInt**)&idxs);CHKERRQ(ierr); 796 for (j=0;j<nz;j++) vals[j] = 1.; 797 ierr = VecSetValues(matis->x,nz,idxs,vals,INSERT_VALUES);CHKERRQ(ierr); 798 ierr = VecAssemblyBegin(matis->x);CHKERRQ(ierr); 799 ierr = VecAssemblyEnd(matis->x);CHKERRQ(ierr); 800 ierr = VecSet(matis->y,0.);CHKERRQ(ierr); 801 ierr = MatMult(matis->A,matis->x,matis->y);CHKERRQ(ierr); 802 ierr = VecGetArray(matis->y,&array);CHKERRQ(ierr); 803 cum = 0; 804 for (j=0;j<n;j++) { 805 if (PetscUnlikely(PetscAbsReal(array[j]) > PETSC_SMALL)) { 806 vals[cum] = array[j]; 807 idxs_ins[cum] = j; 808 cum++; 809 } 810 } 811 ierr = MatSetValues(pcbddc->benign_B0,1,&i,cum,idxs_ins,vals,INSERT_VALUES);CHKERRQ(ierr); 812 ierr = VecRestoreArray(matis->y,&array);CHKERRQ(ierr); 813 ierr = ISRestoreIndices(pcbddc->benign_zerodiag_subs[i],(const PetscInt**)&idxs);CHKERRQ(ierr); 814 } 815 ierr = MatAssemblyBegin(pcbddc->benign_B0,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 816 ierr = MatAssemblyEnd(pcbddc->benign_B0,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 817 ierr = PetscFree2(idxs_ins,vals);CHKERRQ(ierr); 818 } 819 } else { /* push */ 820 if (pcbddc->benign_change_explicit) { 821 PetscInt i; 822 823 for (i=0;i<pcbddc->benign_n;i++) { 824 PetscScalar *B0_vals; 825 PetscInt *B0_cols,B0_ncol; 826 827 ierr = MatGetRow(pcbddc->benign_B0,i,&B0_ncol,(const PetscInt**)&B0_cols,(const PetscScalar**)&B0_vals);CHKERRQ(ierr); 828 ierr = MatSetValues(pcbddc->local_mat,1,pcbddc->benign_p0_lidx+i,B0_ncol,B0_cols,B0_vals,INSERT_VALUES);CHKERRQ(ierr); 829 ierr = MatSetValues(pcbddc->local_mat,B0_ncol,B0_cols,1,pcbddc->benign_p0_lidx+i,B0_vals,INSERT_VALUES);CHKERRQ(ierr); 830 ierr = MatSetValue(pcbddc->local_mat,pcbddc->benign_p0_lidx[i],pcbddc->benign_p0_lidx[i],0.0,INSERT_VALUES);CHKERRQ(ierr); 831 ierr = MatRestoreRow(pcbddc->benign_B0,i,&B0_ncol,(const PetscInt**)&B0_cols,(const PetscScalar**)&B0_vals);CHKERRQ(ierr); 832 } 833 ierr = MatAssemblyBegin(pcbddc->local_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 834 ierr = MatAssemblyEnd(pcbddc->local_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 835 } else { 836 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Cannot push B0!\n"); 837 } 838 } 839 PetscFunctionReturn(0); 840 } 841 842 #undef __FUNCT__ 843 #define __FUNCT__ "PCBDDCAdaptiveSelection" 844 PetscErrorCode PCBDDCAdaptiveSelection(PC pc) 845 { 846 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 847 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 848 PetscBLASInt B_dummyint,B_neigs,B_ierr,B_lwork; 849 PetscBLASInt *B_iwork,*B_ifail; 850 PetscScalar *work,lwork; 851 PetscScalar *St,*S,*eigv; 852 PetscScalar *Sarray,*Starray; 853 PetscReal *eigs,thresh; 854 PetscInt i,nmax,nmin,nv,cum,mss,cum2,cumarray,maxneigs; 855 PetscBool allocated_S_St; 856 #if defined(PETSC_USE_COMPLEX) 857 PetscReal *rwork; 858 #endif 859 PetscErrorCode ierr; 860 861 PetscFunctionBegin; 862 if (!sub_schurs->schur_explicit) { 863 SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_SUP,"Adaptive selection of constraints requires MUMPS and/or MKL_CPARDISO"); 864 } 865 866 if (sub_schurs->n_subs && (!sub_schurs->is_hermitian || !sub_schurs->is_posdef)) { 867 SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_SUP,"Adaptive selection not yet implemented for general matrix pencils (herm %d, posdef %d)\n",sub_schurs->is_hermitian,sub_schurs->is_posdef); 868 } 869 870 if (pcbddc->dbg_flag) { 871 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 872 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 873 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Check adaptive selection of constraints\n");CHKERRQ(ierr); 874 ierr = PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 875 } 876 877 if (pcbddc->dbg_flag) { 878 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d cc %d (%d,%d).\n",PetscGlobalRank,sub_schurs->n_subs,sub_schurs->is_hermitian,sub_schurs->is_posdef); 879 } 880 881 /* max size of subsets */ 882 mss = 0; 883 for (i=0;i<sub_schurs->n_subs;i++) { 884 PetscInt subset_size; 885 886 ierr = ISGetLocalSize(sub_schurs->is_subs[i],&subset_size);CHKERRQ(ierr); 887 mss = PetscMax(mss,subset_size); 888 } 889 890 /* min/max and threshold */ 891 nmax = pcbddc->adaptive_nmax > 0 ? pcbddc->adaptive_nmax : mss; 892 nmin = pcbddc->adaptive_nmin > 0 ? pcbddc->adaptive_nmin : 0; 893 nmax = PetscMax(nmin,nmax); 894 allocated_S_St = PETSC_FALSE; 895 if (nmin) { 896 allocated_S_St = PETSC_TRUE; 897 } 898 899 /* allocate lapack workspace */ 900 cum = cum2 = 0; 901 maxneigs = 0; 902 for (i=0;i<sub_schurs->n_subs;i++) { 903 PetscInt n,subset_size; 904 905 ierr = ISGetLocalSize(sub_schurs->is_subs[i],&subset_size);CHKERRQ(ierr); 906 n = PetscMin(subset_size,nmax); 907 cum += subset_size; 908 cum2 += subset_size*n; 909 maxneigs = PetscMax(maxneigs,n); 910 } 911 if (mss) { 912 if (sub_schurs->is_hermitian && sub_schurs->is_posdef) { 913 PetscBLASInt B_itype = 1; 914 PetscBLASInt B_N = mss; 915 PetscReal zero = 0.0; 916 PetscReal eps = 0.0; /* dlamch? */ 917 918 B_lwork = -1; 919 S = NULL; 920 St = NULL; 921 eigs = NULL; 922 eigv = NULL; 923 B_iwork = NULL; 924 B_ifail = NULL; 925 #if defined(PETSC_USE_COMPLEX) 926 rwork = NULL; 927 #endif 928 thresh = 1.0; 929 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 930 #if defined(PETSC_USE_COMPLEX) 931 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&zero,&thresh,&B_dummyint,&B_dummyint,&eps,&B_neigs,eigs,eigv,&B_N,&lwork,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr)); 932 #else 933 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&zero,&thresh,&B_dummyint,&B_dummyint,&eps,&B_neigs,eigs,eigv,&B_N,&lwork,&B_lwork,B_iwork,B_ifail,&B_ierr)); 934 #endif 935 if (B_ierr != 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to SYGVX Lapack routine %d",(int)B_ierr); 936 ierr = PetscFPTrapPop();CHKERRQ(ierr); 937 } else { 938 /* TODO */ 939 } 940 } else { 941 lwork = 0; 942 } 943 944 nv = 0; 945 if (sub_schurs->is_vertices && pcbddc->use_vertices) { /* complement set of active subsets, each entry is a vertex (boundary made by active subsets, vertices and dirichlet dofs) */ 946 ierr = ISGetLocalSize(sub_schurs->is_vertices,&nv);CHKERRQ(ierr); 947 } 948 ierr = PetscBLASIntCast((PetscInt)PetscRealPart(lwork),&B_lwork);CHKERRQ(ierr); 949 if (allocated_S_St) { 950 ierr = PetscMalloc2(mss*mss,&S,mss*mss,&St);CHKERRQ(ierr); 951 } 952 ierr = PetscMalloc5(mss*mss,&eigv,mss,&eigs,B_lwork,&work,5*mss,&B_iwork,mss,&B_ifail);CHKERRQ(ierr); 953 #if defined(PETSC_USE_COMPLEX) 954 ierr = PetscMalloc1(7*mss,&rwork);CHKERRQ(ierr); 955 #endif 956 ierr = PetscMalloc5(nv+sub_schurs->n_subs,&pcbddc->adaptive_constraints_n, 957 nv+sub_schurs->n_subs+1,&pcbddc->adaptive_constraints_idxs_ptr, 958 nv+sub_schurs->n_subs+1,&pcbddc->adaptive_constraints_data_ptr, 959 nv+cum,&pcbddc->adaptive_constraints_idxs, 960 nv+cum2,&pcbddc->adaptive_constraints_data);CHKERRQ(ierr); 961 ierr = PetscMemzero(pcbddc->adaptive_constraints_n,(nv+sub_schurs->n_subs)*sizeof(PetscInt));CHKERRQ(ierr); 962 963 maxneigs = 0; 964 cum = cum2 = cumarray = 0; 965 pcbddc->adaptive_constraints_idxs_ptr[0] = 0; 966 pcbddc->adaptive_constraints_data_ptr[0] = 0; 967 if (sub_schurs->is_vertices && pcbddc->use_vertices) { 968 const PetscInt *idxs; 969 970 ierr = ISGetIndices(sub_schurs->is_vertices,&idxs);CHKERRQ(ierr); 971 for (cum=0;cum<nv;cum++) { 972 pcbddc->adaptive_constraints_n[cum] = 1; 973 pcbddc->adaptive_constraints_idxs[cum] = idxs[cum]; 974 pcbddc->adaptive_constraints_data[cum] = 1.0; 975 pcbddc->adaptive_constraints_idxs_ptr[cum+1] = pcbddc->adaptive_constraints_idxs_ptr[cum]+1; 976 pcbddc->adaptive_constraints_data_ptr[cum+1] = pcbddc->adaptive_constraints_data_ptr[cum]+1; 977 } 978 cum2 = cum; 979 ierr = ISRestoreIndices(sub_schurs->is_vertices,&idxs);CHKERRQ(ierr); 980 } 981 982 if (mss) { /* multilevel */ 983 ierr = MatSeqAIJGetArray(sub_schurs->sum_S_Ej_inv_all,&Sarray);CHKERRQ(ierr); 984 ierr = MatSeqAIJGetArray(sub_schurs->sum_S_Ej_tilda_all,&Starray);CHKERRQ(ierr); 985 } 986 987 thresh = pcbddc->adaptive_threshold; 988 for (i=0;i<sub_schurs->n_subs;i++) { 989 const PetscInt *idxs; 990 PetscReal infty = PETSC_MAX_REAL; 991 PetscInt j,subset_size,eigs_start = 0; 992 PetscBLASInt B_N; 993 PetscBool same_data = PETSC_FALSE; 994 995 ierr = ISGetLocalSize(sub_schurs->is_subs[i],&subset_size);CHKERRQ(ierr); 996 ierr = ISGetIndices(sub_schurs->is_subs[i],&idxs);CHKERRQ(ierr); 997 ierr = PetscBLASIntCast(subset_size,&B_N);CHKERRQ(ierr); 998 if (allocated_S_St) { /* S and S_t should be copied since we could need them later */ 999 if (sub_schurs->is_hermitian) { 1000 PetscInt j,k; 1001 if (sub_schurs->n_subs == 1) { /* zeroing memory to use PetscMemcmp later */ 1002 ierr = PetscMemzero(S,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 1003 ierr = PetscMemzero(St,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 1004 } 1005 for (j=0;j<subset_size;j++) { 1006 for (k=j;k<subset_size;k++) { 1007 S [j*subset_size+k] = Sarray [cumarray+j*subset_size+k]; 1008 St[j*subset_size+k] = Starray[cumarray+j*subset_size+k]; 1009 } 1010 } 1011 } else { 1012 ierr = PetscMemcpy(S,Sarray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 1013 ierr = PetscMemcpy(St,Starray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 1014 } 1015 } else { 1016 S = Sarray + cumarray; 1017 St = Starray + cumarray; 1018 } 1019 1020 /* see if we can save some work */ 1021 if (sub_schurs->n_subs == 1) { 1022 ierr = PetscMemcmp(S,St,subset_size*subset_size*sizeof(PetscScalar),&same_data);CHKERRQ(ierr); 1023 } 1024 1025 if (same_data) { /* there's no need of constraints here, deluxe scaling is enough */ 1026 B_neigs = 0; 1027 } else { 1028 if (sub_schurs->is_hermitian && sub_schurs->is_posdef) { 1029 PetscBLASInt B_itype = 1; 1030 PetscBLASInt B_IL, B_IU; 1031 PetscReal eps = -1.0; /* dlamch? */ 1032 PetscInt nmin_s; 1033 1034 if (pcbddc->dbg_flag) { 1035 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Computing for sub %d/%d %d %d.\n",i,sub_schurs->n_subs,subset_size,pcbddc->mat_graph->count[idxs[0]]); 1036 } 1037 1038 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 1039 if (thresh > 1.+PETSC_SMALL) { 1040 1041 /* ask for eigenvalues larger than thresh */ 1042 #if defined(PETSC_USE_COMPLEX) 1043 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&thresh,&infty,&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr)); 1044 #else 1045 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&thresh,&infty,&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr)); 1046 #endif 1047 } else { 1048 B_IU = PetscMax(1,PetscMin(B_N,nmax)); 1049 B_IL = 1; 1050 #if defined(PETSC_USE_COMPLEX) 1051 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","I","L",&B_N,St,&B_N,S,&B_N,&thresh,&infty,&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr)); 1052 #else 1053 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","I","L",&B_N,St,&B_N,S,&B_N,&thresh,&infty,&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr)); 1054 #endif 1055 } 1056 ierr = PetscFPTrapPop();CHKERRQ(ierr); 1057 if (B_ierr) { 1058 if (B_ierr < 0 ) { 1059 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: illegal value for argument %d",-(int)B_ierr); 1060 } else if (B_ierr <= B_N) { 1061 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: %d eigenvalues failed to converge",(int)B_ierr); 1062 } else { 1063 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: leading minor of order %d is not positive definite",(int)B_ierr-B_N-1); 1064 } 1065 } 1066 1067 if (B_neigs > nmax) { 1068 if (pcbddc->dbg_flag) { 1069 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," found %d eigs, more than maximum required %d.\n",B_neigs,nmax); 1070 } 1071 eigs_start = B_neigs -nmax; 1072 B_neigs = nmax; 1073 } 1074 1075 nmin_s = PetscMin(nmin,B_N); 1076 if (B_neigs < nmin_s) { 1077 PetscBLASInt B_neigs2; 1078 1079 B_IU = B_N - B_neigs; 1080 B_IL = B_N - nmin_s + 1; 1081 if (pcbddc->dbg_flag) { 1082 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," found %d eigs, less than minimum required %d. Asking for %d to %d incl (fortran like)\n",B_neigs,nmin,B_IL,B_IU); 1083 } 1084 if (sub_schurs->is_hermitian) { 1085 PetscInt j; 1086 for (j=0;j<subset_size;j++) { 1087 ierr = PetscMemcpy(S+j*(subset_size+1),Sarray+cumarray+j*(subset_size+1),(subset_size-j)*sizeof(PetscScalar));CHKERRQ(ierr); 1088 } 1089 for (j=0;j<subset_size;j++) { 1090 ierr = PetscMemcpy(St+j*(subset_size+1),Starray+cumarray+j*(subset_size+1),(subset_size-j)*sizeof(PetscScalar));CHKERRQ(ierr); 1091 } 1092 } else { 1093 ierr = PetscMemcpy(S,Sarray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 1094 ierr = PetscMemcpy(St,Starray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 1095 } 1096 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 1097 #if defined(PETSC_USE_COMPLEX) 1098 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","I","L",&B_N,St,&B_N,S,&B_N,&thresh,&infty,&B_IL,&B_IU,&eps,&B_neigs2,eigs+B_neigs,eigv+B_neigs*subset_size,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr)); 1099 #else 1100 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","I","L",&B_N,St,&B_N,S,&B_N,&thresh,&infty,&B_IL,&B_IU,&eps,&B_neigs2,eigs+B_neigs,eigv+B_neigs*subset_size,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr)); 1101 #endif 1102 ierr = PetscFPTrapPop();CHKERRQ(ierr); 1103 B_neigs += B_neigs2; 1104 } 1105 if (B_ierr) { 1106 if (B_ierr < 0 ) { 1107 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: illegal value for argument %d",-(int)B_ierr); 1108 } else if (B_ierr <= B_N) { 1109 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: %d eigenvalues failed to converge",(int)B_ierr); 1110 } else { 1111 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: leading minor of order %d is not positive definite",(int)B_ierr-B_N-1); 1112 } 1113 } 1114 if (pcbddc->dbg_flag) { 1115 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," -> Got %d eigs\n",B_neigs);CHKERRQ(ierr); 1116 for (j=0;j<B_neigs;j++) { 1117 if (eigs[j] == 0.0) { 1118 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," Inf\n");CHKERRQ(ierr); 1119 } else { 1120 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," %1.6e\n",eigs[j+eigs_start]);CHKERRQ(ierr); 1121 } 1122 } 1123 } 1124 } else { 1125 /* TODO */ 1126 } 1127 } 1128 maxneigs = PetscMax(B_neigs,maxneigs); 1129 pcbddc->adaptive_constraints_n[i+nv] = B_neigs; 1130 if (B_neigs) { 1131 ierr = PetscMemcpy(pcbddc->adaptive_constraints_data+pcbddc->adaptive_constraints_data_ptr[cum],eigv+eigs_start*subset_size,B_neigs*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 1132 1133 if (pcbddc->dbg_flag > 1) { 1134 PetscInt ii; 1135 for (ii=0;ii<B_neigs;ii++) { 1136 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," -> Eigenvector %d/%d (%d)\n",ii,B_neigs,B_N);CHKERRQ(ierr); 1137 for (j=0;j<B_N;j++) { 1138 #if defined(PETSC_USE_COMPLEX) 1139 PetscReal r = PetscRealPart(pcbddc->adaptive_constraints_data[ii*subset_size+j+pcbddc->adaptive_constraints_data_ptr[cum]]); 1140 PetscReal c = PetscImaginaryPart(pcbddc->adaptive_constraints_data[ii*subset_size+j+pcbddc->adaptive_constraints_data_ptr[cum]]); 1141 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," %1.4e + %1.4e i\n",r,c);CHKERRQ(ierr); 1142 #else 1143 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," %1.4e\n",pcbddc->adaptive_constraints_data[ii*subset_size+j+pcbddc->adaptive_constraints_data_ptr[cum]]);CHKERRQ(ierr); 1144 #endif 1145 } 1146 } 1147 } 1148 #if 0 1149 for (j=0;j<B_neigs;j++) { 1150 PetscBLASInt Blas_N,Blas_one = 1.0; 1151 PetscScalar norm; 1152 ierr = PetscBLASIntCast(subset_size,&Blas_N);CHKERRQ(ierr); 1153 PetscStackCallBLAS("BLASdot",norm = BLASdot_(&Blas_N,pcbddc->adaptive_constraints_data+pcbddc->adaptive_constraints_data_ptr[cum]+j*subset_size, 1154 &Blas_one,pcbddc->adaptive_constraints_data+pcbddc->adaptive_constraints_data_ptr[cum]+j*subset_size,&Blas_one)); 1155 if (pcbddc->adaptive_constraints_data[cum2] > 0.0) { 1156 norm = 1.0/PetscSqrtReal(PetscRealPart(norm)); 1157 } else { 1158 norm = -1.0/PetscSqrtReal(PetscRealPart(norm)); 1159 } 1160 PetscStackCallBLAS("BLASscal",BLASscal_(&Blas_N,&norm,pcbddc->adaptive_constraints_data+pcbddc->adaptive_constraints_data_ptr[cum]+j*subset_size,&Blas_one)); 1161 } 1162 #endif 1163 ierr = PetscMemcpy(pcbddc->adaptive_constraints_idxs+pcbddc->adaptive_constraints_idxs_ptr[cum],idxs,subset_size*sizeof(PetscInt));CHKERRQ(ierr); 1164 pcbddc->adaptive_constraints_idxs_ptr[cum+1] = pcbddc->adaptive_constraints_idxs_ptr[cum] + subset_size; 1165 pcbddc->adaptive_constraints_data_ptr[cum+1] = pcbddc->adaptive_constraints_data_ptr[cum] + subset_size*B_neigs; 1166 cum++; 1167 } 1168 ierr = ISRestoreIndices(sub_schurs->is_subs[i],&idxs);CHKERRQ(ierr); 1169 /* shift for next computation */ 1170 cumarray += subset_size*subset_size; 1171 } 1172 if (pcbddc->dbg_flag) { 1173 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 1174 } 1175 1176 if (mss) { 1177 ierr = MatSeqAIJRestoreArray(sub_schurs->sum_S_Ej_inv_all,&Sarray);CHKERRQ(ierr); 1178 ierr = MatSeqAIJRestoreArray(sub_schurs->sum_S_Ej_tilda_all,&Starray);CHKERRQ(ierr); 1179 /* destroy matrices (junk) */ 1180 ierr = MatDestroy(&sub_schurs->sum_S_Ej_inv_all);CHKERRQ(ierr); 1181 ierr = MatDestroy(&sub_schurs->sum_S_Ej_tilda_all);CHKERRQ(ierr); 1182 } 1183 if (allocated_S_St) { 1184 ierr = PetscFree2(S,St);CHKERRQ(ierr); 1185 } 1186 ierr = PetscFree5(eigv,eigs,work,B_iwork,B_ifail);CHKERRQ(ierr); 1187 #if defined(PETSC_USE_COMPLEX) 1188 ierr = PetscFree(rwork);CHKERRQ(ierr); 1189 #endif 1190 if (pcbddc->dbg_flag) { 1191 PetscInt maxneigs_r; 1192 ierr = MPI_Allreduce(&maxneigs,&maxneigs_r,1,MPIU_INT,MPI_MAX,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 1193 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Maximum number of constraints per cc %d\n",maxneigs_r);CHKERRQ(ierr); 1194 } 1195 PetscFunctionReturn(0); 1196 } 1197 1198 #undef __FUNCT__ 1199 #define __FUNCT__ "PCBDDCSetUpSolvers" 1200 PetscErrorCode PCBDDCSetUpSolvers(PC pc) 1201 { 1202 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 1203 PetscScalar *coarse_submat_vals; 1204 PetscErrorCode ierr; 1205 1206 PetscFunctionBegin; 1207 /* Setup local scatters R_to_B and (optionally) R_to_D */ 1208 /* PCBDDCSetUpLocalWorkVectors should be called first! */ 1209 ierr = PCBDDCSetUpLocalScatters(pc);CHKERRQ(ierr); 1210 1211 /* Setup local neumann solver ksp_R */ 1212 /* PCBDDCSetUpLocalScatters should be called first! */ 1213 ierr = PCBDDCSetUpLocalSolvers(pc,PETSC_FALSE,PETSC_TRUE);CHKERRQ(ierr); 1214 1215 /* Change global null space passed in by the user if change of basis has been requested */ 1216 if (pcbddc->NullSpace && pcbddc->ChangeOfBasisMatrix) { 1217 ierr = PCBDDCNullSpaceAdaptGlobal(pc);CHKERRQ(ierr); 1218 } 1219 1220 /* 1221 Setup local correction and local part of coarse basis. 1222 Gives back the dense local part of the coarse matrix in column major ordering 1223 */ 1224 ierr = PCBDDCSetUpCorrection(pc,&coarse_submat_vals);CHKERRQ(ierr); 1225 1226 /* Compute total number of coarse nodes and setup coarse solver */ 1227 ierr = PCBDDCSetUpCoarseSolver(pc,coarse_submat_vals);CHKERRQ(ierr); 1228 1229 /* free */ 1230 ierr = PetscFree(coarse_submat_vals);CHKERRQ(ierr); 1231 PetscFunctionReturn(0); 1232 } 1233 1234 #undef __FUNCT__ 1235 #define __FUNCT__ "PCBDDCResetCustomization" 1236 PetscErrorCode PCBDDCResetCustomization(PC pc) 1237 { 1238 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 1239 PetscErrorCode ierr; 1240 1241 PetscFunctionBegin; 1242 ierr = PCBDDCGraphResetCSR(pcbddc->mat_graph);CHKERRQ(ierr); 1243 ierr = ISDestroy(&pcbddc->user_primal_vertices);CHKERRQ(ierr); 1244 ierr = ISDestroy(&pcbddc->user_primal_vertices_local);CHKERRQ(ierr); 1245 ierr = MatNullSpaceDestroy(&pcbddc->NullSpace);CHKERRQ(ierr); 1246 ierr = ISDestroy(&pcbddc->NeumannBoundaries);CHKERRQ(ierr); 1247 ierr = ISDestroy(&pcbddc->NeumannBoundariesLocal);CHKERRQ(ierr); 1248 ierr = ISDestroy(&pcbddc->DirichletBoundaries);CHKERRQ(ierr); 1249 ierr = MatNullSpaceDestroy(&pcbddc->onearnullspace);CHKERRQ(ierr); 1250 ierr = PetscFree(pcbddc->onearnullvecs_state);CHKERRQ(ierr); 1251 ierr = ISDestroy(&pcbddc->DirichletBoundariesLocal);CHKERRQ(ierr); 1252 ierr = PCBDDCSetDofsSplitting(pc,0,NULL);CHKERRQ(ierr); 1253 ierr = PCBDDCSetDofsSplittingLocal(pc,0,NULL);CHKERRQ(ierr); 1254 PetscFunctionReturn(0); 1255 } 1256 1257 #undef __FUNCT__ 1258 #define __FUNCT__ "PCBDDCResetTopography" 1259 PetscErrorCode PCBDDCResetTopography(PC pc) 1260 { 1261 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 1262 PetscInt i; 1263 PetscErrorCode ierr; 1264 1265 PetscFunctionBegin; 1266 ierr = MatDestroy(&pcbddc->user_ChangeOfBasisMatrix);CHKERRQ(ierr); 1267 ierr = MatDestroy(&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 1268 ierr = MatDestroy(&pcbddc->ConstraintMatrix);CHKERRQ(ierr); 1269 ierr = PCBDDCGraphReset(pcbddc->mat_graph);CHKERRQ(ierr); 1270 for (i=0;i<pcbddc->n_local_subs;i++) { 1271 ierr = ISDestroy(&pcbddc->local_subs[i]);CHKERRQ(ierr); 1272 } 1273 ierr = PetscFree(pcbddc->local_subs);CHKERRQ(ierr); 1274 ierr = PCBDDCSubSchursReset(pcbddc->sub_schurs);CHKERRQ(ierr); 1275 PetscFunctionReturn(0); 1276 } 1277 1278 #undef __FUNCT__ 1279 #define __FUNCT__ "PCBDDCResetSolvers" 1280 PetscErrorCode PCBDDCResetSolvers(PC pc) 1281 { 1282 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 1283 PetscErrorCode ierr; 1284 1285 PetscFunctionBegin; 1286 ierr = VecDestroy(&pcbddc->coarse_vec);CHKERRQ(ierr); 1287 if (pcbddc->coarse_phi_B) { 1288 PetscScalar *array; 1289 ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&array);CHKERRQ(ierr); 1290 ierr = PetscFree(array);CHKERRQ(ierr); 1291 } 1292 ierr = MatDestroy(&pcbddc->coarse_phi_B);CHKERRQ(ierr); 1293 ierr = MatDestroy(&pcbddc->coarse_phi_D);CHKERRQ(ierr); 1294 ierr = MatDestroy(&pcbddc->coarse_psi_B);CHKERRQ(ierr); 1295 ierr = MatDestroy(&pcbddc->coarse_psi_D);CHKERRQ(ierr); 1296 ierr = VecDestroy(&pcbddc->vec1_P);CHKERRQ(ierr); 1297 ierr = VecDestroy(&pcbddc->vec1_C);CHKERRQ(ierr); 1298 ierr = MatDestroy(&pcbddc->local_auxmat2);CHKERRQ(ierr); 1299 ierr = MatDestroy(&pcbddc->local_auxmat1);CHKERRQ(ierr); 1300 ierr = VecDestroy(&pcbddc->vec1_R);CHKERRQ(ierr); 1301 ierr = VecDestroy(&pcbddc->vec2_R);CHKERRQ(ierr); 1302 ierr = ISDestroy(&pcbddc->is_R_local);CHKERRQ(ierr); 1303 ierr = VecScatterDestroy(&pcbddc->R_to_B);CHKERRQ(ierr); 1304 ierr = VecScatterDestroy(&pcbddc->R_to_D);CHKERRQ(ierr); 1305 ierr = VecScatterDestroy(&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr); 1306 ierr = KSPDestroy(&pcbddc->ksp_D);CHKERRQ(ierr); 1307 ierr = KSPDestroy(&pcbddc->ksp_R);CHKERRQ(ierr); 1308 ierr = KSPDestroy(&pcbddc->coarse_ksp);CHKERRQ(ierr); 1309 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 1310 ierr = PetscFree(pcbddc->primal_indices_local_idxs);CHKERRQ(ierr); 1311 ierr = PetscFree2(pcbddc->local_primal_ref_node,pcbddc->local_primal_ref_mult);CHKERRQ(ierr); 1312 ierr = PetscFree(pcbddc->global_primal_indices);CHKERRQ(ierr); 1313 ierr = ISDestroy(&pcbddc->coarse_subassembling);CHKERRQ(ierr); 1314 ierr = ISDestroy(&pcbddc->coarse_subassembling_init);CHKERRQ(ierr); 1315 ierr = MatDestroy(&pcbddc->benign_change);CHKERRQ(ierr); 1316 ierr = VecDestroy(&pcbddc->benign_vec);CHKERRQ(ierr); 1317 ierr = MatDestroy(&pcbddc->benign_original_mat);CHKERRQ(ierr); 1318 ierr = MatDestroy(&pcbddc->benign_B0);CHKERRQ(ierr); 1319 ierr = PetscSFDestroy(&pcbddc->benign_sf);CHKERRQ(ierr); 1320 if (pcbddc->benign_zerodiag_subs) { 1321 PetscInt i; 1322 for (i=0;i<pcbddc->benign_n;i++) { 1323 ierr = ISDestroy(&pcbddc->benign_zerodiag_subs[i]);CHKERRQ(ierr); 1324 } 1325 ierr = PetscFree(pcbddc->benign_zerodiag_subs);CHKERRQ(ierr); 1326 } 1327 ierr = PetscFree3(pcbddc->benign_p0_lidx,pcbddc->benign_p0_gidx,pcbddc->benign_p0);CHKERRQ(ierr); 1328 PetscFunctionReturn(0); 1329 } 1330 1331 #undef __FUNCT__ 1332 #define __FUNCT__ "PCBDDCSetUpLocalWorkVectors" 1333 PetscErrorCode PCBDDCSetUpLocalWorkVectors(PC pc) 1334 { 1335 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 1336 PC_IS *pcis = (PC_IS*)pc->data; 1337 VecType impVecType; 1338 PetscInt n_constraints,n_R,old_size; 1339 PetscErrorCode ierr; 1340 1341 PetscFunctionBegin; 1342 if (!pcbddc->ConstraintMatrix) { 1343 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"BDDC Constraint matrix has not been created"); 1344 } 1345 /* get sizes */ 1346 n_constraints = pcbddc->local_primal_size - pcbddc->benign_n - pcbddc->n_vertices; 1347 n_R = pcis->n - pcbddc->n_vertices; 1348 ierr = VecGetType(pcis->vec1_N,&impVecType);CHKERRQ(ierr); 1349 /* local work vectors (try to avoid unneeded work)*/ 1350 /* R nodes */ 1351 old_size = -1; 1352 if (pcbddc->vec1_R) { 1353 ierr = VecGetSize(pcbddc->vec1_R,&old_size);CHKERRQ(ierr); 1354 } 1355 if (n_R != old_size) { 1356 ierr = VecDestroy(&pcbddc->vec1_R);CHKERRQ(ierr); 1357 ierr = VecDestroy(&pcbddc->vec2_R);CHKERRQ(ierr); 1358 ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_R);CHKERRQ(ierr); 1359 ierr = VecSetSizes(pcbddc->vec1_R,PETSC_DECIDE,n_R);CHKERRQ(ierr); 1360 ierr = VecSetType(pcbddc->vec1_R,impVecType);CHKERRQ(ierr); 1361 ierr = VecDuplicate(pcbddc->vec1_R,&pcbddc->vec2_R);CHKERRQ(ierr); 1362 } 1363 /* local primal dofs */ 1364 old_size = -1; 1365 if (pcbddc->vec1_P) { 1366 ierr = VecGetSize(pcbddc->vec1_P,&old_size);CHKERRQ(ierr); 1367 } 1368 if (pcbddc->local_primal_size != old_size) { 1369 ierr = VecDestroy(&pcbddc->vec1_P);CHKERRQ(ierr); 1370 ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_P);CHKERRQ(ierr); 1371 ierr = VecSetSizes(pcbddc->vec1_P,PETSC_DECIDE,pcbddc->local_primal_size);CHKERRQ(ierr); 1372 ierr = VecSetType(pcbddc->vec1_P,impVecType);CHKERRQ(ierr); 1373 } 1374 /* local explicit constraints */ 1375 old_size = -1; 1376 if (pcbddc->vec1_C) { 1377 ierr = VecGetSize(pcbddc->vec1_C,&old_size);CHKERRQ(ierr); 1378 } 1379 if (n_constraints && n_constraints != old_size) { 1380 ierr = VecDestroy(&pcbddc->vec1_C);CHKERRQ(ierr); 1381 ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_C);CHKERRQ(ierr); 1382 ierr = VecSetSizes(pcbddc->vec1_C,PETSC_DECIDE,n_constraints);CHKERRQ(ierr); 1383 ierr = VecSetType(pcbddc->vec1_C,impVecType);CHKERRQ(ierr); 1384 } 1385 PetscFunctionReturn(0); 1386 } 1387 1388 #undef __FUNCT__ 1389 #define __FUNCT__ "PCBDDCSetUpCorrection" 1390 PetscErrorCode PCBDDCSetUpCorrection(PC pc, PetscScalar **coarse_submat_vals_n) 1391 { 1392 PetscErrorCode ierr; 1393 /* pointers to pcis and pcbddc */ 1394 PC_IS* pcis = (PC_IS*)pc->data; 1395 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 1396 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 1397 /* submatrices of local problem */ 1398 Mat A_RV,A_VR,A_VV,local_auxmat2_R; 1399 /* submatrices of benign trick */ 1400 Mat B0_V = NULL; 1401 /* submatrices of local coarse problem */ 1402 Mat S_VV,S_CV,S_VC,S_CC; 1403 /* working matrices */ 1404 Mat C_CR; 1405 /* additional working stuff */ 1406 PC pc_R; 1407 Mat F; 1408 PetscBool isLU,isCHOL,isILU,need_benign_correction; 1409 1410 PetscScalar *coarse_submat_vals; /* TODO: use a PETSc matrix */ 1411 PetscScalar *work; 1412 PetscInt *idx_V_B; 1413 PetscInt lda_rhs,n,n_vertices,n_constraints,*p0_lidx_I; 1414 PetscInt i,n_R,n_D,n_B; 1415 1416 /* some shortcuts to scalars */ 1417 PetscScalar one=1.0,m_one=-1.0; 1418 1419 PetscFunctionBegin; 1420 if (!pcbddc->symmetric_primal && pcbddc->benign_n) { 1421 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Non-symmetric primal basis computation with benign trick not yet implemented"); 1422 } 1423 1424 /* Set Non-overlapping dimensions */ 1425 n_vertices = pcbddc->n_vertices; 1426 n_constraints = pcbddc->local_primal_size - pcbddc->benign_n - n_vertices; 1427 n_B = pcis->n_B; 1428 n_D = pcis->n - n_B; 1429 n_R = pcis->n - n_vertices; 1430 1431 /* vertices in boundary numbering */ 1432 ierr = PetscMalloc1(n_vertices,&idx_V_B);CHKERRQ(ierr); 1433 ierr = ISGlobalToLocalMappingApply(pcis->BtoNmap,IS_GTOLM_DROP,n_vertices,pcbddc->local_primal_ref_node,&i,idx_V_B);CHKERRQ(ierr); 1434 if (i != n_vertices) { 1435 SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error in boundary numbering for BDDC vertices! %d != %d\n",n_vertices,i); 1436 } 1437 1438 /* Subdomain contribution (Non-overlapping) to coarse matrix */ 1439 ierr = PetscCalloc1(pcbddc->local_primal_size*pcbddc->local_primal_size,&coarse_submat_vals);CHKERRQ(ierr); 1440 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_vertices,n_vertices,coarse_submat_vals,&S_VV);CHKERRQ(ierr); 1441 ierr = MatSeqDenseSetLDA(S_VV,pcbddc->local_primal_size);CHKERRQ(ierr); 1442 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_constraints,n_vertices,coarse_submat_vals+n_vertices,&S_CV);CHKERRQ(ierr); 1443 ierr = MatSeqDenseSetLDA(S_CV,pcbddc->local_primal_size);CHKERRQ(ierr); 1444 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_vertices,n_constraints,coarse_submat_vals+pcbddc->local_primal_size*n_vertices,&S_VC);CHKERRQ(ierr); 1445 ierr = MatSeqDenseSetLDA(S_VC,pcbddc->local_primal_size);CHKERRQ(ierr); 1446 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_constraints,n_constraints,coarse_submat_vals+(pcbddc->local_primal_size+1)*n_vertices,&S_CC);CHKERRQ(ierr); 1447 ierr = MatSeqDenseSetLDA(S_CC,pcbddc->local_primal_size);CHKERRQ(ierr); 1448 1449 /* determine if can use MatSolve routines instead of calling KSPSolve on ksp_R */ 1450 ierr = KSPGetPC(pcbddc->ksp_R,&pc_R);CHKERRQ(ierr); 1451 ierr = PetscObjectTypeCompare((PetscObject)pc_R,PCLU,&isLU);CHKERRQ(ierr); 1452 ierr = PetscObjectTypeCompare((PetscObject)pc_R,PCILU,&isILU);CHKERRQ(ierr); 1453 ierr = PetscObjectTypeCompare((PetscObject)pc_R,PCCHOLESKY,&isCHOL);CHKERRQ(ierr); 1454 lda_rhs = n_R; 1455 need_benign_correction = PETSC_FALSE; 1456 if (isLU || isILU || isCHOL) { 1457 ierr = PCFactorGetMatrix(pc_R,&F);CHKERRQ(ierr); 1458 } else if (sub_schurs->reuse_solver) { 1459 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 1460 MatFactorType type; 1461 1462 F = reuse_solver->F; 1463 ierr = MatGetFactorType(F,&type);CHKERRQ(ierr); 1464 if (type == MAT_FACTOR_CHOLESKY) isCHOL = PETSC_TRUE; 1465 ierr = MatGetSize(F,&lda_rhs,NULL);CHKERRQ(ierr); 1466 need_benign_correction = !!reuse_solver->benign_n; 1467 } else { 1468 F = NULL; 1469 } 1470 1471 /* allocate workspace */ 1472 n = 0; 1473 if (n_constraints) { 1474 n += lda_rhs*n_constraints; 1475 } 1476 if (n_vertices) { 1477 n = PetscMax(2*lda_rhs*n_vertices,n); 1478 n = PetscMax((lda_rhs+n_B)*n_vertices,n); 1479 } 1480 ierr = PetscMalloc1(n,&work);CHKERRQ(ierr); 1481 1482 /* Precompute stuffs needed for preprocessing and application of BDDC*/ 1483 if (n_constraints) { 1484 Mat M1,M2,M3; 1485 Mat auxmat; 1486 IS is_aux; 1487 PetscScalar *array,*array2; 1488 1489 ierr = MatDestroy(&pcbddc->local_auxmat1);CHKERRQ(ierr); 1490 ierr = MatDestroy(&pcbddc->local_auxmat2);CHKERRQ(ierr); 1491 1492 /* Extract constraints on R nodes: C_{CR} */ 1493 ierr = ISCreateStride(PETSC_COMM_SELF,n_constraints,n_vertices,1,&is_aux);CHKERRQ(ierr); 1494 ierr = MatGetSubMatrix(pcbddc->ConstraintMatrix,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&C_CR);CHKERRQ(ierr); 1495 ierr = MatGetSubMatrix(pcbddc->ConstraintMatrix,is_aux,pcis->is_B_local,MAT_INITIAL_MATRIX,&auxmat);CHKERRQ(ierr); 1496 1497 /* Assemble local_auxmat2_R = (- A_{RR}^{-1} C^T_{CR}) needed by BDDC setup */ 1498 /* Assemble pcbddc->local_auxmat2 = R_to_B (- A_{RR}^{-1} C^T_{CR}) needed by BDDC application */ 1499 ierr = PetscMemzero(work,lda_rhs*n_constraints*sizeof(PetscScalar));CHKERRQ(ierr); 1500 for (i=0;i<n_constraints;i++) { 1501 const PetscScalar *row_cmat_values; 1502 const PetscInt *row_cmat_indices; 1503 PetscInt size_of_constraint,j; 1504 1505 ierr = MatGetRow(C_CR,i,&size_of_constraint,&row_cmat_indices,&row_cmat_values);CHKERRQ(ierr); 1506 for (j=0;j<size_of_constraint;j++) { 1507 work[row_cmat_indices[j]+i*lda_rhs] = -row_cmat_values[j]; 1508 } 1509 ierr = MatRestoreRow(C_CR,i,&size_of_constraint,&row_cmat_indices,&row_cmat_values);CHKERRQ(ierr); 1510 } 1511 ierr = MatCreateSeqDense(PETSC_COMM_SELF,lda_rhs,n_constraints,NULL,&local_auxmat2_R);CHKERRQ(ierr); 1512 if (F) { 1513 Mat B; 1514 1515 ierr = MatCreateSeqDense(PETSC_COMM_SELF,lda_rhs,n_constraints,work,&B);CHKERRQ(ierr); 1516 if (need_benign_correction) { 1517 PetscScalar *marr; 1518 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 1519 1520 ierr = MatDenseGetArray(B,&marr);CHKERRQ(ierr); 1521 for (i=0;i<n_constraints;i++) { 1522 ierr = VecPlaceArray(pcbddc->vec1_R,marr+i*lda_rhs);CHKERRQ(ierr); 1523 ierr = PCBDDCReuseSolversChangeInterior(reuse_solver,pcbddc->vec1_R,NULL,PETSC_FALSE);CHKERRQ(ierr); 1524 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 1525 } 1526 ierr = MatDenseRestoreArray(B,&marr);CHKERRQ(ierr); 1527 } 1528 ierr = MatMatSolve(F,B,local_auxmat2_R);CHKERRQ(ierr); 1529 if (need_benign_correction) { 1530 PetscScalar *marr; 1531 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 1532 1533 ierr = MatDenseGetArray(local_auxmat2_R,&marr);CHKERRQ(ierr); 1534 for (i=0;i<n_constraints;i++) { 1535 ierr = VecPlaceArray(pcbddc->vec1_R,marr+i*lda_rhs);CHKERRQ(ierr); 1536 ierr = PCBDDCReuseSolversChangeInterior(reuse_solver,pcbddc->vec1_R,NULL,PETSC_TRUE);CHKERRQ(ierr); 1537 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 1538 } 1539 ierr = MatDenseRestoreArray(local_auxmat2_R,&marr);CHKERRQ(ierr); 1540 } 1541 ierr = MatDestroy(&B);CHKERRQ(ierr); 1542 } else { 1543 PetscScalar *marr; 1544 1545 ierr = MatDenseGetArray(local_auxmat2_R,&marr);CHKERRQ(ierr); 1546 for (i=0;i<n_constraints;i++) { 1547 ierr = VecPlaceArray(pcbddc->vec1_R,work+i*lda_rhs);CHKERRQ(ierr); 1548 ierr = VecPlaceArray(pcbddc->vec2_R,marr+i*lda_rhs);CHKERRQ(ierr); 1549 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 1550 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 1551 ierr = VecResetArray(pcbddc->vec2_R);CHKERRQ(ierr); 1552 } 1553 ierr = MatDenseRestoreArray(local_auxmat2_R,&marr);CHKERRQ(ierr); 1554 } 1555 if (!pcbddc->switch_static) { 1556 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_B,n_constraints,NULL,&pcbddc->local_auxmat2);CHKERRQ(ierr); 1557 ierr = MatDenseGetArray(pcbddc->local_auxmat2,&array);CHKERRQ(ierr); 1558 ierr = MatDenseGetArray(local_auxmat2_R,&array2);CHKERRQ(ierr); 1559 for (i=0;i<n_constraints;i++) { 1560 ierr = VecPlaceArray(pcbddc->vec1_R,array2+i*lda_rhs);CHKERRQ(ierr); 1561 ierr = VecPlaceArray(pcis->vec1_B,array+i*n_B);CHKERRQ(ierr); 1562 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1563 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1564 ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr); 1565 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 1566 } 1567 ierr = MatDenseRestoreArray(local_auxmat2_R,&array2);CHKERRQ(ierr); 1568 ierr = MatDenseRestoreArray(pcbddc->local_auxmat2,&array);CHKERRQ(ierr); 1569 ierr = MatMatMult(auxmat,pcbddc->local_auxmat2,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&M3);CHKERRQ(ierr); 1570 } else { 1571 if (lda_rhs != n_R) { 1572 IS dummy; 1573 1574 ierr = ISCreateStride(PETSC_COMM_SELF,n_R,0,1,&dummy);CHKERRQ(ierr); 1575 ierr = MatGetSubMatrix(local_auxmat2_R,dummy,dummy,MAT_INITIAL_MATRIX,&pcbddc->local_auxmat2);CHKERRQ(ierr); 1576 ierr = ISDestroy(&dummy);CHKERRQ(ierr); 1577 } else { 1578 ierr = PetscObjectReference((PetscObject)local_auxmat2_R);CHKERRQ(ierr); 1579 pcbddc->local_auxmat2 = local_auxmat2_R; 1580 } 1581 ierr = MatMatMult(C_CR,pcbddc->local_auxmat2,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&M3);CHKERRQ(ierr); 1582 } 1583 ierr = ISDestroy(&is_aux);CHKERRQ(ierr); 1584 /* Assemble explicitly S_CC = ( C_{CR} A_{RR}^{-1} C^T_{CR} )^{-1} */ 1585 ierr = MatScale(M3,m_one);CHKERRQ(ierr); 1586 ierr = MatDuplicate(M3,MAT_DO_NOT_COPY_VALUES,&M1);CHKERRQ(ierr); 1587 ierr = MatDuplicate(M3,MAT_DO_NOT_COPY_VALUES,&M2);CHKERRQ(ierr); 1588 if (isCHOL) { 1589 ierr = MatCholeskyFactor(M3,NULL,NULL);CHKERRQ(ierr); 1590 } else { 1591 ierr = MatLUFactor(M3,NULL,NULL,NULL);CHKERRQ(ierr); 1592 } 1593 ierr = VecSet(pcbddc->vec1_C,one);CHKERRQ(ierr); 1594 ierr = MatDiagonalSet(M2,pcbddc->vec1_C,INSERT_VALUES);CHKERRQ(ierr); 1595 ierr = MatMatSolve(M3,M2,M1);CHKERRQ(ierr); 1596 ierr = MatDestroy(&M2);CHKERRQ(ierr); 1597 ierr = MatDestroy(&M3);CHKERRQ(ierr); 1598 /* Assemble local_auxmat1 = S_CC*C_{CB} needed by BDDC application in KSP and in preproc */ 1599 ierr = MatMatMult(M1,auxmat,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&pcbddc->local_auxmat1);CHKERRQ(ierr); 1600 ierr = MatDestroy(&auxmat);CHKERRQ(ierr); 1601 ierr = MatCopy(M1,S_CC,SAME_NONZERO_PATTERN);CHKERRQ(ierr); /* S_CC can have a different LDA, MatMatSolve doesn't support it */ 1602 ierr = MatDestroy(&M1);CHKERRQ(ierr); 1603 } 1604 /* Get submatrices from subdomain matrix */ 1605 if (pcbddc->benign_n) { 1606 IS dummy; 1607 Mat B0_R; 1608 PetscReal norm; 1609 1610 ierr = ISCreateStride(PETSC_COMM_SELF,pcbddc->benign_n,0,1,&dummy);CHKERRQ(ierr); 1611 ierr = MatGetSubMatrix(pcbddc->benign_B0,dummy,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&B0_R);CHKERRQ(ierr); 1612 ierr = MatNorm(B0_R,NORM_INFINITY,&norm);CHKERRQ(ierr); 1613 if (norm > PETSC_SMALL) { 1614 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"Benign trick can not be applied! ||B0_R|| = %f (should be numerically 0.)",norm); 1615 } 1616 ierr = MatDestroy(&B0_R);CHKERRQ(ierr); 1617 ierr = ISDestroy(&dummy);CHKERRQ(ierr); 1618 } 1619 1620 if (n_vertices) { 1621 IS is_aux; 1622 1623 if (sub_schurs->reuse_solver) { /* is_R_local is not sorted, ISComplement doesn't like it */ 1624 IS tis; 1625 1626 ierr = ISDuplicate(pcbddc->is_R_local,&tis);CHKERRQ(ierr); 1627 ierr = ISSort(tis);CHKERRQ(ierr); 1628 ierr = ISComplement(tis,0,pcis->n,&is_aux);CHKERRQ(ierr); 1629 ierr = ISDestroy(&tis);CHKERRQ(ierr); 1630 } else { 1631 ierr = ISComplement(pcbddc->is_R_local,0,pcis->n,&is_aux);CHKERRQ(ierr); 1632 } 1633 ierr = MatGetSubMatrix(pcbddc->local_mat,pcbddc->is_R_local,is_aux,MAT_INITIAL_MATRIX,&A_RV);CHKERRQ(ierr); 1634 ierr = MatGetSubMatrix(pcbddc->local_mat,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&A_VR);CHKERRQ(ierr); 1635 ierr = MatGetSubMatrix(pcbddc->local_mat,is_aux,is_aux,MAT_INITIAL_MATRIX,&A_VV);CHKERRQ(ierr); 1636 if (pcbddc->benign_n) { 1637 IS dummy; 1638 1639 ierr = ISCreateStride(PETSC_COMM_SELF,pcbddc->benign_n,0,1,&dummy);CHKERRQ(ierr); 1640 ierr = MatGetSubMatrix(pcbddc->benign_B0,dummy,is_aux,MAT_INITIAL_MATRIX,&B0_V);CHKERRQ(ierr); 1641 ierr = ISDestroy(&dummy);CHKERRQ(ierr); 1642 } 1643 ierr = ISDestroy(&is_aux);CHKERRQ(ierr); 1644 } 1645 1646 /* Matrix of coarse basis functions (local) */ 1647 if (pcbddc->coarse_phi_B) { 1648 PetscInt on_B,on_primal,on_D=n_D; 1649 if (pcbddc->coarse_phi_D) { 1650 ierr = MatGetSize(pcbddc->coarse_phi_D,&on_D,NULL);CHKERRQ(ierr); 1651 } 1652 ierr = MatGetSize(pcbddc->coarse_phi_B,&on_B,&on_primal);CHKERRQ(ierr); 1653 if (on_B != n_B || on_primal != pcbddc->local_primal_size || on_D != n_D) { 1654 PetscScalar *marray; 1655 1656 ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&marray);CHKERRQ(ierr); 1657 ierr = PetscFree(marray);CHKERRQ(ierr); 1658 ierr = MatDestroy(&pcbddc->coarse_phi_B);CHKERRQ(ierr); 1659 ierr = MatDestroy(&pcbddc->coarse_psi_B);CHKERRQ(ierr); 1660 ierr = MatDestroy(&pcbddc->coarse_phi_D);CHKERRQ(ierr); 1661 ierr = MatDestroy(&pcbddc->coarse_psi_D);CHKERRQ(ierr); 1662 } 1663 } 1664 1665 if (!pcbddc->coarse_phi_B) { 1666 PetscScalar *marray; 1667 1668 n = n_B*pcbddc->local_primal_size; 1669 if (pcbddc->switch_static || pcbddc->dbg_flag) { 1670 n += n_D*pcbddc->local_primal_size; 1671 } 1672 if (!pcbddc->symmetric_primal) { 1673 n *= 2; 1674 } 1675 ierr = PetscCalloc1(n,&marray);CHKERRQ(ierr); 1676 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_B,pcbddc->local_primal_size,marray,&pcbddc->coarse_phi_B);CHKERRQ(ierr); 1677 n = n_B*pcbddc->local_primal_size; 1678 if (pcbddc->switch_static || pcbddc->dbg_flag) { 1679 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_D,pcbddc->local_primal_size,marray+n,&pcbddc->coarse_phi_D);CHKERRQ(ierr); 1680 n += n_D*pcbddc->local_primal_size; 1681 } 1682 if (!pcbddc->symmetric_primal) { 1683 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_B,pcbddc->local_primal_size,marray+n,&pcbddc->coarse_psi_B);CHKERRQ(ierr); 1684 if (pcbddc->switch_static || pcbddc->dbg_flag) { 1685 n = n_B*pcbddc->local_primal_size; 1686 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_D,pcbddc->local_primal_size,marray+n,&pcbddc->coarse_psi_D);CHKERRQ(ierr); 1687 } 1688 } else { 1689 ierr = PetscObjectReference((PetscObject)pcbddc->coarse_phi_B);CHKERRQ(ierr); 1690 pcbddc->coarse_psi_B = pcbddc->coarse_phi_B; 1691 if (pcbddc->switch_static || pcbddc->dbg_flag) { 1692 ierr = PetscObjectReference((PetscObject)pcbddc->coarse_phi_D);CHKERRQ(ierr); 1693 pcbddc->coarse_psi_D = pcbddc->coarse_phi_D; 1694 } 1695 } 1696 } 1697 1698 /* We are now ready to evaluate coarse basis functions and subdomain contribution to coarse problem */ 1699 p0_lidx_I = NULL; 1700 if (pcbddc->benign_n && (pcbddc->switch_static || pcbddc->dbg_flag)) { 1701 const PetscInt *idxs; 1702 1703 ierr = ISGetIndices(pcis->is_I_local,&idxs);CHKERRQ(ierr); 1704 ierr = PetscMalloc1(pcbddc->benign_n,&p0_lidx_I);CHKERRQ(ierr); 1705 for (i=0;i<pcbddc->benign_n;i++) { 1706 ierr = PetscFindInt(pcbddc->benign_p0_lidx[i],pcis->n-pcis->n_B,idxs,&p0_lidx_I[i]);CHKERRQ(ierr); 1707 } 1708 ierr = ISRestoreIndices(pcis->is_I_local,&idxs);CHKERRQ(ierr); 1709 } 1710 1711 /* vertices */ 1712 if (n_vertices) { 1713 1714 ierr = MatConvert(A_VV,MATDENSE,MAT_REUSE_MATRIX,&A_VV);CHKERRQ(ierr); 1715 1716 if (n_R) { 1717 Mat A_RRmA_RV,A_RV_bcorr=NULL,S_VVt; /* S_VVt with LDA=N */ 1718 PetscBLASInt B_N,B_one = 1; 1719 PetscScalar *x,*y; 1720 PetscBool isseqaij; 1721 1722 ierr = MatScale(A_RV,m_one);CHKERRQ(ierr); 1723 if (need_benign_correction) { 1724 ISLocalToGlobalMapping RtoN; 1725 IS is_p0; 1726 PetscInt *idxs_p0,n; 1727 1728 ierr = PetscMalloc1(pcbddc->benign_n,&idxs_p0);CHKERRQ(ierr); 1729 ierr = ISLocalToGlobalMappingCreateIS(pcbddc->is_R_local,&RtoN);CHKERRQ(ierr); 1730 ierr = ISGlobalToLocalMappingApply(RtoN,IS_GTOLM_DROP,pcbddc->benign_n,pcbddc->benign_p0_lidx,&n,idxs_p0);CHKERRQ(ierr); 1731 if (n != pcbddc->benign_n) { 1732 SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error in R numbering for benign p0! %d != %d\n",n,pcbddc->benign_n); 1733 } 1734 ierr = ISLocalToGlobalMappingDestroy(&RtoN);CHKERRQ(ierr); 1735 ierr = ISCreateGeneral(PETSC_COMM_SELF,n,idxs_p0,PETSC_OWN_POINTER,&is_p0);CHKERRQ(ierr); 1736 ierr = MatGetSubMatrix(A_RV,is_p0,NULL,MAT_INITIAL_MATRIX,&A_RV_bcorr);CHKERRQ(ierr); 1737 ierr = ISDestroy(&is_p0);CHKERRQ(ierr); 1738 } 1739 1740 if (lda_rhs == n_R) { 1741 ierr = MatConvert(A_RV,MATDENSE,MAT_REUSE_MATRIX,&A_RV);CHKERRQ(ierr); 1742 } else { 1743 PetscScalar *av,*array; 1744 const PetscInt *xadj,*adjncy; 1745 PetscInt n; 1746 PetscBool flg_row; 1747 1748 array = work+lda_rhs*n_vertices; 1749 ierr = PetscMemzero(array,lda_rhs*n_vertices*sizeof(PetscScalar));CHKERRQ(ierr); 1750 ierr = MatConvert(A_RV,MATSEQAIJ,MAT_REUSE_MATRIX,&A_RV);CHKERRQ(ierr); 1751 ierr = MatGetRowIJ(A_RV,0,PETSC_FALSE,PETSC_FALSE,&n,&xadj,&adjncy,&flg_row);CHKERRQ(ierr); 1752 ierr = MatSeqAIJGetArray(A_RV,&av);CHKERRQ(ierr); 1753 for (i=0;i<n;i++) { 1754 PetscInt j; 1755 for (j=xadj[i];j<xadj[i+1];j++) array[lda_rhs*adjncy[j]+i] = av[j]; 1756 } 1757 ierr = MatRestoreRowIJ(A_RV,0,PETSC_FALSE,PETSC_FALSE,&n,&xadj,&adjncy,&flg_row);CHKERRQ(ierr); 1758 ierr = MatDestroy(&A_RV);CHKERRQ(ierr); 1759 ierr = MatCreateSeqDense(PETSC_COMM_SELF,lda_rhs,n_vertices,array,&A_RV);CHKERRQ(ierr); 1760 } 1761 ierr = MatCreateSeqDense(PETSC_COMM_SELF,lda_rhs,n_vertices,work,&A_RRmA_RV);CHKERRQ(ierr); 1762 if (F) { 1763 if (need_benign_correction) { 1764 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 1765 PetscScalar *marr; 1766 1767 ierr = MatDenseGetArray(A_RV,&marr);CHKERRQ(ierr); 1768 /* need \Phi^T A_RV = (I+L)A_RV, L given by 1769 1770 | 0 0 0 | (V) 1771 L = | 0 0 -1 | (P-p0) 1772 | 0 0 -1 | (p0) 1773 1774 */ 1775 for (i=0;i<reuse_solver->benign_n;i++) { 1776 const PetscScalar *vals; 1777 const PetscInt *idxs,*idxs_zero; 1778 PetscInt n,j,nz; 1779 1780 ierr = ISGetLocalSize(reuse_solver->benign_zerodiag_subs[i],&nz);CHKERRQ(ierr); 1781 ierr = ISGetIndices(reuse_solver->benign_zerodiag_subs[i],&idxs_zero);CHKERRQ(ierr); 1782 ierr = MatGetRow(A_RV_bcorr,i,&n,&idxs,&vals);CHKERRQ(ierr); 1783 for (j=0;j<n;j++) { 1784 PetscScalar val = vals[j]; 1785 PetscInt k,col = idxs[j]; 1786 for (k=0;k<nz;k++) marr[idxs_zero[k]+lda_rhs*col] -= val; 1787 } 1788 ierr = MatRestoreRow(A_RV_bcorr,i,&n,&idxs,&vals);CHKERRQ(ierr); 1789 ierr = ISRestoreIndices(reuse_solver->benign_zerodiag_subs[i],&idxs_zero);CHKERRQ(ierr); 1790 } 1791 /* need to correct the rhs */ 1792 for (i=0;i<n_vertices;i++) { 1793 ierr = VecPlaceArray(pcbddc->vec1_R,marr+i*lda_rhs);CHKERRQ(ierr); 1794 ierr = PCBDDCReuseSolversChangeInterior(reuse_solver,pcbddc->vec1_R,NULL,PETSC_FALSE);CHKERRQ(ierr); 1795 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 1796 } 1797 ierr = MatDenseRestoreArray(A_RV,&marr);CHKERRQ(ierr); 1798 } 1799 ierr = MatMatSolve(F,A_RV,A_RRmA_RV);CHKERRQ(ierr); 1800 /* need to correct the solution */ 1801 if (need_benign_correction) { 1802 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 1803 PetscScalar *marr; 1804 1805 ierr = MatDenseGetArray(A_RRmA_RV,&marr);CHKERRQ(ierr); 1806 for (i=0;i<n_vertices;i++) { 1807 ierr = VecPlaceArray(pcbddc->vec1_R,marr+i*lda_rhs);CHKERRQ(ierr); 1808 ierr = PCBDDCReuseSolversChangeInterior(reuse_solver,pcbddc->vec1_R,NULL,PETSC_TRUE);CHKERRQ(ierr); 1809 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 1810 } 1811 ierr = MatDenseRestoreArray(A_RRmA_RV,&marr);CHKERRQ(ierr); 1812 } 1813 } else { 1814 ierr = MatDenseGetArray(A_RV,&y);CHKERRQ(ierr); 1815 for (i=0;i<n_vertices;i++) { 1816 ierr = VecPlaceArray(pcbddc->vec1_R,y+i*lda_rhs);CHKERRQ(ierr); 1817 ierr = VecPlaceArray(pcbddc->vec2_R,work+i*lda_rhs);CHKERRQ(ierr); 1818 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 1819 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 1820 ierr = VecResetArray(pcbddc->vec2_R);CHKERRQ(ierr); 1821 } 1822 ierr = MatDenseRestoreArray(A_RV,&y);CHKERRQ(ierr); 1823 } 1824 ierr = MatDestroy(&A_RV);CHKERRQ(ierr); 1825 /* S_VV and S_CV */ 1826 if (n_constraints) { 1827 Mat B; 1828 1829 ierr = PetscMemzero(work+lda_rhs*n_vertices,n_B*n_vertices*sizeof(PetscScalar));CHKERRQ(ierr); 1830 for (i=0;i<n_vertices;i++) { 1831 ierr = VecPlaceArray(pcbddc->vec1_R,work+i*lda_rhs);CHKERRQ(ierr); 1832 ierr = VecPlaceArray(pcis->vec1_B,work+lda_rhs*n_vertices+i*n_B);CHKERRQ(ierr); 1833 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1834 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1835 ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr); 1836 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 1837 } 1838 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_B,n_vertices,work+lda_rhs*n_vertices,&B);CHKERRQ(ierr); 1839 ierr = MatMatMult(pcbddc->local_auxmat1,B,MAT_REUSE_MATRIX,PETSC_DEFAULT,&S_CV);CHKERRQ(ierr); 1840 ierr = MatDestroy(&B);CHKERRQ(ierr); 1841 ierr = MatCreateSeqDense(PETSC_COMM_SELF,lda_rhs,n_vertices,work+lda_rhs*n_vertices,&B);CHKERRQ(ierr); 1842 ierr = MatMatMult(local_auxmat2_R,S_CV,MAT_REUSE_MATRIX,PETSC_DEFAULT,&B);CHKERRQ(ierr); 1843 ierr = MatScale(S_CV,m_one);CHKERRQ(ierr); 1844 ierr = PetscBLASIntCast(lda_rhs*n_vertices,&B_N);CHKERRQ(ierr); 1845 PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&B_N,&one,work+lda_rhs*n_vertices,&B_one,work,&B_one)); 1846 ierr = MatDestroy(&B);CHKERRQ(ierr); 1847 } 1848 ierr = PetscObjectTypeCompare((PetscObject)A_VR,MATSEQAIJ,&isseqaij);CHKERRQ(ierr); 1849 if (!isseqaij) { /* MatMatMult with SEQ(S)BAIJ below will raise an error */ 1850 ierr = MatConvert(A_VR,MATSEQAIJ,MAT_REUSE_MATRIX,&A_VR);CHKERRQ(ierr); 1851 } 1852 if (lda_rhs != n_R) { 1853 ierr = MatDestroy(&A_RRmA_RV);CHKERRQ(ierr); 1854 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_vertices,work,&A_RRmA_RV);CHKERRQ(ierr); 1855 ierr = MatSeqDenseSetLDA(A_RRmA_RV,lda_rhs);CHKERRQ(ierr); 1856 } 1857 ierr = MatMatMult(A_VR,A_RRmA_RV,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&S_VVt);CHKERRQ(ierr); 1858 /* need A_VR * \Phi * A_RRmA_RV = A_VR * (I+L)^T * A_RRmA_RV, L given as before */ 1859 if (need_benign_correction) { 1860 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 1861 PetscScalar *marr,*sums; 1862 1863 ierr = PetscMalloc1(n_vertices,&sums);CHKERRQ(ierr); 1864 ierr = MatDenseGetArray(S_VVt,&marr); 1865 for (i=0;i<reuse_solver->benign_n;i++) { 1866 const PetscScalar *vals; 1867 const PetscInt *idxs,*idxs_zero; 1868 PetscInt n,j,nz; 1869 1870 ierr = ISGetLocalSize(reuse_solver->benign_zerodiag_subs[i],&nz);CHKERRQ(ierr); 1871 ierr = ISGetIndices(reuse_solver->benign_zerodiag_subs[i],&idxs_zero);CHKERRQ(ierr); 1872 for (j=0;j<n_vertices;j++) { 1873 PetscInt k; 1874 sums[j] = 0.; 1875 for (k=0;k<nz;k++) sums[j] += work[idxs_zero[k]+j*lda_rhs]; 1876 } 1877 ierr = MatGetRow(A_RV_bcorr,i,&n,&idxs,&vals);CHKERRQ(ierr); 1878 for (j=0;j<n;j++) { 1879 PetscScalar val = vals[j]; 1880 PetscInt k; 1881 for (k=0;k<n_vertices;k++) { 1882 marr[idxs[j]+k*n_vertices] += val*sums[k]; 1883 } 1884 } 1885 ierr = MatRestoreRow(A_RV_bcorr,i,&n,&idxs,&vals);CHKERRQ(ierr); 1886 ierr = ISRestoreIndices(reuse_solver->benign_zerodiag_subs[i],&idxs_zero);CHKERRQ(ierr); 1887 } 1888 ierr = PetscFree(sums);CHKERRQ(ierr); 1889 ierr = MatDenseRestoreArray(S_VVt,&marr); 1890 ierr = MatDestroy(&A_RV_bcorr);CHKERRQ(ierr); 1891 } 1892 ierr = MatDestroy(&A_RRmA_RV);CHKERRQ(ierr); 1893 ierr = PetscBLASIntCast(n_vertices*n_vertices,&B_N);CHKERRQ(ierr); 1894 ierr = MatDenseGetArray(A_VV,&x);CHKERRQ(ierr); 1895 ierr = MatDenseGetArray(S_VVt,&y);CHKERRQ(ierr); 1896 PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&B_N,&one,x,&B_one,y,&B_one)); 1897 ierr = MatDenseRestoreArray(A_VV,&x);CHKERRQ(ierr); 1898 ierr = MatDenseRestoreArray(S_VVt,&y);CHKERRQ(ierr); 1899 ierr = MatCopy(S_VVt,S_VV,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 1900 ierr = MatDestroy(&S_VVt);CHKERRQ(ierr); 1901 } else { 1902 ierr = MatCopy(A_VV,S_VV,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 1903 } 1904 for (i=0;i<pcbddc->benign_n;i++) { 1905 const PetscScalar *vals; 1906 const PetscInt *idxs; 1907 PetscInt n,j,primal_idx; 1908 1909 ierr = MatGetRow(B0_V,i,&n,&idxs,&vals);CHKERRQ(ierr); 1910 primal_idx = pcbddc->local_primal_size - pcbddc->benign_n + i; 1911 for (j=0;j<n;j++) { 1912 coarse_submat_vals[primal_idx*pcbddc->local_primal_size+idxs[j]] = vals[j]; 1913 coarse_submat_vals[idxs[j]*pcbddc->local_primal_size+primal_idx] = vals[j]; 1914 } 1915 ierr = MatRestoreRow(B0_V,i,&n,&idxs,&vals);CHKERRQ(ierr); 1916 } 1917 ierr = MatDestroy(&A_VV);CHKERRQ(ierr); 1918 1919 /* coarse basis functions */ 1920 for (i=0;i<n_vertices;i++) { 1921 PetscScalar *y; 1922 1923 ierr = VecPlaceArray(pcbddc->vec1_R,work+lda_rhs*i);CHKERRQ(ierr); 1924 ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&y);CHKERRQ(ierr); 1925 ierr = VecPlaceArray(pcis->vec1_B,y+n_B*i);CHKERRQ(ierr); 1926 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1927 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1928 y[n_B*i+idx_V_B[i]] = 1.0; 1929 ierr = MatDenseRestoreArray(pcbddc->coarse_phi_B,&y);CHKERRQ(ierr); 1930 ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr); 1931 1932 if (pcbddc->switch_static || pcbddc->dbg_flag) { 1933 PetscInt j; 1934 1935 ierr = MatDenseGetArray(pcbddc->coarse_phi_D,&y);CHKERRQ(ierr); 1936 ierr = VecPlaceArray(pcis->vec1_D,y+n_D*i);CHKERRQ(ierr); 1937 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1938 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1939 ierr = VecResetArray(pcis->vec1_D);CHKERRQ(ierr); 1940 for (j=0;j<pcbddc->benign_n;j++) y[n_D*i+p0_lidx_I[j]] = 0.0; 1941 ierr = MatDenseRestoreArray(pcbddc->coarse_phi_D,&y);CHKERRQ(ierr); 1942 } 1943 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 1944 } 1945 /* if n_R == 0 the object is not destroyed */ 1946 ierr = MatDestroy(&A_RV);CHKERRQ(ierr); 1947 } 1948 1949 if (n_constraints) { 1950 Mat B; 1951 1952 ierr = MatCreateSeqDense(PETSC_COMM_SELF,lda_rhs,n_constraints,work,&B);CHKERRQ(ierr); 1953 ierr = MatScale(S_CC,m_one);CHKERRQ(ierr); 1954 ierr = MatMatMult(local_auxmat2_R,S_CC,MAT_REUSE_MATRIX,PETSC_DEFAULT,&B);CHKERRQ(ierr); 1955 ierr = MatScale(S_CC,m_one);CHKERRQ(ierr); 1956 if (n_vertices) { 1957 if (isCHOL) { /* if we can solve the interior problem with cholesky, we should also be fine with transposing here */ 1958 ierr = MatTranspose(S_CV,MAT_REUSE_MATRIX,&S_VC);CHKERRQ(ierr); 1959 } else { 1960 Mat S_VCt; 1961 1962 if (lda_rhs != n_R) { 1963 ierr = MatDestroy(&B);CHKERRQ(ierr); 1964 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_vertices,work,&B);CHKERRQ(ierr); 1965 ierr = MatSeqDenseSetLDA(B,lda_rhs);CHKERRQ(ierr); 1966 } 1967 ierr = MatMatMult(A_VR,B,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&S_VCt);CHKERRQ(ierr); 1968 ierr = MatCopy(S_VCt,S_VC,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 1969 ierr = MatDestroy(&S_VCt);CHKERRQ(ierr); 1970 } 1971 } 1972 ierr = MatDestroy(&B);CHKERRQ(ierr); 1973 /* coarse basis functions */ 1974 for (i=0;i<n_constraints;i++) { 1975 PetscScalar *y; 1976 1977 ierr = VecPlaceArray(pcbddc->vec1_R,work+lda_rhs*i);CHKERRQ(ierr); 1978 ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&y);CHKERRQ(ierr); 1979 ierr = VecPlaceArray(pcis->vec1_B,y+n_B*(i+n_vertices));CHKERRQ(ierr); 1980 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1981 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1982 ierr = MatDenseRestoreArray(pcbddc->coarse_phi_B,&y);CHKERRQ(ierr); 1983 ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr); 1984 if (pcbddc->switch_static || pcbddc->dbg_flag) { 1985 PetscInt j; 1986 1987 ierr = MatDenseGetArray(pcbddc->coarse_phi_D,&y);CHKERRQ(ierr); 1988 ierr = VecPlaceArray(pcis->vec1_D,y+n_D*(i+n_vertices));CHKERRQ(ierr); 1989 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1990 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1991 ierr = VecResetArray(pcis->vec1_D);CHKERRQ(ierr); 1992 for (j=0;j<pcbddc->benign_n;j++) y[n_D*i+p0_lidx_I[j]] = 0.0; 1993 ierr = MatDenseRestoreArray(pcbddc->coarse_phi_D,&y);CHKERRQ(ierr); 1994 } 1995 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 1996 } 1997 } 1998 if (n_constraints) { 1999 ierr = MatDestroy(&local_auxmat2_R);CHKERRQ(ierr); 2000 } 2001 ierr = PetscFree(p0_lidx_I);CHKERRQ(ierr); 2002 ierr = MatDestroy(&B0_V);CHKERRQ(ierr); 2003 2004 /* compute other basis functions for non-symmetric problems */ 2005 if (!pcbddc->symmetric_primal) { 2006 Mat B_V=NULL,B_C=NULL; 2007 PetscScalar *marray; 2008 2009 if (n_constraints) { 2010 Mat S_CCT,C_CRT; 2011 2012 ierr = MatTranspose(C_CR,MAT_INITIAL_MATRIX,&C_CRT);CHKERRQ(ierr); 2013 ierr = MatTranspose(S_CC,MAT_INITIAL_MATRIX,&S_CCT);CHKERRQ(ierr); 2014 ierr = MatMatMult(C_CRT,S_CCT,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&B_C);CHKERRQ(ierr); 2015 ierr = MatDestroy(&S_CCT);CHKERRQ(ierr); 2016 if (n_vertices) { 2017 Mat S_VCT; 2018 2019 ierr = MatTranspose(S_VC,MAT_INITIAL_MATRIX,&S_VCT);CHKERRQ(ierr); 2020 ierr = MatMatMult(C_CRT,S_VCT,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&B_V);CHKERRQ(ierr); 2021 ierr = MatDestroy(&S_VCT);CHKERRQ(ierr); 2022 } 2023 ierr = MatDestroy(&C_CRT);CHKERRQ(ierr); 2024 } 2025 if (n_vertices && n_R) { 2026 PetscScalar *av,*marray; 2027 const PetscInt *xadj,*adjncy; 2028 PetscInt n; 2029 PetscBool flg_row; 2030 2031 /* B_V = B_V - A_VR^T */ 2032 ierr = MatConvert(A_VR,MATSEQAIJ,MAT_REUSE_MATRIX,&A_VR);CHKERRQ(ierr); 2033 ierr = MatGetRowIJ(A_VR,0,PETSC_FALSE,PETSC_FALSE,&n,&xadj,&adjncy,&flg_row);CHKERRQ(ierr); 2034 ierr = MatSeqAIJGetArray(A_VR,&av);CHKERRQ(ierr); 2035 ierr = MatDenseGetArray(B_V,&marray);CHKERRQ(ierr); 2036 for (i=0;i<n;i++) { 2037 PetscInt j; 2038 for (j=xadj[i];j<xadj[i+1];j++) marray[i*n_R + adjncy[j]] -= av[j]; 2039 } 2040 ierr = MatDenseRestoreArray(B_V,&marray);CHKERRQ(ierr); 2041 ierr = MatRestoreRowIJ(A_VR,0,PETSC_FALSE,PETSC_FALSE,&n,&xadj,&adjncy,&flg_row);CHKERRQ(ierr); 2042 ierr = MatDestroy(&A_VR);CHKERRQ(ierr); 2043 } 2044 2045 /* currently there's no support for MatTransposeMatSolve(F,B,X) */ 2046 ierr = MatDenseGetArray(B_V,&marray);CHKERRQ(ierr); 2047 for (i=0;i<n_vertices;i++) { 2048 ierr = VecPlaceArray(pcbddc->vec1_R,marray+i*n_R);CHKERRQ(ierr); 2049 ierr = VecPlaceArray(pcbddc->vec2_R,work+i*n_R);CHKERRQ(ierr); 2050 ierr = KSPSolveTranspose(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 2051 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 2052 ierr = VecResetArray(pcbddc->vec2_R);CHKERRQ(ierr); 2053 } 2054 ierr = MatDenseRestoreArray(B_V,&marray);CHKERRQ(ierr); 2055 ierr = MatDenseGetArray(B_C,&marray);CHKERRQ(ierr); 2056 for (i=n_vertices;i<n_constraints+n_vertices;i++) { 2057 ierr = VecPlaceArray(pcbddc->vec1_R,marray+(i-n_vertices)*n_R);CHKERRQ(ierr); 2058 ierr = VecPlaceArray(pcbddc->vec2_R,work+i*n_R);CHKERRQ(ierr); 2059 ierr = KSPSolveTranspose(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 2060 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 2061 ierr = VecResetArray(pcbddc->vec2_R);CHKERRQ(ierr); 2062 } 2063 ierr = MatDenseRestoreArray(B_C,&marray);CHKERRQ(ierr); 2064 /* coarse basis functions */ 2065 for (i=0;i<pcbddc->local_primal_size;i++) { 2066 PetscScalar *y; 2067 2068 ierr = VecPlaceArray(pcbddc->vec1_R,work+i*n_R);CHKERRQ(ierr); 2069 ierr = MatDenseGetArray(pcbddc->coarse_psi_B,&y);CHKERRQ(ierr); 2070 ierr = VecPlaceArray(pcis->vec1_B,y+n_B*i);CHKERRQ(ierr); 2071 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2072 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2073 if (i<n_vertices) { 2074 y[n_B*i+idx_V_B[i]] = 1.0; 2075 } 2076 ierr = MatDenseRestoreArray(pcbddc->coarse_psi_B,&y);CHKERRQ(ierr); 2077 ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr); 2078 2079 if (pcbddc->switch_static || pcbddc->dbg_flag) { 2080 ierr = MatDenseGetArray(pcbddc->coarse_psi_D,&y);CHKERRQ(ierr); 2081 ierr = VecPlaceArray(pcis->vec1_D,y+n_D*i);CHKERRQ(ierr); 2082 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2083 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2084 ierr = VecResetArray(pcis->vec1_D);CHKERRQ(ierr); 2085 ierr = MatDenseRestoreArray(pcbddc->coarse_psi_D,&y);CHKERRQ(ierr); 2086 } 2087 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 2088 } 2089 ierr = MatDestroy(&B_V);CHKERRQ(ierr); 2090 ierr = MatDestroy(&B_C);CHKERRQ(ierr); 2091 } 2092 /* free memory */ 2093 ierr = PetscFree(idx_V_B);CHKERRQ(ierr); 2094 ierr = MatDestroy(&S_VV);CHKERRQ(ierr); 2095 ierr = MatDestroy(&S_CV);CHKERRQ(ierr); 2096 ierr = MatDestroy(&S_VC);CHKERRQ(ierr); 2097 ierr = MatDestroy(&S_CC);CHKERRQ(ierr); 2098 ierr = PetscFree(work);CHKERRQ(ierr); 2099 if (n_vertices) { 2100 ierr = MatDestroy(&A_VR);CHKERRQ(ierr); 2101 } 2102 if (n_constraints) { 2103 ierr = MatDestroy(&C_CR);CHKERRQ(ierr); 2104 } 2105 /* Checking coarse_sub_mat and coarse basis functios */ 2106 /* Symmetric case : It should be \Phi^{(j)^T} A^{(j)} \Phi^{(j)}=coarse_sub_mat */ 2107 /* Non-symmetric case : It should be \Psi^{(j)^T} A^{(j)} \Phi^{(j)}=coarse_sub_mat */ 2108 if (pcbddc->dbg_flag) { 2109 Mat coarse_sub_mat; 2110 Mat AUXMAT,TM1,TM2,TM3,TM4; 2111 Mat coarse_phi_D,coarse_phi_B; 2112 Mat coarse_psi_D,coarse_psi_B; 2113 Mat A_II,A_BB,A_IB,A_BI; 2114 Mat C_B,CPHI; 2115 IS is_dummy; 2116 Vec mones; 2117 MatType checkmattype=MATSEQAIJ; 2118 PetscReal real_value; 2119 2120 if (pcbddc->benign_n && !pcbddc->benign_change_explicit) { 2121 Mat A; 2122 ierr = PCBDDCBenignProject(pc,NULL,NULL,&A);CHKERRQ(ierr); 2123 ierr = MatGetSubMatrix(A,pcis->is_I_local,pcis->is_I_local,MAT_INITIAL_MATRIX,&A_II);CHKERRQ(ierr); 2124 ierr = MatGetSubMatrix(A,pcis->is_I_local,pcis->is_B_local,MAT_INITIAL_MATRIX,&A_IB);CHKERRQ(ierr); 2125 ierr = MatGetSubMatrix(A,pcis->is_B_local,pcis->is_I_local,MAT_INITIAL_MATRIX,&A_BI);CHKERRQ(ierr); 2126 ierr = MatGetSubMatrix(A,pcis->is_B_local,pcis->is_B_local,MAT_INITIAL_MATRIX,&A_BB);CHKERRQ(ierr); 2127 ierr = MatDestroy(&A);CHKERRQ(ierr); 2128 } else { 2129 ierr = MatConvert(pcis->A_II,checkmattype,MAT_INITIAL_MATRIX,&A_II);CHKERRQ(ierr); 2130 ierr = MatConvert(pcis->A_IB,checkmattype,MAT_INITIAL_MATRIX,&A_IB);CHKERRQ(ierr); 2131 ierr = MatConvert(pcis->A_BI,checkmattype,MAT_INITIAL_MATRIX,&A_BI);CHKERRQ(ierr); 2132 ierr = MatConvert(pcis->A_BB,checkmattype,MAT_INITIAL_MATRIX,&A_BB);CHKERRQ(ierr); 2133 } 2134 ierr = MatConvert(pcbddc->coarse_phi_D,checkmattype,MAT_INITIAL_MATRIX,&coarse_phi_D);CHKERRQ(ierr); 2135 ierr = MatConvert(pcbddc->coarse_phi_B,checkmattype,MAT_INITIAL_MATRIX,&coarse_phi_B);CHKERRQ(ierr); 2136 if (!pcbddc->symmetric_primal) { 2137 ierr = MatConvert(pcbddc->coarse_psi_D,checkmattype,MAT_INITIAL_MATRIX,&coarse_psi_D);CHKERRQ(ierr); 2138 ierr = MatConvert(pcbddc->coarse_psi_B,checkmattype,MAT_INITIAL_MATRIX,&coarse_psi_B);CHKERRQ(ierr); 2139 } 2140 ierr = MatCreateSeqDense(PETSC_COMM_SELF,pcbddc->local_primal_size,pcbddc->local_primal_size,coarse_submat_vals,&coarse_sub_mat);CHKERRQ(ierr); 2141 2142 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 2143 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Check coarse sub mat computation (symmetric %d)\n",pcbddc->symmetric_primal);CHKERRQ(ierr); 2144 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 2145 if (!pcbddc->symmetric_primal) { 2146 ierr = MatMatMult(A_II,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 2147 ierr = MatTransposeMatMult(coarse_psi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM1);CHKERRQ(ierr); 2148 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 2149 ierr = MatMatMult(A_BB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 2150 ierr = MatTransposeMatMult(coarse_psi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM2);CHKERRQ(ierr); 2151 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 2152 ierr = MatMatMult(A_IB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 2153 ierr = MatTransposeMatMult(coarse_psi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM3);CHKERRQ(ierr); 2154 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 2155 ierr = MatMatMult(A_BI,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 2156 ierr = MatTransposeMatMult(coarse_psi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM4);CHKERRQ(ierr); 2157 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 2158 } else { 2159 ierr = MatPtAP(A_II,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&TM1);CHKERRQ(ierr); 2160 ierr = MatPtAP(A_BB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&TM2);CHKERRQ(ierr); 2161 ierr = MatMatMult(A_IB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 2162 ierr = MatTransposeMatMult(coarse_phi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM3);CHKERRQ(ierr); 2163 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 2164 ierr = MatMatMult(A_BI,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 2165 ierr = MatTransposeMatMult(coarse_phi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM4);CHKERRQ(ierr); 2166 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 2167 } 2168 ierr = MatAXPY(TM1,one,TM2,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 2169 ierr = MatAXPY(TM1,one,TM3,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 2170 ierr = MatAXPY(TM1,one,TM4,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 2171 ierr = MatConvert(TM1,MATSEQDENSE,MAT_REUSE_MATRIX,&TM1);CHKERRQ(ierr); 2172 if (pcbddc->benign_n) { 2173 Mat B0_I,B0_B,B0_BPHI,B0_IPHI; 2174 PetscScalar *data,*data2; 2175 PetscInt j; 2176 2177 ierr = ISCreateStride(PETSC_COMM_SELF,pcbddc->benign_n,0,1,&is_dummy);CHKERRQ(ierr); 2178 ierr = MatGetSubMatrix(pcbddc->benign_B0,is_dummy,pcis->is_B_local,MAT_INITIAL_MATRIX,&B0_B); 2179 ierr = MatGetSubMatrix(pcbddc->benign_B0,is_dummy,pcis->is_I_local,MAT_INITIAL_MATRIX,&B0_I); 2180 ierr = MatMatMult(B0_B,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&B0_BPHI);CHKERRQ(ierr); 2181 ierr = MatConvert(B0_BPHI,MATSEQDENSE,MAT_REUSE_MATRIX,&B0_BPHI);CHKERRQ(ierr); 2182 ierr = MatDenseGetArray(TM1,&data);CHKERRQ(ierr); 2183 ierr = MatDenseGetArray(B0_BPHI,&data2);CHKERRQ(ierr); 2184 for (j=0;j<pcbddc->benign_n;j++) { 2185 PetscInt primal_idx = pcbddc->local_primal_size - pcbddc->benign_n + j; 2186 for (i=0;i<pcbddc->local_primal_size;i++) { 2187 data[primal_idx*pcbddc->local_primal_size+i] += data2[i*pcbddc->benign_n+j]; 2188 data[i*pcbddc->local_primal_size+primal_idx] += data2[i*pcbddc->benign_n+j]; 2189 } 2190 } 2191 ierr = MatDenseRestoreArray(TM1,&data);CHKERRQ(ierr); 2192 ierr = MatDenseRestoreArray(B0_BPHI,&data2);CHKERRQ(ierr); 2193 ierr = MatDestroy(&B0_B);CHKERRQ(ierr); 2194 ierr = ISDestroy(&is_dummy);CHKERRQ(ierr); 2195 ierr = MatDestroy(&B0_BPHI);CHKERRQ(ierr); 2196 ierr = MatMatMult(B0_I,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&B0_IPHI);CHKERRQ(ierr); 2197 ierr = MatDestroy(&B0_I);CHKERRQ(ierr); 2198 ierr = MatNorm(B0_IPHI,NORM_FROBENIUS,&real_value);CHKERRQ(ierr); 2199 ierr = MatDestroy(&B0_IPHI);CHKERRQ(ierr); 2200 } 2201 #if 0 2202 { 2203 PetscViewer viewer; 2204 char filename[256]; 2205 sprintf(filename,"details_local_coarse_mat%d_level%d.m",PetscGlobalRank,pcbddc->current_level); 2206 ierr = PetscViewerASCIIOpen(PETSC_COMM_SELF,filename,&viewer);CHKERRQ(ierr); 2207 ierr = PetscViewerSetFormat(viewer,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr); 2208 ierr = PetscObjectSetName((PetscObject)coarse_sub_mat,"computed");CHKERRQ(ierr); 2209 ierr = MatView(coarse_sub_mat,viewer);CHKERRQ(ierr); 2210 ierr = PetscObjectSetName((PetscObject)TM1,"projected");CHKERRQ(ierr); 2211 ierr = MatView(TM1,viewer);CHKERRQ(ierr); 2212 ierr = PetscObjectSetName((PetscObject)pcbddc->coarse_phi_B,"phi_B");CHKERRQ(ierr); 2213 ierr = MatView(pcbddc->coarse_phi_B,viewer);CHKERRQ(ierr); 2214 if (pcbddc->coarse_phi_D) { 2215 ierr = PetscObjectSetName((PetscObject)pcbddc->coarse_phi_D,"phi_D");CHKERRQ(ierr); 2216 ierr = MatView(pcbddc->coarse_phi_D,viewer);CHKERRQ(ierr); 2217 } 2218 if (pcbddc->coarse_psi_B) { 2219 ierr = PetscObjectSetName((PetscObject)pcbddc->coarse_psi_B,"psi_B");CHKERRQ(ierr); 2220 ierr = MatView(pcbddc->coarse_psi_B,viewer);CHKERRQ(ierr); 2221 } 2222 if (pcbddc->coarse_phi_B) { 2223 ierr = PetscObjectSetName((PetscObject)pcbddc->coarse_psi_D,"psi_D");CHKERRQ(ierr); 2224 ierr = MatView(pcbddc->coarse_psi_D,viewer);CHKERRQ(ierr); 2225 } 2226 ierr = PetscViewerDestroy(&viewer);CHKERRQ(ierr); 2227 } 2228 #endif 2229 ierr = MatAXPY(TM1,m_one,coarse_sub_mat,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 2230 ierr = MatNorm(TM1,NORM_FROBENIUS,&real_value);CHKERRQ(ierr); 2231 ierr = PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 2232 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d matrix error % 1.14e\n",PetscGlobalRank,real_value);CHKERRQ(ierr); 2233 2234 /* check constraints */ 2235 if (!pcbddc->benign_n) { /* TODO: add benign case */ 2236 ierr = ISCreateStride(PETSC_COMM_SELF,pcbddc->local_primal_size,0,1,&is_dummy);CHKERRQ(ierr); 2237 ierr = MatGetSubMatrix(pcbddc->ConstraintMatrix,is_dummy,pcis->is_B_local,MAT_INITIAL_MATRIX,&C_B); 2238 ierr = MatMatMult(C_B,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&CPHI);CHKERRQ(ierr); 2239 ierr = MatCreateVecs(CPHI,&mones,NULL);CHKERRQ(ierr); 2240 ierr = VecSet(mones,-1.0);CHKERRQ(ierr); 2241 ierr = MatDiagonalSet(CPHI,mones,ADD_VALUES);CHKERRQ(ierr); 2242 ierr = MatNorm(CPHI,NORM_FROBENIUS,&real_value);CHKERRQ(ierr); 2243 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d phi constraints error % 1.14e\n",PetscGlobalRank,real_value);CHKERRQ(ierr); 2244 if (!pcbddc->symmetric_primal) { 2245 ierr = MatMatMult(C_B,coarse_psi_B,MAT_REUSE_MATRIX,1.0,&CPHI);CHKERRQ(ierr); 2246 ierr = VecSet(mones,-1.0);CHKERRQ(ierr); 2247 ierr = MatDiagonalSet(CPHI,mones,ADD_VALUES);CHKERRQ(ierr); 2248 ierr = MatNorm(CPHI,NORM_FROBENIUS,&real_value);CHKERRQ(ierr); 2249 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d psi constraints error % 1.14e\n",PetscGlobalRank,real_value);CHKERRQ(ierr); 2250 } 2251 ierr = MatDestroy(&C_B);CHKERRQ(ierr); 2252 ierr = MatDestroy(&CPHI);CHKERRQ(ierr); 2253 ierr = ISDestroy(&is_dummy);CHKERRQ(ierr); 2254 ierr = VecDestroy(&mones);CHKERRQ(ierr); 2255 } 2256 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 2257 ierr = MatDestroy(&A_II);CHKERRQ(ierr); 2258 ierr = MatDestroy(&A_BB);CHKERRQ(ierr); 2259 ierr = MatDestroy(&A_IB);CHKERRQ(ierr); 2260 ierr = MatDestroy(&A_BI);CHKERRQ(ierr); 2261 ierr = MatDestroy(&TM1);CHKERRQ(ierr); 2262 ierr = MatDestroy(&TM2);CHKERRQ(ierr); 2263 ierr = MatDestroy(&TM3);CHKERRQ(ierr); 2264 ierr = MatDestroy(&TM4);CHKERRQ(ierr); 2265 ierr = MatDestroy(&coarse_phi_D);CHKERRQ(ierr); 2266 ierr = MatDestroy(&coarse_phi_B);CHKERRQ(ierr); 2267 if (!pcbddc->symmetric_primal) { 2268 ierr = MatDestroy(&coarse_psi_D);CHKERRQ(ierr); 2269 ierr = MatDestroy(&coarse_psi_B);CHKERRQ(ierr); 2270 } 2271 ierr = MatDestroy(&coarse_sub_mat);CHKERRQ(ierr); 2272 } 2273 /* get back data */ 2274 *coarse_submat_vals_n = coarse_submat_vals; 2275 PetscFunctionReturn(0); 2276 } 2277 2278 #undef __FUNCT__ 2279 #define __FUNCT__ "MatGetSubMatrixUnsorted" 2280 PetscErrorCode MatGetSubMatrixUnsorted(Mat A, IS isrow, IS iscol, Mat* B) 2281 { 2282 Mat *work_mat; 2283 IS isrow_s,iscol_s; 2284 PetscBool rsorted,csorted; 2285 PetscInt rsize,*idxs_perm_r,csize,*idxs_perm_c; 2286 PetscErrorCode ierr; 2287 2288 PetscFunctionBegin; 2289 ierr = ISSorted(isrow,&rsorted);CHKERRQ(ierr); 2290 ierr = ISSorted(iscol,&csorted);CHKERRQ(ierr); 2291 ierr = ISGetLocalSize(isrow,&rsize);CHKERRQ(ierr); 2292 ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr); 2293 2294 if (!rsorted) { 2295 const PetscInt *idxs; 2296 PetscInt *idxs_sorted,i; 2297 2298 ierr = PetscMalloc1(rsize,&idxs_perm_r);CHKERRQ(ierr); 2299 ierr = PetscMalloc1(rsize,&idxs_sorted);CHKERRQ(ierr); 2300 for (i=0;i<rsize;i++) { 2301 idxs_perm_r[i] = i; 2302 } 2303 ierr = ISGetIndices(isrow,&idxs);CHKERRQ(ierr); 2304 ierr = PetscSortIntWithPermutation(rsize,idxs,idxs_perm_r);CHKERRQ(ierr); 2305 for (i=0;i<rsize;i++) { 2306 idxs_sorted[i] = idxs[idxs_perm_r[i]]; 2307 } 2308 ierr = ISRestoreIndices(isrow,&idxs);CHKERRQ(ierr); 2309 ierr = ISCreateGeneral(PETSC_COMM_SELF,rsize,idxs_sorted,PETSC_OWN_POINTER,&isrow_s);CHKERRQ(ierr); 2310 } else { 2311 ierr = PetscObjectReference((PetscObject)isrow);CHKERRQ(ierr); 2312 isrow_s = isrow; 2313 } 2314 2315 if (!csorted) { 2316 if (isrow == iscol) { 2317 ierr = PetscObjectReference((PetscObject)isrow_s);CHKERRQ(ierr); 2318 iscol_s = isrow_s; 2319 } else { 2320 const PetscInt *idxs; 2321 PetscInt *idxs_sorted,i; 2322 2323 ierr = PetscMalloc1(csize,&idxs_perm_c);CHKERRQ(ierr); 2324 ierr = PetscMalloc1(csize,&idxs_sorted);CHKERRQ(ierr); 2325 for (i=0;i<csize;i++) { 2326 idxs_perm_c[i] = i; 2327 } 2328 ierr = ISGetIndices(iscol,&idxs);CHKERRQ(ierr); 2329 ierr = PetscSortIntWithPermutation(csize,idxs,idxs_perm_c);CHKERRQ(ierr); 2330 for (i=0;i<csize;i++) { 2331 idxs_sorted[i] = idxs[idxs_perm_c[i]]; 2332 } 2333 ierr = ISRestoreIndices(iscol,&idxs);CHKERRQ(ierr); 2334 ierr = ISCreateGeneral(PETSC_COMM_SELF,csize,idxs_sorted,PETSC_OWN_POINTER,&iscol_s);CHKERRQ(ierr); 2335 } 2336 } else { 2337 ierr = PetscObjectReference((PetscObject)iscol);CHKERRQ(ierr); 2338 iscol_s = iscol; 2339 } 2340 2341 ierr = MatGetSubMatrices(A,1,&isrow_s,&iscol_s,MAT_INITIAL_MATRIX,&work_mat);CHKERRQ(ierr); 2342 2343 if (!rsorted || !csorted) { 2344 Mat new_mat; 2345 IS is_perm_r,is_perm_c; 2346 2347 if (!rsorted) { 2348 PetscInt *idxs_r,i; 2349 ierr = PetscMalloc1(rsize,&idxs_r);CHKERRQ(ierr); 2350 for (i=0;i<rsize;i++) { 2351 idxs_r[idxs_perm_r[i]] = i; 2352 } 2353 ierr = PetscFree(idxs_perm_r);CHKERRQ(ierr); 2354 ierr = ISCreateGeneral(PETSC_COMM_SELF,rsize,idxs_r,PETSC_OWN_POINTER,&is_perm_r);CHKERRQ(ierr); 2355 } else { 2356 ierr = ISCreateStride(PETSC_COMM_SELF,rsize,0,1,&is_perm_r);CHKERRQ(ierr); 2357 } 2358 ierr = ISSetPermutation(is_perm_r);CHKERRQ(ierr); 2359 2360 if (!csorted) { 2361 if (isrow_s == iscol_s) { 2362 ierr = PetscObjectReference((PetscObject)is_perm_r);CHKERRQ(ierr); 2363 is_perm_c = is_perm_r; 2364 } else { 2365 PetscInt *idxs_c,i; 2366 ierr = PetscMalloc1(csize,&idxs_c);CHKERRQ(ierr); 2367 for (i=0;i<csize;i++) { 2368 idxs_c[idxs_perm_c[i]] = i; 2369 } 2370 ierr = PetscFree(idxs_perm_c);CHKERRQ(ierr); 2371 ierr = ISCreateGeneral(PETSC_COMM_SELF,csize,idxs_c,PETSC_OWN_POINTER,&is_perm_c);CHKERRQ(ierr); 2372 } 2373 } else { 2374 ierr = ISCreateStride(PETSC_COMM_SELF,csize,0,1,&is_perm_c);CHKERRQ(ierr); 2375 } 2376 ierr = ISSetPermutation(is_perm_c);CHKERRQ(ierr); 2377 2378 ierr = MatPermute(work_mat[0],is_perm_r,is_perm_c,&new_mat);CHKERRQ(ierr); 2379 ierr = MatDestroy(&work_mat[0]);CHKERRQ(ierr); 2380 work_mat[0] = new_mat; 2381 ierr = ISDestroy(&is_perm_r);CHKERRQ(ierr); 2382 ierr = ISDestroy(&is_perm_c);CHKERRQ(ierr); 2383 } 2384 2385 ierr = PetscObjectReference((PetscObject)work_mat[0]);CHKERRQ(ierr); 2386 *B = work_mat[0]; 2387 ierr = MatDestroyMatrices(1,&work_mat);CHKERRQ(ierr); 2388 ierr = ISDestroy(&isrow_s);CHKERRQ(ierr); 2389 ierr = ISDestroy(&iscol_s);CHKERRQ(ierr); 2390 PetscFunctionReturn(0); 2391 } 2392 2393 #undef __FUNCT__ 2394 #define __FUNCT__ "PCBDDCComputeLocalMatrix" 2395 PetscErrorCode PCBDDCComputeLocalMatrix(PC pc, Mat ChangeOfBasisMatrix) 2396 { 2397 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 2398 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 2399 Mat new_mat; 2400 IS is_local,is_global; 2401 PetscInt local_size; 2402 PetscBool isseqaij; 2403 PetscErrorCode ierr; 2404 2405 PetscFunctionBegin; 2406 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 2407 ierr = MatGetSize(matis->A,&local_size,NULL);CHKERRQ(ierr); 2408 ierr = ISCreateStride(PetscObjectComm((PetscObject)matis->A),local_size,0,1,&is_local);CHKERRQ(ierr); 2409 ierr = ISLocalToGlobalMappingApplyIS(pc->pmat->rmap->mapping,is_local,&is_global);CHKERRQ(ierr); 2410 ierr = ISDestroy(&is_local);CHKERRQ(ierr); 2411 ierr = MatGetSubMatrixUnsorted(ChangeOfBasisMatrix,is_global,is_global,&new_mat);CHKERRQ(ierr); 2412 ierr = ISDestroy(&is_global);CHKERRQ(ierr); 2413 2414 /* check */ 2415 if (pcbddc->dbg_flag) { 2416 Vec x,x_change; 2417 PetscReal error; 2418 2419 ierr = MatCreateVecs(ChangeOfBasisMatrix,&x,&x_change);CHKERRQ(ierr); 2420 ierr = VecSetRandom(x,NULL);CHKERRQ(ierr); 2421 ierr = MatMult(ChangeOfBasisMatrix,x,x_change);CHKERRQ(ierr); 2422 ierr = VecScatterBegin(matis->cctx,x,matis->x,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2423 ierr = VecScatterEnd(matis->cctx,x,matis->x,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2424 ierr = MatMult(new_mat,matis->x,matis->y);CHKERRQ(ierr); 2425 ierr = VecScatterBegin(matis->rctx,matis->y,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2426 ierr = VecScatterEnd(matis->rctx,matis->y,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2427 ierr = VecAXPY(x,-1.0,x_change);CHKERRQ(ierr); 2428 ierr = VecNorm(x,NORM_INFINITY,&error);CHKERRQ(ierr); 2429 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 2430 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Error global vs local change on N: %1.6e\n",error);CHKERRQ(ierr); 2431 ierr = VecDestroy(&x);CHKERRQ(ierr); 2432 ierr = VecDestroy(&x_change);CHKERRQ(ierr); 2433 } 2434 2435 /* TODO: HOW TO WORK WITH BAIJ and SBAIJ and SEQDENSE? */ 2436 ierr = PetscObjectTypeCompare((PetscObject)matis->A,MATSEQAIJ,&isseqaij);CHKERRQ(ierr); 2437 if (isseqaij) { 2438 Mat M; 2439 2440 ierr = MatPtAP(matis->A,new_mat,MAT_INITIAL_MATRIX,2.0,&M);CHKERRQ(ierr); 2441 ierr = MatSeqAIJCompress(M,&pcbddc->local_mat);CHKERRQ(ierr); 2442 ierr = MatDestroy(&M);CHKERRQ(ierr); 2443 } else { 2444 Mat work_mat,M; 2445 2446 ierr = MatConvert(matis->A,MATSEQAIJ,MAT_INITIAL_MATRIX,&work_mat);CHKERRQ(ierr); 2447 ierr = MatPtAP(work_mat,new_mat,MAT_INITIAL_MATRIX,2.0,&M);CHKERRQ(ierr); 2448 ierr = MatSeqAIJCompress(M,&pcbddc->local_mat);CHKERRQ(ierr); 2449 ierr = MatDestroy(&M);CHKERRQ(ierr); 2450 ierr = MatDestroy(&work_mat);CHKERRQ(ierr); 2451 } 2452 if (matis->A->symmetric_set) { 2453 ierr = MatSetOption(pcbddc->local_mat,MAT_SYMMETRIC,matis->A->symmetric);CHKERRQ(ierr); 2454 #if !defined(PETSC_USE_COMPLEX) 2455 ierr = MatSetOption(pcbddc->local_mat,MAT_HERMITIAN,matis->A->symmetric);CHKERRQ(ierr); 2456 #endif 2457 } 2458 /* 2459 ierr = PetscViewerSetFormat(PETSC_VIEWER_STDOUT_SELF,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr); 2460 ierr = MatView(new_mat,(PetscViewer)0);CHKERRQ(ierr); 2461 */ 2462 ierr = MatDestroy(&new_mat);CHKERRQ(ierr); 2463 PetscFunctionReturn(0); 2464 } 2465 2466 #undef __FUNCT__ 2467 #define __FUNCT__ "PCBDDCSetUpLocalScatters" 2468 PetscErrorCode PCBDDCSetUpLocalScatters(PC pc) 2469 { 2470 PC_IS* pcis = (PC_IS*)(pc->data); 2471 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 2472 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 2473 PetscInt *idx_R_local=NULL; 2474 PetscInt n_vertices,i,j,n_R,n_D,n_B; 2475 PetscInt vbs,bs; 2476 PetscBT bitmask=NULL; 2477 PetscErrorCode ierr; 2478 2479 PetscFunctionBegin; 2480 /* 2481 No need to setup local scatters if 2482 - primal space is unchanged 2483 AND 2484 - we actually have locally some primal dofs (could not be true in multilevel or for isolated subdomains) 2485 AND 2486 - we are not in debugging mode (this is needed since there are Synchronized prints at the end of the subroutine 2487 */ 2488 if (!pcbddc->new_primal_space_local && pcbddc->local_primal_size && !pcbddc->dbg_flag) { 2489 PetscFunctionReturn(0); 2490 } 2491 /* destroy old objects */ 2492 ierr = ISDestroy(&pcbddc->is_R_local);CHKERRQ(ierr); 2493 ierr = VecScatterDestroy(&pcbddc->R_to_B);CHKERRQ(ierr); 2494 ierr = VecScatterDestroy(&pcbddc->R_to_D);CHKERRQ(ierr); 2495 /* Set Non-overlapping dimensions */ 2496 n_B = pcis->n_B; 2497 n_D = pcis->n - n_B; 2498 n_vertices = pcbddc->n_vertices; 2499 2500 /* Dohrmann's notation: dofs splitted in R (Remaining: all dofs but the vertices) and V (Vertices) */ 2501 2502 /* create auxiliary bitmask and allocate workspace */ 2503 if (!sub_schurs->reuse_solver) { 2504 ierr = PetscMalloc1(pcis->n-n_vertices,&idx_R_local);CHKERRQ(ierr); 2505 ierr = PetscBTCreate(pcis->n,&bitmask);CHKERRQ(ierr); 2506 for (i=0;i<n_vertices;i++) { 2507 ierr = PetscBTSet(bitmask,pcbddc->local_primal_ref_node[i]);CHKERRQ(ierr); 2508 } 2509 2510 for (i=0, n_R=0; i<pcis->n; i++) { 2511 if (!PetscBTLookup(bitmask,i)) { 2512 idx_R_local[n_R++] = i; 2513 } 2514 } 2515 } else { /* A different ordering (already computed) is present if we are reusing the Schur solver */ 2516 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 2517 2518 ierr = ISGetIndices(reuse_solver->is_R,(const PetscInt**)&idx_R_local);CHKERRQ(ierr); 2519 ierr = ISGetLocalSize(reuse_solver->is_R,&n_R);CHKERRQ(ierr); 2520 } 2521 2522 /* Block code */ 2523 vbs = 1; 2524 ierr = MatGetBlockSize(pcbddc->local_mat,&bs);CHKERRQ(ierr); 2525 if (bs>1 && !(n_vertices%bs)) { 2526 PetscBool is_blocked = PETSC_TRUE; 2527 PetscInt *vary; 2528 if (!sub_schurs->reuse_solver) { 2529 ierr = PetscMalloc1(pcis->n/bs,&vary);CHKERRQ(ierr); 2530 ierr = PetscMemzero(vary,pcis->n/bs*sizeof(PetscInt));CHKERRQ(ierr); 2531 /* Verify that the vertex indices correspond to each element in a block (code taken from sbaij2.c) */ 2532 /* it is ok to check this way since local_primal_ref_node are always sorted by local numbering and idx_R_local is obtained as a complement */ 2533 for (i=0; i<n_vertices; i++) vary[pcbddc->local_primal_ref_node[i]/bs]++; 2534 for (i=0; i<pcis->n/bs; i++) { 2535 if (vary[i]!=0 && vary[i]!=bs) { 2536 is_blocked = PETSC_FALSE; 2537 break; 2538 } 2539 } 2540 ierr = PetscFree(vary);CHKERRQ(ierr); 2541 } else { 2542 /* Verify directly the R set */ 2543 for (i=0; i<n_R/bs; i++) { 2544 PetscInt j,node=idx_R_local[bs*i]; 2545 for (j=1; j<bs; j++) { 2546 if (node != idx_R_local[bs*i+j]-j) { 2547 is_blocked = PETSC_FALSE; 2548 break; 2549 } 2550 } 2551 } 2552 } 2553 if (is_blocked) { /* build compressed IS for R nodes (complement of vertices) */ 2554 vbs = bs; 2555 for (i=0;i<n_R/vbs;i++) { 2556 idx_R_local[i] = idx_R_local[vbs*i]/vbs; 2557 } 2558 } 2559 } 2560 ierr = ISCreateBlock(PETSC_COMM_SELF,vbs,n_R/vbs,idx_R_local,PETSC_COPY_VALUES,&pcbddc->is_R_local);CHKERRQ(ierr); 2561 if (sub_schurs->reuse_solver) { 2562 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 2563 2564 ierr = ISRestoreIndices(reuse_solver->is_R,(const PetscInt**)&idx_R_local);CHKERRQ(ierr); 2565 ierr = ISDestroy(&reuse_solver->is_R);CHKERRQ(ierr); 2566 ierr = PetscObjectReference((PetscObject)pcbddc->is_R_local);CHKERRQ(ierr); 2567 reuse_solver->is_R = pcbddc->is_R_local; 2568 } else { 2569 ierr = PetscFree(idx_R_local);CHKERRQ(ierr); 2570 } 2571 2572 /* print some info if requested */ 2573 if (pcbddc->dbg_flag) { 2574 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 2575 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 2576 ierr = PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 2577 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d local dimensions\n",PetscGlobalRank);CHKERRQ(ierr); 2578 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local_size = %d, dirichlet_size = %d, boundary_size = %d\n",pcis->n,n_D,n_B);CHKERRQ(ierr); 2579 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"r_size = %d, v_size = %d, constraints = %d, local_primal_size = %d\n",n_R,n_vertices,pcbddc->local_primal_size-n_vertices-pcbddc->benign_n,pcbddc->local_primal_size);CHKERRQ(ierr); 2580 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 2581 } 2582 2583 /* VecScatters pcbddc->R_to_B and (optionally) pcbddc->R_to_D */ 2584 if (!sub_schurs->reuse_solver) { 2585 IS is_aux1,is_aux2; 2586 PetscInt *aux_array1,*aux_array2,*is_indices,*idx_R_local; 2587 2588 ierr = ISGetIndices(pcbddc->is_R_local,(const PetscInt**)&idx_R_local);CHKERRQ(ierr); 2589 ierr = PetscMalloc1(pcis->n_B-n_vertices,&aux_array1);CHKERRQ(ierr); 2590 ierr = PetscMalloc1(pcis->n_B-n_vertices,&aux_array2);CHKERRQ(ierr); 2591 ierr = ISGetIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 2592 for (i=0; i<n_D; i++) { 2593 ierr = PetscBTSet(bitmask,is_indices[i]);CHKERRQ(ierr); 2594 } 2595 ierr = ISRestoreIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 2596 for (i=0, j=0; i<n_R; i++) { 2597 if (!PetscBTLookup(bitmask,idx_R_local[i])) { 2598 aux_array1[j++] = i; 2599 } 2600 } 2601 ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array1,PETSC_OWN_POINTER,&is_aux1);CHKERRQ(ierr); 2602 ierr = ISGetIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 2603 for (i=0, j=0; i<n_B; i++) { 2604 if (!PetscBTLookup(bitmask,is_indices[i])) { 2605 aux_array2[j++] = i; 2606 } 2607 } 2608 ierr = ISRestoreIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 2609 ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array2,PETSC_OWN_POINTER,&is_aux2);CHKERRQ(ierr); 2610 ierr = VecScatterCreate(pcbddc->vec1_R,is_aux1,pcis->vec1_B,is_aux2,&pcbddc->R_to_B);CHKERRQ(ierr); 2611 ierr = ISDestroy(&is_aux1);CHKERRQ(ierr); 2612 ierr = ISDestroy(&is_aux2);CHKERRQ(ierr); 2613 2614 if (pcbddc->switch_static || pcbddc->dbg_flag) { 2615 ierr = PetscMalloc1(n_D,&aux_array1);CHKERRQ(ierr); 2616 for (i=0, j=0; i<n_R; i++) { 2617 if (PetscBTLookup(bitmask,idx_R_local[i])) { 2618 aux_array1[j++] = i; 2619 } 2620 } 2621 ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array1,PETSC_OWN_POINTER,&is_aux1);CHKERRQ(ierr); 2622 ierr = VecScatterCreate(pcbddc->vec1_R,is_aux1,pcis->vec1_D,(IS)0,&pcbddc->R_to_D);CHKERRQ(ierr); 2623 ierr = ISDestroy(&is_aux1);CHKERRQ(ierr); 2624 } 2625 ierr = PetscBTDestroy(&bitmask);CHKERRQ(ierr); 2626 ierr = ISRestoreIndices(pcbddc->is_R_local,(const PetscInt**)&idx_R_local);CHKERRQ(ierr); 2627 } else { 2628 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 2629 IS tis; 2630 PetscInt schur_size; 2631 2632 ierr = ISGetLocalSize(reuse_solver->is_B,&schur_size);CHKERRQ(ierr); 2633 ierr = ISCreateStride(PETSC_COMM_SELF,schur_size,n_D,1,&tis);CHKERRQ(ierr); 2634 ierr = VecScatterCreate(pcbddc->vec1_R,tis,pcis->vec1_B,reuse_solver->is_B,&pcbddc->R_to_B);CHKERRQ(ierr); 2635 ierr = ISDestroy(&tis);CHKERRQ(ierr); 2636 if (pcbddc->switch_static || pcbddc->dbg_flag) { 2637 ierr = ISCreateStride(PETSC_COMM_SELF,n_D,0,1,&tis);CHKERRQ(ierr); 2638 ierr = VecScatterCreate(pcbddc->vec1_R,tis,pcis->vec1_D,(IS)0,&pcbddc->R_to_D);CHKERRQ(ierr); 2639 ierr = ISDestroy(&tis);CHKERRQ(ierr); 2640 } 2641 } 2642 PetscFunctionReturn(0); 2643 } 2644 2645 2646 #undef __FUNCT__ 2647 #define __FUNCT__ "PCBDDCSetUpLocalSolvers" 2648 PetscErrorCode PCBDDCSetUpLocalSolvers(PC pc, PetscBool dirichlet, PetscBool neumann) 2649 { 2650 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 2651 PC_IS *pcis = (PC_IS*)pc->data; 2652 PC pc_temp; 2653 Mat A_RR; 2654 MatReuse reuse; 2655 PetscScalar m_one = -1.0; 2656 PetscReal value; 2657 PetscInt n_D,n_R; 2658 PetscBool use_exact,use_exact_reduced,issbaij; 2659 PetscErrorCode ierr; 2660 /* prefixes stuff */ 2661 char dir_prefix[256],neu_prefix[256],str_level[16]; 2662 size_t len; 2663 2664 PetscFunctionBegin; 2665 2666 /* compute prefixes */ 2667 ierr = PetscStrcpy(dir_prefix,"");CHKERRQ(ierr); 2668 ierr = PetscStrcpy(neu_prefix,"");CHKERRQ(ierr); 2669 if (!pcbddc->current_level) { 2670 ierr = PetscStrcpy(dir_prefix,((PetscObject)pc)->prefix);CHKERRQ(ierr); 2671 ierr = PetscStrcpy(neu_prefix,((PetscObject)pc)->prefix);CHKERRQ(ierr); 2672 ierr = PetscStrcat(dir_prefix,"pc_bddc_dirichlet_");CHKERRQ(ierr); 2673 ierr = PetscStrcat(neu_prefix,"pc_bddc_neumann_");CHKERRQ(ierr); 2674 } else { 2675 ierr = PetscStrcpy(str_level,"");CHKERRQ(ierr); 2676 sprintf(str_level,"l%d_",(int)(pcbddc->current_level)); 2677 ierr = PetscStrlen(((PetscObject)pc)->prefix,&len);CHKERRQ(ierr); 2678 len -= 15; /* remove "pc_bddc_coarse_" */ 2679 if (pcbddc->current_level>1) len -= 3; /* remove "lX_" with X level number */ 2680 if (pcbddc->current_level>10) len -= 1; /* remove another char from level number */ 2681 ierr = PetscStrncpy(dir_prefix,((PetscObject)pc)->prefix,len+1);CHKERRQ(ierr); 2682 ierr = PetscStrncpy(neu_prefix,((PetscObject)pc)->prefix,len+1);CHKERRQ(ierr); 2683 ierr = PetscStrcat(dir_prefix,"pc_bddc_dirichlet_");CHKERRQ(ierr); 2684 ierr = PetscStrcat(neu_prefix,"pc_bddc_neumann_");CHKERRQ(ierr); 2685 ierr = PetscStrcat(dir_prefix,str_level);CHKERRQ(ierr); 2686 ierr = PetscStrcat(neu_prefix,str_level);CHKERRQ(ierr); 2687 } 2688 2689 /* DIRICHLET PROBLEM */ 2690 if (dirichlet) { 2691 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 2692 if (pcbddc->benign_n && !pcbddc->benign_change_explicit && pcbddc->dbg_flag) { 2693 Mat A_IIn; 2694 2695 ierr = PCBDDCBenignProject(pc,pcis->is_I_local,pcis->is_I_local,&A_IIn);CHKERRQ(ierr); 2696 ierr = MatDestroy(&pcis->A_II);CHKERRQ(ierr); 2697 pcis->A_II = A_IIn; 2698 } 2699 if (pcbddc->local_mat->symmetric_set) { 2700 ierr = MatSetOption(pcis->A_II,MAT_SYMMETRIC,pcbddc->local_mat->symmetric_set);CHKERRQ(ierr); 2701 } 2702 /* Matrix for Dirichlet problem is pcis->A_II */ 2703 n_D = pcis->n - pcis->n_B; 2704 if (!pcbddc->ksp_D) { /* create object if not yet build */ 2705 ierr = KSPCreate(PETSC_COMM_SELF,&pcbddc->ksp_D);CHKERRQ(ierr); 2706 ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->ksp_D,(PetscObject)pc,1);CHKERRQ(ierr); 2707 /* default */ 2708 ierr = KSPSetType(pcbddc->ksp_D,KSPPREONLY);CHKERRQ(ierr); 2709 ierr = KSPSetOptionsPrefix(pcbddc->ksp_D,dir_prefix);CHKERRQ(ierr); 2710 ierr = PetscObjectTypeCompare((PetscObject)pcis->A_II,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr); 2711 ierr = KSPGetPC(pcbddc->ksp_D,&pc_temp);CHKERRQ(ierr); 2712 if (issbaij) { 2713 ierr = PCSetType(pc_temp,PCCHOLESKY);CHKERRQ(ierr); 2714 } else { 2715 ierr = PCSetType(pc_temp,PCLU);CHKERRQ(ierr); 2716 } 2717 /* Allow user's customization */ 2718 ierr = KSPSetFromOptions(pcbddc->ksp_D);CHKERRQ(ierr); 2719 ierr = PCFactorSetReuseFill(pc_temp,PETSC_TRUE);CHKERRQ(ierr); 2720 } 2721 ierr = KSPSetOperators(pcbddc->ksp_D,pcis->A_II,pcis->A_II);CHKERRQ(ierr); 2722 if (sub_schurs->reuse_solver) { 2723 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 2724 2725 ierr = KSPSetPC(pcbddc->ksp_D,reuse_solver->interior_solver);CHKERRQ(ierr); 2726 } 2727 /* umfpack interface has a bug when matrix dimension is zero. TODO solve from umfpack interface */ 2728 if (!n_D) { 2729 ierr = KSPGetPC(pcbddc->ksp_D,&pc_temp);CHKERRQ(ierr); 2730 ierr = PCSetType(pc_temp,PCNONE);CHKERRQ(ierr); 2731 } 2732 /* Set Up KSP for Dirichlet problem of BDDC */ 2733 ierr = KSPSetUp(pcbddc->ksp_D);CHKERRQ(ierr); 2734 /* set ksp_D into pcis data */ 2735 ierr = KSPDestroy(&pcis->ksp_D);CHKERRQ(ierr); 2736 ierr = PetscObjectReference((PetscObject)pcbddc->ksp_D);CHKERRQ(ierr); 2737 pcis->ksp_D = pcbddc->ksp_D; 2738 } 2739 2740 /* NEUMANN PROBLEM */ 2741 A_RR = 0; 2742 if (neumann) { 2743 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 2744 PetscInt ibs,mbs; 2745 PetscBool issbaij; 2746 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 2747 /* Matrix for Neumann problem is A_RR -> we need to create/reuse it at this point */ 2748 ierr = ISGetSize(pcbddc->is_R_local,&n_R);CHKERRQ(ierr); 2749 if (pcbddc->ksp_R) { /* already created ksp */ 2750 PetscInt nn_R; 2751 ierr = KSPGetOperators(pcbddc->ksp_R,NULL,&A_RR);CHKERRQ(ierr); 2752 ierr = PetscObjectReference((PetscObject)A_RR);CHKERRQ(ierr); 2753 ierr = MatGetSize(A_RR,&nn_R,NULL);CHKERRQ(ierr); 2754 if (nn_R != n_R) { /* old ksp is not reusable, so reset it */ 2755 ierr = KSPReset(pcbddc->ksp_R);CHKERRQ(ierr); 2756 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 2757 reuse = MAT_INITIAL_MATRIX; 2758 } else { /* same sizes, but nonzero pattern depend on primal vertices so it can be changed */ 2759 if (pcbddc->new_primal_space_local) { /* we are not sure the matrix will have the same nonzero pattern */ 2760 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 2761 reuse = MAT_INITIAL_MATRIX; 2762 } else { /* safe to reuse the matrix */ 2763 reuse = MAT_REUSE_MATRIX; 2764 } 2765 } 2766 /* last check */ 2767 if (pc->flag == DIFFERENT_NONZERO_PATTERN) { 2768 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 2769 reuse = MAT_INITIAL_MATRIX; 2770 } 2771 } else { /* first time, so we need to create the matrix */ 2772 reuse = MAT_INITIAL_MATRIX; 2773 } 2774 /* extract A_RR */ 2775 ierr = MatGetBlockSize(pcbddc->local_mat,&mbs);CHKERRQ(ierr); 2776 ierr = ISGetBlockSize(pcbddc->is_R_local,&ibs);CHKERRQ(ierr); 2777 ierr = PetscObjectTypeCompare((PetscObject)pcbddc->local_mat,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr); 2778 if (ibs != mbs) { /* need to convert to SEQAIJ to extract any submatrix with is_R_local */ 2779 if (matis->A == pcbddc->local_mat) { 2780 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 2781 ierr = MatConvert(matis->A,MATSEQAIJ,MAT_INITIAL_MATRIX,&pcbddc->local_mat);CHKERRQ(ierr); 2782 } else { 2783 ierr = MatConvert(pcbddc->local_mat,MATSEQAIJ,MAT_REUSE_MATRIX,&pcbddc->local_mat);CHKERRQ(ierr); 2784 } 2785 } else if (issbaij) { /* need to convert to BAIJ to get offdiagonal blocks */ 2786 if (matis->A == pcbddc->local_mat) { 2787 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 2788 ierr = MatConvert(matis->A,MATSEQBAIJ,MAT_INITIAL_MATRIX,&pcbddc->local_mat);CHKERRQ(ierr); 2789 } else { 2790 ierr = MatConvert(pcbddc->local_mat,MATSEQBAIJ,MAT_REUSE_MATRIX,&pcbddc->local_mat);CHKERRQ(ierr); 2791 } 2792 } 2793 ierr = MatGetSubMatrix(pcbddc->local_mat,pcbddc->is_R_local,pcbddc->is_R_local,reuse,&A_RR);CHKERRQ(ierr); 2794 if (pcbddc->local_mat->symmetric_set) { 2795 ierr = MatSetOption(A_RR,MAT_SYMMETRIC,pcbddc->local_mat->symmetric_set);CHKERRQ(ierr); 2796 } 2797 if (pcbddc->benign_n && !pcbddc->benign_change_explicit && pcbddc->dbg_flag) { 2798 Mat A_RRn; 2799 ierr = PCBDDCBenignProject(pc,pcbddc->is_R_local,pcbddc->is_R_local,&A_RRn);CHKERRQ(ierr); 2800 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 2801 A_RR = A_RRn; 2802 } 2803 if (!pcbddc->ksp_R) { /* create object if not present */ 2804 ierr = KSPCreate(PETSC_COMM_SELF,&pcbddc->ksp_R);CHKERRQ(ierr); 2805 ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->ksp_R,(PetscObject)pc,1);CHKERRQ(ierr); 2806 /* default */ 2807 ierr = KSPSetType(pcbddc->ksp_R,KSPPREONLY);CHKERRQ(ierr); 2808 ierr = KSPSetOptionsPrefix(pcbddc->ksp_R,neu_prefix);CHKERRQ(ierr); 2809 ierr = KSPGetPC(pcbddc->ksp_R,&pc_temp);CHKERRQ(ierr); 2810 ierr = PetscObjectTypeCompare((PetscObject)A_RR,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr); 2811 if (issbaij) { 2812 ierr = PCSetType(pc_temp,PCCHOLESKY);CHKERRQ(ierr); 2813 } else { 2814 ierr = PCSetType(pc_temp,PCLU);CHKERRQ(ierr); 2815 } 2816 /* Allow user's customization */ 2817 ierr = KSPSetFromOptions(pcbddc->ksp_R);CHKERRQ(ierr); 2818 ierr = PCFactorSetReuseFill(pc_temp,PETSC_TRUE);CHKERRQ(ierr); 2819 } 2820 /* umfpack interface has a bug when matrix dimension is zero. TODO solve from umfpack interface */ 2821 if (!n_R) { 2822 ierr = KSPGetPC(pcbddc->ksp_R,&pc_temp);CHKERRQ(ierr); 2823 ierr = PCSetType(pc_temp,PCNONE);CHKERRQ(ierr); 2824 } 2825 /* Reuse solver if it is present */ 2826 if (sub_schurs->reuse_solver) { 2827 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 2828 2829 ierr = KSPSetPC(pcbddc->ksp_R,reuse_solver->correction_solver);CHKERRQ(ierr); 2830 } 2831 ierr = KSPSetOperators(pcbddc->ksp_R,A_RR,A_RR);CHKERRQ(ierr); 2832 /* Set Up KSP for Neumann problem of BDDC */ 2833 ierr = KSPSetUp(pcbddc->ksp_R);CHKERRQ(ierr); 2834 } 2835 /* free Neumann problem's matrix */ 2836 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 2837 2838 /* check Dirichlet and Neumann solvers and adapt them if a nullspace correction is needed */ 2839 if (pcbddc->NullSpace || pcbddc->dbg_flag) { 2840 if (pcbddc->dbg_flag) { 2841 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 2842 ierr = PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 2843 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 2844 } 2845 if (dirichlet) { /* Dirichlet */ 2846 ierr = VecSetRandom(pcis->vec1_D,NULL);CHKERRQ(ierr); 2847 ierr = MatMult(pcis->A_II,pcis->vec1_D,pcis->vec2_D);CHKERRQ(ierr); 2848 ierr = KSPSolve(pcbddc->ksp_D,pcis->vec2_D,pcis->vec2_D);CHKERRQ(ierr); 2849 ierr = VecAXPY(pcis->vec1_D,m_one,pcis->vec2_D);CHKERRQ(ierr); 2850 ierr = VecNorm(pcis->vec1_D,NORM_INFINITY,&value);CHKERRQ(ierr); 2851 /* need to be adapted? */ 2852 use_exact = (PetscAbsReal(value) > 1.e-4 ? PETSC_FALSE : PETSC_TRUE); 2853 ierr = MPI_Allreduce(&use_exact,&use_exact_reduced,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 2854 ierr = PCBDDCSetUseExactDirichlet(pc,use_exact_reduced);CHKERRQ(ierr); 2855 /* print info */ 2856 if (pcbddc->dbg_flag) { 2857 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d infinity error for Dirichlet solve (%s) = % 1.14e \n",PetscGlobalRank,((PetscObject)(pcbddc->ksp_D))->prefix,value);CHKERRQ(ierr); 2858 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 2859 } 2860 if (pcbddc->NullSpace && !use_exact_reduced && !pcbddc->switch_static) { 2861 ierr = PCBDDCNullSpaceAssembleCorrection(pc,PETSC_TRUE,pcis->is_I_local);CHKERRQ(ierr); 2862 } 2863 } 2864 if (neumann) { /* Neumann */ 2865 ierr = KSPGetOperators(pcbddc->ksp_R,&A_RR,NULL);CHKERRQ(ierr); 2866 ierr = VecSetRandom(pcbddc->vec1_R,NULL);CHKERRQ(ierr); 2867 ierr = MatMult(A_RR,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 2868 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec2_R,pcbddc->vec2_R);CHKERRQ(ierr); 2869 ierr = VecAXPY(pcbddc->vec1_R,m_one,pcbddc->vec2_R);CHKERRQ(ierr); 2870 ierr = VecNorm(pcbddc->vec1_R,NORM_INFINITY,&value);CHKERRQ(ierr); 2871 /* need to be adapted? */ 2872 use_exact = (PetscAbsReal(value) > 1.e-4 ? PETSC_FALSE : PETSC_TRUE); 2873 ierr = MPI_Allreduce(&use_exact,&use_exact_reduced,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 2874 /* print info */ 2875 if (pcbddc->dbg_flag) { 2876 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d infinity error for Neumann solve (%s) = % 1.14e\n",PetscGlobalRank,((PetscObject)(pcbddc->ksp_R))->prefix,value);CHKERRQ(ierr); 2877 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 2878 } 2879 if (pcbddc->NullSpace && !use_exact_reduced) { /* is it the right logic? */ 2880 ierr = PCBDDCNullSpaceAssembleCorrection(pc,PETSC_FALSE,pcbddc->is_R_local);CHKERRQ(ierr); 2881 } 2882 } 2883 } 2884 PetscFunctionReturn(0); 2885 } 2886 2887 #undef __FUNCT__ 2888 #define __FUNCT__ "PCBDDCSolveSubstructureCorrection" 2889 static PetscErrorCode PCBDDCSolveSubstructureCorrection(PC pc, Vec inout_B, Vec inout_D, PetscBool applytranspose) 2890 { 2891 PetscErrorCode ierr; 2892 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 2893 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 2894 2895 PetscFunctionBegin; 2896 if (!sub_schurs->reuse_solver) { 2897 ierr = VecSet(pcbddc->vec1_R,0.);CHKERRQ(ierr); 2898 } 2899 if (!pcbddc->switch_static) { 2900 if (applytranspose && pcbddc->local_auxmat1) { 2901 ierr = MatMultTranspose(pcbddc->local_auxmat2,inout_B,pcbddc->vec1_C);CHKERRQ(ierr); 2902 ierr = MatMultTransposeAdd(pcbddc->local_auxmat1,pcbddc->vec1_C,inout_B,inout_B);CHKERRQ(ierr); 2903 } 2904 if (!sub_schurs->reuse_solver) { 2905 ierr = VecScatterBegin(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2906 ierr = VecScatterEnd(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2907 } else { 2908 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 2909 2910 ierr = VecScatterBegin(reuse_solver->correction_scatter_B,inout_B,reuse_solver->rhs_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2911 ierr = VecScatterEnd(reuse_solver->correction_scatter_B,inout_B,reuse_solver->rhs_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2912 } 2913 } else { 2914 ierr = VecScatterBegin(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2915 ierr = VecScatterEnd(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2916 ierr = VecScatterBegin(pcbddc->R_to_D,inout_D,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2917 ierr = VecScatterEnd(pcbddc->R_to_D,inout_D,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2918 if (applytranspose && pcbddc->local_auxmat1) { 2919 ierr = MatMultTranspose(pcbddc->local_auxmat2,pcbddc->vec1_R,pcbddc->vec1_C);CHKERRQ(ierr); 2920 ierr = MatMultTransposeAdd(pcbddc->local_auxmat1,pcbddc->vec1_C,inout_B,inout_B);CHKERRQ(ierr); 2921 ierr = VecScatterBegin(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2922 ierr = VecScatterEnd(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2923 } 2924 } 2925 if (!sub_schurs->reuse_solver) { 2926 if (applytranspose) { 2927 ierr = KSPSolveTranspose(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr); 2928 } else { 2929 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr); 2930 } 2931 } else { 2932 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 2933 2934 if (applytranspose) { 2935 ierr = MatFactorSolveSchurComplementTranspose(reuse_solver->F,reuse_solver->rhs_B,reuse_solver->sol_B);CHKERRQ(ierr); 2936 } else { 2937 ierr = MatFactorSolveSchurComplement(reuse_solver->F,reuse_solver->rhs_B,reuse_solver->sol_B);CHKERRQ(ierr); 2938 } 2939 } 2940 ierr = VecSet(inout_B,0.);CHKERRQ(ierr); 2941 if (!pcbddc->switch_static) { 2942 if (!sub_schurs->reuse_solver) { 2943 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2944 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2945 } else { 2946 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 2947 2948 ierr = VecScatterBegin(reuse_solver->correction_scatter_B,reuse_solver->sol_B,inout_B,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2949 ierr = VecScatterEnd(reuse_solver->correction_scatter_B,reuse_solver->sol_B,inout_B,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2950 } 2951 if (!applytranspose && pcbddc->local_auxmat1) { 2952 ierr = MatMult(pcbddc->local_auxmat1,inout_B,pcbddc->vec1_C);CHKERRQ(ierr); 2953 ierr = MatMultAdd(pcbddc->local_auxmat2,pcbddc->vec1_C,inout_B,inout_B);CHKERRQ(ierr); 2954 } 2955 } else { 2956 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2957 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2958 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,inout_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2959 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,inout_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2960 if (!applytranspose && pcbddc->local_auxmat1) { 2961 ierr = MatMult(pcbddc->local_auxmat1,inout_B,pcbddc->vec1_C);CHKERRQ(ierr); 2962 ierr = MatMultAdd(pcbddc->local_auxmat2,pcbddc->vec1_C,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr); 2963 } 2964 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2965 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2966 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,inout_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2967 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,inout_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2968 } 2969 PetscFunctionReturn(0); 2970 } 2971 2972 /* parameter apply transpose determines if the interface preconditioner should be applied transposed or not */ 2973 #undef __FUNCT__ 2974 #define __FUNCT__ "PCBDDCApplyInterfacePreconditioner" 2975 PetscErrorCode PCBDDCApplyInterfacePreconditioner(PC pc, PetscBool applytranspose) 2976 { 2977 PetscErrorCode ierr; 2978 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 2979 PC_IS* pcis = (PC_IS*) (pc->data); 2980 const PetscScalar zero = 0.0; 2981 2982 PetscFunctionBegin; 2983 /* Application of PSI^T or PHI^T (depending on applytranspose, see comment above) */ 2984 if (applytranspose) { 2985 ierr = MatMultTranspose(pcbddc->coarse_phi_B,pcis->vec1_B,pcbddc->vec1_P);CHKERRQ(ierr); 2986 if (pcbddc->switch_static) { ierr = MatMultTransposeAdd(pcbddc->coarse_phi_D,pcis->vec1_D,pcbddc->vec1_P,pcbddc->vec1_P);CHKERRQ(ierr); } 2987 } else { 2988 ierr = MatMultTranspose(pcbddc->coarse_psi_B,pcis->vec1_B,pcbddc->vec1_P);CHKERRQ(ierr); 2989 if (pcbddc->switch_static) { ierr = MatMultTransposeAdd(pcbddc->coarse_psi_D,pcis->vec1_D,pcbddc->vec1_P,pcbddc->vec1_P);CHKERRQ(ierr); } 2990 } 2991 2992 /* add p0 to the last value of vec1_P holding the coarse dof relative to p0 */ 2993 if (pcbddc->benign_n) { 2994 PetscScalar *array; 2995 PetscInt j; 2996 2997 ierr = VecGetArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 2998 for (j=0;j<pcbddc->benign_n;j++) array[pcbddc->local_primal_size-pcbddc->benign_n+j] += pcbddc->benign_p0[j]; 2999 ierr = VecRestoreArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 3000 } 3001 3002 /* start communications from local primal nodes to rhs of coarse solver */ 3003 ierr = VecSet(pcbddc->coarse_vec,zero);CHKERRQ(ierr); 3004 ierr = PCBDDCScatterCoarseDataBegin(pc,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3005 ierr = PCBDDCScatterCoarseDataEnd(pc,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3006 3007 /* Coarse solution -> rhs and sol updated inside PCBDDCScattarCoarseDataBegin/End */ 3008 /* TODO remove null space when doing multilevel */ 3009 if (pcbddc->coarse_ksp) { 3010 Mat coarse_mat; 3011 Vec rhs,sol; 3012 MatNullSpace nullsp; 3013 3014 ierr = KSPGetRhs(pcbddc->coarse_ksp,&rhs);CHKERRQ(ierr); 3015 ierr = KSPGetSolution(pcbddc->coarse_ksp,&sol);CHKERRQ(ierr); 3016 ierr = KSPGetOperators(pcbddc->coarse_ksp,&coarse_mat,NULL);CHKERRQ(ierr); 3017 ierr = MatGetNullSpace(coarse_mat,&nullsp);CHKERRQ(ierr); 3018 if (nullsp) { 3019 ierr = MatNullSpaceRemove(nullsp,rhs);CHKERRQ(ierr); 3020 } 3021 if (applytranspose) { 3022 ierr = KSPSolveTranspose(pcbddc->coarse_ksp,rhs,sol);CHKERRQ(ierr); 3023 } else { 3024 ierr = KSPSolve(pcbddc->coarse_ksp,rhs,sol);CHKERRQ(ierr); 3025 } 3026 if (nullsp) { 3027 ierr = MatNullSpaceRemove(nullsp,sol);CHKERRQ(ierr); 3028 } 3029 } 3030 3031 /* Local solution on R nodes */ 3032 if (pcis->n) { /* in/out pcbddc->vec1_B,pcbddc->vec1_D */ 3033 ierr = PCBDDCSolveSubstructureCorrection(pc,pcis->vec1_B,pcis->vec1_D,applytranspose);CHKERRQ(ierr); 3034 } 3035 3036 /* communications from coarse sol to local primal nodes */ 3037 ierr = PCBDDCScatterCoarseDataBegin(pc,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 3038 ierr = PCBDDCScatterCoarseDataEnd(pc,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 3039 3040 /* Sum contributions from two levels */ 3041 if (applytranspose) { 3042 ierr = MatMultAdd(pcbddc->coarse_psi_B,pcbddc->vec1_P,pcis->vec1_B,pcis->vec1_B);CHKERRQ(ierr); 3043 if (pcbddc->switch_static) { ierr = MatMultAdd(pcbddc->coarse_psi_D,pcbddc->vec1_P,pcis->vec1_D,pcis->vec1_D);CHKERRQ(ierr); } 3044 } else { 3045 ierr = MatMultAdd(pcbddc->coarse_phi_B,pcbddc->vec1_P,pcis->vec1_B,pcis->vec1_B);CHKERRQ(ierr); 3046 if (pcbddc->switch_static) { ierr = MatMultAdd(pcbddc->coarse_phi_D,pcbddc->vec1_P,pcis->vec1_D,pcis->vec1_D);CHKERRQ(ierr); } 3047 } 3048 /* store p0 */ 3049 if (pcbddc->benign_n) { 3050 PetscScalar *array; 3051 PetscInt j; 3052 3053 ierr = VecGetArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 3054 for (j=0;j<pcbddc->benign_n;j++) pcbddc->benign_p0[j] = array[pcbddc->local_primal_size-pcbddc->benign_n+j]; 3055 ierr = VecRestoreArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 3056 } 3057 PetscFunctionReturn(0); 3058 } 3059 3060 #undef __FUNCT__ 3061 #define __FUNCT__ "PCBDDCScatterCoarseDataBegin" 3062 PetscErrorCode PCBDDCScatterCoarseDataBegin(PC pc,InsertMode imode, ScatterMode smode) 3063 { 3064 PetscErrorCode ierr; 3065 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 3066 PetscScalar *array; 3067 Vec from,to; 3068 3069 PetscFunctionBegin; 3070 if (smode == SCATTER_REVERSE) { /* from global to local -> get data from coarse solution */ 3071 from = pcbddc->coarse_vec; 3072 to = pcbddc->vec1_P; 3073 if (pcbddc->coarse_ksp) { /* get array from coarse processes */ 3074 Vec tvec; 3075 3076 ierr = KSPGetRhs(pcbddc->coarse_ksp,&tvec);CHKERRQ(ierr); 3077 ierr = VecResetArray(tvec);CHKERRQ(ierr); 3078 ierr = KSPGetSolution(pcbddc->coarse_ksp,&tvec);CHKERRQ(ierr); 3079 ierr = VecGetArray(tvec,&array);CHKERRQ(ierr); 3080 ierr = VecPlaceArray(from,array);CHKERRQ(ierr); 3081 ierr = VecRestoreArray(tvec,&array);CHKERRQ(ierr); 3082 } 3083 } else { /* from local to global -> put data in coarse right hand side */ 3084 from = pcbddc->vec1_P; 3085 to = pcbddc->coarse_vec; 3086 } 3087 ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,from,to,imode,smode);CHKERRQ(ierr); 3088 PetscFunctionReturn(0); 3089 } 3090 3091 #undef __FUNCT__ 3092 #define __FUNCT__ "PCBDDCScatterCoarseDataEnd" 3093 PetscErrorCode PCBDDCScatterCoarseDataEnd(PC pc, InsertMode imode, ScatterMode smode) 3094 { 3095 PetscErrorCode ierr; 3096 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 3097 PetscScalar *array; 3098 Vec from,to; 3099 3100 PetscFunctionBegin; 3101 if (smode == SCATTER_REVERSE) { /* from global to local -> get data from coarse solution */ 3102 from = pcbddc->coarse_vec; 3103 to = pcbddc->vec1_P; 3104 } else { /* from local to global -> put data in coarse right hand side */ 3105 from = pcbddc->vec1_P; 3106 to = pcbddc->coarse_vec; 3107 } 3108 ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,from,to,imode,smode);CHKERRQ(ierr); 3109 if (smode == SCATTER_FORWARD) { 3110 if (pcbddc->coarse_ksp) { /* get array from coarse processes */ 3111 Vec tvec; 3112 3113 ierr = KSPGetRhs(pcbddc->coarse_ksp,&tvec);CHKERRQ(ierr); 3114 ierr = VecGetArray(to,&array);CHKERRQ(ierr); 3115 ierr = VecPlaceArray(tvec,array);CHKERRQ(ierr); 3116 ierr = VecRestoreArray(to,&array);CHKERRQ(ierr); 3117 } 3118 } else { 3119 if (pcbddc->coarse_ksp) { /* restore array of pcbddc->coarse_vec */ 3120 ierr = VecResetArray(from);CHKERRQ(ierr); 3121 } 3122 } 3123 PetscFunctionReturn(0); 3124 } 3125 3126 /* uncomment for testing purposes */ 3127 /* #define PETSC_MISSING_LAPACK_GESVD 1 */ 3128 #undef __FUNCT__ 3129 #define __FUNCT__ "PCBDDCConstraintsSetUp" 3130 PetscErrorCode PCBDDCConstraintsSetUp(PC pc) 3131 { 3132 PetscErrorCode ierr; 3133 PC_IS* pcis = (PC_IS*)(pc->data); 3134 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 3135 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 3136 /* one and zero */ 3137 PetscScalar one=1.0,zero=0.0; 3138 /* space to store constraints and their local indices */ 3139 PetscScalar *constraints_data; 3140 PetscInt *constraints_idxs,*constraints_idxs_B; 3141 PetscInt *constraints_idxs_ptr,*constraints_data_ptr; 3142 PetscInt *constraints_n; 3143 /* iterators */ 3144 PetscInt i,j,k,total_counts,total_counts_cc,cum; 3145 /* BLAS integers */ 3146 PetscBLASInt lwork,lierr; 3147 PetscBLASInt Blas_N,Blas_M,Blas_K,Blas_one=1; 3148 PetscBLASInt Blas_LDA,Blas_LDB,Blas_LDC; 3149 /* reuse */ 3150 PetscInt olocal_primal_size,olocal_primal_size_cc; 3151 PetscInt *olocal_primal_ref_node,*olocal_primal_ref_mult; 3152 /* change of basis */ 3153 PetscBool qr_needed; 3154 PetscBT change_basis,qr_needed_idx; 3155 /* auxiliary stuff */ 3156 PetscInt *nnz,*is_indices; 3157 PetscInt ncc; 3158 /* some quantities */ 3159 PetscInt n_vertices,total_primal_vertices,valid_constraints; 3160 PetscInt size_of_constraint,max_size_of_constraint=0,max_constraints,temp_constraints; 3161 3162 PetscFunctionBegin; 3163 /* Destroy Mat objects computed previously */ 3164 ierr = MatDestroy(&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 3165 ierr = MatDestroy(&pcbddc->ConstraintMatrix);CHKERRQ(ierr); 3166 /* save info on constraints from previous setup (if any) */ 3167 olocal_primal_size = pcbddc->local_primal_size; 3168 olocal_primal_size_cc = pcbddc->local_primal_size_cc; 3169 ierr = PetscMalloc2(olocal_primal_size_cc,&olocal_primal_ref_node,olocal_primal_size_cc,&olocal_primal_ref_mult);CHKERRQ(ierr); 3170 ierr = PetscMemcpy(olocal_primal_ref_node,pcbddc->local_primal_ref_node,olocal_primal_size_cc*sizeof(PetscInt));CHKERRQ(ierr); 3171 ierr = PetscMemcpy(olocal_primal_ref_mult,pcbddc->local_primal_ref_mult,olocal_primal_size_cc*sizeof(PetscInt));CHKERRQ(ierr); 3172 ierr = PetscFree2(pcbddc->local_primal_ref_node,pcbddc->local_primal_ref_mult);CHKERRQ(ierr); 3173 ierr = PetscFree(pcbddc->primal_indices_local_idxs);CHKERRQ(ierr); 3174 3175 /* print some info */ 3176 if (pcbddc->dbg_flag) { 3177 IS vertices; 3178 PetscInt nv,nedges,nfaces; 3179 ierr = PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph,&nfaces,NULL,&nedges,NULL,&vertices);CHKERRQ(ierr); 3180 ierr = ISGetSize(vertices,&nv);CHKERRQ(ierr); 3181 ierr = ISDestroy(&vertices);CHKERRQ(ierr); 3182 ierr = PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 3183 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"--------------------------------------------------------------\n");CHKERRQ(ierr); 3184 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate vertices (%d)\n",PetscGlobalRank,nv,pcbddc->use_vertices);CHKERRQ(ierr); 3185 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate edges (%d)\n",PetscGlobalRank,nedges,pcbddc->use_edges);CHKERRQ(ierr); 3186 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate faces (%d)\n",PetscGlobalRank,nfaces,pcbddc->use_faces);CHKERRQ(ierr); 3187 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 3188 ierr = PetscViewerASCIIPopSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 3189 } 3190 3191 if (!pcbddc->adaptive_selection) { 3192 IS ISForVertices,*ISForFaces,*ISForEdges; 3193 MatNullSpace nearnullsp; 3194 const Vec *nearnullvecs; 3195 Vec *localnearnullsp; 3196 PetscScalar *array; 3197 PetscInt n_ISForFaces,n_ISForEdges,nnsp_size; 3198 PetscBool nnsp_has_cnst; 3199 /* LAPACK working arrays for SVD or POD */ 3200 PetscBool skip_lapack,boolforchange; 3201 PetscScalar *work; 3202 PetscReal *singular_vals; 3203 #if defined(PETSC_USE_COMPLEX) 3204 PetscReal *rwork; 3205 #endif 3206 #if defined(PETSC_MISSING_LAPACK_GESVD) 3207 PetscScalar *temp_basis,*correlation_mat; 3208 #else 3209 PetscBLASInt dummy_int=1; 3210 PetscScalar dummy_scalar=1.; 3211 #endif 3212 3213 /* Get index sets for faces, edges and vertices from graph */ 3214 ierr = PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph,&n_ISForFaces,&ISForFaces,&n_ISForEdges,&ISForEdges,&ISForVertices);CHKERRQ(ierr); 3215 /* free unneeded index sets */ 3216 if (!pcbddc->use_vertices) { 3217 ierr = ISDestroy(&ISForVertices);CHKERRQ(ierr); 3218 } 3219 if (!pcbddc->use_edges) { 3220 for (i=0;i<n_ISForEdges;i++) { 3221 ierr = ISDestroy(&ISForEdges[i]);CHKERRQ(ierr); 3222 } 3223 ierr = PetscFree(ISForEdges);CHKERRQ(ierr); 3224 n_ISForEdges = 0; 3225 } 3226 if (!pcbddc->use_faces) { 3227 for (i=0;i<n_ISForFaces;i++) { 3228 ierr = ISDestroy(&ISForFaces[i]);CHKERRQ(ierr); 3229 } 3230 ierr = PetscFree(ISForFaces);CHKERRQ(ierr); 3231 n_ISForFaces = 0; 3232 } 3233 3234 #if defined(PETSC_USE_DEBUG) 3235 /* HACK: when solving singular problems not using vertices, a change of basis is mandatory. 3236 Also use_change_of_basis should be consistent among processors */ 3237 if (pcbddc->NullSpace) { 3238 PetscBool tbool[2],gbool[2]; 3239 3240 if (!ISForVertices && !pcbddc->user_ChangeOfBasisMatrix) { 3241 pcbddc->use_change_of_basis = PETSC_TRUE; 3242 if (!ISForEdges) { 3243 pcbddc->use_change_on_faces = PETSC_TRUE; 3244 } 3245 } 3246 tbool[0] = pcbddc->use_change_of_basis; 3247 tbool[1] = pcbddc->use_change_on_faces; 3248 ierr = MPI_Allreduce(tbool,gbool,2,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 3249 pcbddc->use_change_of_basis = gbool[0]; 3250 pcbddc->use_change_on_faces = gbool[1]; 3251 } 3252 #endif 3253 3254 /* check if near null space is attached to global mat */ 3255 ierr = MatGetNearNullSpace(pc->pmat,&nearnullsp);CHKERRQ(ierr); 3256 if (nearnullsp) { 3257 ierr = MatNullSpaceGetVecs(nearnullsp,&nnsp_has_cnst,&nnsp_size,&nearnullvecs);CHKERRQ(ierr); 3258 /* remove any stored info */ 3259 ierr = MatNullSpaceDestroy(&pcbddc->onearnullspace);CHKERRQ(ierr); 3260 ierr = PetscFree(pcbddc->onearnullvecs_state);CHKERRQ(ierr); 3261 /* store information for BDDC solver reuse */ 3262 ierr = PetscObjectReference((PetscObject)nearnullsp);CHKERRQ(ierr); 3263 pcbddc->onearnullspace = nearnullsp; 3264 ierr = PetscMalloc1(nnsp_size,&pcbddc->onearnullvecs_state);CHKERRQ(ierr); 3265 for (i=0;i<nnsp_size;i++) { 3266 ierr = PetscObjectStateGet((PetscObject)nearnullvecs[i],&pcbddc->onearnullvecs_state[i]);CHKERRQ(ierr); 3267 } 3268 } else { /* if near null space is not provided BDDC uses constants by default */ 3269 nnsp_size = 0; 3270 nnsp_has_cnst = PETSC_TRUE; 3271 } 3272 /* get max number of constraints on a single cc */ 3273 max_constraints = nnsp_size; 3274 if (nnsp_has_cnst) max_constraints++; 3275 3276 /* 3277 Evaluate maximum storage size needed by the procedure 3278 - Indices for connected component i stored at "constraints_idxs + constraints_idxs_ptr[i]" 3279 - Values for constraints on connected component i stored at "constraints_data + constraints_data_ptr[i]" 3280 There can be multiple constraints per connected component 3281 */ 3282 n_vertices = 0; 3283 if (ISForVertices) { 3284 ierr = ISGetSize(ISForVertices,&n_vertices);CHKERRQ(ierr); 3285 } 3286 ncc = n_vertices+n_ISForFaces+n_ISForEdges; 3287 ierr = PetscMalloc3(ncc+1,&constraints_idxs_ptr,ncc+1,&constraints_data_ptr,ncc,&constraints_n);CHKERRQ(ierr); 3288 3289 total_counts = n_ISForFaces+n_ISForEdges; 3290 total_counts *= max_constraints; 3291 total_counts += n_vertices; 3292 ierr = PetscBTCreate(total_counts,&change_basis);CHKERRQ(ierr); 3293 3294 total_counts = 0; 3295 max_size_of_constraint = 0; 3296 for (i=0;i<n_ISForEdges+n_ISForFaces;i++) { 3297 IS used_is; 3298 if (i<n_ISForEdges) { 3299 used_is = ISForEdges[i]; 3300 } else { 3301 used_is = ISForFaces[i-n_ISForEdges]; 3302 } 3303 ierr = ISGetSize(used_is,&j);CHKERRQ(ierr); 3304 total_counts += j; 3305 max_size_of_constraint = PetscMax(j,max_size_of_constraint); 3306 } 3307 ierr = PetscMalloc3(total_counts*max_constraints+n_vertices,&constraints_data,total_counts+n_vertices,&constraints_idxs,total_counts+n_vertices,&constraints_idxs_B);CHKERRQ(ierr); 3308 3309 /* get local part of global near null space vectors */ 3310 ierr = PetscMalloc1(nnsp_size,&localnearnullsp);CHKERRQ(ierr); 3311 for (k=0;k<nnsp_size;k++) { 3312 ierr = VecDuplicate(pcis->vec1_N,&localnearnullsp[k]);CHKERRQ(ierr); 3313 ierr = VecScatterBegin(matis->rctx,nearnullvecs[k],localnearnullsp[k],INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3314 ierr = VecScatterEnd(matis->rctx,nearnullvecs[k],localnearnullsp[k],INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3315 } 3316 3317 /* whether or not to skip lapack calls */ 3318 skip_lapack = PETSC_TRUE; 3319 if (n_ISForFaces+n_ISForEdges && max_constraints > 1 && !pcbddc->use_nnsp_true) skip_lapack = PETSC_FALSE; 3320 3321 /* First we issue queries to allocate optimal workspace for LAPACKgesvd (or LAPACKsyev if SVD is missing) */ 3322 if (!skip_lapack) { 3323 PetscScalar temp_work; 3324 3325 #if defined(PETSC_MISSING_LAPACK_GESVD) 3326 /* Proper Orthogonal Decomposition (POD) using the snapshot method */ 3327 ierr = PetscMalloc1(max_constraints*max_constraints,&correlation_mat);CHKERRQ(ierr); 3328 ierr = PetscMalloc1(max_constraints,&singular_vals);CHKERRQ(ierr); 3329 ierr = PetscMalloc1(max_size_of_constraint*max_constraints,&temp_basis);CHKERRQ(ierr); 3330 #if defined(PETSC_USE_COMPLEX) 3331 ierr = PetscMalloc1(3*max_constraints,&rwork);CHKERRQ(ierr); 3332 #endif 3333 /* now we evaluate the optimal workspace using query with lwork=-1 */ 3334 ierr = PetscBLASIntCast(max_constraints,&Blas_N);CHKERRQ(ierr); 3335 ierr = PetscBLASIntCast(max_constraints,&Blas_LDA);CHKERRQ(ierr); 3336 lwork = -1; 3337 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 3338 #if !defined(PETSC_USE_COMPLEX) 3339 PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,&temp_work,&lwork,&lierr)); 3340 #else 3341 PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,&temp_work,&lwork,rwork,&lierr)); 3342 #endif 3343 ierr = PetscFPTrapPop();CHKERRQ(ierr); 3344 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to SYEV Lapack routine %d",(int)lierr); 3345 #else /* on missing GESVD */ 3346 /* SVD */ 3347 PetscInt max_n,min_n; 3348 max_n = max_size_of_constraint; 3349 min_n = max_constraints; 3350 if (max_size_of_constraint < max_constraints) { 3351 min_n = max_size_of_constraint; 3352 max_n = max_constraints; 3353 } 3354 ierr = PetscMalloc1(min_n,&singular_vals);CHKERRQ(ierr); 3355 #if defined(PETSC_USE_COMPLEX) 3356 ierr = PetscMalloc1(5*min_n,&rwork);CHKERRQ(ierr); 3357 #endif 3358 /* now we evaluate the optimal workspace using query with lwork=-1 */ 3359 lwork = -1; 3360 ierr = PetscBLASIntCast(max_n,&Blas_M);CHKERRQ(ierr); 3361 ierr = PetscBLASIntCast(min_n,&Blas_N);CHKERRQ(ierr); 3362 ierr = PetscBLASIntCast(max_n,&Blas_LDA);CHKERRQ(ierr); 3363 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 3364 #if !defined(PETSC_USE_COMPLEX) 3365 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&constraints_data[0],&Blas_LDA,singular_vals,&dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,&temp_work,&lwork,&lierr)); 3366 #else 3367 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&constraints_data[0],&Blas_LDA,singular_vals,&dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,&temp_work,&lwork,rwork,&lierr)); 3368 #endif 3369 ierr = PetscFPTrapPop();CHKERRQ(ierr); 3370 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to GESVD Lapack routine %d",(int)lierr); 3371 #endif /* on missing GESVD */ 3372 /* Allocate optimal workspace */ 3373 ierr = PetscBLASIntCast((PetscInt)PetscRealPart(temp_work),&lwork);CHKERRQ(ierr); 3374 ierr = PetscMalloc1(lwork,&work);CHKERRQ(ierr); 3375 } 3376 /* Now we can loop on constraining sets */ 3377 total_counts = 0; 3378 constraints_idxs_ptr[0] = 0; 3379 constraints_data_ptr[0] = 0; 3380 /* vertices */ 3381 if (n_vertices) { 3382 ierr = ISGetIndices(ISForVertices,(const PetscInt**)&is_indices);CHKERRQ(ierr); 3383 ierr = PetscMemcpy(constraints_idxs,is_indices,n_vertices*sizeof(PetscInt));CHKERRQ(ierr); 3384 for (i=0;i<n_vertices;i++) { 3385 constraints_n[total_counts] = 1; 3386 constraints_data[total_counts] = 1.0; 3387 constraints_idxs_ptr[total_counts+1] = constraints_idxs_ptr[total_counts]+1; 3388 constraints_data_ptr[total_counts+1] = constraints_data_ptr[total_counts]+1; 3389 total_counts++; 3390 } 3391 ierr = ISRestoreIndices(ISForVertices,(const PetscInt**)&is_indices);CHKERRQ(ierr); 3392 n_vertices = total_counts; 3393 } 3394 3395 /* edges and faces */ 3396 total_counts_cc = total_counts; 3397 for (ncc=0;ncc<n_ISForEdges+n_ISForFaces;ncc++) { 3398 IS used_is; 3399 PetscBool idxs_copied = PETSC_FALSE; 3400 3401 if (ncc<n_ISForEdges) { 3402 used_is = ISForEdges[ncc]; 3403 boolforchange = pcbddc->use_change_of_basis; /* change or not the basis on the edge */ 3404 } else { 3405 used_is = ISForFaces[ncc-n_ISForEdges]; 3406 boolforchange = (PetscBool)(pcbddc->use_change_of_basis && pcbddc->use_change_on_faces); /* change or not the basis on the face */ 3407 } 3408 temp_constraints = 0; /* zero the number of constraints I have on this conn comp */ 3409 3410 ierr = ISGetSize(used_is,&size_of_constraint);CHKERRQ(ierr); 3411 ierr = ISGetIndices(used_is,(const PetscInt**)&is_indices);CHKERRQ(ierr); 3412 /* change of basis should not be performed on local periodic nodes */ 3413 if (pcbddc->mat_graph->mirrors && pcbddc->mat_graph->mirrors[is_indices[0]]) boolforchange = PETSC_FALSE; 3414 if (nnsp_has_cnst) { 3415 PetscScalar quad_value; 3416 3417 ierr = PetscMemcpy(constraints_idxs + constraints_idxs_ptr[total_counts_cc],is_indices,size_of_constraint*sizeof(PetscInt));CHKERRQ(ierr); 3418 idxs_copied = PETSC_TRUE; 3419 3420 if (!pcbddc->use_nnsp_true) { 3421 quad_value = (PetscScalar)(1.0/PetscSqrtReal((PetscReal)size_of_constraint)); 3422 } else { 3423 quad_value = 1.0; 3424 } 3425 for (j=0;j<size_of_constraint;j++) { 3426 constraints_data[constraints_data_ptr[total_counts_cc]+j] = quad_value; 3427 } 3428 temp_constraints++; 3429 total_counts++; 3430 } 3431 for (k=0;k<nnsp_size;k++) { 3432 PetscReal real_value; 3433 PetscScalar *ptr_to_data; 3434 3435 ierr = VecGetArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr); 3436 ptr_to_data = &constraints_data[constraints_data_ptr[total_counts_cc]+temp_constraints*size_of_constraint]; 3437 for (j=0;j<size_of_constraint;j++) { 3438 ptr_to_data[j] = array[is_indices[j]]; 3439 } 3440 ierr = VecRestoreArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr); 3441 /* check if array is null on the connected component */ 3442 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 3443 PetscStackCallBLAS("BLASasum",real_value = BLASasum_(&Blas_N,ptr_to_data,&Blas_one)); 3444 if (real_value > 0.0) { /* keep indices and values */ 3445 temp_constraints++; 3446 total_counts++; 3447 if (!idxs_copied) { 3448 ierr = PetscMemcpy(constraints_idxs + constraints_idxs_ptr[total_counts_cc],is_indices,size_of_constraint*sizeof(PetscInt));CHKERRQ(ierr); 3449 idxs_copied = PETSC_TRUE; 3450 } 3451 } 3452 } 3453 ierr = ISRestoreIndices(used_is,(const PetscInt**)&is_indices);CHKERRQ(ierr); 3454 valid_constraints = temp_constraints; 3455 if (!pcbddc->use_nnsp_true && temp_constraints) { 3456 if (temp_constraints == 1) { /* just normalize the constraint */ 3457 PetscScalar norm,*ptr_to_data; 3458 3459 ptr_to_data = &constraints_data[constraints_data_ptr[total_counts_cc]]; 3460 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 3461 PetscStackCallBLAS("BLASdot",norm = BLASdot_(&Blas_N,ptr_to_data,&Blas_one,ptr_to_data,&Blas_one)); 3462 norm = 1.0/PetscSqrtReal(PetscRealPart(norm)); 3463 PetscStackCallBLAS("BLASscal",BLASscal_(&Blas_N,&norm,ptr_to_data,&Blas_one)); 3464 } else { /* perform SVD */ 3465 PetscReal tol = 1.0e-8; /* tolerance for retaining eigenmodes */ 3466 PetscScalar *ptr_to_data = &constraints_data[constraints_data_ptr[total_counts_cc]]; 3467 3468 #if defined(PETSC_MISSING_LAPACK_GESVD) 3469 /* SVD: Y = U*S*V^H -> U (eigenvectors of Y*Y^H) = Y*V*(S)^\dag 3470 POD: Y^H*Y = V*D*V^H, D = S^H*S -> U = Y*V*D^(-1/2) 3471 -> When PETSC_USE_COMPLEX and PETSC_MISSING_LAPACK_GESVD are defined 3472 the constraints basis will differ (by a complex factor with absolute value equal to 1) 3473 from that computed using LAPACKgesvd 3474 -> This is due to a different computation of eigenvectors in LAPACKheev 3475 -> The quality of the POD-computed basis will be the same */ 3476 ierr = PetscMemzero(correlation_mat,temp_constraints*temp_constraints*sizeof(PetscScalar));CHKERRQ(ierr); 3477 /* Store upper triangular part of correlation matrix */ 3478 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 3479 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 3480 for (j=0;j<temp_constraints;j++) { 3481 for (k=0;k<j+1;k++) { 3482 PetscStackCallBLAS("BLASdot",correlation_mat[j*temp_constraints+k] = BLASdot_(&Blas_N,ptr_to_data+k*size_of_constraint,&Blas_one,ptr_to_data+j*size_of_constraint,&Blas_one)); 3483 } 3484 } 3485 /* compute eigenvalues and eigenvectors of correlation matrix */ 3486 ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr); 3487 ierr = PetscBLASIntCast(temp_constraints,&Blas_LDA);CHKERRQ(ierr); 3488 #if !defined(PETSC_USE_COMPLEX) 3489 PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,work,&lwork,&lierr)); 3490 #else 3491 PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,work,&lwork,rwork,&lierr)); 3492 #endif 3493 ierr = PetscFPTrapPop();CHKERRQ(ierr); 3494 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYEV Lapack routine %d",(int)lierr); 3495 /* retain eigenvalues greater than tol: note that LAPACKsyev gives eigs in ascending order */ 3496 j = 0; 3497 while (j < temp_constraints && singular_vals[j] < tol) j++; 3498 total_counts = total_counts-j; 3499 valid_constraints = temp_constraints-j; 3500 /* scale and copy POD basis into used quadrature memory */ 3501 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 3502 ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr); 3503 ierr = PetscBLASIntCast(temp_constraints,&Blas_K);CHKERRQ(ierr); 3504 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 3505 ierr = PetscBLASIntCast(temp_constraints,&Blas_LDB);CHKERRQ(ierr); 3506 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDC);CHKERRQ(ierr); 3507 if (j<temp_constraints) { 3508 PetscInt ii; 3509 for (k=j;k<temp_constraints;k++) singular_vals[k] = 1.0/PetscSqrtReal(singular_vals[k]); 3510 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 3511 PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&Blas_M,&Blas_N,&Blas_K,&one,ptr_to_data,&Blas_LDA,correlation_mat,&Blas_LDB,&zero,temp_basis,&Blas_LDC)); 3512 ierr = PetscFPTrapPop();CHKERRQ(ierr); 3513 for (k=0;k<temp_constraints-j;k++) { 3514 for (ii=0;ii<size_of_constraint;ii++) { 3515 ptr_to_data[k*size_of_constraint+ii] = singular_vals[temp_constraints-1-k]*temp_basis[(temp_constraints-1-k)*size_of_constraint+ii]; 3516 } 3517 } 3518 } 3519 #else /* on missing GESVD */ 3520 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 3521 ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr); 3522 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 3523 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 3524 #if !defined(PETSC_USE_COMPLEX) 3525 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,ptr_to_data,&Blas_LDA,singular_vals,&dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,work,&lwork,&lierr)); 3526 #else 3527 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,ptr_to_data,&Blas_LDA,singular_vals,&dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,work,&lwork,rwork,&lierr)); 3528 #endif 3529 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in GESVD Lapack routine %d",(int)lierr); 3530 ierr = PetscFPTrapPop();CHKERRQ(ierr); 3531 /* retain eigenvalues greater than tol: note that LAPACKgesvd gives eigs in descending order */ 3532 k = temp_constraints; 3533 if (k > size_of_constraint) k = size_of_constraint; 3534 j = 0; 3535 while (j < k && singular_vals[k-j-1] < tol) j++; 3536 valid_constraints = k-j; 3537 total_counts = total_counts-temp_constraints+valid_constraints; 3538 #endif /* on missing GESVD */ 3539 } 3540 } 3541 /* update pointers information */ 3542 if (valid_constraints) { 3543 constraints_n[total_counts_cc] = valid_constraints; 3544 constraints_idxs_ptr[total_counts_cc+1] = constraints_idxs_ptr[total_counts_cc]+size_of_constraint; 3545 constraints_data_ptr[total_counts_cc+1] = constraints_data_ptr[total_counts_cc]+size_of_constraint*valid_constraints; 3546 /* set change_of_basis flag */ 3547 if (boolforchange) { 3548 PetscBTSet(change_basis,total_counts_cc); 3549 } 3550 total_counts_cc++; 3551 } 3552 } 3553 /* free workspace */ 3554 if (!skip_lapack) { 3555 ierr = PetscFree(work);CHKERRQ(ierr); 3556 #if defined(PETSC_USE_COMPLEX) 3557 ierr = PetscFree(rwork);CHKERRQ(ierr); 3558 #endif 3559 ierr = PetscFree(singular_vals);CHKERRQ(ierr); 3560 #if defined(PETSC_MISSING_LAPACK_GESVD) 3561 ierr = PetscFree(correlation_mat);CHKERRQ(ierr); 3562 ierr = PetscFree(temp_basis);CHKERRQ(ierr); 3563 #endif 3564 } 3565 for (k=0;k<nnsp_size;k++) { 3566 ierr = VecDestroy(&localnearnullsp[k]);CHKERRQ(ierr); 3567 } 3568 ierr = PetscFree(localnearnullsp);CHKERRQ(ierr); 3569 /* free index sets of faces, edges and vertices */ 3570 for (i=0;i<n_ISForFaces;i++) { 3571 ierr = ISDestroy(&ISForFaces[i]);CHKERRQ(ierr); 3572 } 3573 if (n_ISForFaces) { 3574 ierr = PetscFree(ISForFaces);CHKERRQ(ierr); 3575 } 3576 for (i=0;i<n_ISForEdges;i++) { 3577 ierr = ISDestroy(&ISForEdges[i]);CHKERRQ(ierr); 3578 } 3579 if (n_ISForEdges) { 3580 ierr = PetscFree(ISForEdges);CHKERRQ(ierr); 3581 } 3582 ierr = ISDestroy(&ISForVertices);CHKERRQ(ierr); 3583 } else { 3584 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 3585 3586 total_counts = 0; 3587 n_vertices = 0; 3588 if (sub_schurs->is_vertices && pcbddc->use_vertices) { 3589 ierr = ISGetLocalSize(sub_schurs->is_vertices,&n_vertices);CHKERRQ(ierr); 3590 } 3591 max_constraints = 0; 3592 total_counts_cc = 0; 3593 for (i=0;i<sub_schurs->n_subs+n_vertices;i++) { 3594 total_counts += pcbddc->adaptive_constraints_n[i]; 3595 if (pcbddc->adaptive_constraints_n[i]) total_counts_cc++; 3596 max_constraints = PetscMax(max_constraints,pcbddc->adaptive_constraints_n[i]); 3597 } 3598 constraints_idxs_ptr = pcbddc->adaptive_constraints_idxs_ptr; 3599 constraints_data_ptr = pcbddc->adaptive_constraints_data_ptr; 3600 constraints_idxs = pcbddc->adaptive_constraints_idxs; 3601 constraints_data = pcbddc->adaptive_constraints_data; 3602 /* constraints_n differs from pcbddc->adaptive_constraints_n */ 3603 ierr = PetscMalloc1(total_counts_cc,&constraints_n);CHKERRQ(ierr); 3604 total_counts_cc = 0; 3605 for (i=0;i<sub_schurs->n_subs+n_vertices;i++) { 3606 if (pcbddc->adaptive_constraints_n[i]) { 3607 constraints_n[total_counts_cc++] = pcbddc->adaptive_constraints_n[i]; 3608 } 3609 } 3610 #if 0 3611 printf("Found %d totals (%d)\n",total_counts_cc,total_counts); 3612 for (i=0;i<total_counts_cc;i++) { 3613 printf("const %d, start %d",i,constraints_idxs_ptr[i]); 3614 printf(" end %d:\n",constraints_idxs_ptr[i+1]); 3615 for (j=constraints_idxs_ptr[i];j<constraints_idxs_ptr[i+1];j++) { 3616 printf(" %d",constraints_idxs[j]); 3617 } 3618 printf("\n"); 3619 printf("number of cc: %d\n",constraints_n[i]); 3620 } 3621 for (i=0;i<n_vertices;i++) { 3622 PetscPrintf(PETSC_COMM_SELF,"[%d] vertex %d, n %d\n",PetscGlobalRank,i,pcbddc->adaptive_constraints_n[i]); 3623 } 3624 for (i=0;i<sub_schurs->n_subs;i++) { 3625 PetscPrintf(PETSC_COMM_SELF,"[%d] sub %d, edge %d, n %d\n",PetscGlobalRank,i,(PetscBool)PetscBTLookup(sub_schurs->is_edge,i),pcbddc->adaptive_constraints_n[i+n_vertices]); 3626 } 3627 #endif 3628 3629 max_size_of_constraint = 0; 3630 for (i=0;i<total_counts_cc;i++) max_size_of_constraint = PetscMax(max_size_of_constraint,constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i]); 3631 ierr = PetscMalloc1(constraints_idxs_ptr[total_counts_cc],&constraints_idxs_B);CHKERRQ(ierr); 3632 /* Change of basis */ 3633 ierr = PetscBTCreate(total_counts_cc,&change_basis);CHKERRQ(ierr); 3634 if (pcbddc->use_change_of_basis) { 3635 for (i=0;i<sub_schurs->n_subs;i++) { 3636 if (PetscBTLookup(sub_schurs->is_edge,i) || pcbddc->use_change_on_faces) { 3637 ierr = PetscBTSet(change_basis,i+n_vertices);CHKERRQ(ierr); 3638 } 3639 } 3640 } 3641 } 3642 pcbddc->local_primal_size = total_counts; 3643 ierr = PetscMalloc1(pcbddc->local_primal_size+pcbddc->benign_n,&pcbddc->primal_indices_local_idxs);CHKERRQ(ierr); 3644 3645 /* map constraints_idxs in boundary numbering */ 3646 ierr = ISGlobalToLocalMappingApply(pcis->BtoNmap,IS_GTOLM_DROP,constraints_idxs_ptr[total_counts_cc],constraints_idxs,&i,constraints_idxs_B);CHKERRQ(ierr); 3647 if (i != constraints_idxs_ptr[total_counts_cc]) { 3648 SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_SUP,"Error in boundary numbering for constraints indices %d != %d\n",constraints_idxs_ptr[total_counts_cc],i); 3649 } 3650 3651 /* Create constraint matrix */ 3652 ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->ConstraintMatrix);CHKERRQ(ierr); 3653 ierr = MatSetType(pcbddc->ConstraintMatrix,MATAIJ);CHKERRQ(ierr); 3654 ierr = MatSetSizes(pcbddc->ConstraintMatrix,pcbddc->local_primal_size,pcis->n,pcbddc->local_primal_size,pcis->n);CHKERRQ(ierr); 3655 3656 /* find primal_dofs: subdomain corners plus dofs selected as primal after change of basis */ 3657 /* determine if a QR strategy is needed for change of basis */ 3658 qr_needed = PETSC_FALSE; 3659 ierr = PetscBTCreate(total_counts_cc,&qr_needed_idx);CHKERRQ(ierr); 3660 total_primal_vertices=0; 3661 pcbddc->local_primal_size_cc = 0; 3662 for (i=0;i<total_counts_cc;i++) { 3663 size_of_constraint = constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i]; 3664 if (size_of_constraint == 1) { 3665 pcbddc->primal_indices_local_idxs[total_primal_vertices++] = constraints_idxs[constraints_idxs_ptr[i]]; 3666 pcbddc->local_primal_size_cc += 1; 3667 } else if (PetscBTLookup(change_basis,i)) { 3668 for (k=0;k<constraints_n[i];k++) { 3669 pcbddc->primal_indices_local_idxs[total_primal_vertices++] = constraints_idxs[constraints_idxs_ptr[i]+k]; 3670 } 3671 pcbddc->local_primal_size_cc += constraints_n[i]; 3672 if (constraints_n[i] > 1 || pcbddc->use_qr_single || pcbddc->faster_deluxe) { 3673 PetscBTSet(qr_needed_idx,i); 3674 qr_needed = PETSC_TRUE; 3675 } 3676 } else { 3677 pcbddc->local_primal_size_cc += 1; 3678 } 3679 } 3680 /* note that the local variable n_vertices used below stores the number of pointwise constraints */ 3681 pcbddc->n_vertices = total_primal_vertices; 3682 /* permute indices in order to have a sorted set of vertices */ 3683 ierr = PetscSortInt(total_primal_vertices,pcbddc->primal_indices_local_idxs);CHKERRQ(ierr); 3684 3685 ierr = PetscMalloc2(pcbddc->local_primal_size_cc+pcbddc->benign_n,&pcbddc->local_primal_ref_node,pcbddc->local_primal_size_cc+pcbddc->benign_n,&pcbddc->local_primal_ref_mult);CHKERRQ(ierr); 3686 ierr = PetscMemcpy(pcbddc->local_primal_ref_node,pcbddc->primal_indices_local_idxs,total_primal_vertices*sizeof(PetscInt));CHKERRQ(ierr); 3687 for (i=0;i<total_primal_vertices;i++) pcbddc->local_primal_ref_mult[i] = 1; 3688 3689 /* nonzero structure of constraint matrix */ 3690 /* and get reference dof for local constraints */ 3691 ierr = PetscMalloc1(pcbddc->local_primal_size,&nnz);CHKERRQ(ierr); 3692 for (i=0;i<total_primal_vertices;i++) nnz[i] = 1; 3693 3694 j = total_primal_vertices; 3695 total_counts = total_primal_vertices; 3696 cum = total_primal_vertices; 3697 for (i=n_vertices;i<total_counts_cc;i++) { 3698 if (!PetscBTLookup(change_basis,i)) { 3699 pcbddc->local_primal_ref_node[cum] = constraints_idxs[constraints_idxs_ptr[i]]; 3700 pcbddc->local_primal_ref_mult[cum] = constraints_n[i]; 3701 cum++; 3702 size_of_constraint = constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i]; 3703 for (k=0;k<constraints_n[i];k++) { 3704 pcbddc->primal_indices_local_idxs[total_counts++] = constraints_idxs[constraints_idxs_ptr[i]+k]; 3705 nnz[j+k] = size_of_constraint; 3706 } 3707 j += constraints_n[i]; 3708 } 3709 } 3710 ierr = MatSeqAIJSetPreallocation(pcbddc->ConstraintMatrix,0,nnz);CHKERRQ(ierr); 3711 ierr = PetscFree(nnz);CHKERRQ(ierr); 3712 3713 /* set values in constraint matrix */ 3714 for (i=0;i<total_primal_vertices;i++) { 3715 ierr = MatSetValue(pcbddc->ConstraintMatrix,i,pcbddc->local_primal_ref_node[i],1.0,INSERT_VALUES);CHKERRQ(ierr); 3716 } 3717 total_counts = total_primal_vertices; 3718 for (i=n_vertices;i<total_counts_cc;i++) { 3719 if (!PetscBTLookup(change_basis,i)) { 3720 PetscInt *cols; 3721 3722 size_of_constraint = constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i]; 3723 cols = constraints_idxs+constraints_idxs_ptr[i]; 3724 for (k=0;k<constraints_n[i];k++) { 3725 PetscInt row = total_counts+k; 3726 PetscScalar *vals; 3727 3728 vals = constraints_data+constraints_data_ptr[i]+k*size_of_constraint; 3729 ierr = MatSetValues(pcbddc->ConstraintMatrix,1,&row,size_of_constraint,cols,vals,INSERT_VALUES);CHKERRQ(ierr); 3730 } 3731 total_counts += constraints_n[i]; 3732 } 3733 } 3734 /* assembling */ 3735 ierr = MatAssemblyBegin(pcbddc->ConstraintMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3736 ierr = MatAssemblyEnd(pcbddc->ConstraintMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3737 3738 /* 3739 ierr = PetscViewerSetFormat(PETSC_VIEWER_STDOUT_SELF,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr); 3740 ierr = MatView(pcbddc->ConstraintMatrix,(PetscViewer)0);CHKERRQ(ierr); 3741 */ 3742 /* Create matrix for change of basis. We don't need it in case pcbddc->use_change_of_basis is FALSE */ 3743 if (pcbddc->use_change_of_basis) { 3744 /* dual and primal dofs on a single cc */ 3745 PetscInt dual_dofs,primal_dofs; 3746 /* working stuff for GEQRF */ 3747 PetscScalar *qr_basis,*qr_tau = NULL,*qr_work,lqr_work_t; 3748 PetscBLASInt lqr_work; 3749 /* working stuff for UNGQR */ 3750 PetscScalar *gqr_work,lgqr_work_t; 3751 PetscBLASInt lgqr_work; 3752 /* working stuff for TRTRS */ 3753 PetscScalar *trs_rhs; 3754 PetscBLASInt Blas_NRHS; 3755 /* pointers for values insertion into change of basis matrix */ 3756 PetscInt *start_rows,*start_cols; 3757 PetscScalar *start_vals; 3758 /* working stuff for values insertion */ 3759 PetscBT is_primal; 3760 PetscInt *aux_primal_numbering_B; 3761 /* matrix sizes */ 3762 PetscInt global_size,local_size; 3763 /* temporary change of basis */ 3764 Mat localChangeOfBasisMatrix; 3765 /* extra space for debugging */ 3766 PetscScalar *dbg_work; 3767 3768 /* local temporary change of basis acts on local interfaces -> dimension is n_B x n_B */ 3769 ierr = MatCreate(PETSC_COMM_SELF,&localChangeOfBasisMatrix);CHKERRQ(ierr); 3770 ierr = MatSetType(localChangeOfBasisMatrix,MATAIJ);CHKERRQ(ierr); 3771 ierr = MatSetSizes(localChangeOfBasisMatrix,pcis->n,pcis->n,pcis->n,pcis->n);CHKERRQ(ierr); 3772 /* nonzeros for local mat */ 3773 ierr = PetscMalloc1(pcis->n,&nnz);CHKERRQ(ierr); 3774 for (i=0;i<pcis->n;i++) nnz[i]=1; 3775 for (i=n_vertices;i<total_counts_cc;i++) { 3776 if (PetscBTLookup(change_basis,i)) { 3777 size_of_constraint = constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i]; 3778 if (PetscBTLookup(qr_needed_idx,i)) { 3779 for (j=0;j<size_of_constraint;j++) nnz[constraints_idxs[constraints_idxs_ptr[i]+j]] = size_of_constraint; 3780 } else { 3781 nnz[constraints_idxs[constraints_idxs_ptr[i]]] = size_of_constraint; 3782 for (j=1;j<size_of_constraint;j++) nnz[constraints_idxs[constraints_idxs_ptr[i]+j]] = 2; 3783 } 3784 } 3785 } 3786 ierr = MatSeqAIJSetPreallocation(localChangeOfBasisMatrix,0,nnz);CHKERRQ(ierr); 3787 ierr = PetscFree(nnz);CHKERRQ(ierr); 3788 /* Set initial identity in the matrix */ 3789 for (i=0;i<pcis->n;i++) { 3790 ierr = MatSetValue(localChangeOfBasisMatrix,i,i,1.0,INSERT_VALUES);CHKERRQ(ierr); 3791 } 3792 3793 if (pcbddc->dbg_flag) { 3794 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"--------------------------------------------------------------\n");CHKERRQ(ierr); 3795 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Checking change of basis computation for subdomain %04d\n",PetscGlobalRank);CHKERRQ(ierr); 3796 } 3797 3798 3799 /* Now we loop on the constraints which need a change of basis */ 3800 /* 3801 Change of basis matrix is evaluated similarly to the FIRST APPROACH in 3802 Klawonn and Widlund, Dual-primal FETI-DP methods for linear elasticity, (see Sect 6.2.1) 3803 3804 Basic blocks of change of basis matrix T computed by 3805 3806 - Using the following block transformation if there is only a primal dof on the cc (and -pc_bddc_use_qr_single is not specified) 3807 3808 | 1 0 ... 0 s_1/S | 3809 | 0 1 ... 0 s_2/S | 3810 | ... | 3811 | 0 ... 1 s_{n-1}/S | 3812 | -s_1/s_n ... -s_{n-1}/s_n s_n/S | 3813 3814 with S = \sum_{i=1}^n s_i^2 3815 NOTE: in the above example, the primal dof is the last one of the edge in LOCAL ordering 3816 in the current implementation, the primal dof is the first one of the edge in GLOBAL ordering 3817 3818 - QR decomposition of constraints otherwise 3819 */ 3820 if (qr_needed) { 3821 /* space to store Q */ 3822 ierr = PetscMalloc1(max_size_of_constraint*max_size_of_constraint,&qr_basis);CHKERRQ(ierr); 3823 /* first we issue queries for optimal work */ 3824 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_M);CHKERRQ(ierr); 3825 ierr = PetscBLASIntCast(max_constraints,&Blas_N);CHKERRQ(ierr); 3826 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 3827 lqr_work = -1; 3828 PetscStackCallBLAS("LAPACKgeqrf",LAPACKgeqrf_(&Blas_M,&Blas_N,qr_basis,&Blas_LDA,qr_tau,&lqr_work_t,&lqr_work,&lierr)); 3829 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to GEQRF Lapack routine %d",(int)lierr); 3830 ierr = PetscBLASIntCast((PetscInt)PetscRealPart(lqr_work_t),&lqr_work);CHKERRQ(ierr); 3831 ierr = PetscMalloc1((PetscInt)PetscRealPart(lqr_work_t),&qr_work);CHKERRQ(ierr); 3832 lgqr_work = -1; 3833 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_M);CHKERRQ(ierr); 3834 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_N);CHKERRQ(ierr); 3835 ierr = PetscBLASIntCast(max_constraints,&Blas_K);CHKERRQ(ierr); 3836 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 3837 if (Blas_K>Blas_M) Blas_K=Blas_M; /* adjust just for computing optimal work */ 3838 PetscStackCallBLAS("LAPACKungqr",LAPACKungqr_(&Blas_M,&Blas_N,&Blas_K,qr_basis,&Blas_LDA,qr_tau,&lgqr_work_t,&lgqr_work,&lierr)); 3839 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to UNGQR Lapack routine %d",(int)lierr); 3840 ierr = PetscBLASIntCast((PetscInt)PetscRealPart(lgqr_work_t),&lgqr_work);CHKERRQ(ierr); 3841 ierr = PetscMalloc1((PetscInt)PetscRealPart(lgqr_work_t),&gqr_work);CHKERRQ(ierr); 3842 /* array to store scaling factors for reflectors */ 3843 ierr = PetscMalloc1(max_constraints,&qr_tau);CHKERRQ(ierr); 3844 /* array to store rhs and solution of triangular solver */ 3845 ierr = PetscMalloc1(max_constraints*max_constraints,&trs_rhs);CHKERRQ(ierr); 3846 /* allocating workspace for check */ 3847 if (pcbddc->dbg_flag) { 3848 ierr = PetscMalloc1(max_size_of_constraint*(max_constraints+max_size_of_constraint),&dbg_work);CHKERRQ(ierr); 3849 } 3850 } 3851 /* array to store whether a node is primal or not */ 3852 ierr = PetscBTCreate(pcis->n_B,&is_primal);CHKERRQ(ierr); 3853 ierr = PetscMalloc1(total_primal_vertices,&aux_primal_numbering_B);CHKERRQ(ierr); 3854 ierr = ISGlobalToLocalMappingApply(pcis->BtoNmap,IS_GTOLM_DROP,total_primal_vertices,pcbddc->local_primal_ref_node,&i,aux_primal_numbering_B);CHKERRQ(ierr); 3855 if (i != total_primal_vertices) { 3856 SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_SUP,"Error in boundary numbering for BDDC vertices! %d != %d\n",total_primal_vertices,i); 3857 } 3858 for (i=0;i<total_primal_vertices;i++) { 3859 ierr = PetscBTSet(is_primal,aux_primal_numbering_B[i]);CHKERRQ(ierr); 3860 } 3861 ierr = PetscFree(aux_primal_numbering_B);CHKERRQ(ierr); 3862 3863 /* loop on constraints and see whether or not they need a change of basis and compute it */ 3864 for (total_counts=n_vertices;total_counts<total_counts_cc;total_counts++) { 3865 size_of_constraint = constraints_idxs_ptr[total_counts+1]-constraints_idxs_ptr[total_counts]; 3866 if (PetscBTLookup(change_basis,total_counts)) { 3867 /* get constraint info */ 3868 primal_dofs = constraints_n[total_counts]; 3869 dual_dofs = size_of_constraint-primal_dofs; 3870 3871 if (pcbddc->dbg_flag) { 3872 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Constraints %d: %d need a change of basis (size %d)\n",total_counts,primal_dofs,size_of_constraint);CHKERRQ(ierr); 3873 } 3874 3875 if (PetscBTLookup(qr_needed_idx,total_counts)) { /* QR */ 3876 3877 /* copy quadrature constraints for change of basis check */ 3878 if (pcbddc->dbg_flag) { 3879 ierr = PetscMemcpy(dbg_work,&constraints_data[constraints_data_ptr[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr); 3880 } 3881 /* copy temporary constraints into larger work vector (in order to store all columns of Q) */ 3882 ierr = PetscMemcpy(qr_basis,&constraints_data[constraints_data_ptr[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr); 3883 3884 /* compute QR decomposition of constraints */ 3885 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 3886 ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr); 3887 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 3888 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 3889 PetscStackCallBLAS("LAPACKgeqrf",LAPACKgeqrf_(&Blas_M,&Blas_N,qr_basis,&Blas_LDA,qr_tau,qr_work,&lqr_work,&lierr)); 3890 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in GEQRF Lapack routine %d",(int)lierr); 3891 ierr = PetscFPTrapPop();CHKERRQ(ierr); 3892 3893 /* explictly compute R^-T */ 3894 ierr = PetscMemzero(trs_rhs,primal_dofs*primal_dofs*sizeof(*trs_rhs));CHKERRQ(ierr); 3895 for (j=0;j<primal_dofs;j++) trs_rhs[j*(primal_dofs+1)] = 1.0; 3896 ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr); 3897 ierr = PetscBLASIntCast(primal_dofs,&Blas_NRHS);CHKERRQ(ierr); 3898 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 3899 ierr = PetscBLASIntCast(primal_dofs,&Blas_LDB);CHKERRQ(ierr); 3900 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 3901 PetscStackCallBLAS("LAPACKtrtrs",LAPACKtrtrs_("U","T","N",&Blas_N,&Blas_NRHS,qr_basis,&Blas_LDA,trs_rhs,&Blas_LDB,&lierr)); 3902 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in TRTRS Lapack routine %d",(int)lierr); 3903 ierr = PetscFPTrapPop();CHKERRQ(ierr); 3904 3905 /* explicitly compute all columns of Q (Q = [Q1 | Q2] ) overwriting QR factorization in qr_basis */ 3906 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 3907 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 3908 ierr = PetscBLASIntCast(primal_dofs,&Blas_K);CHKERRQ(ierr); 3909 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 3910 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 3911 PetscStackCallBLAS("LAPACKungqr",LAPACKungqr_(&Blas_M,&Blas_N,&Blas_K,qr_basis,&Blas_LDA,qr_tau,gqr_work,&lgqr_work,&lierr)); 3912 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in UNGQR Lapack routine %d",(int)lierr); 3913 ierr = PetscFPTrapPop();CHKERRQ(ierr); 3914 3915 /* first primal_dofs columns of Q need to be re-scaled in order to be unitary w.r.t constraints 3916 i.e. C_{pxn}*Q_{nxn} should be equal to [I_pxp | 0_pxd] (see check below) 3917 where n=size_of_constraint, p=primal_dofs, d=dual_dofs (n=p+d), I and 0 identity and null matrix resp. */ 3918 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 3919 ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr); 3920 ierr = PetscBLASIntCast(primal_dofs,&Blas_K);CHKERRQ(ierr); 3921 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 3922 ierr = PetscBLASIntCast(primal_dofs,&Blas_LDB);CHKERRQ(ierr); 3923 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDC);CHKERRQ(ierr); 3924 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 3925 PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&Blas_M,&Blas_N,&Blas_K,&one,qr_basis,&Blas_LDA,trs_rhs,&Blas_LDB,&zero,constraints_data+constraints_data_ptr[total_counts],&Blas_LDC)); 3926 ierr = PetscFPTrapPop();CHKERRQ(ierr); 3927 ierr = PetscMemcpy(qr_basis,&constraints_data[constraints_data_ptr[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr); 3928 3929 /* insert values in change of basis matrix respecting global ordering of new primal dofs */ 3930 start_rows = &constraints_idxs[constraints_idxs_ptr[total_counts]]; 3931 /* insert cols for primal dofs */ 3932 for (j=0;j<primal_dofs;j++) { 3933 start_vals = &qr_basis[j*size_of_constraint]; 3934 start_cols = &constraints_idxs[constraints_idxs_ptr[total_counts]+j]; 3935 ierr = MatSetValues(localChangeOfBasisMatrix,size_of_constraint,start_rows,1,start_cols,start_vals,INSERT_VALUES);CHKERRQ(ierr); 3936 } 3937 /* insert cols for dual dofs */ 3938 for (j=0,k=0;j<dual_dofs;k++) { 3939 if (!PetscBTLookup(is_primal,constraints_idxs_B[constraints_idxs_ptr[total_counts]+k])) { 3940 start_vals = &qr_basis[(primal_dofs+j)*size_of_constraint]; 3941 start_cols = &constraints_idxs[constraints_idxs_ptr[total_counts]+k]; 3942 ierr = MatSetValues(localChangeOfBasisMatrix,size_of_constraint,start_rows,1,start_cols,start_vals,INSERT_VALUES);CHKERRQ(ierr); 3943 j++; 3944 } 3945 } 3946 3947 /* check change of basis */ 3948 if (pcbddc->dbg_flag) { 3949 PetscInt ii,jj; 3950 PetscBool valid_qr=PETSC_TRUE; 3951 ierr = PetscBLASIntCast(primal_dofs,&Blas_M);CHKERRQ(ierr); 3952 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 3953 ierr = PetscBLASIntCast(size_of_constraint,&Blas_K);CHKERRQ(ierr); 3954 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 3955 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDB);CHKERRQ(ierr); 3956 ierr = PetscBLASIntCast(primal_dofs,&Blas_LDC);CHKERRQ(ierr); 3957 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 3958 PetscStackCallBLAS("BLASgemm",BLASgemm_("T","N",&Blas_M,&Blas_N,&Blas_K,&one,dbg_work,&Blas_LDA,qr_basis,&Blas_LDB,&zero,&dbg_work[size_of_constraint*primal_dofs],&Blas_LDC)); 3959 ierr = PetscFPTrapPop();CHKERRQ(ierr); 3960 for (jj=0;jj<size_of_constraint;jj++) { 3961 for (ii=0;ii<primal_dofs;ii++) { 3962 if (ii != jj && PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]) > 1.e-12) valid_qr = PETSC_FALSE; 3963 if (ii == jj && PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]-1.0) > 1.e-12) valid_qr = PETSC_FALSE; 3964 } 3965 } 3966 if (!valid_qr) { 3967 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> wrong change of basis!\n");CHKERRQ(ierr); 3968 for (jj=0;jj<size_of_constraint;jj++) { 3969 for (ii=0;ii<primal_dofs;ii++) { 3970 if (ii != jj && PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]) > 1.e-12) { 3971 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\tQr basis function %d is not orthogonal to constraint %d (%1.14e)!\n",jj,ii,PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii])); 3972 } 3973 if (ii == jj && PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]-1.0) > 1.e-12) { 3974 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\tQr basis function %d is not unitary w.r.t constraint %d (%1.14e)!\n",jj,ii,PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii])); 3975 } 3976 } 3977 } 3978 } else { 3979 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> right change of basis!\n");CHKERRQ(ierr); 3980 } 3981 } 3982 } else { /* simple transformation block */ 3983 PetscInt row,col; 3984 PetscScalar val,norm; 3985 3986 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 3987 PetscStackCallBLAS("BLASdot",norm = BLASdot_(&Blas_N,constraints_data+constraints_data_ptr[total_counts],&Blas_one,constraints_data+constraints_data_ptr[total_counts],&Blas_one)); 3988 for (j=0;j<size_of_constraint;j++) { 3989 PetscInt row_B = constraints_idxs_B[constraints_idxs_ptr[total_counts]+j]; 3990 row = constraints_idxs[constraints_idxs_ptr[total_counts]+j]; 3991 if (!PetscBTLookup(is_primal,row_B)) { 3992 col = constraints_idxs[constraints_idxs_ptr[total_counts]]; 3993 ierr = MatSetValue(localChangeOfBasisMatrix,row,row,1.0,INSERT_VALUES);CHKERRQ(ierr); 3994 ierr = MatSetValue(localChangeOfBasisMatrix,row,col,constraints_data[constraints_data_ptr[total_counts]+j]/norm,INSERT_VALUES);CHKERRQ(ierr); 3995 } else { 3996 for (k=0;k<size_of_constraint;k++) { 3997 col = constraints_idxs[constraints_idxs_ptr[total_counts]+k]; 3998 if (row != col) { 3999 val = -constraints_data[constraints_data_ptr[total_counts]+k]/constraints_data[constraints_data_ptr[total_counts]]; 4000 } else { 4001 val = constraints_data[constraints_data_ptr[total_counts]]/norm; 4002 } 4003 ierr = MatSetValue(localChangeOfBasisMatrix,row,col,val,INSERT_VALUES);CHKERRQ(ierr); 4004 } 4005 } 4006 } 4007 if (pcbddc->dbg_flag) { 4008 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> using standard change of basis\n");CHKERRQ(ierr); 4009 } 4010 } 4011 } else { 4012 if (pcbddc->dbg_flag) { 4013 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Constraint %d does not need a change of basis (size %d)\n",total_counts,size_of_constraint);CHKERRQ(ierr); 4014 } 4015 } 4016 } 4017 4018 /* free workspace */ 4019 if (qr_needed) { 4020 if (pcbddc->dbg_flag) { 4021 ierr = PetscFree(dbg_work);CHKERRQ(ierr); 4022 } 4023 ierr = PetscFree(trs_rhs);CHKERRQ(ierr); 4024 ierr = PetscFree(qr_tau);CHKERRQ(ierr); 4025 ierr = PetscFree(qr_work);CHKERRQ(ierr); 4026 ierr = PetscFree(gqr_work);CHKERRQ(ierr); 4027 ierr = PetscFree(qr_basis);CHKERRQ(ierr); 4028 } 4029 ierr = PetscBTDestroy(&is_primal);CHKERRQ(ierr); 4030 ierr = MatAssemblyBegin(localChangeOfBasisMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4031 ierr = MatAssemblyEnd(localChangeOfBasisMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4032 4033 /* assembling of global change of variable */ 4034 { 4035 Mat tmat; 4036 PetscInt bs; 4037 4038 ierr = VecGetSize(pcis->vec1_global,&global_size);CHKERRQ(ierr); 4039 ierr = VecGetLocalSize(pcis->vec1_global,&local_size);CHKERRQ(ierr); 4040 ierr = MatDuplicate(pc->pmat,MAT_DO_NOT_COPY_VALUES,&tmat);CHKERRQ(ierr); 4041 ierr = MatISSetLocalMat(tmat,localChangeOfBasisMatrix);CHKERRQ(ierr); 4042 ierr = MatCreate(PetscObjectComm((PetscObject)pc),&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 4043 ierr = MatSetType(pcbddc->ChangeOfBasisMatrix,MATAIJ);CHKERRQ(ierr); 4044 ierr = MatGetBlockSize(pc->pmat,&bs);CHKERRQ(ierr); 4045 ierr = MatSetBlockSize(pcbddc->ChangeOfBasisMatrix,bs);CHKERRQ(ierr); 4046 ierr = MatSetSizes(pcbddc->ChangeOfBasisMatrix,local_size,local_size,global_size,global_size);CHKERRQ(ierr); 4047 ierr = MatISSetMPIXAIJPreallocation_Private(tmat,pcbddc->ChangeOfBasisMatrix,PETSC_TRUE);CHKERRQ(ierr); 4048 ierr = MatISGetMPIXAIJ(tmat,MAT_REUSE_MATRIX,&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 4049 ierr = MatDestroy(&tmat);CHKERRQ(ierr); 4050 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 4051 ierr = VecSet(pcis->vec1_N,1.0);CHKERRQ(ierr); 4052 ierr = VecScatterBegin(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4053 ierr = VecScatterEnd(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4054 ierr = VecReciprocal(pcis->vec1_global);CHKERRQ(ierr); 4055 ierr = MatDiagonalScale(pcbddc->ChangeOfBasisMatrix,pcis->vec1_global,NULL);CHKERRQ(ierr); 4056 } 4057 /* check */ 4058 if (pcbddc->dbg_flag) { 4059 PetscReal error; 4060 Vec x,x_change; 4061 4062 ierr = VecDuplicate(pcis->vec1_global,&x);CHKERRQ(ierr); 4063 ierr = VecDuplicate(pcis->vec1_global,&x_change);CHKERRQ(ierr); 4064 ierr = VecSetRandom(x,NULL);CHKERRQ(ierr); 4065 ierr = VecCopy(x,pcis->vec1_global);CHKERRQ(ierr); 4066 ierr = VecScatterBegin(matis->rctx,x,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4067 ierr = VecScatterEnd(matis->rctx,x,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4068 ierr = MatMult(localChangeOfBasisMatrix,pcis->vec1_N,pcis->vec2_N);CHKERRQ(ierr); 4069 ierr = VecScatterBegin(matis->rctx,pcis->vec2_N,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4070 ierr = VecScatterEnd(matis->rctx,pcis->vec2_N,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4071 ierr = MatMult(pcbddc->ChangeOfBasisMatrix,pcis->vec1_global,x_change);CHKERRQ(ierr); 4072 ierr = VecAXPY(x,-1.0,x_change);CHKERRQ(ierr); 4073 ierr = VecNorm(x,NORM_INFINITY,&error);CHKERRQ(ierr); 4074 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4075 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Error global vs local change: %1.6e\n",error);CHKERRQ(ierr); 4076 ierr = VecDestroy(&x);CHKERRQ(ierr); 4077 ierr = VecDestroy(&x_change);CHKERRQ(ierr); 4078 } 4079 4080 /* adapt sub_schurs computed (if any) */ 4081 if (pcbddc->use_deluxe_scaling) { 4082 PCBDDCSubSchurs sub_schurs=pcbddc->sub_schurs; 4083 if (sub_schurs->S_Ej_all) { 4084 Mat S_new,tmat; 4085 ISLocalToGlobalMapping NtoSall; 4086 IS is_all_N,is_V,is_V_Sall; 4087 const PetscScalar *array; 4088 const PetscInt *idxs_V,*idxs_all; 4089 PetscInt i,n_V; 4090 4091 ierr = ISLocalToGlobalMappingApplyIS(pcis->BtoNmap,sub_schurs->is_Ej_all,&is_all_N);CHKERRQ(ierr); 4092 ierr = MatGetSubMatrix(localChangeOfBasisMatrix,is_all_N,is_all_N,MAT_INITIAL_MATRIX,&tmat);CHKERRQ(ierr); 4093 ierr = ISCreateGeneral(PETSC_COMM_SELF,pcbddc->n_vertices,pcbddc->local_primal_ref_node,PETSC_COPY_VALUES,&is_V);CHKERRQ(ierr); 4094 ierr = ISLocalToGlobalMappingCreateIS(is_all_N,&NtoSall);CHKERRQ(ierr); 4095 ierr = ISGlobalToLocalMappingApplyIS(NtoSall,IS_GTOLM_DROP,is_V,&is_V_Sall);CHKERRQ(ierr); 4096 ierr = ISLocalToGlobalMappingDestroy(&NtoSall);CHKERRQ(ierr); 4097 ierr = ISDestroy(&is_all_N);CHKERRQ(ierr); 4098 ierr = ISDestroy(&is_V);CHKERRQ(ierr); 4099 ierr = MatPtAP(sub_schurs->S_Ej_all,tmat,MAT_INITIAL_MATRIX,1.0,&S_new);CHKERRQ(ierr); 4100 ierr = MatDestroy(&sub_schurs->S_Ej_all);CHKERRQ(ierr); 4101 ierr = PetscObjectReference((PetscObject)S_new);CHKERRQ(ierr); 4102 ierr = MatZeroRowsColumnsIS(S_new,is_V_Sall,1.,NULL,NULL);CHKERRQ(ierr); 4103 ierr = ISGetLocalSize(is_V_Sall,&n_V);CHKERRQ(ierr); 4104 ierr = ISGetIndices(is_V_Sall,&idxs_V);CHKERRQ(ierr); 4105 ierr = ISGetIndices(sub_schurs->is_Ej_all,&idxs_all);CHKERRQ(ierr); 4106 ierr = VecGetArrayRead(pcis->D,&array);CHKERRQ(ierr); 4107 for (i=0;i<n_V;i++) { 4108 PetscScalar val; 4109 PetscInt idx; 4110 4111 idx = idxs_V[i]; 4112 val = array[idxs_all[idxs_V[i]]]; 4113 ierr = MatSetValue(S_new,idx,idx,val,INSERT_VALUES);CHKERRQ(ierr); 4114 } 4115 ierr = MatAssemblyBegin(S_new,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4116 ierr = MatAssemblyEnd(S_new,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4117 sub_schurs->S_Ej_all = S_new; 4118 ierr = MatDestroy(&S_new);CHKERRQ(ierr); 4119 if (sub_schurs->sum_S_Ej_all) { 4120 ierr = MatPtAP(sub_schurs->sum_S_Ej_all,tmat,MAT_INITIAL_MATRIX,1.0,&S_new);CHKERRQ(ierr); 4121 ierr = MatDestroy(&sub_schurs->sum_S_Ej_all);CHKERRQ(ierr); 4122 ierr = PetscObjectReference((PetscObject)S_new);CHKERRQ(ierr); 4123 ierr = MatZeroRowsColumnsIS(S_new,is_V_Sall,1.,NULL,NULL);CHKERRQ(ierr); 4124 sub_schurs->sum_S_Ej_all = S_new; 4125 ierr = MatDestroy(&S_new);CHKERRQ(ierr); 4126 } 4127 ierr = VecRestoreArrayRead(pcis->D,&array);CHKERRQ(ierr); 4128 ierr = ISRestoreIndices(sub_schurs->is_Ej_all,&idxs_all);CHKERRQ(ierr); 4129 ierr = ISRestoreIndices(is_V_Sall,&idxs_V);CHKERRQ(ierr); 4130 ierr = MatDestroy(&tmat);CHKERRQ(ierr); 4131 ierr = ISDestroy(&is_V_Sall);CHKERRQ(ierr); 4132 } 4133 } 4134 ierr = MatDestroy(&localChangeOfBasisMatrix);CHKERRQ(ierr); 4135 } else if (pcbddc->user_ChangeOfBasisMatrix) { 4136 ierr = PetscObjectReference((PetscObject)pcbddc->user_ChangeOfBasisMatrix);CHKERRQ(ierr); 4137 pcbddc->ChangeOfBasisMatrix = pcbddc->user_ChangeOfBasisMatrix; 4138 } 4139 4140 /* set up change of basis context */ 4141 if (pcbddc->ChangeOfBasisMatrix) { 4142 PCBDDCChange_ctx change_ctx; 4143 4144 if (!pcbddc->new_global_mat) { 4145 PetscInt global_size,local_size; 4146 4147 ierr = VecGetSize(pcis->vec1_global,&global_size);CHKERRQ(ierr); 4148 ierr = VecGetLocalSize(pcis->vec1_global,&local_size);CHKERRQ(ierr); 4149 ierr = MatCreate(PetscObjectComm((PetscObject)pc),&pcbddc->new_global_mat);CHKERRQ(ierr); 4150 ierr = MatSetSizes(pcbddc->new_global_mat,local_size,local_size,global_size,global_size);CHKERRQ(ierr); 4151 ierr = MatSetType(pcbddc->new_global_mat,MATSHELL);CHKERRQ(ierr); 4152 ierr = MatShellSetOperation(pcbddc->new_global_mat,MATOP_MULT,(void (*)(void))PCBDDCMatMult_Private);CHKERRQ(ierr); 4153 ierr = MatShellSetOperation(pcbddc->new_global_mat,MATOP_MULT_TRANSPOSE,(void (*)(void))PCBDDCMatMultTranspose_Private);CHKERRQ(ierr); 4154 ierr = PetscNew(&change_ctx);CHKERRQ(ierr); 4155 ierr = MatShellSetContext(pcbddc->new_global_mat,change_ctx);CHKERRQ(ierr); 4156 } else { 4157 ierr = MatShellGetContext(pcbddc->new_global_mat,&change_ctx);CHKERRQ(ierr); 4158 ierr = MatDestroy(&change_ctx->global_change);CHKERRQ(ierr); 4159 ierr = VecDestroyVecs(2,&change_ctx->work);CHKERRQ(ierr); 4160 } 4161 if (!pcbddc->user_ChangeOfBasisMatrix) { 4162 ierr = PetscObjectReference((PetscObject)pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 4163 change_ctx->global_change = pcbddc->ChangeOfBasisMatrix; 4164 } else { 4165 ierr = PetscObjectReference((PetscObject)pcbddc->user_ChangeOfBasisMatrix);CHKERRQ(ierr); 4166 change_ctx->global_change = pcbddc->user_ChangeOfBasisMatrix; 4167 } 4168 ierr = VecDuplicateVecs(pcis->vec1_global,2,&change_ctx->work);CHKERRQ(ierr); 4169 ierr = MatSetUp(pcbddc->new_global_mat);CHKERRQ(ierr); 4170 ierr = MatAssemblyBegin(pcbddc->new_global_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4171 ierr = MatAssemblyEnd(pcbddc->new_global_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4172 } else { 4173 ierr = MatDestroy(&pcbddc->new_global_mat);CHKERRQ(ierr); 4174 } 4175 4176 /* add pressure dofs to set of primal nodes for numbering purposes */ 4177 for (i=0;i<pcbddc->benign_n;i++) { 4178 pcbddc->local_primal_ref_node[pcbddc->local_primal_size_cc] = pcbddc->benign_p0_lidx[i]; 4179 pcbddc->primal_indices_local_idxs[pcbddc->local_primal_size] = pcbddc->benign_p0_lidx[i]; 4180 pcbddc->local_primal_ref_mult[pcbddc->local_primal_size_cc] = 1; 4181 pcbddc->local_primal_size_cc++; 4182 pcbddc->local_primal_size++; 4183 } 4184 4185 /* check if a new primal space has been introduced (also take into account benign trick) */ 4186 pcbddc->new_primal_space_local = PETSC_TRUE; 4187 if (olocal_primal_size == pcbddc->local_primal_size) { 4188 ierr = PetscMemcmp(pcbddc->local_primal_ref_node,olocal_primal_ref_node,olocal_primal_size_cc*sizeof(PetscInt),&pcbddc->new_primal_space_local);CHKERRQ(ierr); 4189 pcbddc->new_primal_space_local = (PetscBool)(!pcbddc->new_primal_space_local); 4190 if (!pcbddc->new_primal_space_local) { 4191 ierr = PetscMemcmp(pcbddc->local_primal_ref_mult,olocal_primal_ref_mult,olocal_primal_size_cc*sizeof(PetscInt),&pcbddc->new_primal_space_local);CHKERRQ(ierr); 4192 pcbddc->new_primal_space_local = (PetscBool)(!pcbddc->new_primal_space_local); 4193 } 4194 } 4195 ierr = PetscFree2(olocal_primal_ref_node,olocal_primal_ref_mult);CHKERRQ(ierr); 4196 /* new_primal_space will be used for numbering of coarse dofs, so it should be the same across all subdomains */ 4197 ierr = MPI_Allreduce(&pcbddc->new_primal_space_local,&pcbddc->new_primal_space,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 4198 4199 /* flush dbg viewer */ 4200 if (pcbddc->dbg_flag) { 4201 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4202 } 4203 4204 /* free workspace */ 4205 ierr = PetscBTDestroy(&qr_needed_idx);CHKERRQ(ierr); 4206 ierr = PetscBTDestroy(&change_basis);CHKERRQ(ierr); 4207 if (!pcbddc->adaptive_selection) { 4208 ierr = PetscFree3(constraints_idxs_ptr,constraints_data_ptr,constraints_n);CHKERRQ(ierr); 4209 ierr = PetscFree3(constraints_data,constraints_idxs,constraints_idxs_B);CHKERRQ(ierr); 4210 } else { 4211 ierr = PetscFree5(pcbddc->adaptive_constraints_n, 4212 pcbddc->adaptive_constraints_idxs_ptr, 4213 pcbddc->adaptive_constraints_data_ptr, 4214 pcbddc->adaptive_constraints_idxs, 4215 pcbddc->adaptive_constraints_data);CHKERRQ(ierr); 4216 ierr = PetscFree(constraints_n);CHKERRQ(ierr); 4217 ierr = PetscFree(constraints_idxs_B);CHKERRQ(ierr); 4218 } 4219 PetscFunctionReturn(0); 4220 } 4221 4222 #undef __FUNCT__ 4223 #define __FUNCT__ "PCBDDCAnalyzeInterface" 4224 PetscErrorCode PCBDDCAnalyzeInterface(PC pc) 4225 { 4226 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 4227 PC_IS *pcis = (PC_IS*)pc->data; 4228 Mat_IS *matis = (Mat_IS*)pc->pmat->data; 4229 PetscInt ierr,i,vertex_size,N; 4230 PetscViewer viewer=pcbddc->dbg_viewer; 4231 4232 PetscFunctionBegin; 4233 /* Reset previously computed graph */ 4234 ierr = PCBDDCGraphReset(pcbddc->mat_graph);CHKERRQ(ierr); 4235 /* Init local Graph struct */ 4236 ierr = MatGetSize(pc->pmat,&N,NULL);CHKERRQ(ierr); 4237 ierr = PCBDDCGraphInit(pcbddc->mat_graph,pcis->mapping,N);CHKERRQ(ierr); 4238 4239 /* Check validity of the csr graph passed in by the user */ 4240 if (pcbddc->mat_graph->nvtxs_csr && pcbddc->mat_graph->nvtxs_csr != pcbddc->mat_graph->nvtxs) { 4241 SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Invalid size of local CSR graph! Found %d, expected %d\n",pcbddc->mat_graph->nvtxs_csr,pcbddc->mat_graph->nvtxs); 4242 } 4243 4244 /* Set default CSR adjacency of local dofs if not provided by the user with PCBDDCSetLocalAdjacencyGraph */ 4245 if ( (!pcbddc->mat_graph->xadj || !pcbddc->mat_graph->adjncy) && pcbddc->use_local_adj) { 4246 PetscInt *xadj,*adjncy; 4247 PetscInt nvtxs; 4248 PetscBool flg_row=PETSC_FALSE; 4249 4250 ierr = MatGetRowIJ(matis->A,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);CHKERRQ(ierr); 4251 if (flg_row) { 4252 ierr = PCBDDCSetLocalAdjacencyGraph(pc,nvtxs,xadj,adjncy,PETSC_COPY_VALUES);CHKERRQ(ierr); 4253 pcbddc->computed_rowadj = PETSC_TRUE; 4254 } 4255 ierr = MatRestoreRowIJ(matis->A,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);CHKERRQ(ierr); 4256 } 4257 if (pcbddc->dbg_flag) { 4258 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4259 } 4260 4261 /* Set default dofs' splitting if no information has been provided by the user with PCBDDCSetDofsSplitting or PCBDDCSetDofsSplittingLocal */ 4262 vertex_size = 1; 4263 if (pcbddc->user_provided_isfordofs) { 4264 if (pcbddc->n_ISForDofs) { /* need to convert from global to local and remove references to global dofs splitting */ 4265 ierr = PetscMalloc1(pcbddc->n_ISForDofs,&pcbddc->ISForDofsLocal);CHKERRQ(ierr); 4266 for (i=0;i<pcbddc->n_ISForDofs;i++) { 4267 ierr = PCBDDCGlobalToLocal(matis->rctx,pcis->vec1_global,pcis->vec1_N,pcbddc->ISForDofs[i],&pcbddc->ISForDofsLocal[i]);CHKERRQ(ierr); 4268 ierr = ISDestroy(&pcbddc->ISForDofs[i]);CHKERRQ(ierr); 4269 } 4270 pcbddc->n_ISForDofsLocal = pcbddc->n_ISForDofs; 4271 pcbddc->n_ISForDofs = 0; 4272 ierr = PetscFree(pcbddc->ISForDofs);CHKERRQ(ierr); 4273 } 4274 /* mat block size as vertex size (used for elasticity with rigid body modes as nearnullspace) */ 4275 ierr = MatGetBlockSize(matis->A,&vertex_size);CHKERRQ(ierr); 4276 } else { 4277 if (!pcbddc->n_ISForDofsLocal) { /* field split not present, create it in local ordering */ 4278 ierr = MatGetBlockSize(pc->pmat,&pcbddc->n_ISForDofsLocal);CHKERRQ(ierr); 4279 ierr = PetscMalloc1(pcbddc->n_ISForDofsLocal,&pcbddc->ISForDofsLocal);CHKERRQ(ierr); 4280 for (i=0;i<pcbddc->n_ISForDofsLocal;i++) { 4281 ierr = ISCreateStride(PetscObjectComm((PetscObject)pc),pcis->n/pcbddc->n_ISForDofsLocal,i,pcbddc->n_ISForDofsLocal,&pcbddc->ISForDofsLocal[i]);CHKERRQ(ierr); 4282 } 4283 } 4284 } 4285 4286 /* Setup of Graph */ 4287 if (!pcbddc->DirichletBoundariesLocal && pcbddc->DirichletBoundaries) { /* need to convert from global to local */ 4288 ierr = PCBDDCGlobalToLocal(matis->rctx,pcis->vec1_global,pcis->vec1_N,pcbddc->DirichletBoundaries,&pcbddc->DirichletBoundariesLocal);CHKERRQ(ierr); 4289 } 4290 if (!pcbddc->NeumannBoundariesLocal && pcbddc->NeumannBoundaries) { /* need to convert from global to local */ 4291 ierr = PCBDDCGlobalToLocal(matis->rctx,pcis->vec1_global,pcis->vec1_N,pcbddc->NeumannBoundaries,&pcbddc->NeumannBoundariesLocal);CHKERRQ(ierr); 4292 } 4293 if (!pcbddc->user_primal_vertices_local && pcbddc->user_primal_vertices) { /* need to convert from global to local */ 4294 ierr = PCBDDCGlobalToLocal(matis->rctx,pcis->vec1_global,pcis->vec1_N,pcbddc->user_primal_vertices,&pcbddc->user_primal_vertices_local);CHKERRQ(ierr); 4295 } 4296 ierr = PCBDDCGraphSetUp(pcbddc->mat_graph,vertex_size,pcbddc->NeumannBoundariesLocal,pcbddc->DirichletBoundariesLocal,pcbddc->n_ISForDofsLocal,pcbddc->ISForDofsLocal,pcbddc->user_primal_vertices_local);CHKERRQ(ierr); 4297 4298 /* attach info on disconnected subdomains if present */ 4299 if (pcbddc->n_local_subs) { 4300 PetscInt *local_subs; 4301 4302 ierr = PetscMalloc1(N,&local_subs);CHKERRQ(ierr); 4303 for (i=0;i<pcbddc->n_local_subs;i++) { 4304 const PetscInt *idxs; 4305 PetscInt nl,j; 4306 4307 ierr = ISGetLocalSize(pcbddc->local_subs[i],&nl);CHKERRQ(ierr); 4308 ierr = ISGetIndices(pcbddc->local_subs[i],&idxs);CHKERRQ(ierr); 4309 for (j=0;j<nl;j++) { 4310 local_subs[idxs[j]] = i; 4311 } 4312 ierr = ISRestoreIndices(pcbddc->local_subs[i],&idxs);CHKERRQ(ierr); 4313 } 4314 pcbddc->mat_graph->n_local_subs = pcbddc->n_local_subs; 4315 pcbddc->mat_graph->local_subs = local_subs; 4316 } 4317 4318 /* Graph's connected components analysis */ 4319 ierr = PCBDDCGraphComputeConnectedComponents(pcbddc->mat_graph);CHKERRQ(ierr); 4320 4321 /* print some info to stdout */ 4322 if (pcbddc->dbg_flag) { 4323 ierr = PCBDDCGraphASCIIView(pcbddc->mat_graph,pcbddc->dbg_flag,viewer);CHKERRQ(ierr); 4324 } 4325 4326 /* mark topography has done */ 4327 pcbddc->recompute_topography = PETSC_FALSE; 4328 PetscFunctionReturn(0); 4329 } 4330 4331 /* given an index sets possibly with holes, renumbers the indexes removing the holes */ 4332 #undef __FUNCT__ 4333 #define __FUNCT__ "PCBDDCSubsetNumbering" 4334 PetscErrorCode PCBDDCSubsetNumbering(IS subset, IS subset_mult, PetscInt *N_n, IS *subset_n) 4335 { 4336 PetscSF sf; 4337 PetscLayout map; 4338 const PetscInt *idxs; 4339 PetscInt *leaf_data,*root_data,*gidxs; 4340 PetscInt N,n,i,lbounds[2],gbounds[2],Nl; 4341 PetscInt n_n,nlocals,start,first_index; 4342 PetscMPIInt commsize; 4343 PetscBool first_found; 4344 PetscErrorCode ierr; 4345 4346 PetscFunctionBegin; 4347 ierr = ISGetLocalSize(subset,&n);CHKERRQ(ierr); 4348 if (subset_mult) { 4349 PetscCheckSameComm(subset,1,subset_mult,2); 4350 ierr = ISGetLocalSize(subset,&i);CHKERRQ(ierr); 4351 if (i != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Local subset and multiplicity sizes don't match! %d != %d",n,i); 4352 } 4353 /* create workspace layout for computing global indices of subset */ 4354 ierr = ISGetIndices(subset,&idxs);CHKERRQ(ierr); 4355 lbounds[0] = lbounds[1] = 0; 4356 for (i=0;i<n;i++) { 4357 if (idxs[i] < lbounds[0]) lbounds[0] = idxs[i]; 4358 else if (idxs[i] > lbounds[1]) lbounds[1] = idxs[i]; 4359 } 4360 lbounds[0] = -lbounds[0]; 4361 ierr = MPI_Allreduce(lbounds,gbounds,2,MPIU_INT,MPI_MAX,PetscObjectComm((PetscObject)subset));CHKERRQ(ierr); 4362 gbounds[0] = -gbounds[0]; 4363 N = gbounds[1] - gbounds[0] + 1; 4364 ierr = PetscLayoutCreate(PetscObjectComm((PetscObject)subset),&map);CHKERRQ(ierr); 4365 ierr = PetscLayoutSetBlockSize(map,1);CHKERRQ(ierr); 4366 ierr = PetscLayoutSetSize(map,N);CHKERRQ(ierr); 4367 ierr = PetscLayoutSetUp(map);CHKERRQ(ierr); 4368 ierr = PetscLayoutGetLocalSize(map,&Nl);CHKERRQ(ierr); 4369 4370 /* create sf : leaf_data == multiplicity of indexes, root data == global index in layout */ 4371 ierr = PetscMalloc2(n,&leaf_data,Nl,&root_data);CHKERRQ(ierr); 4372 if (subset_mult) { 4373 const PetscInt* idxs_mult; 4374 4375 ierr = ISGetIndices(subset_mult,&idxs_mult);CHKERRQ(ierr); 4376 ierr = PetscMemcpy(leaf_data,idxs_mult,n*sizeof(PetscInt));CHKERRQ(ierr); 4377 ierr = ISRestoreIndices(subset_mult,&idxs_mult);CHKERRQ(ierr); 4378 } else { 4379 for (i=0;i<n;i++) leaf_data[i] = 1; 4380 } 4381 /* local size of new subset */ 4382 n_n = 0; 4383 for (i=0;i<n;i++) n_n += leaf_data[i]; 4384 4385 /* global indexes in layout */ 4386 ierr = PetscMalloc1(n_n,&gidxs);CHKERRQ(ierr); /* allocating possibly extra space in gidxs which will be used later */ 4387 for (i=0;i<n;i++) gidxs[i] = idxs[i] - gbounds[0]; 4388 ierr = ISRestoreIndices(subset,&idxs);CHKERRQ(ierr); 4389 ierr = PetscSFCreate(PetscObjectComm((PetscObject)subset),&sf);CHKERRQ(ierr); 4390 ierr = PetscSFSetGraphLayout(sf,map,n,NULL,PETSC_COPY_VALUES,gidxs);CHKERRQ(ierr); 4391 ierr = PetscLayoutDestroy(&map);CHKERRQ(ierr); 4392 4393 /* reduce from leaves to roots */ 4394 ierr = PetscMemzero(root_data,Nl*sizeof(PetscInt));CHKERRQ(ierr); 4395 ierr = PetscSFReduceBegin(sf,MPIU_INT,leaf_data,root_data,MPI_MAX);CHKERRQ(ierr); 4396 ierr = PetscSFReduceEnd(sf,MPIU_INT,leaf_data,root_data,MPI_MAX);CHKERRQ(ierr); 4397 4398 /* count indexes in local part of layout */ 4399 nlocals = 0; 4400 first_index = -1; 4401 first_found = PETSC_FALSE; 4402 for (i=0;i<Nl;i++) { 4403 if (!first_found && root_data[i]) { 4404 first_found = PETSC_TRUE; 4405 first_index = i; 4406 } 4407 nlocals += root_data[i]; 4408 } 4409 4410 /* cumulative of number of indexes and size of subset without holes */ 4411 #if defined(PETSC_HAVE_MPI_EXSCAN) 4412 start = 0; 4413 ierr = MPI_Exscan(&nlocals,&start,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)subset));CHKERRQ(ierr); 4414 #else 4415 ierr = MPI_Scan(&nlocals,&start,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)subset));CHKERRQ(ierr); 4416 start = start-nlocals; 4417 #endif 4418 4419 if (N_n) { /* compute total size of new subset if requested */ 4420 *N_n = start + nlocals; 4421 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)subset),&commsize);CHKERRQ(ierr); 4422 ierr = MPI_Bcast(N_n,1,MPIU_INT,commsize-1,PetscObjectComm((PetscObject)subset));CHKERRQ(ierr); 4423 } 4424 4425 /* adapt root data with cumulative */ 4426 if (first_found) { 4427 PetscInt old_index; 4428 4429 root_data[first_index] += start; 4430 old_index = first_index; 4431 for (i=first_index+1;i<Nl;i++) { 4432 if (root_data[i]) { 4433 root_data[i] += root_data[old_index]; 4434 old_index = i; 4435 } 4436 } 4437 } 4438 4439 /* from roots to leaves */ 4440 ierr = PetscSFBcastBegin(sf,MPIU_INT,root_data,leaf_data);CHKERRQ(ierr); 4441 ierr = PetscSFBcastEnd(sf,MPIU_INT,root_data,leaf_data);CHKERRQ(ierr); 4442 ierr = PetscSFDestroy(&sf);CHKERRQ(ierr); 4443 4444 /* create new IS with global indexes without holes */ 4445 if (subset_mult) { 4446 const PetscInt* idxs_mult; 4447 PetscInt cum; 4448 4449 cum = 0; 4450 ierr = ISGetIndices(subset_mult,&idxs_mult);CHKERRQ(ierr); 4451 for (i=0;i<n;i++) { 4452 PetscInt j; 4453 for (j=0;j<idxs_mult[i];j++) gidxs[cum++] = leaf_data[i] - idxs_mult[i] + j; 4454 } 4455 ierr = ISRestoreIndices(subset_mult,&idxs_mult);CHKERRQ(ierr); 4456 } else { 4457 for (i=0;i<n;i++) { 4458 gidxs[i] = leaf_data[i]-1; 4459 } 4460 } 4461 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)subset),n_n,gidxs,PETSC_OWN_POINTER,subset_n);CHKERRQ(ierr); 4462 ierr = PetscFree2(leaf_data,root_data);CHKERRQ(ierr); 4463 PetscFunctionReturn(0); 4464 } 4465 4466 #undef __FUNCT__ 4467 #define __FUNCT__ "PCBDDCOrthonormalizeVecs" 4468 PetscErrorCode PCBDDCOrthonormalizeVecs(PetscInt n, Vec vecs[]) 4469 { 4470 PetscInt i,j; 4471 PetscScalar *alphas; 4472 PetscErrorCode ierr; 4473 4474 PetscFunctionBegin; 4475 /* this implements stabilized Gram-Schmidt */ 4476 ierr = PetscMalloc1(n,&alphas);CHKERRQ(ierr); 4477 for (i=0;i<n;i++) { 4478 ierr = VecNormalize(vecs[i],NULL);CHKERRQ(ierr); 4479 if (i<n) { ierr = VecMDot(vecs[i],n-i-1,&vecs[i+1],&alphas[i+1]);CHKERRQ(ierr); } 4480 for (j=i+1;j<n;j++) { ierr = VecAXPY(vecs[j],PetscConj(-alphas[j]),vecs[i]);CHKERRQ(ierr); } 4481 } 4482 ierr = PetscFree(alphas);CHKERRQ(ierr); 4483 PetscFunctionReturn(0); 4484 } 4485 4486 #undef __FUNCT__ 4487 #define __FUNCT__ "MatISGetSubassemblingPattern" 4488 PetscErrorCode MatISGetSubassemblingPattern(Mat mat, PetscInt n_subdomains, PetscInt redprocs, IS* is_sends) 4489 { 4490 IS ranks_send_to; 4491 PetscInt n_neighs,*neighs,*n_shared,**shared; 4492 PetscMPIInt size,rank,color; 4493 PetscInt *xadj,*adjncy; 4494 PetscInt *adjncy_wgt,*v_wgt,*ranks_send_to_idx; 4495 PetscInt i,local_size,threshold=0; 4496 PetscBool use_vwgt=PETSC_FALSE,use_square=PETSC_FALSE; 4497 PetscSubcomm subcomm; 4498 PetscErrorCode ierr; 4499 4500 PetscFunctionBegin; 4501 ierr = PetscOptionsGetBool(NULL,"-matis_partitioning_use_square",&use_square,NULL);CHKERRQ(ierr); 4502 ierr = PetscOptionsGetBool(NULL,"-matis_partitioning_use_vwgt",&use_vwgt,NULL);CHKERRQ(ierr); 4503 ierr = PetscOptionsGetInt(NULL,"-matis_partitioning_threshold",&threshold,NULL);CHKERRQ(ierr); 4504 4505 /* Get info on mapping */ 4506 ierr = ISLocalToGlobalMappingGetSize(mat->rmap->mapping,&local_size);CHKERRQ(ierr); 4507 ierr = ISLocalToGlobalMappingGetInfo(mat->rmap->mapping,&n_neighs,&neighs,&n_shared,&shared);CHKERRQ(ierr); 4508 4509 /* build local CSR graph of subdomains' connectivity */ 4510 ierr = PetscMalloc1(2,&xadj);CHKERRQ(ierr); 4511 xadj[0] = 0; 4512 xadj[1] = PetscMax(n_neighs-1,0); 4513 ierr = PetscMalloc1(xadj[1],&adjncy);CHKERRQ(ierr); 4514 ierr = PetscMalloc1(xadj[1],&adjncy_wgt);CHKERRQ(ierr); 4515 4516 if (threshold) { 4517 PetscInt xadj_count = 0; 4518 for (i=1;i<n_neighs;i++) { 4519 if (n_shared[i] > threshold) { 4520 adjncy[xadj_count] = neighs[i]; 4521 adjncy_wgt[xadj_count] = n_shared[i]; 4522 xadj_count++; 4523 } 4524 } 4525 xadj[1] = xadj_count; 4526 } else { 4527 if (xadj[1]) { 4528 ierr = PetscMemcpy(adjncy,&neighs[1],xadj[1]*sizeof(*adjncy));CHKERRQ(ierr); 4529 ierr = PetscMemcpy(adjncy_wgt,&n_shared[1],xadj[1]*sizeof(*adjncy_wgt));CHKERRQ(ierr); 4530 } 4531 } 4532 ierr = ISLocalToGlobalMappingRestoreInfo(mat->rmap->mapping,&n_neighs,&neighs,&n_shared,&shared);CHKERRQ(ierr); 4533 if (use_square) { 4534 for (i=0;i<xadj[1];i++) { 4535 adjncy_wgt[i] = adjncy_wgt[i]*adjncy_wgt[i]; 4536 } 4537 } 4538 ierr = PetscSortIntWithArray(xadj[1],adjncy,adjncy_wgt);CHKERRQ(ierr); 4539 4540 ierr = PetscMalloc1(1,&ranks_send_to_idx);CHKERRQ(ierr); 4541 4542 /* 4543 Restrict work on active processes only. 4544 */ 4545 ierr = PetscSubcommCreate(PetscObjectComm((PetscObject)mat),&subcomm);CHKERRQ(ierr); 4546 ierr = PetscSubcommSetNumber(subcomm,2);CHKERRQ(ierr); /* 2 groups, active process and not active processes */ 4547 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr); 4548 ierr = PetscMPIIntCast(!local_size,&color);CHKERRQ(ierr); 4549 ierr = PetscSubcommSetTypeGeneral(subcomm,color,rank);CHKERRQ(ierr); 4550 if (color) { 4551 ierr = PetscFree(xadj);CHKERRQ(ierr); 4552 ierr = PetscFree(adjncy);CHKERRQ(ierr); 4553 ierr = PetscFree(adjncy_wgt);CHKERRQ(ierr); 4554 } else { 4555 Mat subdomain_adj; 4556 IS new_ranks,new_ranks_contig; 4557 MatPartitioning partitioner; 4558 PetscInt prank,rstart=0,rend=0; 4559 PetscInt *is_indices,*oldranks; 4560 PetscBool aggregate; 4561 4562 ierr = MPI_Comm_size(PetscSubcommChild(subcomm),&size);CHKERRQ(ierr); 4563 ierr = PetscMalloc1(size,&oldranks);CHKERRQ(ierr); 4564 prank = rank; 4565 ierr = MPI_Allgather(&prank,1,MPIU_INT,oldranks,1,MPIU_INT,PetscSubcommChild(subcomm));CHKERRQ(ierr); 4566 /* 4567 for (i=0;i<size;i++) { 4568 PetscPrintf(subcomm->comm,"oldranks[%d] = %d\n",i,oldranks[i]); 4569 } 4570 */ 4571 for (i=0;i<xadj[1];i++) { 4572 ierr = PetscFindInt(adjncy[i],size,oldranks,&adjncy[i]);CHKERRQ(ierr); 4573 } 4574 ierr = PetscSortIntWithArray(xadj[1],adjncy,adjncy_wgt);CHKERRQ(ierr); 4575 aggregate = ((redprocs > 0 && redprocs < size) ? PETSC_TRUE : PETSC_FALSE); 4576 if (aggregate) { 4577 PetscInt lrows,row,ncols,*cols; 4578 PetscMPIInt nrank; 4579 PetscScalar *vals; 4580 4581 ierr = MPI_Comm_rank(PetscSubcommChild(subcomm),&nrank);CHKERRQ(ierr); 4582 lrows = 0; 4583 if (nrank<redprocs) { 4584 lrows = size/redprocs; 4585 if (nrank<size%redprocs) lrows++; 4586 } 4587 ierr = MatCreateAIJ(PetscSubcommChild(subcomm),lrows,lrows,size,size,50,NULL,50,NULL,&subdomain_adj);CHKERRQ(ierr); 4588 ierr = MatGetOwnershipRange(subdomain_adj,&rstart,&rend);CHKERRQ(ierr); 4589 ierr = MatSetOption(subdomain_adj,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr); 4590 ierr = MatSetOption(subdomain_adj,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr); 4591 row = nrank; 4592 ncols = xadj[1]-xadj[0]; 4593 cols = adjncy; 4594 ierr = PetscMalloc1(ncols,&vals);CHKERRQ(ierr); 4595 for (i=0;i<ncols;i++) vals[i] = adjncy_wgt[i]; 4596 ierr = MatSetValues(subdomain_adj,1,&row,ncols,cols,vals,INSERT_VALUES);CHKERRQ(ierr); 4597 ierr = MatAssemblyBegin(subdomain_adj,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4598 ierr = MatAssemblyEnd(subdomain_adj,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4599 ierr = PetscFree(xadj);CHKERRQ(ierr); 4600 ierr = PetscFree(adjncy);CHKERRQ(ierr); 4601 ierr = PetscFree(adjncy_wgt);CHKERRQ(ierr); 4602 ierr = PetscFree(vals);CHKERRQ(ierr); 4603 } else { 4604 ierr = MatCreateMPIAdj(PetscSubcommChild(subcomm),1,(PetscInt)size,xadj,adjncy,adjncy_wgt,&subdomain_adj);CHKERRQ(ierr); 4605 } 4606 /* ierr = MatView(subdomain_adj,0);CHKERRQ(ierr); */ 4607 4608 /* Partition */ 4609 ierr = MatPartitioningCreate(PetscSubcommChild(subcomm),&partitioner);CHKERRQ(ierr); 4610 ierr = MatPartitioningSetAdjacency(partitioner,subdomain_adj);CHKERRQ(ierr); 4611 if (use_vwgt) { 4612 ierr = PetscMalloc1(1,&v_wgt);CHKERRQ(ierr); 4613 v_wgt[0] = local_size; 4614 ierr = MatPartitioningSetVertexWeights(partitioner,v_wgt);CHKERRQ(ierr); 4615 } 4616 n_subdomains = PetscMin((PetscInt)size,n_subdomains); 4617 ierr = MatPartitioningSetNParts(partitioner,n_subdomains);CHKERRQ(ierr); 4618 ierr = MatPartitioningSetFromOptions(partitioner);CHKERRQ(ierr); 4619 ierr = MatPartitioningApply(partitioner,&new_ranks);CHKERRQ(ierr); 4620 /* ierr = MatPartitioningView(partitioner,0);CHKERRQ(ierr); */ 4621 4622 /* renumber new_ranks to avoid "holes" in new set of processors */ 4623 ierr = PCBDDCSubsetNumbering(new_ranks,NULL,NULL,&new_ranks_contig);CHKERRQ(ierr); 4624 ierr = ISDestroy(&new_ranks);CHKERRQ(ierr); 4625 ierr = ISGetIndices(new_ranks_contig,(const PetscInt**)&is_indices);CHKERRQ(ierr); 4626 if (!redprocs) { 4627 ranks_send_to_idx[0] = oldranks[is_indices[0]]; 4628 } else { 4629 PetscInt idxs[1]; 4630 PetscMPIInt tag; 4631 MPI_Request *reqs; 4632 4633 ierr = PetscObjectGetNewTag((PetscObject)subdomain_adj,&tag);CHKERRQ(ierr); 4634 ierr = PetscMalloc1(rend-rstart,&reqs);CHKERRQ(ierr); 4635 for (i=rstart;i<rend;i++) { 4636 ierr = MPI_Isend(is_indices+i-rstart,1,MPIU_INT,i,tag,PetscSubcommChild(subcomm),&reqs[i-rstart]);CHKERRQ(ierr); 4637 } 4638 ierr = MPI_Recv(idxs,1,MPIU_INT,MPI_ANY_SOURCE,tag,PetscSubcommChild(subcomm),MPI_STATUS_IGNORE);CHKERRQ(ierr); 4639 ierr = MPI_Waitall(rend-rstart,reqs,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 4640 ierr = PetscFree(reqs);CHKERRQ(ierr); 4641 ranks_send_to_idx[0] = oldranks[idxs[0]]; 4642 } 4643 ierr = ISRestoreIndices(new_ranks_contig,(const PetscInt**)&is_indices);CHKERRQ(ierr); 4644 /* clean up */ 4645 ierr = PetscFree(oldranks);CHKERRQ(ierr); 4646 ierr = ISDestroy(&new_ranks_contig);CHKERRQ(ierr); 4647 ierr = MatDestroy(&subdomain_adj);CHKERRQ(ierr); 4648 ierr = MatPartitioningDestroy(&partitioner);CHKERRQ(ierr); 4649 } 4650 ierr = PetscSubcommDestroy(&subcomm);CHKERRQ(ierr); 4651 4652 /* assemble parallel IS for sends */ 4653 i = 1; 4654 if (color) i=0; 4655 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)mat),i,ranks_send_to_idx,PETSC_OWN_POINTER,&ranks_send_to);CHKERRQ(ierr); 4656 /* get back IS */ 4657 *is_sends = ranks_send_to; 4658 PetscFunctionReturn(0); 4659 } 4660 4661 typedef enum {MATDENSE_PRIVATE=0,MATAIJ_PRIVATE,MATBAIJ_PRIVATE,MATSBAIJ_PRIVATE}MatTypePrivate; 4662 4663 #undef __FUNCT__ 4664 #define __FUNCT__ "MatISSubassemble" 4665 PetscErrorCode MatISSubassemble(Mat mat, IS is_sends, PetscInt n_subdomains, PetscBool restrict_comm, PetscBool restrict_full, MatReuse reuse, Mat *mat_n, PetscInt nis, IS isarray[]) 4666 { 4667 Mat local_mat; 4668 IS is_sends_internal; 4669 PetscInt rows,cols,new_local_rows; 4670 PetscInt i,bs,buf_size_idxs,buf_size_idxs_is,buf_size_vals; 4671 PetscBool ismatis,isdense,newisdense,destroy_mat; 4672 ISLocalToGlobalMapping l2gmap; 4673 PetscInt* l2gmap_indices; 4674 const PetscInt* is_indices; 4675 MatType new_local_type; 4676 /* buffers */ 4677 PetscInt *ptr_idxs,*send_buffer_idxs,*recv_buffer_idxs; 4678 PetscInt *ptr_idxs_is,*send_buffer_idxs_is,*recv_buffer_idxs_is; 4679 PetscInt *recv_buffer_idxs_local; 4680 PetscScalar *ptr_vals,*send_buffer_vals,*recv_buffer_vals; 4681 /* MPI */ 4682 MPI_Comm comm,comm_n; 4683 PetscSubcomm subcomm; 4684 PetscMPIInt n_sends,n_recvs,commsize; 4685 PetscMPIInt *iflags,*ilengths_idxs,*ilengths_vals,*ilengths_idxs_is; 4686 PetscMPIInt *onodes,*onodes_is,*olengths_idxs,*olengths_idxs_is,*olengths_vals; 4687 PetscMPIInt len,tag_idxs,tag_idxs_is,tag_vals,source_dest; 4688 MPI_Request *send_req_idxs,*send_req_idxs_is,*send_req_vals; 4689 MPI_Request *recv_req_idxs,*recv_req_idxs_is,*recv_req_vals; 4690 PetscErrorCode ierr; 4691 4692 PetscFunctionBegin; 4693 /* TODO: add missing checks */ 4694 PetscValidLogicalCollectiveInt(mat,n_subdomains,3); 4695 PetscValidLogicalCollectiveBool(mat,restrict_comm,4); 4696 PetscValidLogicalCollectiveEnum(mat,reuse,5); 4697 PetscValidLogicalCollectiveInt(mat,nis,7); 4698 ierr = PetscObjectTypeCompare((PetscObject)mat,MATIS,&ismatis);CHKERRQ(ierr); 4699 if (!ismatis) SETERRQ1(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot use %s on a matrix object which is not of type MATIS",__FUNCT__); 4700 ierr = MatISGetLocalMat(mat,&local_mat);CHKERRQ(ierr); 4701 ierr = PetscObjectTypeCompare((PetscObject)local_mat,MATSEQDENSE,&isdense);CHKERRQ(ierr); 4702 if (!isdense) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Currently cannot subassemble MATIS when local matrix type is not of type SEQDENSE"); 4703 ierr = MatGetSize(local_mat,&rows,&cols);CHKERRQ(ierr); 4704 if (rows != cols) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Local MATIS matrices should be square"); 4705 if (reuse == MAT_REUSE_MATRIX && *mat_n) { 4706 PetscInt mrows,mcols,mnrows,mncols; 4707 ierr = PetscObjectTypeCompare((PetscObject)*mat_n,MATIS,&ismatis);CHKERRQ(ierr); 4708 if (!ismatis) SETERRQ(PetscObjectComm((PetscObject)*mat_n),PETSC_ERR_SUP,"Cannot reuse a matrix which is not of type MATIS"); 4709 ierr = MatGetSize(mat,&mrows,&mcols);CHKERRQ(ierr); 4710 ierr = MatGetSize(*mat_n,&mnrows,&mncols);CHKERRQ(ierr); 4711 if (mrows != mnrows) SETERRQ2(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot reuse matrix! Wrong number of rows %D != %D",mrows,mnrows); 4712 if (mcols != mncols) SETERRQ2(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot reuse matrix! Wrong number of cols %D != %D",mcols,mncols); 4713 } 4714 ierr = MatGetBlockSize(local_mat,&bs);CHKERRQ(ierr); 4715 PetscValidLogicalCollectiveInt(mat,bs,0); 4716 /* prepare IS for sending if not provided */ 4717 if (!is_sends) { 4718 if (!n_subdomains) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"You should specify either an IS or a target number of subdomains"); 4719 ierr = MatISGetSubassemblingPattern(mat,n_subdomains,0,&is_sends_internal);CHKERRQ(ierr); 4720 } else { 4721 ierr = PetscObjectReference((PetscObject)is_sends);CHKERRQ(ierr); 4722 is_sends_internal = is_sends; 4723 } 4724 4725 /* get comm */ 4726 ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr); 4727 4728 /* compute number of sends */ 4729 ierr = ISGetLocalSize(is_sends_internal,&i);CHKERRQ(ierr); 4730 ierr = PetscMPIIntCast(i,&n_sends);CHKERRQ(ierr); 4731 4732 /* compute number of receives */ 4733 ierr = MPI_Comm_size(comm,&commsize);CHKERRQ(ierr); 4734 ierr = PetscMalloc1(commsize,&iflags);CHKERRQ(ierr); 4735 ierr = PetscMemzero(iflags,commsize*sizeof(*iflags));CHKERRQ(ierr); 4736 ierr = ISGetIndices(is_sends_internal,&is_indices);CHKERRQ(ierr); 4737 for (i=0;i<n_sends;i++) iflags[is_indices[i]] = 1; 4738 ierr = PetscGatherNumberOfMessages(comm,iflags,NULL,&n_recvs);CHKERRQ(ierr); 4739 ierr = PetscFree(iflags);CHKERRQ(ierr); 4740 4741 /* restrict comm if requested */ 4742 subcomm = 0; 4743 destroy_mat = PETSC_FALSE; 4744 if (restrict_comm) { 4745 PetscMPIInt color,subcommsize; 4746 4747 color = 0; 4748 if (restrict_full) { 4749 if (!n_recvs) color = 1; /* processes not receiving anything will not partecipate in new comm (full restriction) */ 4750 } else { 4751 if (!n_recvs && n_sends) color = 1; /* just those processes that are sending but not receiving anything will not partecipate in new comm */ 4752 } 4753 ierr = MPI_Allreduce(&color,&subcommsize,1,MPI_INT,MPI_SUM,comm);CHKERRQ(ierr); 4754 subcommsize = commsize - subcommsize; 4755 /* check if reuse has been requested */ 4756 if (reuse == MAT_REUSE_MATRIX) { 4757 if (*mat_n) { 4758 PetscMPIInt subcommsize2; 4759 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)*mat_n),&subcommsize2);CHKERRQ(ierr); 4760 if (subcommsize != subcommsize2) SETERRQ2(PetscObjectComm((PetscObject)*mat_n),PETSC_ERR_PLIB,"Cannot reuse matrix! wrong subcomm size %d != %d",subcommsize,subcommsize2); 4761 comm_n = PetscObjectComm((PetscObject)*mat_n); 4762 } else { 4763 comm_n = PETSC_COMM_SELF; 4764 } 4765 } else { /* MAT_INITIAL_MATRIX */ 4766 PetscMPIInt rank; 4767 4768 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 4769 ierr = PetscSubcommCreate(comm,&subcomm);CHKERRQ(ierr); 4770 ierr = PetscSubcommSetNumber(subcomm,2);CHKERRQ(ierr); 4771 ierr = PetscSubcommSetTypeGeneral(subcomm,color,rank);CHKERRQ(ierr); 4772 comm_n = PetscSubcommChild(subcomm); 4773 } 4774 /* flag to destroy *mat_n if not significative */ 4775 if (color) destroy_mat = PETSC_TRUE; 4776 } else { 4777 comm_n = comm; 4778 } 4779 4780 /* prepare send/receive buffers */ 4781 ierr = PetscMalloc1(commsize,&ilengths_idxs);CHKERRQ(ierr); 4782 ierr = PetscMemzero(ilengths_idxs,commsize*sizeof(*ilengths_idxs));CHKERRQ(ierr); 4783 ierr = PetscMalloc1(commsize,&ilengths_vals);CHKERRQ(ierr); 4784 ierr = PetscMemzero(ilengths_vals,commsize*sizeof(*ilengths_vals));CHKERRQ(ierr); 4785 if (nis) { 4786 ierr = PetscCalloc1(commsize,&ilengths_idxs_is);CHKERRQ(ierr); 4787 } 4788 4789 /* Get data from local matrices */ 4790 if (!isdense) { 4791 SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Subassembling of AIJ local matrices not yet implemented"); 4792 /* TODO: See below some guidelines on how to prepare the local buffers */ 4793 /* 4794 send_buffer_vals should contain the raw values of the local matrix 4795 send_buffer_idxs should contain: 4796 - MatType_PRIVATE type 4797 - PetscInt size_of_l2gmap 4798 - PetscInt global_row_indices[size_of_l2gmap] 4799 - PetscInt all_other_info_which_is_needed_to_compute_preallocation_and_set_values 4800 */ 4801 } else { 4802 ierr = MatDenseGetArray(local_mat,&send_buffer_vals);CHKERRQ(ierr); 4803 ierr = ISLocalToGlobalMappingGetSize(mat->rmap->mapping,&i);CHKERRQ(ierr); 4804 ierr = PetscMalloc1(i+2,&send_buffer_idxs);CHKERRQ(ierr); 4805 send_buffer_idxs[0] = (PetscInt)MATDENSE_PRIVATE; 4806 send_buffer_idxs[1] = i; 4807 ierr = ISLocalToGlobalMappingGetIndices(mat->rmap->mapping,(const PetscInt**)&ptr_idxs);CHKERRQ(ierr); 4808 ierr = PetscMemcpy(&send_buffer_idxs[2],ptr_idxs,i*sizeof(PetscInt));CHKERRQ(ierr); 4809 ierr = ISLocalToGlobalMappingRestoreIndices(mat->rmap->mapping,(const PetscInt**)&ptr_idxs);CHKERRQ(ierr); 4810 ierr = PetscMPIIntCast(i,&len);CHKERRQ(ierr); 4811 for (i=0;i<n_sends;i++) { 4812 ilengths_vals[is_indices[i]] = len*len; 4813 ilengths_idxs[is_indices[i]] = len+2; 4814 } 4815 } 4816 ierr = PetscGatherMessageLengths2(comm,n_sends,n_recvs,ilengths_idxs,ilengths_vals,&onodes,&olengths_idxs,&olengths_vals);CHKERRQ(ierr); 4817 /* additional is (if any) */ 4818 if (nis) { 4819 PetscMPIInt psum; 4820 PetscInt j; 4821 for (j=0,psum=0;j<nis;j++) { 4822 PetscInt plen; 4823 ierr = ISGetLocalSize(isarray[j],&plen);CHKERRQ(ierr); 4824 ierr = PetscMPIIntCast(plen,&len);CHKERRQ(ierr); 4825 psum += len+1; /* indices + lenght */ 4826 } 4827 ierr = PetscMalloc1(psum,&send_buffer_idxs_is);CHKERRQ(ierr); 4828 for (j=0,psum=0;j<nis;j++) { 4829 PetscInt plen; 4830 const PetscInt *is_array_idxs; 4831 ierr = ISGetLocalSize(isarray[j],&plen);CHKERRQ(ierr); 4832 send_buffer_idxs_is[psum] = plen; 4833 ierr = ISGetIndices(isarray[j],&is_array_idxs);CHKERRQ(ierr); 4834 ierr = PetscMemcpy(&send_buffer_idxs_is[psum+1],is_array_idxs,plen*sizeof(PetscInt));CHKERRQ(ierr); 4835 ierr = ISRestoreIndices(isarray[j],&is_array_idxs);CHKERRQ(ierr); 4836 psum += plen+1; /* indices + lenght */ 4837 } 4838 for (i=0;i<n_sends;i++) { 4839 ilengths_idxs_is[is_indices[i]] = psum; 4840 } 4841 ierr = PetscGatherMessageLengths(comm,n_sends,n_recvs,ilengths_idxs_is,&onodes_is,&olengths_idxs_is);CHKERRQ(ierr); 4842 } 4843 4844 buf_size_idxs = 0; 4845 buf_size_vals = 0; 4846 buf_size_idxs_is = 0; 4847 for (i=0;i<n_recvs;i++) { 4848 buf_size_idxs += (PetscInt)olengths_idxs[i]; 4849 buf_size_vals += (PetscInt)olengths_vals[i]; 4850 if (nis) buf_size_idxs_is += (PetscInt)olengths_idxs_is[i]; 4851 } 4852 ierr = PetscMalloc1(buf_size_idxs,&recv_buffer_idxs);CHKERRQ(ierr); 4853 ierr = PetscMalloc1(buf_size_vals,&recv_buffer_vals);CHKERRQ(ierr); 4854 ierr = PetscMalloc1(buf_size_idxs_is,&recv_buffer_idxs_is);CHKERRQ(ierr); 4855 4856 /* get new tags for clean communications */ 4857 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_idxs);CHKERRQ(ierr); 4858 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_vals);CHKERRQ(ierr); 4859 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_idxs_is);CHKERRQ(ierr); 4860 4861 /* allocate for requests */ 4862 ierr = PetscMalloc1(n_sends,&send_req_idxs);CHKERRQ(ierr); 4863 ierr = PetscMalloc1(n_sends,&send_req_vals);CHKERRQ(ierr); 4864 ierr = PetscMalloc1(n_sends,&send_req_idxs_is);CHKERRQ(ierr); 4865 ierr = PetscMalloc1(n_recvs,&recv_req_idxs);CHKERRQ(ierr); 4866 ierr = PetscMalloc1(n_recvs,&recv_req_vals);CHKERRQ(ierr); 4867 ierr = PetscMalloc1(n_recvs,&recv_req_idxs_is);CHKERRQ(ierr); 4868 4869 /* communications */ 4870 ptr_idxs = recv_buffer_idxs; 4871 ptr_vals = recv_buffer_vals; 4872 ptr_idxs_is = recv_buffer_idxs_is; 4873 for (i=0;i<n_recvs;i++) { 4874 source_dest = onodes[i]; 4875 ierr = MPI_Irecv(ptr_idxs,olengths_idxs[i],MPIU_INT,source_dest,tag_idxs,comm,&recv_req_idxs[i]);CHKERRQ(ierr); 4876 ierr = MPI_Irecv(ptr_vals,olengths_vals[i],MPIU_SCALAR,source_dest,tag_vals,comm,&recv_req_vals[i]);CHKERRQ(ierr); 4877 ptr_idxs += olengths_idxs[i]; 4878 ptr_vals += olengths_vals[i]; 4879 if (nis) { 4880 ierr = MPI_Irecv(ptr_idxs_is,olengths_idxs_is[i],MPIU_INT,source_dest,tag_idxs_is,comm,&recv_req_idxs_is[i]);CHKERRQ(ierr); 4881 ptr_idxs_is += olengths_idxs_is[i]; 4882 } 4883 } 4884 for (i=0;i<n_sends;i++) { 4885 ierr = PetscMPIIntCast(is_indices[i],&source_dest);CHKERRQ(ierr); 4886 ierr = MPI_Isend(send_buffer_idxs,ilengths_idxs[source_dest],MPIU_INT,source_dest,tag_idxs,comm,&send_req_idxs[i]);CHKERRQ(ierr); 4887 ierr = MPI_Isend(send_buffer_vals,ilengths_vals[source_dest],MPIU_SCALAR,source_dest,tag_vals,comm,&send_req_vals[i]);CHKERRQ(ierr); 4888 if (nis) { 4889 ierr = MPI_Isend(send_buffer_idxs_is,ilengths_idxs_is[source_dest],MPIU_INT,source_dest,tag_idxs_is,comm,&send_req_idxs_is[i]);CHKERRQ(ierr); 4890 } 4891 } 4892 ierr = ISRestoreIndices(is_sends_internal,&is_indices);CHKERRQ(ierr); 4893 ierr = ISDestroy(&is_sends_internal);CHKERRQ(ierr); 4894 4895 /* assemble new l2g map */ 4896 ierr = MPI_Waitall(n_recvs,recv_req_idxs,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 4897 ptr_idxs = recv_buffer_idxs; 4898 new_local_rows = 0; 4899 for (i=0;i<n_recvs;i++) { 4900 new_local_rows += *(ptr_idxs+1); /* second element is the local size of the l2gmap */ 4901 ptr_idxs += olengths_idxs[i]; 4902 } 4903 ierr = PetscMalloc1(new_local_rows,&l2gmap_indices);CHKERRQ(ierr); 4904 ptr_idxs = recv_buffer_idxs; 4905 new_local_rows = 0; 4906 for (i=0;i<n_recvs;i++) { 4907 ierr = PetscMemcpy(&l2gmap_indices[new_local_rows],ptr_idxs+2,(*(ptr_idxs+1))*sizeof(PetscInt));CHKERRQ(ierr); 4908 new_local_rows += *(ptr_idxs+1); /* second element is the local size of the l2gmap */ 4909 ptr_idxs += olengths_idxs[i]; 4910 } 4911 ierr = PetscSortRemoveDupsInt(&new_local_rows,l2gmap_indices);CHKERRQ(ierr); 4912 ierr = ISLocalToGlobalMappingCreate(comm_n,1,new_local_rows,l2gmap_indices,PETSC_COPY_VALUES,&l2gmap);CHKERRQ(ierr); 4913 ierr = PetscFree(l2gmap_indices);CHKERRQ(ierr); 4914 4915 /* infer new local matrix type from received local matrices type */ 4916 /* currently if all local matrices are of type X, then the resulting matrix will be of type X, except for the dense case */ 4917 /* it also assumes that if the block size is set, than it is the same among all local matrices (see checks at the beginning of the function) */ 4918 if (n_recvs) { 4919 MatTypePrivate new_local_type_private = (MatTypePrivate)send_buffer_idxs[0]; 4920 ptr_idxs = recv_buffer_idxs; 4921 for (i=0;i<n_recvs;i++) { 4922 if ((PetscInt)new_local_type_private != *ptr_idxs) { 4923 new_local_type_private = MATAIJ_PRIVATE; 4924 break; 4925 } 4926 ptr_idxs += olengths_idxs[i]; 4927 } 4928 switch (new_local_type_private) { 4929 case MATDENSE_PRIVATE: 4930 if (n_recvs>1) { /* subassembling of dense matrices does not give a dense matrix! */ 4931 new_local_type = MATSEQAIJ; 4932 bs = 1; 4933 } else { /* if I receive only 1 dense matrix */ 4934 new_local_type = MATSEQDENSE; 4935 bs = 1; 4936 } 4937 break; 4938 case MATAIJ_PRIVATE: 4939 new_local_type = MATSEQAIJ; 4940 bs = 1; 4941 break; 4942 case MATBAIJ_PRIVATE: 4943 new_local_type = MATSEQBAIJ; 4944 break; 4945 case MATSBAIJ_PRIVATE: 4946 new_local_type = MATSEQSBAIJ; 4947 break; 4948 default: 4949 SETERRQ2(comm,PETSC_ERR_SUP,"Unsupported private type %d in %s",new_local_type_private,__FUNCT__); 4950 break; 4951 } 4952 } else { /* by default, new_local_type is seqdense */ 4953 new_local_type = MATSEQDENSE; 4954 bs = 1; 4955 } 4956 4957 /* create MATIS object if needed */ 4958 if (reuse == MAT_INITIAL_MATRIX) { 4959 ierr = MatGetSize(mat,&rows,&cols);CHKERRQ(ierr); 4960 ierr = MatCreateIS(comm_n,bs,PETSC_DECIDE,PETSC_DECIDE,rows,cols,l2gmap,NULL,mat_n);CHKERRQ(ierr); 4961 } else { 4962 /* it also destroys the local matrices */ 4963 ierr = MatSetLocalToGlobalMapping(*mat_n,l2gmap,l2gmap);CHKERRQ(ierr); 4964 } 4965 ierr = MatISGetLocalMat(*mat_n,&local_mat);CHKERRQ(ierr); 4966 ierr = MatSetType(local_mat,new_local_type);CHKERRQ(ierr); 4967 4968 ierr = MPI_Waitall(n_recvs,recv_req_vals,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 4969 4970 /* Global to local map of received indices */ 4971 ierr = PetscMalloc1(buf_size_idxs,&recv_buffer_idxs_local);CHKERRQ(ierr); /* needed for values insertion */ 4972 ierr = ISGlobalToLocalMappingApply(l2gmap,IS_GTOLM_MASK,buf_size_idxs,recv_buffer_idxs,&i,recv_buffer_idxs_local);CHKERRQ(ierr); 4973 ierr = ISLocalToGlobalMappingDestroy(&l2gmap);CHKERRQ(ierr); 4974 4975 /* restore attributes -> type of incoming data and its size */ 4976 buf_size_idxs = 0; 4977 for (i=0;i<n_recvs;i++) { 4978 recv_buffer_idxs_local[buf_size_idxs] = recv_buffer_idxs[buf_size_idxs]; 4979 recv_buffer_idxs_local[buf_size_idxs+1] = recv_buffer_idxs[buf_size_idxs+1]; 4980 buf_size_idxs += (PetscInt)olengths_idxs[i]; 4981 } 4982 ierr = PetscFree(recv_buffer_idxs);CHKERRQ(ierr); 4983 4984 /* set preallocation */ 4985 ierr = PetscObjectTypeCompare((PetscObject)local_mat,MATSEQDENSE,&newisdense);CHKERRQ(ierr); 4986 if (!newisdense) { 4987 PetscInt *new_local_nnz=0; 4988 4989 ptr_vals = recv_buffer_vals; 4990 ptr_idxs = recv_buffer_idxs_local; 4991 if (n_recvs) { 4992 ierr = PetscCalloc1(new_local_rows,&new_local_nnz);CHKERRQ(ierr); 4993 } 4994 for (i=0;i<n_recvs;i++) { 4995 PetscInt j; 4996 if (*ptr_idxs == (PetscInt)MATDENSE_PRIVATE) { /* preallocation provided for dense case only */ 4997 for (j=0;j<*(ptr_idxs+1);j++) { 4998 new_local_nnz[*(ptr_idxs+2+j)] += *(ptr_idxs+1); 4999 } 5000 } else { 5001 /* TODO */ 5002 } 5003 ptr_idxs += olengths_idxs[i]; 5004 } 5005 if (new_local_nnz) { 5006 for (i=0;i<new_local_rows;i++) new_local_nnz[i] = PetscMin(new_local_nnz[i],new_local_rows); 5007 ierr = MatSeqAIJSetPreallocation(local_mat,0,new_local_nnz);CHKERRQ(ierr); 5008 for (i=0;i<new_local_rows;i++) new_local_nnz[i] /= bs; 5009 ierr = MatSeqBAIJSetPreallocation(local_mat,bs,0,new_local_nnz);CHKERRQ(ierr); 5010 for (i=0;i<new_local_rows;i++) new_local_nnz[i] = PetscMax(new_local_nnz[i]-i,0); 5011 ierr = MatSeqSBAIJSetPreallocation(local_mat,bs,0,new_local_nnz);CHKERRQ(ierr); 5012 } else { 5013 ierr = MatSetUp(local_mat);CHKERRQ(ierr); 5014 } 5015 ierr = PetscFree(new_local_nnz);CHKERRQ(ierr); 5016 } else { 5017 ierr = MatSetUp(local_mat);CHKERRQ(ierr); 5018 } 5019 5020 /* set values */ 5021 ptr_vals = recv_buffer_vals; 5022 ptr_idxs = recv_buffer_idxs_local; 5023 for (i=0;i<n_recvs;i++) { 5024 if (*ptr_idxs == (PetscInt)MATDENSE_PRIVATE) { /* values insertion provided for dense case only */ 5025 ierr = MatSetOption(local_mat,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr); 5026 ierr = MatSetValues(local_mat,*(ptr_idxs+1),ptr_idxs+2,*(ptr_idxs+1),ptr_idxs+2,ptr_vals,ADD_VALUES);CHKERRQ(ierr); 5027 ierr = MatAssemblyBegin(local_mat,MAT_FLUSH_ASSEMBLY);CHKERRQ(ierr); 5028 ierr = MatAssemblyEnd(local_mat,MAT_FLUSH_ASSEMBLY);CHKERRQ(ierr); 5029 ierr = MatSetOption(local_mat,MAT_ROW_ORIENTED,PETSC_TRUE);CHKERRQ(ierr); 5030 } else { 5031 /* TODO */ 5032 } 5033 ptr_idxs += olengths_idxs[i]; 5034 ptr_vals += olengths_vals[i]; 5035 } 5036 ierr = MatAssemblyBegin(local_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5037 ierr = MatAssemblyEnd(local_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5038 ierr = MatAssemblyBegin(*mat_n,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5039 ierr = MatAssemblyEnd(*mat_n,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5040 ierr = PetscFree(recv_buffer_vals);CHKERRQ(ierr); 5041 ierr = PetscFree(recv_buffer_idxs_local);CHKERRQ(ierr); 5042 5043 #if 0 5044 if (!restrict_comm) { /* check */ 5045 Vec lvec,rvec; 5046 PetscReal infty_error; 5047 5048 ierr = MatCreateVecs(mat,&rvec,&lvec);CHKERRQ(ierr); 5049 ierr = VecSetRandom(rvec,NULL);CHKERRQ(ierr); 5050 ierr = MatMult(mat,rvec,lvec);CHKERRQ(ierr); 5051 ierr = VecScale(lvec,-1.0);CHKERRQ(ierr); 5052 ierr = MatMultAdd(*mat_n,rvec,lvec,lvec);CHKERRQ(ierr); 5053 ierr = VecNorm(lvec,NORM_INFINITY,&infty_error);CHKERRQ(ierr); 5054 ierr = PetscPrintf(PetscObjectComm((PetscObject)mat),"Infinity error subassembling %1.6e\n",infty_error); 5055 ierr = VecDestroy(&rvec);CHKERRQ(ierr); 5056 ierr = VecDestroy(&lvec);CHKERRQ(ierr); 5057 } 5058 #endif 5059 5060 /* assemble new additional is (if any) */ 5061 if (nis) { 5062 PetscInt **temp_idxs,*count_is,j,psum; 5063 5064 ierr = MPI_Waitall(n_recvs,recv_req_idxs_is,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 5065 ierr = PetscCalloc1(nis,&count_is);CHKERRQ(ierr); 5066 ptr_idxs = recv_buffer_idxs_is; 5067 psum = 0; 5068 for (i=0;i<n_recvs;i++) { 5069 for (j=0;j<nis;j++) { 5070 PetscInt plen = *(ptr_idxs); /* first element is the local size of IS's indices */ 5071 count_is[j] += plen; /* increment counting of buffer for j-th IS */ 5072 psum += plen; 5073 ptr_idxs += plen+1; /* shift pointer to received data */ 5074 } 5075 } 5076 ierr = PetscMalloc1(nis,&temp_idxs);CHKERRQ(ierr); 5077 ierr = PetscMalloc1(psum,&temp_idxs[0]);CHKERRQ(ierr); 5078 for (i=1;i<nis;i++) { 5079 temp_idxs[i] = temp_idxs[i-1]+count_is[i-1]; 5080 } 5081 ierr = PetscMemzero(count_is,nis*sizeof(PetscInt));CHKERRQ(ierr); 5082 ptr_idxs = recv_buffer_idxs_is; 5083 for (i=0;i<n_recvs;i++) { 5084 for (j=0;j<nis;j++) { 5085 PetscInt plen = *(ptr_idxs); /* first element is the local size of IS's indices */ 5086 ierr = PetscMemcpy(&temp_idxs[j][count_is[j]],ptr_idxs+1,plen*sizeof(PetscInt));CHKERRQ(ierr); 5087 count_is[j] += plen; /* increment starting point of buffer for j-th IS */ 5088 ptr_idxs += plen+1; /* shift pointer to received data */ 5089 } 5090 } 5091 for (i=0;i<nis;i++) { 5092 ierr = ISDestroy(&isarray[i]);CHKERRQ(ierr); 5093 ierr = PetscSortRemoveDupsInt(&count_is[i],temp_idxs[i]);CHKERRQ(ierr);CHKERRQ(ierr); 5094 ierr = ISCreateGeneral(comm_n,count_is[i],temp_idxs[i],PETSC_COPY_VALUES,&isarray[i]);CHKERRQ(ierr); 5095 } 5096 ierr = PetscFree(count_is);CHKERRQ(ierr); 5097 ierr = PetscFree(temp_idxs[0]);CHKERRQ(ierr); 5098 ierr = PetscFree(temp_idxs);CHKERRQ(ierr); 5099 } 5100 /* free workspace */ 5101 ierr = PetscFree(recv_buffer_idxs_is);CHKERRQ(ierr); 5102 ierr = MPI_Waitall(n_sends,send_req_idxs,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 5103 ierr = PetscFree(send_buffer_idxs);CHKERRQ(ierr); 5104 ierr = MPI_Waitall(n_sends,send_req_vals,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 5105 if (isdense) { 5106 ierr = MatISGetLocalMat(mat,&local_mat);CHKERRQ(ierr); 5107 ierr = MatDenseRestoreArray(local_mat,&send_buffer_vals);CHKERRQ(ierr); 5108 } else { 5109 /* ierr = PetscFree(send_buffer_vals);CHKERRQ(ierr); */ 5110 } 5111 if (nis) { 5112 ierr = MPI_Waitall(n_sends,send_req_idxs_is,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 5113 ierr = PetscFree(send_buffer_idxs_is);CHKERRQ(ierr); 5114 } 5115 ierr = PetscFree(recv_req_idxs);CHKERRQ(ierr); 5116 ierr = PetscFree(recv_req_vals);CHKERRQ(ierr); 5117 ierr = PetscFree(recv_req_idxs_is);CHKERRQ(ierr); 5118 ierr = PetscFree(send_req_idxs);CHKERRQ(ierr); 5119 ierr = PetscFree(send_req_vals);CHKERRQ(ierr); 5120 ierr = PetscFree(send_req_idxs_is);CHKERRQ(ierr); 5121 ierr = PetscFree(ilengths_vals);CHKERRQ(ierr); 5122 ierr = PetscFree(ilengths_idxs);CHKERRQ(ierr); 5123 ierr = PetscFree(olengths_vals);CHKERRQ(ierr); 5124 ierr = PetscFree(olengths_idxs);CHKERRQ(ierr); 5125 ierr = PetscFree(onodes);CHKERRQ(ierr); 5126 if (nis) { 5127 ierr = PetscFree(ilengths_idxs_is);CHKERRQ(ierr); 5128 ierr = PetscFree(olengths_idxs_is);CHKERRQ(ierr); 5129 ierr = PetscFree(onodes_is);CHKERRQ(ierr); 5130 } 5131 ierr = PetscSubcommDestroy(&subcomm);CHKERRQ(ierr); 5132 if (destroy_mat) { /* destroy mat is true only if restrict comm is true and process will not partecipate */ 5133 ierr = MatDestroy(mat_n);CHKERRQ(ierr); 5134 for (i=0;i<nis;i++) { 5135 ierr = ISDestroy(&isarray[i]);CHKERRQ(ierr); 5136 } 5137 *mat_n = NULL; 5138 } 5139 PetscFunctionReturn(0); 5140 } 5141 5142 /* temporary hack into ksp private data structure */ 5143 #include <petsc/private/kspimpl.h> 5144 5145 #undef __FUNCT__ 5146 #define __FUNCT__ "PCBDDCSetUpCoarseSolver" 5147 PetscErrorCode PCBDDCSetUpCoarseSolver(PC pc,PetscScalar* coarse_submat_vals) 5148 { 5149 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 5150 PC_IS *pcis = (PC_IS*)pc->data; 5151 Mat coarse_mat,coarse_mat_is,coarse_submat_dense; 5152 MatNullSpace CoarseNullSpace=NULL; 5153 ISLocalToGlobalMapping coarse_islg; 5154 IS coarse_is,*isarray; 5155 PetscInt i,im_active=-1,active_procs=-1; 5156 PetscInt nis,nisdofs,nisneu,nisvert; 5157 PC pc_temp; 5158 PCType coarse_pc_type; 5159 KSPType coarse_ksp_type; 5160 PetscBool multilevel_requested,multilevel_allowed; 5161 PetscBool isredundant,isbddc,isnn,coarse_reuse; 5162 Mat t_coarse_mat_is; 5163 PetscInt void_procs,ncoarse_ml,ncoarse_ds,ncoarse; 5164 PetscMPIInt all_procs; 5165 PetscBool csin_ml,csin_ds,csin,csin_type_simple,redist; 5166 PetscBool compute_vecs = PETSC_FALSE; 5167 PetscScalar *array; 5168 PetscErrorCode ierr; 5169 5170 PetscFunctionBegin; 5171 /* Assign global numbering to coarse dofs */ 5172 if (pcbddc->new_primal_space || pcbddc->coarse_size == -1) { /* a new primal space is present or it is the first initialization, so recompute global numbering */ 5173 PetscInt ocoarse_size; 5174 compute_vecs = PETSC_TRUE; 5175 ocoarse_size = pcbddc->coarse_size; 5176 ierr = PetscFree(pcbddc->global_primal_indices);CHKERRQ(ierr); 5177 ierr = PCBDDCComputePrimalNumbering(pc,&pcbddc->coarse_size,&pcbddc->global_primal_indices);CHKERRQ(ierr); 5178 /* see if we can avoid some work */ 5179 if (pcbddc->coarse_ksp) { /* coarse ksp has already been created */ 5180 /* if the coarse size is different or we are using adaptive selection, better to not reuse the coarse matrix */ 5181 if (ocoarse_size != pcbddc->coarse_size || pcbddc->adaptive_selection) { 5182 PC pc; 5183 PetscBool isbddc; 5184 5185 /* temporary workaround since PCBDDC does not have a reset method so far */ 5186 ierr = KSPGetPC(pcbddc->coarse_ksp,&pc);CHKERRQ(ierr); 5187 ierr = PetscObjectTypeCompare((PetscObject)pc,PCBDDC,&isbddc);CHKERRQ(ierr); 5188 if (isbddc) { 5189 ierr = KSPDestroy(&pcbddc->coarse_ksp);CHKERRQ(ierr); 5190 } else { 5191 ierr = KSPReset(pcbddc->coarse_ksp);CHKERRQ(ierr); 5192 } 5193 coarse_reuse = PETSC_FALSE; 5194 } else { /* we can safely reuse already computed coarse matrix */ 5195 coarse_reuse = PETSC_TRUE; 5196 } 5197 } else { /* there's no coarse ksp, so we need to create the coarse matrix too */ 5198 coarse_reuse = PETSC_FALSE; 5199 } 5200 /* reset any subassembling information */ 5201 ierr = ISDestroy(&pcbddc->coarse_subassembling);CHKERRQ(ierr); 5202 ierr = ISDestroy(&pcbddc->coarse_subassembling_init);CHKERRQ(ierr); 5203 } else { /* primal space is unchanged, so we can reuse coarse matrix */ 5204 coarse_reuse = PETSC_TRUE; 5205 } 5206 5207 /* count "active" (i.e. with positive local size) and "void" processes */ 5208 im_active = !!(pcis->n); 5209 ierr = MPI_Allreduce(&im_active,&active_procs,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 5210 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)pc),&all_procs);CHKERRQ(ierr); 5211 void_procs = all_procs-active_procs; 5212 csin_type_simple = PETSC_TRUE; 5213 redist = PETSC_FALSE; 5214 if (pcbddc->current_level && void_procs) { 5215 csin_ml = PETSC_TRUE; 5216 ncoarse_ml = void_procs; 5217 /* it has no sense to redistribute on a set of processors larger than the number of active processes */ 5218 if (pcbddc->redistribute_coarse > 0 && pcbddc->redistribute_coarse < active_procs) { 5219 csin_ds = PETSC_TRUE; 5220 ncoarse_ds = pcbddc->redistribute_coarse; 5221 redist = PETSC_TRUE; 5222 } else { 5223 csin_ds = PETSC_TRUE; 5224 ncoarse_ds = active_procs; 5225 redist = PETSC_TRUE; 5226 } 5227 } else { 5228 csin_ml = PETSC_FALSE; 5229 ncoarse_ml = all_procs; 5230 if (void_procs) { 5231 csin_ds = PETSC_TRUE; 5232 ncoarse_ds = void_procs; 5233 csin_type_simple = PETSC_FALSE; 5234 } else { 5235 if (pcbddc->redistribute_coarse > 0 && pcbddc->redistribute_coarse < all_procs) { 5236 csin_ds = PETSC_TRUE; 5237 ncoarse_ds = pcbddc->redistribute_coarse; 5238 redist = PETSC_TRUE; 5239 } else { 5240 csin_ds = PETSC_FALSE; 5241 ncoarse_ds = all_procs; 5242 } 5243 } 5244 } 5245 5246 /* 5247 test if we can go multilevel: three conditions must be satisfied: 5248 - we have not exceeded the number of levels requested 5249 - we can actually subassemble the active processes 5250 - we can find a suitable number of MPI processes where we can place the subassembled problem 5251 */ 5252 multilevel_allowed = PETSC_FALSE; 5253 multilevel_requested = PETSC_FALSE; 5254 if (pcbddc->current_level < pcbddc->max_levels) { 5255 multilevel_requested = PETSC_TRUE; 5256 if (active_procs/pcbddc->coarsening_ratio < 2 || ncoarse_ml/pcbddc->coarsening_ratio < 2) { 5257 multilevel_allowed = PETSC_FALSE; 5258 } else { 5259 multilevel_allowed = PETSC_TRUE; 5260 } 5261 } 5262 /* determine number of process partecipating to coarse solver */ 5263 if (multilevel_allowed) { 5264 ncoarse = ncoarse_ml; 5265 csin = csin_ml; 5266 redist = PETSC_FALSE; 5267 } else { 5268 ncoarse = ncoarse_ds; 5269 csin = csin_ds; 5270 } 5271 5272 /* creates temporary l2gmap and IS for coarse indexes */ 5273 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),pcbddc->local_primal_size,pcbddc->global_primal_indices,PETSC_COPY_VALUES,&coarse_is);CHKERRQ(ierr); 5274 ierr = ISLocalToGlobalMappingCreateIS(coarse_is,&coarse_islg);CHKERRQ(ierr); 5275 5276 /* creates temporary MATIS object for coarse matrix */ 5277 ierr = MatCreateSeqDense(PETSC_COMM_SELF,pcbddc->local_primal_size,pcbddc->local_primal_size,NULL,&coarse_submat_dense);CHKERRQ(ierr); 5278 ierr = MatDenseGetArray(coarse_submat_dense,&array);CHKERRQ(ierr); 5279 ierr = PetscMemcpy(array,coarse_submat_vals,sizeof(*coarse_submat_vals)*pcbddc->local_primal_size*pcbddc->local_primal_size);CHKERRQ(ierr); 5280 ierr = MatDenseRestoreArray(coarse_submat_dense,&array);CHKERRQ(ierr); 5281 ierr = MatCreateIS(PetscObjectComm((PetscObject)pc),1,PETSC_DECIDE,PETSC_DECIDE,pcbddc->coarse_size,pcbddc->coarse_size,coarse_islg,NULL,&t_coarse_mat_is);CHKERRQ(ierr); 5282 ierr = MatISSetLocalMat(t_coarse_mat_is,coarse_submat_dense);CHKERRQ(ierr); 5283 ierr = MatAssemblyBegin(t_coarse_mat_is,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5284 ierr = MatAssemblyEnd(t_coarse_mat_is,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5285 ierr = MatDestroy(&coarse_submat_dense);CHKERRQ(ierr); 5286 5287 /* compute dofs splitting and neumann boundaries for coarse dofs */ 5288 if (multilevel_allowed && (pcbddc->n_ISForDofsLocal || pcbddc->NeumannBoundariesLocal || (pcbddc->benign_saddle_point && pcbddc->user_primal_vertices_local))) { /* protects from unneded computations */ 5289 PetscInt *tidxs,*tidxs2,nout,tsize,i; 5290 const PetscInt *idxs; 5291 ISLocalToGlobalMapping tmap; 5292 5293 /* create map between primal indices (in local representative ordering) and local primal numbering */ 5294 ierr = ISLocalToGlobalMappingCreate(PETSC_COMM_SELF,1,pcbddc->local_primal_size,pcbddc->primal_indices_local_idxs,PETSC_COPY_VALUES,&tmap);CHKERRQ(ierr); 5295 /* allocate space for temporary storage */ 5296 ierr = PetscMalloc1(pcbddc->local_primal_size,&tidxs);CHKERRQ(ierr); 5297 ierr = PetscMalloc1(pcbddc->local_primal_size,&tidxs2);CHKERRQ(ierr); 5298 /* allocate for IS array */ 5299 nisdofs = pcbddc->n_ISForDofsLocal; 5300 nisneu = !!pcbddc->NeumannBoundariesLocal; 5301 nisvert = 0; 5302 if (pcbddc->benign_saddle_point && pcbddc->user_primal_vertices_local) { 5303 nisvert = 1; 5304 } 5305 nis = nisdofs + nisneu + nisvert; 5306 ierr = PetscMalloc1(nis,&isarray);CHKERRQ(ierr); 5307 /* dofs splitting */ 5308 for (i=0;i<nisdofs;i++) { 5309 /* ierr = ISView(pcbddc->ISForDofsLocal[i],0);CHKERRQ(ierr); */ 5310 ierr = ISGetLocalSize(pcbddc->ISForDofsLocal[i],&tsize);CHKERRQ(ierr); 5311 ierr = ISGetIndices(pcbddc->ISForDofsLocal[i],&idxs);CHKERRQ(ierr); 5312 ierr = ISGlobalToLocalMappingApply(tmap,IS_GTOLM_DROP,tsize,idxs,&nout,tidxs);CHKERRQ(ierr); 5313 ierr = ISRestoreIndices(pcbddc->ISForDofsLocal[i],&idxs);CHKERRQ(ierr); 5314 ierr = ISLocalToGlobalMappingApply(coarse_islg,nout,tidxs,tidxs2);CHKERRQ(ierr); 5315 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),nout,tidxs2,PETSC_COPY_VALUES,&isarray[i]);CHKERRQ(ierr); 5316 /* ierr = ISView(isarray[i],0);CHKERRQ(ierr); */ 5317 } 5318 /* neumann boundaries */ 5319 if (pcbddc->NeumannBoundariesLocal) { 5320 /* ierr = ISView(pcbddc->NeumannBoundariesLocal,0);CHKERRQ(ierr); */ 5321 ierr = ISGetLocalSize(pcbddc->NeumannBoundariesLocal,&tsize);CHKERRQ(ierr); 5322 ierr = ISGetIndices(pcbddc->NeumannBoundariesLocal,&idxs);CHKERRQ(ierr); 5323 ierr = ISGlobalToLocalMappingApply(tmap,IS_GTOLM_DROP,tsize,idxs,&nout,tidxs);CHKERRQ(ierr); 5324 ierr = ISRestoreIndices(pcbddc->NeumannBoundariesLocal,&idxs);CHKERRQ(ierr); 5325 ierr = ISLocalToGlobalMappingApply(coarse_islg,nout,tidxs,tidxs2);CHKERRQ(ierr); 5326 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),nout,tidxs2,PETSC_COPY_VALUES,&isarray[nisdofs]);CHKERRQ(ierr); 5327 /* ierr = ISView(isarray[nisdofs],0);CHKERRQ(ierr); */ 5328 } 5329 /* primal vertices (benign) */ 5330 if (pcbddc->benign_saddle_point && pcbddc->user_primal_vertices_local) { 5331 ierr = ISGetLocalSize(pcbddc->user_primal_vertices_local,&tsize);CHKERRQ(ierr); 5332 ierr = ISGetIndices(pcbddc->user_primal_vertices_local,&idxs);CHKERRQ(ierr); 5333 ierr = ISGlobalToLocalMappingApply(tmap,IS_GTOLM_DROP,tsize,idxs,&nout,tidxs);CHKERRQ(ierr); 5334 ierr = ISRestoreIndices(pcbddc->user_primal_vertices_local,&idxs);CHKERRQ(ierr); 5335 ierr = ISLocalToGlobalMappingApply(coarse_islg,nout,tidxs,tidxs2);CHKERRQ(ierr); 5336 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),nout,tidxs2,PETSC_COPY_VALUES,&isarray[nis-1]);CHKERRQ(ierr); 5337 } 5338 /* free memory */ 5339 ierr = PetscFree(tidxs);CHKERRQ(ierr); 5340 ierr = PetscFree(tidxs2);CHKERRQ(ierr); 5341 ierr = ISLocalToGlobalMappingDestroy(&tmap);CHKERRQ(ierr); 5342 } else { 5343 nis = 0; 5344 nisdofs = 0; 5345 nisneu = 0; 5346 nisvert = 0; 5347 isarray = NULL; 5348 } 5349 /* destroy no longer needed map */ 5350 ierr = ISLocalToGlobalMappingDestroy(&coarse_islg);CHKERRQ(ierr); 5351 5352 /* restrict on coarse candidates (if needed) */ 5353 coarse_mat_is = NULL; 5354 if (csin) { 5355 if (!pcbddc->coarse_subassembling_init ) { /* creates subassembling init pattern if not present */ 5356 if (redist) { 5357 PetscMPIInt rank; 5358 PetscInt spc,n_spc_p1,dest[1],destsize; 5359 5360 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)pc),&rank);CHKERRQ(ierr); 5361 spc = active_procs/ncoarse; 5362 n_spc_p1 = active_procs%ncoarse; 5363 if (im_active) { 5364 destsize = 1; 5365 if (rank > n_spc_p1*(spc+1)-1) { 5366 dest[0] = n_spc_p1+(rank-(n_spc_p1*(spc+1)))/spc; 5367 } else { 5368 dest[0] = rank/(spc+1); 5369 } 5370 } else { 5371 destsize = 0; 5372 } 5373 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),destsize,dest,PETSC_COPY_VALUES,&pcbddc->coarse_subassembling_init);CHKERRQ(ierr); 5374 } else if (csin_type_simple) { 5375 PetscMPIInt rank; 5376 PetscInt issize,isidx; 5377 5378 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)pc),&rank);CHKERRQ(ierr); 5379 if (im_active) { 5380 issize = 1; 5381 isidx = (PetscInt)rank; 5382 } else { 5383 issize = 0; 5384 isidx = -1; 5385 } 5386 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),issize,&isidx,PETSC_COPY_VALUES,&pcbddc->coarse_subassembling_init);CHKERRQ(ierr); 5387 } else { /* get a suitable subassembling pattern from MATIS code */ 5388 ierr = MatISGetSubassemblingPattern(t_coarse_mat_is,ncoarse,pcbddc->coarse_adj_red,&pcbddc->coarse_subassembling_init);CHKERRQ(ierr); 5389 } 5390 5391 /* we need to shift on coarse candidates either if we are not redistributing or we are redistributing and we have enough void processes */ 5392 if (!redist || ncoarse <= void_procs) { 5393 PetscInt ncoarse_cand,tissize,*nisindices; 5394 PetscInt *coarse_candidates; 5395 const PetscInt* tisindices; 5396 5397 /* get coarse candidates' ranks in pc communicator */ 5398 ierr = PetscMalloc1(all_procs,&coarse_candidates);CHKERRQ(ierr); 5399 ierr = MPI_Allgather(&im_active,1,MPIU_INT,coarse_candidates,1,MPIU_INT,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 5400 for (i=0,ncoarse_cand=0;i<all_procs;i++) { 5401 if (!coarse_candidates[i]) { 5402 coarse_candidates[ncoarse_cand++]=i; 5403 } 5404 } 5405 if (ncoarse_cand < ncoarse) SETERRQ2(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"This should not happen! %d < %d",ncoarse_cand,ncoarse); 5406 5407 5408 if (pcbddc->dbg_flag) { 5409 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 5410 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Subassembling pattern init (before shift)\n");CHKERRQ(ierr); 5411 ierr = ISView(pcbddc->coarse_subassembling_init,pcbddc->dbg_viewer);CHKERRQ(ierr); 5412 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Coarse candidates\n");CHKERRQ(ierr); 5413 for (i=0;i<ncoarse_cand;i++) { 5414 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"%d ",coarse_candidates[i]);CHKERRQ(ierr); 5415 } 5416 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"\n");CHKERRQ(ierr); 5417 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 5418 } 5419 /* shift the pattern on coarse candidates */ 5420 ierr = ISGetLocalSize(pcbddc->coarse_subassembling_init,&tissize);CHKERRQ(ierr); 5421 ierr = ISGetIndices(pcbddc->coarse_subassembling_init,&tisindices);CHKERRQ(ierr); 5422 ierr = PetscMalloc1(tissize,&nisindices);CHKERRQ(ierr); 5423 for (i=0;i<tissize;i++) nisindices[i] = coarse_candidates[tisindices[i]]; 5424 ierr = ISRestoreIndices(pcbddc->coarse_subassembling_init,&tisindices);CHKERRQ(ierr); 5425 ierr = ISGeneralSetIndices(pcbddc->coarse_subassembling_init,tissize,nisindices,PETSC_OWN_POINTER);CHKERRQ(ierr); 5426 ierr = PetscFree(coarse_candidates);CHKERRQ(ierr); 5427 } 5428 if (pcbddc->dbg_flag) { 5429 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 5430 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Subassembling pattern init\n");CHKERRQ(ierr); 5431 ierr = ISView(pcbddc->coarse_subassembling_init,pcbddc->dbg_viewer);CHKERRQ(ierr); 5432 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 5433 } 5434 } 5435 /* get temporary coarse mat in IS format restricted on coarse procs (plus additional index sets of isarray) */ 5436 if (multilevel_allowed) { /* we need to keep tracking of void processes for future placements */ 5437 ierr = MatISSubassemble(t_coarse_mat_is,pcbddc->coarse_subassembling_init,0,PETSC_TRUE,PETSC_FALSE,MAT_INITIAL_MATRIX,&coarse_mat_is,nis,isarray);CHKERRQ(ierr); 5438 } else { /* this is the last level, so use just receiving processes in subcomm */ 5439 ierr = MatISSubassemble(t_coarse_mat_is,pcbddc->coarse_subassembling_init,0,PETSC_TRUE,PETSC_TRUE,MAT_INITIAL_MATRIX,&coarse_mat_is,nis,isarray);CHKERRQ(ierr); 5440 } 5441 } else { 5442 if (pcbddc->dbg_flag) { 5443 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 5444 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Subassembling pattern init not needed\n");CHKERRQ(ierr); 5445 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 5446 } 5447 ierr = PetscObjectReference((PetscObject)t_coarse_mat_is);CHKERRQ(ierr); 5448 coarse_mat_is = t_coarse_mat_is; 5449 } 5450 5451 /* create local to global scatters for coarse problem */ 5452 if (compute_vecs) { 5453 PetscInt lrows; 5454 ierr = VecDestroy(&pcbddc->coarse_vec);CHKERRQ(ierr); 5455 if (coarse_mat_is) { 5456 ierr = MatGetLocalSize(coarse_mat_is,&lrows,NULL);CHKERRQ(ierr); 5457 } else { 5458 lrows = 0; 5459 } 5460 ierr = VecCreate(PetscObjectComm((PetscObject)pc),&pcbddc->coarse_vec);CHKERRQ(ierr); 5461 ierr = VecSetSizes(pcbddc->coarse_vec,lrows,PETSC_DECIDE);CHKERRQ(ierr); 5462 ierr = VecSetType(pcbddc->coarse_vec,VECSTANDARD);CHKERRQ(ierr); 5463 ierr = VecScatterDestroy(&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr); 5464 ierr = VecScatterCreate(pcbddc->vec1_P,NULL,pcbddc->coarse_vec,coarse_is,&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr); 5465 } 5466 ierr = ISDestroy(&coarse_is);CHKERRQ(ierr); 5467 ierr = MatDestroy(&t_coarse_mat_is);CHKERRQ(ierr); 5468 5469 /* set defaults for coarse KSP and PC */ 5470 if (multilevel_allowed) { 5471 coarse_ksp_type = KSPRICHARDSON; 5472 coarse_pc_type = PCBDDC; 5473 } else { 5474 coarse_ksp_type = KSPPREONLY; 5475 coarse_pc_type = PCREDUNDANT; 5476 } 5477 5478 /* print some info if requested */ 5479 if (pcbddc->dbg_flag) { 5480 if (!multilevel_allowed) { 5481 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 5482 if (multilevel_requested) { 5483 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Not enough active processes on level %d (active processes %d, coarsening ratio %d)\n",pcbddc->current_level,active_procs,pcbddc->coarsening_ratio);CHKERRQ(ierr); 5484 } else if (pcbddc->max_levels) { 5485 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Maximum number of requested levels reached (%d)\n",pcbddc->max_levels);CHKERRQ(ierr); 5486 } 5487 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 5488 } 5489 } 5490 5491 /* create the coarse KSP object only once with defaults */ 5492 if (coarse_mat_is) { 5493 MatReuse coarse_mat_reuse; 5494 PetscViewer dbg_viewer = NULL; 5495 if (pcbddc->dbg_flag) { 5496 dbg_viewer = PETSC_VIEWER_STDOUT_(PetscObjectComm((PetscObject)coarse_mat_is)); 5497 ierr = PetscViewerASCIIAddTab(dbg_viewer,2*pcbddc->current_level);CHKERRQ(ierr); 5498 } 5499 if (!pcbddc->coarse_ksp) { 5500 char prefix[256],str_level[16]; 5501 size_t len; 5502 ierr = KSPCreate(PetscObjectComm((PetscObject)coarse_mat_is),&pcbddc->coarse_ksp);CHKERRQ(ierr); 5503 ierr = KSPSetErrorIfNotConverged(pcbddc->coarse_ksp,pc->erroriffailure);CHKERRQ(ierr); 5504 ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->coarse_ksp,(PetscObject)pc,1);CHKERRQ(ierr); 5505 ierr = KSPSetTolerances(pcbddc->coarse_ksp,PETSC_DEFAULT,PETSC_DEFAULT,PETSC_DEFAULT,1);CHKERRQ(ierr); 5506 ierr = KSPSetOperators(pcbddc->coarse_ksp,coarse_mat_is,coarse_mat_is);CHKERRQ(ierr); 5507 ierr = KSPSetType(pcbddc->coarse_ksp,coarse_ksp_type);CHKERRQ(ierr); 5508 ierr = KSPSetNormType(pcbddc->coarse_ksp,KSP_NORM_NONE);CHKERRQ(ierr); 5509 ierr = KSPGetPC(pcbddc->coarse_ksp,&pc_temp);CHKERRQ(ierr); 5510 ierr = PCSetType(pc_temp,coarse_pc_type);CHKERRQ(ierr); 5511 /* prefix */ 5512 ierr = PetscStrcpy(prefix,"");CHKERRQ(ierr); 5513 ierr = PetscStrcpy(str_level,"");CHKERRQ(ierr); 5514 if (!pcbddc->current_level) { 5515 ierr = PetscStrcpy(prefix,((PetscObject)pc)->prefix);CHKERRQ(ierr); 5516 ierr = PetscStrcat(prefix,"pc_bddc_coarse_");CHKERRQ(ierr); 5517 } else { 5518 ierr = PetscStrlen(((PetscObject)pc)->prefix,&len);CHKERRQ(ierr); 5519 if (pcbddc->current_level>1) len -= 3; /* remove "lX_" with X level number */ 5520 if (pcbddc->current_level>10) len -= 1; /* remove another char from level number */ 5521 ierr = PetscStrncpy(prefix,((PetscObject)pc)->prefix,len+1);CHKERRQ(ierr); 5522 sprintf(str_level,"l%d_",(int)(pcbddc->current_level)); 5523 ierr = PetscStrcat(prefix,str_level);CHKERRQ(ierr); 5524 } 5525 ierr = KSPSetOptionsPrefix(pcbddc->coarse_ksp,prefix);CHKERRQ(ierr); 5526 /* propagate BDDC info to the next level (these are dummy calls if pc_temp is not of type PCBDDC) */ 5527 ierr = PCBDDCSetLevel(pc_temp,pcbddc->current_level+1);CHKERRQ(ierr); 5528 ierr = PCBDDCSetCoarseningRatio(pc_temp,pcbddc->coarsening_ratio);CHKERRQ(ierr); 5529 ierr = PCBDDCSetLevels(pc_temp,pcbddc->max_levels);CHKERRQ(ierr); 5530 /* allow user customization */ 5531 ierr = KSPSetFromOptions(pcbddc->coarse_ksp);CHKERRQ(ierr); 5532 } 5533 /* propagate BDDC info to the next level (these are dummy calls if pc_temp is not of type PCBDDC) */ 5534 ierr = KSPGetPC(pcbddc->coarse_ksp,&pc_temp);CHKERRQ(ierr); 5535 if (nisdofs) { 5536 ierr = PCBDDCSetDofsSplitting(pc_temp,nisdofs,isarray);CHKERRQ(ierr); 5537 for (i=0;i<nisdofs;i++) { 5538 ierr = ISDestroy(&isarray[i]);CHKERRQ(ierr); 5539 } 5540 } 5541 if (nisneu) { 5542 ierr = PCBDDCSetNeumannBoundaries(pc_temp,isarray[nisdofs]);CHKERRQ(ierr); 5543 ierr = ISDestroy(&isarray[nisdofs]);CHKERRQ(ierr); 5544 } 5545 if (nisvert) { 5546 ierr = PCBDDCSetPrimalVerticesIS(pc_temp,isarray[nis-1]);CHKERRQ(ierr); 5547 ierr = ISDestroy(&isarray[nis-1]);CHKERRQ(ierr); 5548 } 5549 5550 /* get some info after set from options */ 5551 ierr = PetscObjectTypeCompare((PetscObject)pc_temp,PCNN,&isnn);CHKERRQ(ierr); 5552 ierr = PetscObjectTypeCompare((PetscObject)pc_temp,PCBDDC,&isbddc);CHKERRQ(ierr); 5553 ierr = PetscObjectTypeCompare((PetscObject)pc_temp,PCREDUNDANT,&isredundant);CHKERRQ(ierr); 5554 if (isbddc && !multilevel_allowed) { /* multilevel can only be requested via pc_bddc_set_levels */ 5555 ierr = PCSetType(pc_temp,coarse_pc_type);CHKERRQ(ierr); 5556 isbddc = PETSC_FALSE; 5557 } 5558 ierr = PCFactorSetReuseFill(pc_temp,PETSC_TRUE);CHKERRQ(ierr); 5559 if (isredundant) { 5560 KSP inner_ksp; 5561 PC inner_pc; 5562 ierr = PCRedundantGetKSP(pc_temp,&inner_ksp);CHKERRQ(ierr); 5563 ierr = KSPGetPC(inner_ksp,&inner_pc);CHKERRQ(ierr); 5564 ierr = PCFactorSetReuseFill(inner_pc,PETSC_TRUE);CHKERRQ(ierr); 5565 } 5566 5567 /* assemble coarse matrix */ 5568 if (coarse_reuse) { 5569 ierr = KSPGetOperators(pcbddc->coarse_ksp,&coarse_mat,NULL);CHKERRQ(ierr); 5570 ierr = PetscObjectReference((PetscObject)coarse_mat);CHKERRQ(ierr); 5571 coarse_mat_reuse = MAT_REUSE_MATRIX; 5572 } else { 5573 coarse_mat_reuse = MAT_INITIAL_MATRIX; 5574 } 5575 if (isbddc || isnn) { 5576 if (isbddc) { /* currently there's no API for this */ 5577 PC_BDDC* pcbddc = (PC_BDDC*)pc_temp->data; 5578 pcbddc->detect_disconnected = PETSC_TRUE; 5579 } 5580 if (pcbddc->coarsening_ratio > 1) { 5581 if (!pcbddc->coarse_subassembling) { /* subassembling info is not present */ 5582 ierr = MatISGetSubassemblingPattern(coarse_mat_is,active_procs/pcbddc->coarsening_ratio,pcbddc->coarse_adj_red,&pcbddc->coarse_subassembling);CHKERRQ(ierr); 5583 if (pcbddc->dbg_flag) { 5584 ierr = PetscViewerASCIIPrintf(dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 5585 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Subassembling pattern\n");CHKERRQ(ierr); 5586 ierr = ISView(pcbddc->coarse_subassembling,dbg_viewer);CHKERRQ(ierr); 5587 ierr = PetscViewerFlush(dbg_viewer);CHKERRQ(ierr); 5588 } 5589 } 5590 ierr = MatISSubassemble(coarse_mat_is,pcbddc->coarse_subassembling,0,PETSC_FALSE,PETSC_FALSE,coarse_mat_reuse,&coarse_mat,0,NULL);CHKERRQ(ierr); 5591 } else { 5592 ierr = PetscObjectReference((PetscObject)coarse_mat_is);CHKERRQ(ierr); 5593 coarse_mat = coarse_mat_is; 5594 } 5595 } else { 5596 ierr = MatISGetMPIXAIJ(coarse_mat_is,coarse_mat_reuse,&coarse_mat);CHKERRQ(ierr); 5597 } 5598 ierr = MatDestroy(&coarse_mat_is);CHKERRQ(ierr); 5599 5600 /* propagate symmetry info of coarse matrix */ 5601 ierr = MatSetOption(coarse_mat,MAT_STRUCTURALLY_SYMMETRIC,PETSC_TRUE);CHKERRQ(ierr); 5602 if (pc->pmat->symmetric_set) { 5603 ierr = MatSetOption(coarse_mat,MAT_SYMMETRIC,pc->pmat->symmetric);CHKERRQ(ierr); 5604 } 5605 if (pc->pmat->hermitian_set) { 5606 ierr = MatSetOption(coarse_mat,MAT_HERMITIAN,pc->pmat->hermitian);CHKERRQ(ierr); 5607 } 5608 if (pc->pmat->spd_set) { 5609 ierr = MatSetOption(coarse_mat,MAT_SPD,pc->pmat->spd);CHKERRQ(ierr); 5610 } 5611 /* set operators */ 5612 ierr = KSPSetOperators(pcbddc->coarse_ksp,coarse_mat,coarse_mat);CHKERRQ(ierr); 5613 if (pcbddc->dbg_flag) { 5614 ierr = PetscViewerASCIISubtractTab(dbg_viewer,2*pcbddc->current_level);CHKERRQ(ierr); 5615 } 5616 } else { /* processes non partecipating to coarse solver (if any) */ 5617 coarse_mat = 0; 5618 } 5619 ierr = PetscFree(isarray);CHKERRQ(ierr); 5620 #if 0 5621 { 5622 PetscViewer viewer; 5623 char filename[256]; 5624 sprintf(filename,"coarse_mat_level%d.m",pcbddc->current_level); 5625 ierr = PetscViewerASCIIOpen(PetscObjectComm((PetscObject)coarse_mat),filename,&viewer);CHKERRQ(ierr); 5626 ierr = PetscViewerSetFormat(viewer,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr); 5627 ierr = MatView(coarse_mat,viewer);CHKERRQ(ierr); 5628 ierr = PetscViewerDestroy(&viewer);CHKERRQ(ierr); 5629 } 5630 #endif 5631 5632 /* Compute coarse null space (special handling by BDDC only) */ 5633 #if 0 5634 if (pcbddc->NullSpace) { 5635 ierr = PCBDDCNullSpaceAssembleCoarse(pc,coarse_mat,&CoarseNullSpace);CHKERRQ(ierr); 5636 } 5637 #endif 5638 /* hack */ 5639 if (pcbddc->coarse_ksp) { 5640 Vec crhs,csol; 5641 5642 ierr = KSPGetSolution(pcbddc->coarse_ksp,&csol);CHKERRQ(ierr); 5643 ierr = KSPGetRhs(pcbddc->coarse_ksp,&crhs);CHKERRQ(ierr); 5644 if (!csol) { 5645 ierr = MatCreateVecs(coarse_mat,&((pcbddc->coarse_ksp)->vec_sol),NULL);CHKERRQ(ierr); 5646 } 5647 if (!crhs) { 5648 ierr = MatCreateVecs(coarse_mat,NULL,&((pcbddc->coarse_ksp)->vec_rhs));CHKERRQ(ierr); 5649 } 5650 } 5651 5652 /* compute null space for coarse solver if the benign trick has been requested */ 5653 if (pcbddc->benign_null) { 5654 5655 ierr = VecSet(pcbddc->vec1_P,0.);CHKERRQ(ierr); 5656 for (i=0;i<pcbddc->benign_n;i++) { 5657 ierr = VecSetValue(pcbddc->vec1_P,pcbddc->local_primal_size-pcbddc->benign_n+i,1.0,INSERT_VALUES);CHKERRQ(ierr); 5658 } 5659 ierr = VecAssemblyBegin(pcbddc->vec1_P);CHKERRQ(ierr); 5660 ierr = VecAssemblyEnd(pcbddc->vec1_P);CHKERRQ(ierr); 5661 ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,pcbddc->vec1_P,pcbddc->coarse_vec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 5662 ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,pcbddc->vec1_P,pcbddc->coarse_vec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 5663 if (coarse_mat) { 5664 Vec nullv; 5665 PetscScalar *array,*array2; 5666 PetscInt nl; 5667 5668 ierr = MatCreateVecs(coarse_mat,&nullv,NULL);CHKERRQ(ierr); 5669 ierr = VecGetLocalSize(nullv,&nl);CHKERRQ(ierr); 5670 ierr = VecGetArrayRead(pcbddc->coarse_vec,(const PetscScalar**)&array);CHKERRQ(ierr); 5671 ierr = VecGetArray(nullv,&array2);CHKERRQ(ierr); 5672 ierr = PetscMemcpy(array2,array,nl*sizeof(*array));CHKERRQ(ierr); 5673 ierr = VecRestoreArray(nullv,&array2);CHKERRQ(ierr); 5674 ierr = VecRestoreArrayRead(pcbddc->coarse_vec,(const PetscScalar**)&array);CHKERRQ(ierr); 5675 ierr = VecNormalize(nullv,NULL);CHKERRQ(ierr); 5676 ierr = MatNullSpaceCreate(PetscObjectComm((PetscObject)coarse_mat),PETSC_FALSE,1,&nullv,&CoarseNullSpace);CHKERRQ(ierr); 5677 ierr = VecDestroy(&nullv);CHKERRQ(ierr); 5678 } 5679 } 5680 5681 if (pcbddc->coarse_ksp) { 5682 PetscBool ispreonly; 5683 5684 if (CoarseNullSpace) { 5685 PetscBool isnull; 5686 ierr = MatNullSpaceTest(CoarseNullSpace,coarse_mat,&isnull);CHKERRQ(ierr); 5687 if (0) { 5688 if (isbddc && !pcbddc->benign_saddle_point) { 5689 ierr = PCBDDCSetNullSpace(pc_temp,CoarseNullSpace);CHKERRQ(ierr); 5690 } else { 5691 ierr = MatSetNullSpace(coarse_mat,CoarseNullSpace);CHKERRQ(ierr); 5692 } 5693 } else { 5694 ierr = MatNullSpaceDestroy(&CoarseNullSpace);CHKERRQ(ierr); 5695 } 5696 } 5697 /* setup coarse ksp */ 5698 ierr = KSPSetUp(pcbddc->coarse_ksp);CHKERRQ(ierr); 5699 /* Check coarse problem if in debug mode or if solving with an iterative method */ 5700 ierr = PetscObjectTypeCompare((PetscObject)pcbddc->coarse_ksp,KSPPREONLY,&ispreonly);CHKERRQ(ierr); 5701 if (pcbddc->dbg_flag || (!ispreonly && pcbddc->use_coarse_estimates) ) { 5702 KSP check_ksp; 5703 KSPType check_ksp_type; 5704 PC check_pc; 5705 Vec check_vec,coarse_vec; 5706 PetscReal abs_infty_error,infty_error,lambda_min=1.0,lambda_max=1.0; 5707 PetscInt its; 5708 PetscBool compute_eigs; 5709 PetscReal *eigs_r,*eigs_c; 5710 PetscInt neigs; 5711 const char *prefix; 5712 5713 /* Create ksp object suitable for estimation of extreme eigenvalues */ 5714 ierr = KSPCreate(PetscObjectComm((PetscObject)pcbddc->coarse_ksp),&check_ksp);CHKERRQ(ierr); 5715 ierr = KSPSetErrorIfNotConverged(pcbddc->coarse_ksp,pc->erroriffailure);CHKERRQ(ierr); 5716 ierr = KSPSetOperators(check_ksp,coarse_mat,coarse_mat);CHKERRQ(ierr); 5717 ierr = KSPSetTolerances(check_ksp,1.e-12,1.e-12,PETSC_DEFAULT,pcbddc->coarse_size);CHKERRQ(ierr); 5718 if (ispreonly) { 5719 check_ksp_type = KSPPREONLY; 5720 compute_eigs = PETSC_FALSE; 5721 } else { 5722 check_ksp_type = KSPGMRES; 5723 compute_eigs = PETSC_TRUE; 5724 } 5725 ierr = KSPSetType(check_ksp,check_ksp_type);CHKERRQ(ierr); 5726 ierr = KSPSetComputeSingularValues(check_ksp,compute_eigs);CHKERRQ(ierr); 5727 ierr = KSPSetComputeEigenvalues(check_ksp,compute_eigs);CHKERRQ(ierr); 5728 ierr = KSPGMRESSetRestart(check_ksp,pcbddc->coarse_size+1);CHKERRQ(ierr); 5729 ierr = KSPGetOptionsPrefix(pcbddc->coarse_ksp,&prefix);CHKERRQ(ierr); 5730 ierr = KSPSetOptionsPrefix(check_ksp,prefix);CHKERRQ(ierr); 5731 ierr = KSPAppendOptionsPrefix(check_ksp,"check_");CHKERRQ(ierr); 5732 ierr = KSPSetFromOptions(check_ksp);CHKERRQ(ierr); 5733 ierr = KSPSetUp(check_ksp);CHKERRQ(ierr); 5734 ierr = KSPGetPC(pcbddc->coarse_ksp,&check_pc);CHKERRQ(ierr); 5735 ierr = KSPSetPC(check_ksp,check_pc);CHKERRQ(ierr); 5736 /* create random vec */ 5737 ierr = KSPGetSolution(pcbddc->coarse_ksp,&coarse_vec);CHKERRQ(ierr); 5738 ierr = VecDuplicate(coarse_vec,&check_vec);CHKERRQ(ierr); 5739 ierr = VecSetRandom(check_vec,NULL);CHKERRQ(ierr); 5740 if (CoarseNullSpace) { 5741 ierr = MatNullSpaceRemove(CoarseNullSpace,check_vec);CHKERRQ(ierr); 5742 } 5743 ierr = MatMult(coarse_mat,check_vec,coarse_vec);CHKERRQ(ierr); 5744 /* solve coarse problem */ 5745 ierr = KSPSolve(check_ksp,coarse_vec,coarse_vec);CHKERRQ(ierr); 5746 if (CoarseNullSpace) { 5747 ierr = MatNullSpaceRemove(CoarseNullSpace,coarse_vec);CHKERRQ(ierr); 5748 } 5749 /* set eigenvalue estimation if preonly has not been requested */ 5750 if (compute_eigs) { 5751 ierr = PetscMalloc1(pcbddc->coarse_size+1,&eigs_r);CHKERRQ(ierr); 5752 ierr = PetscMalloc1(pcbddc->coarse_size+1,&eigs_c);CHKERRQ(ierr); 5753 ierr = KSPComputeEigenvalues(check_ksp,pcbddc->coarse_size+1,eigs_r,eigs_c,&neigs);CHKERRQ(ierr); 5754 lambda_max = eigs_r[neigs-1]; 5755 lambda_min = eigs_r[0]; 5756 if (pcbddc->use_coarse_estimates) { 5757 if (lambda_max>lambda_min) { 5758 ierr = KSPChebyshevSetEigenvalues(pcbddc->coarse_ksp,lambda_max,lambda_min);CHKERRQ(ierr); 5759 ierr = KSPRichardsonSetScale(pcbddc->coarse_ksp,2.0/(lambda_max+lambda_min));CHKERRQ(ierr); 5760 } 5761 } 5762 } 5763 5764 /* check coarse problem residual error */ 5765 if (pcbddc->dbg_flag) { 5766 PetscViewer dbg_viewer = PETSC_VIEWER_STDOUT_(PetscObjectComm((PetscObject)pcbddc->coarse_ksp)); 5767 ierr = PetscViewerASCIIAddTab(dbg_viewer,2*(pcbddc->current_level+1));CHKERRQ(ierr); 5768 ierr = VecAXPY(check_vec,-1.0,coarse_vec);CHKERRQ(ierr); 5769 ierr = VecNorm(check_vec,NORM_INFINITY,&infty_error);CHKERRQ(ierr); 5770 ierr = MatMult(coarse_mat,check_vec,coarse_vec);CHKERRQ(ierr); 5771 ierr = VecNorm(coarse_vec,NORM_INFINITY,&abs_infty_error);CHKERRQ(ierr); 5772 ierr = VecDestroy(&check_vec);CHKERRQ(ierr); 5773 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem details (use estimates %d)\n",pcbddc->use_coarse_estimates);CHKERRQ(ierr); 5774 ierr = PetscObjectPrintClassNamePrefixType((PetscObject)(pcbddc->coarse_ksp),dbg_viewer);CHKERRQ(ierr); 5775 ierr = PetscObjectPrintClassNamePrefixType((PetscObject)(check_pc),dbg_viewer);CHKERRQ(ierr); 5776 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem exact infty_error : %1.6e\n",infty_error);CHKERRQ(ierr); 5777 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem residual infty_error: %1.6e\n",abs_infty_error);CHKERRQ(ierr); 5778 if (CoarseNullSpace) { 5779 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem is singular\n");CHKERRQ(ierr); 5780 } 5781 if (compute_eigs) { 5782 PetscReal lambda_max_s,lambda_min_s; 5783 ierr = KSPGetType(check_ksp,&check_ksp_type);CHKERRQ(ierr); 5784 ierr = KSPGetIterationNumber(check_ksp,&its);CHKERRQ(ierr); 5785 ierr = KSPComputeExtremeSingularValues(check_ksp,&lambda_max_s,&lambda_min_s);CHKERRQ(ierr); 5786 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem eigenvalues (estimated with %d iterations of %s): %1.6e %1.6e (%1.6e %1.6e)\n",its,check_ksp_type,lambda_min,lambda_max,lambda_min_s,lambda_max_s);CHKERRQ(ierr); 5787 for (i=0;i<neigs;i++) { 5788 ierr = PetscViewerASCIIPrintf(dbg_viewer,"%1.6e %1.6ei\n",eigs_r[i],eigs_c[i]);CHKERRQ(ierr); 5789 } 5790 } 5791 ierr = PetscViewerFlush(dbg_viewer);CHKERRQ(ierr); 5792 ierr = PetscViewerASCIISubtractTab(dbg_viewer,2*(pcbddc->current_level+1));CHKERRQ(ierr); 5793 } 5794 ierr = KSPDestroy(&check_ksp);CHKERRQ(ierr); 5795 if (compute_eigs) { 5796 ierr = PetscFree(eigs_r);CHKERRQ(ierr); 5797 ierr = PetscFree(eigs_c);CHKERRQ(ierr); 5798 } 5799 } 5800 } 5801 /* print additional info */ 5802 if (pcbddc->dbg_flag) { 5803 /* waits until all processes reaches this point */ 5804 ierr = PetscBarrier((PetscObject)pc);CHKERRQ(ierr); 5805 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Coarse solver setup completed at level %d\n",pcbddc->current_level);CHKERRQ(ierr); 5806 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 5807 } 5808 5809 /* free memory */ 5810 ierr = MatNullSpaceDestroy(&CoarseNullSpace);CHKERRQ(ierr); 5811 ierr = MatDestroy(&coarse_mat);CHKERRQ(ierr); 5812 PetscFunctionReturn(0); 5813 } 5814 5815 #undef __FUNCT__ 5816 #define __FUNCT__ "PCBDDCComputePrimalNumbering" 5817 PetscErrorCode PCBDDCComputePrimalNumbering(PC pc,PetscInt* coarse_size_n,PetscInt** local_primal_indices_n) 5818 { 5819 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 5820 PC_IS* pcis = (PC_IS*)pc->data; 5821 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 5822 IS subset,subset_mult,subset_n; 5823 PetscInt local_size,coarse_size=0; 5824 PetscInt *local_primal_indices=NULL; 5825 const PetscInt *t_local_primal_indices; 5826 PetscErrorCode ierr; 5827 5828 PetscFunctionBegin; 5829 /* Compute global number of coarse dofs */ 5830 if (pcbddc->local_primal_size && !pcbddc->local_primal_ref_node) { 5831 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"BDDC ConstraintsSetUp should be called first"); 5832 } 5833 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)(pc->pmat)),pcbddc->local_primal_size_cc,pcbddc->local_primal_ref_node,PETSC_COPY_VALUES,&subset_n);CHKERRQ(ierr); 5834 ierr = ISLocalToGlobalMappingApplyIS(pcis->mapping,subset_n,&subset);CHKERRQ(ierr); 5835 ierr = ISDestroy(&subset_n);CHKERRQ(ierr); 5836 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)(pc->pmat)),pcbddc->local_primal_size_cc,pcbddc->local_primal_ref_mult,PETSC_COPY_VALUES,&subset_mult);CHKERRQ(ierr); 5837 ierr = PCBDDCSubsetNumbering(subset,subset_mult,&coarse_size,&subset_n);CHKERRQ(ierr); 5838 ierr = ISDestroy(&subset);CHKERRQ(ierr); 5839 ierr = ISDestroy(&subset_mult);CHKERRQ(ierr); 5840 ierr = ISGetLocalSize(subset_n,&local_size);CHKERRQ(ierr); 5841 if (local_size != pcbddc->local_primal_size) { 5842 SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Invalid number of local primal indices computed %d != %d",local_size,pcbddc->local_primal_size); 5843 } 5844 ierr = PetscMalloc1(local_size,&local_primal_indices);CHKERRQ(ierr); 5845 ierr = ISGetIndices(subset_n,&t_local_primal_indices);CHKERRQ(ierr); 5846 ierr = PetscMemcpy(local_primal_indices,t_local_primal_indices,local_size*sizeof(PetscInt));CHKERRQ(ierr); 5847 ierr = ISRestoreIndices(subset_n,&t_local_primal_indices);CHKERRQ(ierr); 5848 ierr = ISDestroy(&subset_n);CHKERRQ(ierr); 5849 5850 /* check numbering */ 5851 if (pcbddc->dbg_flag) { 5852 PetscScalar coarsesum,*array,*array2; 5853 PetscInt i; 5854 PetscBool set_error = PETSC_FALSE,set_error_reduced = PETSC_FALSE; 5855 5856 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 5857 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 5858 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Check coarse indices\n");CHKERRQ(ierr); 5859 ierr = PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 5860 /* counter */ 5861 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 5862 ierr = VecSet(pcis->vec1_N,1.0);CHKERRQ(ierr); 5863 ierr = VecScatterBegin(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 5864 ierr = VecScatterEnd(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 5865 ierr = VecScatterBegin(matis->rctx,pcis->vec1_global,pcis->vec2_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 5866 ierr = VecScatterEnd(matis->rctx,pcis->vec1_global,pcis->vec2_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 5867 ierr = VecSet(pcis->vec1_N,0.0);CHKERRQ(ierr); 5868 for (i=0;i<pcbddc->local_primal_size;i++) { 5869 ierr = VecSetValue(pcis->vec1_N,pcbddc->primal_indices_local_idxs[i],1.0,INSERT_VALUES);CHKERRQ(ierr); 5870 } 5871 ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr); 5872 ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr); 5873 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 5874 ierr = VecScatterBegin(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 5875 ierr = VecScatterEnd(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 5876 ierr = VecScatterBegin(matis->rctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 5877 ierr = VecScatterEnd(matis->rctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 5878 ierr = VecGetArray(pcis->vec1_N,&array);CHKERRQ(ierr); 5879 ierr = VecGetArray(pcis->vec2_N,&array2);CHKERRQ(ierr); 5880 for (i=0;i<pcis->n;i++) { 5881 if (array[i] != 0.0 && array[i] != array2[i]) { 5882 PetscInt owned = (PetscInt)PetscRealPart(array[i]); 5883 PetscInt neigh = (PetscInt)PetscRealPart(array2[i]); 5884 set_error = PETSC_TRUE; 5885 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d: local index %d owned by a %d processes instead of %d!\n",PetscGlobalRank,i,owned,neigh);CHKERRQ(ierr); 5886 } 5887 } 5888 ierr = VecRestoreArray(pcis->vec2_N,&array2);CHKERRQ(ierr); 5889 ierr = MPI_Allreduce(&set_error,&set_error_reduced,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 5890 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 5891 for (i=0;i<pcis->n;i++) { 5892 if (PetscRealPart(array[i]) > 0.0) array[i] = 1.0/PetscRealPart(array[i]); 5893 } 5894 ierr = VecRestoreArray(pcis->vec1_N,&array);CHKERRQ(ierr); 5895 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 5896 ierr = VecScatterBegin(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 5897 ierr = VecScatterEnd(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 5898 ierr = VecSum(pcis->vec1_global,&coarsesum);CHKERRQ(ierr); 5899 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Size of coarse problem is %d (%lf)\n",coarse_size,PetscRealPart(coarsesum));CHKERRQ(ierr); 5900 if (pcbddc->dbg_flag > 1 || set_error_reduced) { 5901 PetscInt *gidxs; 5902 5903 ierr = PetscMalloc1(pcbddc->local_primal_size,&gidxs);CHKERRQ(ierr); 5904 ierr = ISLocalToGlobalMappingApply(pcis->mapping,pcbddc->local_primal_size,pcbddc->primal_indices_local_idxs,gidxs);CHKERRQ(ierr); 5905 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Distribution of local primal indices\n");CHKERRQ(ierr); 5906 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 5907 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d\n",PetscGlobalRank);CHKERRQ(ierr); 5908 for (i=0;i<pcbddc->local_primal_size;i++) { 5909 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local_primal_indices[%d]=%d (%d,%d)\n",i,local_primal_indices[i],pcbddc->primal_indices_local_idxs[i],gidxs[i]);CHKERRQ(ierr); 5910 } 5911 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 5912 ierr = PetscFree(gidxs);CHKERRQ(ierr); 5913 } 5914 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 5915 ierr = PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 5916 if (set_error_reduced) SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"BDDC Numbering of coarse dofs failed"); 5917 } 5918 /* ierr = PetscPrintf(PetscObjectComm((PetscObject)pc),"Size of coarse problem is %d\n",coarse_size);CHKERRQ(ierr); */ 5919 /* get back data */ 5920 *coarse_size_n = coarse_size; 5921 *local_primal_indices_n = local_primal_indices; 5922 PetscFunctionReturn(0); 5923 } 5924 5925 #undef __FUNCT__ 5926 #define __FUNCT__ "PCBDDCGlobalToLocal" 5927 PetscErrorCode PCBDDCGlobalToLocal(VecScatter g2l_ctx,Vec gwork, Vec lwork, IS globalis, IS* localis) 5928 { 5929 IS localis_t; 5930 PetscInt i,lsize,*idxs,n; 5931 PetscScalar *vals; 5932 PetscErrorCode ierr; 5933 5934 PetscFunctionBegin; 5935 /* get indices in local ordering exploiting local to global map */ 5936 ierr = ISGetLocalSize(globalis,&lsize);CHKERRQ(ierr); 5937 ierr = PetscMalloc1(lsize,&vals);CHKERRQ(ierr); 5938 for (i=0;i<lsize;i++) vals[i] = 1.0; 5939 ierr = ISGetIndices(globalis,(const PetscInt**)&idxs);CHKERRQ(ierr); 5940 ierr = VecSet(gwork,0.0);CHKERRQ(ierr); 5941 ierr = VecSet(lwork,0.0);CHKERRQ(ierr); 5942 if (idxs) { /* multilevel guard */ 5943 ierr = VecSetValues(gwork,lsize,idxs,vals,INSERT_VALUES);CHKERRQ(ierr); 5944 } 5945 ierr = VecAssemblyBegin(gwork);CHKERRQ(ierr); 5946 ierr = ISRestoreIndices(globalis,(const PetscInt**)&idxs);CHKERRQ(ierr); 5947 ierr = PetscFree(vals);CHKERRQ(ierr); 5948 ierr = VecAssemblyEnd(gwork);CHKERRQ(ierr); 5949 /* now compute set in local ordering */ 5950 ierr = VecScatterBegin(g2l_ctx,gwork,lwork,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 5951 ierr = VecScatterEnd(g2l_ctx,gwork,lwork,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 5952 ierr = VecGetArrayRead(lwork,(const PetscScalar**)&vals);CHKERRQ(ierr); 5953 ierr = VecGetSize(lwork,&n);CHKERRQ(ierr); 5954 for (i=0,lsize=0;i<n;i++) { 5955 if (PetscRealPart(vals[i]) > 0.5) { 5956 lsize++; 5957 } 5958 } 5959 ierr = PetscMalloc1(lsize,&idxs);CHKERRQ(ierr); 5960 for (i=0,lsize=0;i<n;i++) { 5961 if (PetscRealPart(vals[i]) > 0.5) { 5962 idxs[lsize++] = i; 5963 } 5964 } 5965 ierr = VecRestoreArrayRead(lwork,(const PetscScalar**)&vals);CHKERRQ(ierr); 5966 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)gwork),lsize,idxs,PETSC_OWN_POINTER,&localis_t);CHKERRQ(ierr); 5967 *localis = localis_t; 5968 PetscFunctionReturn(0); 5969 } 5970 5971 /* the next two functions will be called in KSPMatMult if a change of basis has been requested */ 5972 #undef __FUNCT__ 5973 #define __FUNCT__ "PCBDDCMatMult_Private" 5974 static PetscErrorCode PCBDDCMatMult_Private(Mat A, Vec x, Vec y) 5975 { 5976 PCBDDCChange_ctx change_ctx; 5977 PetscErrorCode ierr; 5978 5979 PetscFunctionBegin; 5980 ierr = MatShellGetContext(A,&change_ctx);CHKERRQ(ierr); 5981 ierr = MatMult(change_ctx->global_change,x,change_ctx->work[0]);CHKERRQ(ierr); 5982 ierr = MatMult(change_ctx->original_mat,change_ctx->work[0],change_ctx->work[1]);CHKERRQ(ierr); 5983 ierr = MatMultTranspose(change_ctx->global_change,change_ctx->work[1],y);CHKERRQ(ierr); 5984 PetscFunctionReturn(0); 5985 } 5986 5987 #undef __FUNCT__ 5988 #define __FUNCT__ "PCBDDCMatMultTranspose_Private" 5989 static PetscErrorCode PCBDDCMatMultTranspose_Private(Mat A, Vec x, Vec y) 5990 { 5991 PCBDDCChange_ctx change_ctx; 5992 PetscErrorCode ierr; 5993 5994 PetscFunctionBegin; 5995 ierr = MatShellGetContext(A,&change_ctx);CHKERRQ(ierr); 5996 ierr = MatMult(change_ctx->global_change,x,change_ctx->work[0]);CHKERRQ(ierr); 5997 ierr = MatMultTranspose(change_ctx->original_mat,change_ctx->work[0],change_ctx->work[1]);CHKERRQ(ierr); 5998 ierr = MatMultTranspose(change_ctx->global_change,change_ctx->work[1],y);CHKERRQ(ierr); 5999 PetscFunctionReturn(0); 6000 } 6001 6002 #undef __FUNCT__ 6003 #define __FUNCT__ "PCBDDCSetUpSubSchurs" 6004 PetscErrorCode PCBDDCSetUpSubSchurs(PC pc) 6005 { 6006 PC_IS *pcis=(PC_IS*)pc->data; 6007 PC_BDDC *pcbddc=(PC_BDDC*)pc->data; 6008 PCBDDCSubSchurs sub_schurs=pcbddc->sub_schurs; 6009 Mat S_j; 6010 PetscInt *used_xadj,*used_adjncy; 6011 PetscBool free_used_adj; 6012 PetscErrorCode ierr; 6013 6014 PetscFunctionBegin; 6015 /* decide the adjacency to be used for determining internal problems for local schur on subsets */ 6016 free_used_adj = PETSC_FALSE; 6017 if (pcbddc->sub_schurs_layers == -1) { 6018 used_xadj = NULL; 6019 used_adjncy = NULL; 6020 } else { 6021 if (pcbddc->sub_schurs_use_useradj && pcbddc->mat_graph->xadj) { 6022 used_xadj = pcbddc->mat_graph->xadj; 6023 used_adjncy = pcbddc->mat_graph->adjncy; 6024 } else if (pcbddc->computed_rowadj) { 6025 used_xadj = pcbddc->mat_graph->xadj; 6026 used_adjncy = pcbddc->mat_graph->adjncy; 6027 } else { 6028 PetscBool flg_row=PETSC_FALSE; 6029 const PetscInt *xadj,*adjncy; 6030 PetscInt nvtxs; 6031 6032 ierr = MatGetRowIJ(pcbddc->local_mat,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,&xadj,&adjncy,&flg_row);CHKERRQ(ierr); 6033 if (flg_row) { 6034 ierr = PetscMalloc2(nvtxs+1,&used_xadj,xadj[nvtxs],&used_adjncy);CHKERRQ(ierr); 6035 ierr = PetscMemcpy(used_xadj,xadj,(nvtxs+1)*sizeof(*xadj));CHKERRQ(ierr); 6036 ierr = PetscMemcpy(used_adjncy,adjncy,(xadj[nvtxs])*sizeof(*adjncy));CHKERRQ(ierr); 6037 free_used_adj = PETSC_TRUE; 6038 } else { 6039 pcbddc->sub_schurs_layers = -1; 6040 used_xadj = NULL; 6041 used_adjncy = NULL; 6042 } 6043 ierr = MatRestoreRowIJ(pcbddc->local_mat,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,&xadj,&adjncy,&flg_row);CHKERRQ(ierr); 6044 } 6045 } 6046 6047 /* setup sub_schurs data */ 6048 ierr = MatCreateSchurComplement(pcis->A_II,pcis->A_II,pcis->A_IB,pcis->A_BI,pcis->A_BB,&S_j);CHKERRQ(ierr); 6049 if (!sub_schurs->schur_explicit) { 6050 /* pcbddc->ksp_D up to date only if not using MatFactor with Schur complement support */ 6051 ierr = MatSchurComplementSetKSP(S_j,pcbddc->ksp_D);CHKERRQ(ierr); 6052 ierr = PCBDDCSubSchursSetUp(sub_schurs,NULL,S_j,PETSC_FALSE,used_xadj,used_adjncy,pcbddc->sub_schurs_layers,pcbddc->faster_deluxe,pcbddc->adaptive_selection,PETSC_FALSE,PETSC_FALSE,0,NULL,NULL);CHKERRQ(ierr); 6053 } else { 6054 PetscBool reuse_solvers = (PetscBool)!pcbddc->use_change_of_basis; 6055 PetscBool isseqaij; 6056 PetscInt benign_n; 6057 6058 if (!pcbddc->use_vertices && reuse_solvers) { 6059 PetscInt n_vertices; 6060 6061 ierr = ISGetLocalSize(sub_schurs->is_vertices,&n_vertices);CHKERRQ(ierr); 6062 reuse_solvers = (PetscBool)!n_vertices; 6063 } 6064 ierr = PetscObjectTypeCompare((PetscObject)pcbddc->local_mat,MATSEQAIJ,&isseqaij);CHKERRQ(ierr); 6065 if (!isseqaij) { 6066 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 6067 if (matis->A == pcbddc->local_mat) { 6068 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 6069 ierr = MatConvert(matis->A,MATSEQAIJ,MAT_INITIAL_MATRIX,&pcbddc->local_mat);CHKERRQ(ierr); 6070 } else { 6071 ierr = MatConvert(pcbddc->local_mat,MATSEQAIJ,MAT_REUSE_MATRIX,&pcbddc->local_mat);CHKERRQ(ierr); 6072 } 6073 } 6074 if (!pcbddc->benign_change_explicit) { 6075 benign_n = pcbddc->benign_n; 6076 } else { 6077 benign_n = 0; 6078 } 6079 ierr = PCBDDCSubSchursSetUp(sub_schurs,pcbddc->local_mat,S_j,pcbddc->sub_schurs_exact_schur,used_xadj,used_adjncy,pcbddc->sub_schurs_layers,pcbddc->faster_deluxe,pcbddc->adaptive_selection,reuse_solvers,pcbddc->benign_saddle_point,benign_n,pcbddc->benign_p0_lidx,pcbddc->benign_zerodiag_subs);CHKERRQ(ierr); 6080 } 6081 ierr = MatDestroy(&S_j);CHKERRQ(ierr); 6082 6083 /* free adjacency */ 6084 if (free_used_adj) { 6085 ierr = PetscFree2(used_xadj,used_adjncy);CHKERRQ(ierr); 6086 } 6087 PetscFunctionReturn(0); 6088 } 6089 6090 #undef __FUNCT__ 6091 #define __FUNCT__ "PCBDDCInitSubSchurs" 6092 PetscErrorCode PCBDDCInitSubSchurs(PC pc) 6093 { 6094 PC_IS *pcis=(PC_IS*)pc->data; 6095 PC_BDDC *pcbddc=(PC_BDDC*)pc->data; 6096 PCBDDCSubSchurs sub_schurs=pcbddc->sub_schurs; 6097 PCBDDCGraph graph; 6098 PetscErrorCode ierr; 6099 6100 PetscFunctionBegin; 6101 /* attach interface graph for determining subsets */ 6102 if (pcbddc->sub_schurs_rebuild) { /* in case rebuild has been requested, it uses a graph generated only by the neighbouring information */ 6103 IS verticesIS,verticescomm; 6104 PetscInt vsize,*idxs; 6105 6106 ierr = PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph,NULL,NULL,NULL,NULL,&verticesIS);CHKERRQ(ierr); 6107 ierr = ISGetSize(verticesIS,&vsize);CHKERRQ(ierr); 6108 ierr = ISGetIndices(verticesIS,(const PetscInt**)&idxs);CHKERRQ(ierr); 6109 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),vsize,idxs,PETSC_COPY_VALUES,&verticescomm);CHKERRQ(ierr); 6110 ierr = ISRestoreIndices(verticesIS,(const PetscInt**)&idxs);CHKERRQ(ierr); 6111 ierr = ISDestroy(&verticesIS);CHKERRQ(ierr); 6112 ierr = PCBDDCGraphCreate(&graph);CHKERRQ(ierr); 6113 ierr = PCBDDCGraphInit(graph,pcbddc->mat_graph->l2gmap,pcbddc->mat_graph->nvtxs_global);CHKERRQ(ierr); 6114 ierr = PCBDDCGraphSetUp(graph,0,NULL,pcbddc->DirichletBoundariesLocal,0,NULL,verticescomm);CHKERRQ(ierr); 6115 ierr = ISDestroy(&verticescomm);CHKERRQ(ierr); 6116 ierr = PCBDDCGraphComputeConnectedComponents(graph);CHKERRQ(ierr); 6117 /* 6118 if (pcbddc->dbg_flag) { 6119 ierr = PCBDDCGraphASCIIView(graph,pcbddc->dbg_flag,pcbddc->dbg_viewer);CHKERRQ(ierr); 6120 } 6121 */ 6122 } else { 6123 graph = pcbddc->mat_graph; 6124 } 6125 6126 /* sub_schurs init */ 6127 ierr = PCBDDCSubSchursInit(sub_schurs,pcis->is_I_local,pcis->is_B_local,graph,pcis->BtoNmap);CHKERRQ(ierr); 6128 6129 /* free graph struct */ 6130 if (pcbddc->sub_schurs_rebuild) { 6131 ierr = PCBDDCGraphDestroy(&graph);CHKERRQ(ierr); 6132 } 6133 PetscFunctionReturn(0); 6134 } 6135 6136 #undef __FUNCT__ 6137 #define __FUNCT__ "PCBDDCCheckOperator" 6138 PetscErrorCode PCBDDCCheckOperator(PC pc) 6139 { 6140 PC_IS *pcis=(PC_IS*)pc->data; 6141 PC_BDDC *pcbddc=(PC_BDDC*)pc->data; 6142 PetscErrorCode ierr; 6143 6144 PetscFunctionBegin; 6145 if (pcbddc->n_vertices == pcbddc->local_primal_size) { 6146 IS zerodiag = NULL; 6147 Mat S_j,B0_B=NULL; 6148 Vec dummy_vec=NULL,vec_check_B,vec_scale_P; 6149 PetscScalar *p0_check,*array,*array2; 6150 PetscReal norm; 6151 PetscInt i; 6152 6153 /* B0 and B0_B */ 6154 if (zerodiag) { 6155 IS dummy; 6156 6157 ierr = ISCreateStride(PETSC_COMM_SELF,pcbddc->benign_n,0,1,&dummy);CHKERRQ(ierr); 6158 ierr = MatGetSubMatrix(pcbddc->benign_B0,dummy,pcis->is_B_local,MAT_INITIAL_MATRIX,&B0_B);CHKERRQ(ierr); 6159 ierr = MatCreateVecs(B0_B,NULL,&dummy_vec);CHKERRQ(ierr); 6160 ierr = ISDestroy(&dummy);CHKERRQ(ierr); 6161 } 6162 /* I need a primal vector to scale primal nodes since BDDC sums contibutions */ 6163 ierr = VecDuplicate(pcbddc->vec1_P,&vec_scale_P);CHKERRQ(ierr); 6164 ierr = VecSet(pcbddc->vec1_P,1.0);CHKERRQ(ierr); 6165 ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,pcbddc->vec1_P,pcbddc->coarse_vec,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 6166 ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,pcbddc->vec1_P,pcbddc->coarse_vec,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 6167 ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,pcbddc->coarse_vec,vec_scale_P,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 6168 ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,pcbddc->coarse_vec,vec_scale_P,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 6169 ierr = VecReciprocal(vec_scale_P);CHKERRQ(ierr); 6170 /* S_j */ 6171 ierr = MatCreateSchurComplement(pcis->A_II,pcis->A_II,pcis->A_IB,pcis->A_BI,pcis->A_BB,&S_j);CHKERRQ(ierr); 6172 ierr = MatSchurComplementSetKSP(S_j,pcbddc->ksp_D);CHKERRQ(ierr); 6173 6174 /* mimic vector in \widetilde{W}_\Gamma */ 6175 ierr = VecSetRandom(pcis->vec1_N,NULL);CHKERRQ(ierr); 6176 /* continuous in primal space */ 6177 ierr = VecSetRandom(pcbddc->coarse_vec,NULL);CHKERRQ(ierr); 6178 ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,pcbddc->coarse_vec,pcbddc->vec1_P,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 6179 ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,pcbddc->coarse_vec,pcbddc->vec1_P,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 6180 ierr = VecGetArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 6181 ierr = PetscCalloc1(pcbddc->benign_n,&p0_check);CHKERRQ(ierr); 6182 for (i=0;i<pcbddc->benign_n;i++) p0_check[i] = array[pcbddc->local_primal_size-pcbddc->benign_n+i]; 6183 ierr = VecSetValues(pcis->vec1_N,pcbddc->local_primal_size,pcbddc->local_primal_ref_node,array,INSERT_VALUES);CHKERRQ(ierr); 6184 ierr = VecRestoreArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 6185 ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr); 6186 ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr); 6187 ierr = VecScatterBegin(pcis->N_to_B,pcis->vec1_N,pcis->vec2_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 6188 ierr = VecScatterEnd(pcis->N_to_B,pcis->vec1_N,pcis->vec2_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 6189 ierr = VecDuplicate(pcis->vec2_B,&vec_check_B);CHKERRQ(ierr); 6190 ierr = VecCopy(pcis->vec2_B,vec_check_B);CHKERRQ(ierr); 6191 6192 /* assemble rhs for coarse problem */ 6193 /* widetilde{S}_\Gamma w_\Gamma + \widetilde{B0}^T_B p0 */ 6194 /* local with Schur */ 6195 ierr = MatMult(S_j,pcis->vec2_B,pcis->vec1_B);CHKERRQ(ierr); 6196 if (zerodiag) { 6197 ierr = VecGetArray(dummy_vec,&array);CHKERRQ(ierr); 6198 for (i=0;i<pcbddc->benign_n;i++) array[i] = p0_check[i]; 6199 ierr = VecRestoreArray(dummy_vec,&array);CHKERRQ(ierr); 6200 ierr = MatMultTransposeAdd(B0_B,dummy_vec,pcis->vec1_B,pcis->vec1_B);CHKERRQ(ierr); 6201 } 6202 /* sum on primal nodes the local contributions */ 6203 ierr = VecScatterBegin(pcis->N_to_B,pcis->vec1_B,pcis->vec1_N,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 6204 ierr = VecScatterEnd(pcis->N_to_B,pcis->vec1_B,pcis->vec1_N,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 6205 ierr = VecGetArray(pcis->vec1_N,&array);CHKERRQ(ierr); 6206 ierr = VecGetArray(pcbddc->vec1_P,&array2);CHKERRQ(ierr); 6207 for (i=0;i<pcbddc->local_primal_size;i++) array2[i] = array[pcbddc->local_primal_ref_node[i]]; 6208 ierr = VecRestoreArray(pcbddc->vec1_P,&array2);CHKERRQ(ierr); 6209 ierr = VecRestoreArray(pcis->vec1_N,&array);CHKERRQ(ierr); 6210 ierr = VecSet(pcbddc->coarse_vec,0.);CHKERRQ(ierr); 6211 ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,pcbddc->vec1_P,pcbddc->coarse_vec,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 6212 ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,pcbddc->vec1_P,pcbddc->coarse_vec,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 6213 ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,pcbddc->coarse_vec,pcbddc->vec1_P,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 6214 ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,pcbddc->coarse_vec,pcbddc->vec1_P,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 6215 ierr = VecGetArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 6216 /* scale primal nodes (BDDC sums contibutions) */ 6217 ierr = VecPointwiseMult(pcbddc->vec1_P,vec_scale_P,pcbddc->vec1_P);CHKERRQ(ierr); 6218 ierr = VecSetValues(pcis->vec1_N,pcbddc->local_primal_size,pcbddc->local_primal_ref_node,array,INSERT_VALUES);CHKERRQ(ierr); 6219 ierr = VecRestoreArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 6220 ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr); 6221 ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr); 6222 ierr = VecScatterBegin(pcis->N_to_B,pcis->vec1_N,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 6223 ierr = VecScatterEnd(pcis->N_to_B,pcis->vec1_N,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 6224 /* global: \widetilde{B0}_B w_\Gamma */ 6225 if (zerodiag) { 6226 ierr = MatMult(B0_B,pcis->vec2_B,dummy_vec);CHKERRQ(ierr); 6227 ierr = VecGetArray(dummy_vec,&array);CHKERRQ(ierr); 6228 for (i=0;i<pcbddc->benign_n;i++) pcbddc->benign_p0[i] = array[i]; 6229 ierr = VecRestoreArray(dummy_vec,&array);CHKERRQ(ierr); 6230 } 6231 /* BDDC */ 6232 ierr = VecSet(pcis->vec1_D,0.);CHKERRQ(ierr); 6233 ierr = PCBDDCApplyInterfacePreconditioner(pc,PETSC_FALSE);CHKERRQ(ierr); 6234 6235 ierr = VecCopy(pcis->vec1_B,pcis->vec2_B);CHKERRQ(ierr); 6236 ierr = VecAXPY(pcis->vec1_B,-1.0,vec_check_B);CHKERRQ(ierr); 6237 ierr = VecNorm(pcis->vec1_B,NORM_INFINITY,&norm);CHKERRQ(ierr); 6238 PetscPrintf(PETSC_COMM_SELF,"[%d] BDDC local error is %1.4e\n",PetscGlobalRank,norm); 6239 for (i=0;i<pcbddc->benign_n;i++) { 6240 PetscPrintf(PETSC_COMM_SELF,"[%d] BDDC p0[%d] error is %1.4e\n",PetscGlobalRank,i,PetscAbsScalar(pcbddc->benign_p0[i]-p0_check[i])); 6241 } 6242 ierr = PetscFree(p0_check);CHKERRQ(ierr); 6243 ierr = VecDestroy(&vec_scale_P);CHKERRQ(ierr); 6244 ierr = VecDestroy(&vec_check_B);CHKERRQ(ierr); 6245 ierr = VecDestroy(&dummy_vec);CHKERRQ(ierr); 6246 ierr = MatDestroy(&S_j);CHKERRQ(ierr); 6247 ierr = MatDestroy(&B0_B);CHKERRQ(ierr); 6248 } 6249 PetscFunctionReturn(0); 6250 } 6251