1 #include <../src/mat/impls/aij/seq/aij.h> 2 #include <../src/ksp/pc/impls/bddc/bddc.h> 3 #include <../src/ksp/pc/impls/bddc/bddcprivate.h> 4 #include <petscblaslapack.h> 5 6 static PetscErrorCode PCBDDCMatMultTranspose_Private(Mat A, Vec x, Vec y); 7 static PetscErrorCode PCBDDCMatMult_Private(Mat A, Vec x, Vec y); 8 9 #undef __FUNCT__ 10 #define __FUNCT__ "PCBDDCBenignMatMult_Private_Private" 11 PetscErrorCode PCBDDCBenignMatMult_Private_Private(Mat A, Vec x, Vec y, PetscBool transpose) 12 { 13 PCBDDCBenignMatMult_ctx ctx; 14 PetscErrorCode ierr; 15 PetscBool apply_right,apply_left,reset_x; 16 17 PetscFunctionBegin; 18 ierr = MatShellGetContext(A,&ctx);CHKERRQ(ierr); 19 if (transpose) { 20 apply_right = ctx->apply_left; 21 apply_left = ctx->apply_right; 22 } else { 23 apply_right = ctx->apply_right; 24 apply_left = ctx->apply_left; 25 } 26 reset_x = PETSC_FALSE; 27 if (apply_right) { 28 const PetscScalar *ax; 29 PetscInt nl,i; 30 31 ierr = VecGetLocalSize(x,&nl);CHKERRQ(ierr); 32 ierr = VecGetArrayRead(x,&ax);CHKERRQ(ierr); 33 ierr = PetscMemcpy(ctx->work,ax,nl*sizeof(PetscScalar));CHKERRQ(ierr); 34 ierr = VecRestoreArrayRead(x,&ax);CHKERRQ(ierr); 35 for (i=0;i<ctx->benign_n;i++) { 36 PetscScalar sum,val; 37 const PetscInt *idxs; 38 PetscInt nz,j; 39 ierr = ISGetLocalSize(ctx->benign_zerodiag_subs[i],&nz);CHKERRQ(ierr); 40 ierr = ISGetIndices(ctx->benign_zerodiag_subs[i],&idxs);CHKERRQ(ierr); 41 sum = 0.; 42 if (ctx->apply_p0) { 43 val = ctx->work[idxs[nz-1]]; 44 for (j=0;j<nz-1;j++) { 45 sum += ctx->work[idxs[j]]; 46 ctx->work[idxs[j]] += val; 47 } 48 } else { 49 for (j=0;j<nz-1;j++) { 50 sum += ctx->work[idxs[j]]; 51 } 52 } 53 ctx->work[idxs[nz-1]] -= sum; 54 ierr = ISRestoreIndices(ctx->benign_zerodiag_subs[i],&idxs);CHKERRQ(ierr); 55 } 56 ierr = VecPlaceArray(x,ctx->work);CHKERRQ(ierr); 57 reset_x = PETSC_TRUE; 58 } 59 if (transpose) { 60 ierr = MatMultTranspose(ctx->A,x,y);CHKERRQ(ierr); 61 } else { 62 ierr = MatMult(ctx->A,x,y);CHKERRQ(ierr); 63 } 64 if (reset_x) { 65 ierr = VecResetArray(x);CHKERRQ(ierr); 66 } 67 if (apply_left) { 68 PetscScalar *ay; 69 PetscInt i; 70 71 ierr = VecGetArray(y,&ay);CHKERRQ(ierr); 72 for (i=0;i<ctx->benign_n;i++) { 73 PetscScalar sum,val; 74 const PetscInt *idxs; 75 PetscInt nz,j; 76 ierr = ISGetLocalSize(ctx->benign_zerodiag_subs[i],&nz);CHKERRQ(ierr); 77 ierr = ISGetIndices(ctx->benign_zerodiag_subs[i],&idxs);CHKERRQ(ierr); 78 val = -ay[idxs[nz-1]]; 79 if (ctx->apply_p0) { 80 sum = 0.; 81 for (j=0;j<nz-1;j++) { 82 sum += ay[idxs[j]]; 83 ay[idxs[j]] += val; 84 } 85 ay[idxs[nz-1]] += sum; 86 } else { 87 for (j=0;j<nz-1;j++) { 88 ay[idxs[j]] += val; 89 } 90 ay[idxs[nz-1]] = 0.; 91 } 92 ierr = ISRestoreIndices(ctx->benign_zerodiag_subs[i],&idxs);CHKERRQ(ierr); 93 } 94 ierr = VecRestoreArray(y,&ay);CHKERRQ(ierr); 95 } 96 PetscFunctionReturn(0); 97 } 98 99 #undef __FUNCT__ 100 #define __FUNCT__ "PCBDDCBenignMatMultTranspose_Private" 101 PetscErrorCode PCBDDCBenignMatMultTranspose_Private(Mat A, Vec x, Vec y) 102 { 103 PetscErrorCode ierr; 104 105 PetscFunctionBegin; 106 ierr = PCBDDCBenignMatMult_Private_Private(A,x,y,PETSC_TRUE);CHKERRQ(ierr); 107 PetscFunctionReturn(0); 108 } 109 110 #undef __FUNCT__ 111 #define __FUNCT__ "PCBDDCBenignMatMult_Private" 112 PetscErrorCode PCBDDCBenignMatMult_Private(Mat A, Vec x, Vec y) 113 { 114 PetscErrorCode ierr; 115 116 PetscFunctionBegin; 117 ierr = PCBDDCBenignMatMult_Private_Private(A,x,y,PETSC_FALSE);CHKERRQ(ierr); 118 PetscFunctionReturn(0); 119 } 120 121 #undef __FUNCT__ 122 #define __FUNCT__ "PCBDDCBenignShellMat" 123 PetscErrorCode PCBDDCBenignShellMat(PC pc, PetscBool restore) 124 { 125 PC_IS *pcis = (PC_IS*)pc->data; 126 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 127 Mat_IS *matis = (Mat_IS*)pc->pmat->data; 128 PCBDDCBenignMatMult_ctx ctx; 129 PetscErrorCode ierr; 130 131 PetscFunctionBegin; 132 if (!restore) { 133 Mat A,A_IB,A_BI; 134 PetscScalar *work; 135 PCBDDCReuseSolvers reuse = pcbddc->sub_schurs->reuse_solver; 136 137 if (!reuse) { 138 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Cannot setup shell matrices!"); 139 } 140 141 /* local mat */ 142 ierr = MatCreate(PETSC_COMM_SELF,&A);CHKERRQ(ierr); 143 ierr = MatSetSizes(A,pcis->n,pcis->n,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr); 144 ierr = MatSetType(A,MATSHELL);CHKERRQ(ierr); 145 ierr = MatShellSetOperation(A,MATOP_MULT,(void (*)(void))PCBDDCBenignMatMult_Private);CHKERRQ(ierr); 146 ierr = MatShellSetOperation(A,MATOP_MULT_TRANSPOSE,(void (*)(void))PCBDDCBenignMatMultTranspose_Private);CHKERRQ(ierr); 147 ierr = PetscNew(&ctx);CHKERRQ(ierr); 148 ierr = MatShellSetContext(A,ctx);CHKERRQ(ierr); 149 ctx->apply_left = PETSC_TRUE; 150 ctx->apply_right = PETSC_TRUE; 151 ctx->apply_p0 = PETSC_TRUE; 152 ctx->benign_n = pcbddc->benign_n; 153 ctx->benign_zerodiag_subs = pcbddc->benign_zerodiag_subs; 154 ctx->A = matis->A; 155 ierr = PetscMalloc1(pcis->n,&work);CHKERRQ(ierr); 156 ctx->work = work; 157 ierr = MatSetUp(A);CHKERRQ(ierr); 158 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 159 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 160 matis->A = A; 161 162 /* A_IB */ 163 ierr = MatCreate(PETSC_COMM_SELF,&A_IB);CHKERRQ(ierr); 164 ierr = MatSetSizes(A_IB,pcis->n-pcis->n_B,pcis->n_B,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr); 165 ierr = MatSetType(A_IB,MATSHELL);CHKERRQ(ierr); 166 ierr = MatShellSetOperation(A_IB,MATOP_MULT,(void (*)(void))PCBDDCBenignMatMult_Private);CHKERRQ(ierr); 167 ierr = MatShellSetOperation(A_IB,MATOP_MULT_TRANSPOSE,(void (*)(void))PCBDDCBenignMatMultTranspose_Private);CHKERRQ(ierr); 168 ierr = PetscNew(&ctx);CHKERRQ(ierr); 169 ierr = MatShellSetContext(A_IB,ctx);CHKERRQ(ierr); 170 ctx->apply_left = PETSC_TRUE; 171 ctx->apply_right = PETSC_FALSE; 172 ctx->apply_p0 = PETSC_FALSE; 173 ctx->benign_n = pcbddc->benign_n; 174 ctx->benign_zerodiag_subs = reuse->benign_zerodiag_subs; 175 ctx->A = pcis->A_IB; 176 ctx->work = work; 177 ierr = MatSetUp(A_IB);CHKERRQ(ierr); 178 ierr = MatAssemblyBegin(A_IB,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 179 ierr = MatAssemblyEnd(A_IB,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 180 pcis->A_IB = A_IB; 181 182 /* A_BI as A_IB^T */ 183 ierr = MatCreateTranspose(A_IB,&A_BI);CHKERRQ(ierr); 184 pcbddc->benign_original_mat = pcis->A_BI; 185 pcis->A_BI = A_BI; 186 } else { 187 ierr = MatShellGetContext(matis->A,&ctx);CHKERRQ(ierr); 188 ierr = MatDestroy(&matis->A);CHKERRQ(ierr); 189 matis->A = ctx->A; 190 ierr = PetscFree(ctx);CHKERRQ(ierr); 191 ierr = MatShellGetContext(pcis->A_IB,&ctx);CHKERRQ(ierr); 192 ierr = MatDestroy(&pcis->A_IB);CHKERRQ(ierr); 193 pcis->A_IB = ctx->A; 194 ierr = PetscFree(ctx->work);CHKERRQ(ierr); 195 ierr = PetscFree(ctx);CHKERRQ(ierr); 196 ierr = MatDestroy(&pcis->A_BI);CHKERRQ(ierr); 197 pcis->A_BI = pcbddc->benign_original_mat; 198 pcbddc->benign_original_mat = NULL; 199 } 200 PetscFunctionReturn(0); 201 } 202 203 /* used just in bddc debug mode */ 204 #undef __FUNCT__ 205 #define __FUNCT__ "PCBDDCBenignProject" 206 PetscErrorCode PCBDDCBenignProject(PC pc, IS is1, IS is2, Mat *B) 207 { 208 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 209 Mat_IS *matis = (Mat_IS*)pc->pmat->data; 210 Mat An; 211 PetscErrorCode ierr; 212 213 PetscFunctionBegin; 214 ierr = MatPtAP(matis->A,pcbddc->benign_change,MAT_INITIAL_MATRIX,2.0,&An);CHKERRQ(ierr); 215 ierr = MatZeroRowsColumns(An,pcbddc->benign_n,pcbddc->benign_p0_lidx,1.0,NULL,NULL);CHKERRQ(ierr); 216 if (is1) { 217 ierr = MatGetSubMatrix(An,is1,is2,MAT_INITIAL_MATRIX,B);CHKERRQ(ierr); 218 ierr = MatDestroy(&An);CHKERRQ(ierr); 219 } else { 220 *B = An; 221 } 222 PetscFunctionReturn(0); 223 } 224 225 /* TODO: add reuse flag */ 226 #undef __FUNCT__ 227 #define __FUNCT__ "MatSeqAIJCompress" 228 PetscErrorCode MatSeqAIJCompress(Mat A, Mat *B) 229 { 230 Mat Bt; 231 PetscScalar *a,*bdata; 232 const PetscInt *ii,*ij; 233 PetscInt m,n,i,nnz,*bii,*bij; 234 PetscBool flg_row; 235 PetscErrorCode ierr; 236 237 PetscFunctionBegin; 238 ierr = MatGetSize(A,&n,&m);CHKERRQ(ierr); 239 ierr = MatGetRowIJ(A,0,PETSC_FALSE,PETSC_FALSE,&n,&ii,&ij,&flg_row);CHKERRQ(ierr); 240 ierr = MatSeqAIJGetArray(A,&a);CHKERRQ(ierr); 241 nnz = n; 242 for (i=0;i<ii[n];i++) { 243 if (PetscLikely(PetscAbsScalar(a[i]) > PETSC_SMALL)) nnz++; 244 } 245 ierr = PetscMalloc1(n+1,&bii);CHKERRQ(ierr); 246 ierr = PetscMalloc1(nnz,&bij);CHKERRQ(ierr); 247 ierr = PetscMalloc1(nnz,&bdata);CHKERRQ(ierr); 248 nnz = 0; 249 bii[0] = 0; 250 for (i=0;i<n;i++) { 251 PetscInt j; 252 for (j=ii[i];j<ii[i+1];j++) { 253 PetscScalar entry = a[j]; 254 if (PetscLikely(PetscAbsScalar(entry) > PETSC_SMALL) || ij[j] == i) { 255 bij[nnz] = ij[j]; 256 bdata[nnz] = entry; 257 nnz++; 258 } 259 } 260 bii[i+1] = nnz; 261 } 262 ierr = MatSeqAIJRestoreArray(A,&a);CHKERRQ(ierr); 263 ierr = MatCreateSeqAIJWithArrays(PetscObjectComm((PetscObject)A),n,m,bii,bij,bdata,&Bt);CHKERRQ(ierr); 264 ierr = MatRestoreRowIJ(A,0,PETSC_FALSE,PETSC_FALSE,&n,&ii,&ij,&flg_row);CHKERRQ(ierr); 265 { 266 Mat_SeqAIJ *b = (Mat_SeqAIJ*)(Bt->data); 267 b->free_a = PETSC_TRUE; 268 b->free_ij = PETSC_TRUE; 269 } 270 *B = Bt; 271 PetscFunctionReturn(0); 272 } 273 274 #undef __FUNCT__ 275 #define __FUNCT__ "MatDetectDisconnectedComponents" 276 PetscErrorCode MatDetectDisconnectedComponents(Mat A, PetscBool filter, PetscInt *ncc, IS* cc[]) 277 { 278 Mat B; 279 IS is_dummy,*cc_n; 280 ISLocalToGlobalMapping l2gmap_dummy; 281 PCBDDCGraph graph; 282 PetscInt i,n; 283 PetscInt *xadj,*adjncy; 284 PetscInt *xadj_filtered,*adjncy_filtered; 285 PetscBool flg_row,isseqaij; 286 PetscErrorCode ierr; 287 288 PetscFunctionBegin; 289 if (!A->rmap->N || !A->cmap->N) { 290 *ncc = 0; 291 *cc = NULL; 292 PetscFunctionReturn(0); 293 } 294 ierr = PetscObjectTypeCompare((PetscObject)A,MATSEQAIJ,&isseqaij);CHKERRQ(ierr); 295 if (!isseqaij && filter) { 296 PetscBool isseqdense; 297 298 ierr = PetscObjectTypeCompare((PetscObject)A,MATSEQDENSE,&isseqdense);CHKERRQ(ierr); 299 if (!isseqdense) { 300 ierr = MatConvert(A,MATSEQAIJ,MAT_INITIAL_MATRIX,&B);CHKERRQ(ierr); 301 } else { /* TODO: rectangular case and LDA */ 302 PetscScalar *array; 303 PetscReal chop=1.e-6; 304 305 ierr = MatDuplicate(A,MAT_COPY_VALUES,&B);CHKERRQ(ierr); 306 ierr = MatDenseGetArray(B,&array);CHKERRQ(ierr); 307 ierr = MatGetSize(B,&n,NULL);CHKERRQ(ierr); 308 for (i=0;i<n;i++) { 309 PetscInt j; 310 for (j=i+1;j<n;j++) { 311 PetscReal thresh = chop*(PetscAbsScalar(array[i*(n+1)])+PetscAbsScalar(array[j*(n+1)])); 312 if (PetscAbsScalar(array[i*n+j]) < thresh) array[i*n+j] = 0.; 313 if (PetscAbsScalar(array[j*n+i]) < thresh) array[j*n+i] = 0.; 314 } 315 } 316 ierr = MatDenseRestoreArray(B,&array);CHKERRQ(ierr); 317 ierr = MatConvert(B,MATSEQAIJ,MAT_REUSE_MATRIX,&B);CHKERRQ(ierr); 318 } 319 } else { 320 B = A; 321 } 322 ierr = MatGetRowIJ(B,0,PETSC_TRUE,PETSC_FALSE,&n,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);CHKERRQ(ierr); 323 324 /* if filter is true, then removes entries lower than PETSC_SMALL in magnitude */ 325 if (filter) { 326 PetscScalar *data; 327 PetscInt j,cum; 328 329 ierr = PetscCalloc2(n+1,&xadj_filtered,xadj[n],&adjncy_filtered);CHKERRQ(ierr); 330 ierr = MatSeqAIJGetArray(B,&data);CHKERRQ(ierr); 331 cum = 0; 332 for (i=0;i<n;i++) { 333 PetscInt t; 334 335 for (j=xadj[i];j<xadj[i+1];j++) { 336 if (PetscUnlikely(PetscAbsScalar(data[j]) < PETSC_SMALL)) { 337 continue; 338 } 339 adjncy_filtered[cum+xadj_filtered[i]++] = adjncy[j]; 340 } 341 t = xadj_filtered[i]; 342 xadj_filtered[i] = cum; 343 cum += t; 344 } 345 ierr = MatSeqAIJRestoreArray(B,&data);CHKERRQ(ierr); 346 } else { 347 xadj_filtered = NULL; 348 adjncy_filtered = NULL; 349 } 350 351 /* compute local connected components using PCBDDCGraph */ 352 ierr = ISCreateStride(PETSC_COMM_SELF,n,0,1,&is_dummy);CHKERRQ(ierr); 353 ierr = ISLocalToGlobalMappingCreateIS(is_dummy,&l2gmap_dummy);CHKERRQ(ierr); 354 ierr = ISDestroy(&is_dummy);CHKERRQ(ierr); 355 ierr = PCBDDCGraphCreate(&graph);CHKERRQ(ierr); 356 ierr = PCBDDCGraphInit(graph,l2gmap_dummy,n);CHKERRQ(ierr); 357 ierr = ISLocalToGlobalMappingDestroy(&l2gmap_dummy);CHKERRQ(ierr); 358 if (xadj_filtered) { 359 graph->xadj = xadj_filtered; 360 graph->adjncy = adjncy_filtered; 361 } else { 362 graph->xadj = xadj; 363 graph->adjncy = adjncy; 364 } 365 ierr = PCBDDCGraphSetUp(graph,1,NULL,NULL,0,NULL,NULL);CHKERRQ(ierr); 366 ierr = PCBDDCGraphComputeConnectedComponents(graph);CHKERRQ(ierr); 367 /* partial clean up */ 368 ierr = PetscFree2(xadj_filtered,adjncy_filtered);CHKERRQ(ierr); 369 ierr = MatRestoreRowIJ(B,0,PETSC_TRUE,PETSC_FALSE,&n,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);CHKERRQ(ierr); 370 if (A != B) { 371 ierr = MatDestroy(&B);CHKERRQ(ierr); 372 } 373 374 /* get back data */ 375 if (ncc) *ncc = graph->ncc; 376 if (cc) { 377 ierr = PetscMalloc1(graph->ncc,&cc_n);CHKERRQ(ierr); 378 for (i=0;i<graph->ncc;i++) { 379 ierr = ISCreateGeneral(PETSC_COMM_SELF,graph->cptr[i+1]-graph->cptr[i],graph->queue+graph->cptr[i],PETSC_COPY_VALUES,&cc_n[i]);CHKERRQ(ierr); 380 } 381 *cc = cc_n; 382 } 383 /* clean up graph */ 384 graph->xadj = 0; 385 graph->adjncy = 0; 386 ierr = PCBDDCGraphDestroy(&graph);CHKERRQ(ierr); 387 PetscFunctionReturn(0); 388 } 389 390 #undef __FUNCT__ 391 #define __FUNCT__ "PCBDDCBenignCheck" 392 PetscErrorCode PCBDDCBenignCheck(PC pc, IS zerodiag) 393 { 394 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 395 PC_IS* pcis = (PC_IS*)(pc->data); 396 IS dirIS = NULL; 397 PetscInt i; 398 PetscErrorCode ierr; 399 400 PetscFunctionBegin; 401 ierr = PCBDDCGraphGetDirichletDofs(pcbddc->mat_graph,&dirIS);CHKERRQ(ierr); 402 if (zerodiag) { 403 Mat A; 404 Vec vec3_N; 405 PetscScalar *vals; 406 const PetscInt *idxs; 407 PetscInt nz,*count; 408 409 /* p0 */ 410 ierr = VecSet(pcis->vec1_N,0.);CHKERRQ(ierr); 411 ierr = PetscMalloc1(pcis->n,&vals);CHKERRQ(ierr); 412 ierr = ISGetLocalSize(zerodiag,&nz);CHKERRQ(ierr); 413 ierr = ISGetIndices(zerodiag,&idxs);CHKERRQ(ierr); 414 for (i=0;i<nz;i++) vals[i] = 1.; 415 ierr = VecSetValues(pcis->vec1_N,nz,idxs,vals,INSERT_VALUES);CHKERRQ(ierr); 416 ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr); 417 ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr); 418 /* v_I */ 419 ierr = VecSetRandom(pcis->vec2_N,NULL);CHKERRQ(ierr); 420 for (i=0;i<nz;i++) vals[i] = 0.; 421 ierr = VecSetValues(pcis->vec2_N,nz,idxs,vals,INSERT_VALUES);CHKERRQ(ierr); 422 ierr = ISRestoreIndices(zerodiag,&idxs);CHKERRQ(ierr); 423 ierr = ISGetIndices(pcis->is_B_local,&idxs);CHKERRQ(ierr); 424 for (i=0;i<pcis->n_B;i++) vals[i] = 0.; 425 ierr = VecSetValues(pcis->vec2_N,pcis->n_B,idxs,vals,INSERT_VALUES);CHKERRQ(ierr); 426 ierr = ISRestoreIndices(pcis->is_B_local,&idxs);CHKERRQ(ierr); 427 if (dirIS) { 428 PetscInt n; 429 430 ierr = ISGetLocalSize(dirIS,&n);CHKERRQ(ierr); 431 ierr = ISGetIndices(dirIS,&idxs);CHKERRQ(ierr); 432 for (i=0;i<n;i++) vals[i] = 0.; 433 ierr = VecSetValues(pcis->vec2_N,n,idxs,vals,INSERT_VALUES);CHKERRQ(ierr); 434 ierr = ISRestoreIndices(dirIS,&idxs);CHKERRQ(ierr); 435 } 436 ierr = VecAssemblyBegin(pcis->vec2_N);CHKERRQ(ierr); 437 ierr = VecAssemblyEnd(pcis->vec2_N);CHKERRQ(ierr); 438 ierr = VecDuplicate(pcis->vec1_N,&vec3_N);CHKERRQ(ierr); 439 ierr = VecSet(vec3_N,0.);CHKERRQ(ierr); 440 ierr = MatISGetLocalMat(pc->mat,&A);CHKERRQ(ierr); 441 ierr = MatMult(A,pcis->vec1_N,vec3_N);CHKERRQ(ierr); 442 ierr = VecDot(vec3_N,pcis->vec2_N,&vals[0]);CHKERRQ(ierr); 443 if (PetscAbsScalar(vals[0]) > PETSC_SMALL) { 444 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"Benign trick can not be applied! b(v_I,p_0) = %f (should be numerically 0.)",PetscAbsScalar(vals[0])); 445 } 446 ierr = PetscFree(vals);CHKERRQ(ierr); 447 ierr = VecDestroy(&vec3_N);CHKERRQ(ierr); 448 449 /* there should not be any pressure dofs lying on the interface */ 450 ierr = PetscCalloc1(pcis->n,&count);CHKERRQ(ierr); 451 ierr = ISGetIndices(pcis->is_B_local,&idxs);CHKERRQ(ierr); 452 for (i=0;i<pcis->n_B;i++) count[idxs[i]]++; 453 ierr = ISRestoreIndices(pcis->is_B_local,&idxs);CHKERRQ(ierr); 454 ierr = ISGetIndices(zerodiag,&idxs);CHKERRQ(ierr); 455 for (i=0;i<nz;i++) { 456 if (count[idxs[i]]) { 457 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"Benign trick can not be applied! pressure dof %d is an interface dof",idxs[i]); 458 } 459 } 460 ierr = ISRestoreIndices(zerodiag,&idxs);CHKERRQ(ierr); 461 ierr = PetscFree(count);CHKERRQ(ierr); 462 } 463 ierr = ISDestroy(&dirIS);CHKERRQ(ierr); 464 465 /* check PCBDDCBenignGetOrSetP0 */ 466 ierr = VecSetRandom(pcis->vec1_global,NULL);CHKERRQ(ierr); 467 for (i=0;i<pcbddc->benign_n;i++) pcbddc->benign_p0[i] = -PetscGlobalRank-i; 468 ierr = PCBDDCBenignGetOrSetP0(pc,pcis->vec1_global,PETSC_FALSE);CHKERRQ(ierr); 469 for (i=0;i<pcbddc->benign_n;i++) pcbddc->benign_p0[i] = 1; 470 ierr = PCBDDCBenignGetOrSetP0(pc,pcis->vec1_global,PETSC_TRUE);CHKERRQ(ierr); 471 for (i=0;i<pcbddc->benign_n;i++) { 472 if ((PetscInt)PetscRealPart(pcbddc->benign_p0[i]) != -PetscGlobalRank-i) { 473 SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error testing PCBDDCBenignGetOrSetP0! Found %1.4e at %d instead of %1.4e\n",pcbddc->benign_p0[i],i,-PetscGlobalRank-i);CHKERRQ(ierr); 474 } 475 } 476 PetscFunctionReturn(0); 477 } 478 479 #undef __FUNCT__ 480 #define __FUNCT__ "PCBDDCBenignDetectSaddlePoint" 481 PetscErrorCode PCBDDCBenignDetectSaddlePoint(PC pc, IS *zerodiaglocal) 482 { 483 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 484 IS pressures,zerodiag,*zerodiag_subs; 485 PetscInt nz,n; 486 PetscBool sorted,have_null,has_null_pressures,recompute_zerodiag; 487 PetscErrorCode ierr; 488 489 PetscFunctionBegin; 490 ierr = PetscSFDestroy(&pcbddc->benign_sf);CHKERRQ(ierr); 491 ierr = MatDestroy(&pcbddc->benign_B0);CHKERRQ(ierr); 492 ierr = MatDestroy(&pcbddc->benign_original_mat);CHKERRQ(ierr); 493 for (n=0;n<pcbddc->benign_n;n++) { 494 ierr = ISDestroy(&pcbddc->benign_zerodiag_subs[n]);CHKERRQ(ierr); 495 } 496 ierr = PetscFree(pcbddc->benign_zerodiag_subs);CHKERRQ(ierr); 497 pcbddc->benign_n = 0; 498 /* if a local info on dofs is present, assumes the last field is represented by "pressures" 499 otherwise, it uses only zerodiagonal dofs (ok if the pressure block is all zero; it could fail if it is not) 500 Checks if all the pressure dofs in each subdomain have a zero diagonal 501 If not, a change of basis on pressures is not needed 502 since the local Schur complements are SPD 503 */ 504 has_null_pressures = PETSC_TRUE; 505 have_null = PETSC_TRUE; 506 if (pcbddc->n_ISForDofsLocal) { 507 PetscInt npl,*idxs,p = pcbddc->n_ISForDofsLocal-1; 508 509 /* Dofs splitting for BDDC cannot have PETSC_COMM_SELF, so create a sequential IS */ 510 ierr = ISGetLocalSize(pcbddc->ISForDofsLocal[p],&npl);CHKERRQ(ierr); 511 ierr = ISGetIndices(pcbddc->ISForDofsLocal[p],(const PetscInt**)&idxs);CHKERRQ(ierr); 512 ierr = ISCreateGeneral(PETSC_COMM_SELF,npl,idxs,PETSC_COPY_VALUES,&pressures);CHKERRQ(ierr); 513 ierr = ISRestoreIndices(pcbddc->ISForDofsLocal[p],(const PetscInt**)&idxs);CHKERRQ(ierr); 514 ierr = ISSorted(pressures,&sorted);CHKERRQ(ierr); 515 if (!sorted) { 516 ierr = ISSort(pressures);CHKERRQ(ierr); 517 } 518 } else { 519 pressures = NULL; 520 } 521 /* pcis has not been setup yet, so get the local size from the subdomain matrix */ 522 ierr = MatGetLocalSize(pcbddc->local_mat,&n,NULL);CHKERRQ(ierr); 523 ierr = MatFindZeroDiagonals(pcbddc->local_mat,&zerodiag);CHKERRQ(ierr); 524 ierr = ISSorted(zerodiag,&sorted);CHKERRQ(ierr); 525 if (!sorted) { 526 ierr = ISSort(zerodiag);CHKERRQ(ierr); 527 } 528 ierr = ISGetLocalSize(zerodiag,&nz);CHKERRQ(ierr); 529 if (!nz) { 530 if (n) have_null = PETSC_FALSE; 531 has_null_pressures = PETSC_FALSE; 532 ierr = ISDestroy(&zerodiag);CHKERRQ(ierr); 533 } 534 recompute_zerodiag = PETSC_FALSE; 535 /* in case disconnected subdomains info is present, split the pressures accordingly (otherwise the benign trick could fail) */ 536 zerodiag_subs = NULL; 537 pcbddc->benign_n = 0; 538 if (has_null_pressures) { 539 IS *subs; 540 PetscInt nsubs,i; 541 542 subs = pcbddc->local_subs; 543 nsubs = pcbddc->n_local_subs; 544 if (nsubs > 1) { 545 ierr = PetscCalloc1(nsubs,&zerodiag_subs);CHKERRQ(ierr); 546 for (i=0;i<nsubs;i++) { 547 ISLocalToGlobalMapping l2g; 548 IS t_zerodiag_subs; 549 PetscInt nl; 550 551 ierr = ISLocalToGlobalMappingCreateIS(subs[i],&l2g);CHKERRQ(ierr); 552 ierr = ISGlobalToLocalMappingApplyIS(l2g,IS_GTOLM_DROP,zerodiag,&t_zerodiag_subs);CHKERRQ(ierr); 553 ierr = ISGetLocalSize(t_zerodiag_subs,&nl);CHKERRQ(ierr); 554 if (nl) { 555 PetscBool valid = PETSC_TRUE; 556 557 if (pressures) { 558 IS t_pressure_subs; 559 ierr = ISGlobalToLocalMappingApplyIS(l2g,IS_GTOLM_DROP,pressures,&t_pressure_subs);CHKERRQ(ierr); 560 ierr = ISEqual(t_pressure_subs,t_zerodiag_subs,&valid);CHKERRQ(ierr); 561 ierr = ISDestroy(&t_pressure_subs);CHKERRQ(ierr); 562 } 563 if (valid) { 564 ierr = ISLocalToGlobalMappingApplyIS(l2g,t_zerodiag_subs,&zerodiag_subs[pcbddc->benign_n]);CHKERRQ(ierr); 565 pcbddc->benign_n++; 566 } else { 567 recompute_zerodiag = PETSC_TRUE; 568 } 569 } 570 ierr = ISDestroy(&t_zerodiag_subs);CHKERRQ(ierr); 571 ierr = ISLocalToGlobalMappingDestroy(&l2g);CHKERRQ(ierr); 572 } 573 } else { /* there's just one subdomain (or zero if they have not been detected */ 574 PetscBool valid = PETSC_TRUE; 575 if (pressures) { 576 ierr = ISEqual(pressures,zerodiag,&valid);CHKERRQ(ierr); 577 } 578 if (valid) { 579 pcbddc->benign_n = 1; 580 ierr = PetscMalloc1(pcbddc->benign_n,&zerodiag_subs);CHKERRQ(ierr); 581 ierr = PetscObjectReference((PetscObject)zerodiag);CHKERRQ(ierr); 582 zerodiag_subs[0] = zerodiag; 583 } 584 } 585 } 586 587 if (!pcbddc->benign_n) { 588 ierr = ISDestroy(&zerodiag);CHKERRQ(ierr); 589 recompute_zerodiag = PETSC_FALSE; 590 has_null_pressures = PETSC_FALSE; 591 have_null = PETSC_FALSE; 592 } 593 594 /* final check for null pressures */ 595 if (zerodiag && pressures) { 596 PetscInt nz,np; 597 ierr = ISGetLocalSize(zerodiag,&nz);CHKERRQ(ierr); 598 ierr = ISGetLocalSize(pressures,&np);CHKERRQ(ierr); 599 if (nz != np) have_null = PETSC_FALSE; 600 } 601 602 if (recompute_zerodiag) { 603 ierr = ISDestroy(&zerodiag);CHKERRQ(ierr); 604 if (pcbddc->benign_n == 1) { 605 ierr = PetscObjectReference((PetscObject)zerodiag_subs[0]);CHKERRQ(ierr); 606 zerodiag = zerodiag_subs[0]; 607 } else { 608 PetscInt i,nzn,*new_idxs; 609 610 nzn = 0; 611 for (i=0;i<pcbddc->benign_n;i++) { 612 PetscInt ns; 613 ierr = ISGetLocalSize(zerodiag_subs[i],&ns);CHKERRQ(ierr); 614 nzn += ns; 615 } 616 ierr = PetscMalloc1(nzn,&new_idxs);CHKERRQ(ierr); 617 nzn = 0; 618 for (i=0;i<pcbddc->benign_n;i++) { 619 PetscInt ns,*idxs; 620 ierr = ISGetLocalSize(zerodiag_subs[i],&ns);CHKERRQ(ierr); 621 ierr = ISGetIndices(zerodiag_subs[i],(const PetscInt**)&idxs);CHKERRQ(ierr); 622 ierr = PetscMemcpy(new_idxs+nzn,idxs,ns*sizeof(PetscInt));CHKERRQ(ierr); 623 ierr = ISRestoreIndices(zerodiag_subs[i],(const PetscInt**)&idxs);CHKERRQ(ierr); 624 nzn += ns; 625 } 626 ierr = PetscSortInt(nzn,new_idxs);CHKERRQ(ierr); 627 ierr = ISCreateGeneral(PETSC_COMM_SELF,nzn,new_idxs,PETSC_OWN_POINTER,&zerodiag);CHKERRQ(ierr); 628 } 629 have_null = PETSC_FALSE; 630 } 631 632 if (has_null_pressures) { 633 IS zerodiagc; 634 const PetscInt *idxs,*idxsc; 635 PetscInt i,s,*nnz; 636 637 ierr = ISGetLocalSize(zerodiag,&nz);CHKERRQ(ierr); 638 ierr = ISComplement(zerodiag,0,n,&zerodiagc);CHKERRQ(ierr); 639 ierr = ISGetIndices(zerodiagc,&idxsc);CHKERRQ(ierr); 640 /* local change of basis for pressures */ 641 ierr = MatDestroy(&pcbddc->benign_change);CHKERRQ(ierr); 642 ierr = MatCreate(PetscObjectComm((PetscObject)pcbddc->local_mat),&pcbddc->benign_change);CHKERRQ(ierr); 643 ierr = MatSetType(pcbddc->benign_change,MATAIJ);CHKERRQ(ierr); 644 ierr = MatSetSizes(pcbddc->benign_change,n,n,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr); 645 ierr = PetscMalloc1(n,&nnz);CHKERRQ(ierr); 646 for (i=0;i<n-nz;i++) nnz[idxsc[i]] = 1; /* identity on velocities plus pressure dofs for non-singular subdomains */ 647 for (i=0;i<pcbddc->benign_n;i++) { 648 PetscInt nzs,j; 649 650 ierr = ISGetLocalSize(zerodiag_subs[i],&nzs);CHKERRQ(ierr); 651 ierr = ISGetIndices(zerodiag_subs[i],&idxs);CHKERRQ(ierr); 652 for (j=0;j<nzs-1;j++) nnz[idxs[j]] = 2; /* change on pressures */ 653 nnz[idxs[nzs-1]] = nzs; /* last local pressure dof in subdomain */ 654 ierr = ISRestoreIndices(zerodiag_subs[i],&idxs);CHKERRQ(ierr); 655 } 656 ierr = MatSeqAIJSetPreallocation(pcbddc->benign_change,0,nnz);CHKERRQ(ierr); 657 ierr = PetscFree(nnz);CHKERRQ(ierr); 658 /* set identity on velocities */ 659 for (i=0;i<n-nz;i++) { 660 ierr = MatSetValue(pcbddc->benign_change,idxsc[i],idxsc[i],1.,INSERT_VALUES);CHKERRQ(ierr); 661 } 662 ierr = ISRestoreIndices(zerodiagc,&idxsc);CHKERRQ(ierr); 663 ierr = ISDestroy(&zerodiagc);CHKERRQ(ierr); 664 ierr = PetscFree3(pcbddc->benign_p0_lidx,pcbddc->benign_p0_gidx,pcbddc->benign_p0);CHKERRQ(ierr); 665 ierr = PetscMalloc3(pcbddc->benign_n,&pcbddc->benign_p0_lidx,pcbddc->benign_n,&pcbddc->benign_p0_gidx,pcbddc->benign_n,&pcbddc->benign_p0);CHKERRQ(ierr); 666 /* set change on pressures */ 667 for (s=0;s<pcbddc->benign_n;s++) { 668 PetscScalar *array; 669 PetscInt nzs; 670 671 ierr = ISGetLocalSize(zerodiag_subs[s],&nzs);CHKERRQ(ierr); 672 ierr = ISGetIndices(zerodiag_subs[s],&idxs);CHKERRQ(ierr); 673 for (i=0;i<nzs-1;i++) { 674 PetscScalar vals[2]; 675 PetscInt cols[2]; 676 677 cols[0] = idxs[i]; 678 cols[1] = idxs[nzs-1]; 679 vals[0] = 1.; 680 vals[1] = 1.; 681 ierr = MatSetValues(pcbddc->benign_change,1,cols,2,cols,vals,INSERT_VALUES);CHKERRQ(ierr); 682 } 683 ierr = PetscMalloc1(nzs,&array);CHKERRQ(ierr); 684 for (i=0;i<nzs-1;i++) array[i] = -1.; 685 array[nzs-1] = 1.; 686 ierr = MatSetValues(pcbddc->benign_change,1,idxs+nzs-1,nzs,idxs,array,INSERT_VALUES);CHKERRQ(ierr); 687 /* store local idxs for p0 */ 688 pcbddc->benign_p0_lidx[s] = idxs[nzs-1]; 689 ierr = ISRestoreIndices(zerodiag_subs[s],&idxs);CHKERRQ(ierr); 690 ierr = PetscFree(array);CHKERRQ(ierr); 691 } 692 ierr = MatAssemblyBegin(pcbddc->benign_change,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 693 ierr = MatAssemblyEnd(pcbddc->benign_change,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 694 /* project if needed */ 695 if (pcbddc->benign_change_explicit) { 696 ierr = MatPtAP(pcbddc->local_mat,pcbddc->benign_change,MAT_INITIAL_MATRIX,2.0,&pcbddc->benign_original_mat);CHKERRQ(ierr); 697 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 698 ierr = MatSeqAIJCompress(pcbddc->benign_original_mat,&pcbddc->local_mat);CHKERRQ(ierr); 699 ierr = MatDestroy(&pcbddc->benign_original_mat);CHKERRQ(ierr); 700 } 701 /* store global idxs for p0 */ 702 ierr = ISLocalToGlobalMappingApply(pc->pmat->rmap->mapping,pcbddc->benign_n,pcbddc->benign_p0_lidx,pcbddc->benign_p0_gidx);CHKERRQ(ierr); 703 } 704 pcbddc->benign_zerodiag_subs = zerodiag_subs; 705 ierr = ISDestroy(&pressures);CHKERRQ(ierr); 706 707 /* determines if the coarse solver will be singular or not */ 708 ierr = MPI_Allreduce(&have_null,&pcbddc->benign_null,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 709 *zerodiaglocal = zerodiag; 710 PetscFunctionReturn(0); 711 } 712 713 #undef __FUNCT__ 714 #define __FUNCT__ "PCBDDCBenignGetOrSetP0" 715 PetscErrorCode PCBDDCBenignGetOrSetP0(PC pc, Vec v, PetscBool get) 716 { 717 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 718 PetscErrorCode ierr; 719 720 PetscFunctionBegin; 721 if (!pcbddc->benign_sf) { 722 ierr = PetscSFCreate(PetscObjectComm((PetscObject)pc),&pcbddc->benign_sf);CHKERRQ(ierr); 723 ierr = PetscSFSetGraphLayout(pcbddc->benign_sf,pc->pmat->rmap,pcbddc->benign_n,NULL,PETSC_OWN_POINTER,pcbddc->benign_p0_gidx);CHKERRQ(ierr); 724 } 725 if (get) { /* use SF to get values */ 726 PetscScalar *array; 727 728 ierr = VecGetArrayRead(v,(const PetscScalar**)&array);CHKERRQ(ierr); 729 ierr = PetscSFBcastBegin(pcbddc->benign_sf,MPIU_SCALAR,array,pcbddc->benign_p0);CHKERRQ(ierr); 730 ierr = PetscSFBcastEnd(pcbddc->benign_sf,MPIU_SCALAR,array,pcbddc->benign_p0);CHKERRQ(ierr); 731 ierr = VecRestoreArrayRead(v,(const PetscScalar**)&array);CHKERRQ(ierr); 732 } else { /* use VecSetValues (not scalable, I should try to find a better solution (defining a new MPI_OP for reduction) */ 733 ierr = VecSetValues(v,pcbddc->benign_n,pcbddc->benign_p0_gidx,pcbddc->benign_p0,INSERT_VALUES);CHKERRQ(ierr); 734 ierr = VecAssemblyBegin(v);CHKERRQ(ierr); 735 ierr = VecAssemblyEnd(v);CHKERRQ(ierr); 736 } 737 PetscFunctionReturn(0); 738 } 739 740 #undef __FUNCT__ 741 #define __FUNCT__ "PCBDDCBenignPopOrPushB0" 742 PetscErrorCode PCBDDCBenignPopOrPushB0(PC pc, PetscBool pop) 743 { 744 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 745 PetscErrorCode ierr; 746 747 PetscFunctionBegin; 748 /* TODO: add error checking 749 - avoid nested pop (or push) calls. 750 - cannot push before pop. 751 - cannot call this if pcbddc->local_mat is NULL 752 */ 753 if (!pcbddc->benign_n) { 754 PetscFunctionReturn(0); 755 } 756 if (pop) { 757 if (pcbddc->benign_change_explicit) { 758 IS is_p0; 759 MatReuse reuse; 760 761 /* extract B_0 */ 762 reuse = MAT_INITIAL_MATRIX; 763 if (pcbddc->benign_B0) { 764 reuse = MAT_REUSE_MATRIX; 765 } 766 ierr = ISCreateGeneral(PETSC_COMM_SELF,pcbddc->benign_n,pcbddc->benign_p0_lidx,PETSC_COPY_VALUES,&is_p0);CHKERRQ(ierr); 767 ierr = MatGetSubMatrix(pcbddc->local_mat,is_p0,NULL,reuse,&pcbddc->benign_B0);CHKERRQ(ierr); 768 /* remove rows and cols from local problem */ 769 ierr = MatSetOption(pcbddc->local_mat,MAT_KEEP_NONZERO_PATTERN,PETSC_TRUE);CHKERRQ(ierr); 770 ierr = MatSetOption(pcbddc->local_mat,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr); 771 ierr = MatZeroRowsColumnsIS(pcbddc->local_mat,is_p0,1.0,NULL,NULL);CHKERRQ(ierr); 772 ierr = ISDestroy(&is_p0);CHKERRQ(ierr); 773 } else { 774 Mat_IS *matis = (Mat_IS*)pc->pmat->data; 775 PetscScalar *vals; 776 PetscInt i,n,*idxs_ins; 777 778 ierr = VecGetLocalSize(matis->y,&n);CHKERRQ(ierr); 779 ierr = PetscMalloc2(n,&idxs_ins,n,&vals);CHKERRQ(ierr); 780 if (!pcbddc->benign_B0) { 781 ierr = MatCreate(PetscObjectComm((PetscObject)pcbddc->local_mat),&pcbddc->benign_B0);CHKERRQ(ierr); 782 ierr = MatSetType(pcbddc->benign_B0,MATAIJ);CHKERRQ(ierr); 783 ierr = MatSetSizes(pcbddc->benign_B0,pcbddc->benign_n,n,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr); 784 if (!pcbddc->current_level) { 785 /* this matrix is actually sparse: the nnz pattern is not known unless we do 2 sweeps of the next loop. 786 Setting nnz=100 should be more than enough for the finest level */ 787 ierr = MatSeqAIJSetPreallocation(pcbddc->benign_B0,100,NULL);CHKERRQ(ierr); 788 } else { 789 PetscInt *nnz; 790 ierr = PetscMalloc1(pcbddc->benign_n,&nnz);CHKERRQ(ierr); 791 for (i=0;i<pcbddc->benign_n;i++) { 792 ierr = ISGetLocalSize(pcbddc->benign_zerodiag_subs[i],&nnz[i]);CHKERRQ(ierr); 793 nnz[i] = n - nnz[i]; 794 } 795 ierr = MatSeqAIJSetPreallocation(pcbddc->benign_B0,0,nnz);CHKERRQ(ierr); 796 ierr = PetscFree(nnz);CHKERRQ(ierr); 797 } 798 } 799 800 for (i=0;i<pcbddc->benign_n;i++) { 801 PetscScalar *array; 802 PetscInt *idxs,j,nz,cum; 803 804 ierr = VecSet(matis->x,0.);CHKERRQ(ierr); 805 ierr = ISGetLocalSize(pcbddc->benign_zerodiag_subs[i],&nz);CHKERRQ(ierr); 806 ierr = ISGetIndices(pcbddc->benign_zerodiag_subs[i],(const PetscInt**)&idxs);CHKERRQ(ierr); 807 for (j=0;j<nz;j++) vals[j] = 1.; 808 ierr = VecSetValues(matis->x,nz,idxs,vals,INSERT_VALUES);CHKERRQ(ierr); 809 ierr = VecAssemblyBegin(matis->x);CHKERRQ(ierr); 810 ierr = VecAssemblyEnd(matis->x);CHKERRQ(ierr); 811 ierr = VecSet(matis->y,0.);CHKERRQ(ierr); 812 ierr = MatMult(matis->A,matis->x,matis->y);CHKERRQ(ierr); 813 ierr = VecGetArray(matis->y,&array);CHKERRQ(ierr); 814 cum = 0; 815 for (j=0;j<n;j++) { 816 if (PetscUnlikely(PetscAbsReal(array[j]) > PETSC_SMALL)) { 817 vals[cum] = array[j]; 818 idxs_ins[cum] = j; 819 cum++; 820 } 821 } 822 ierr = MatSetValues(pcbddc->benign_B0,1,&i,cum,idxs_ins,vals,INSERT_VALUES);CHKERRQ(ierr); 823 ierr = VecRestoreArray(matis->y,&array);CHKERRQ(ierr); 824 ierr = ISRestoreIndices(pcbddc->benign_zerodiag_subs[i],(const PetscInt**)&idxs);CHKERRQ(ierr); 825 } 826 ierr = MatAssemblyBegin(pcbddc->benign_B0,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 827 ierr = MatAssemblyEnd(pcbddc->benign_B0,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 828 ierr = PetscFree2(idxs_ins,vals);CHKERRQ(ierr); 829 } 830 } else { /* push */ 831 if (pcbddc->benign_change_explicit) { 832 PetscInt i; 833 834 for (i=0;i<pcbddc->benign_n;i++) { 835 PetscScalar *B0_vals; 836 PetscInt *B0_cols,B0_ncol; 837 838 ierr = MatGetRow(pcbddc->benign_B0,i,&B0_ncol,(const PetscInt**)&B0_cols,(const PetscScalar**)&B0_vals);CHKERRQ(ierr); 839 ierr = MatSetValues(pcbddc->local_mat,1,pcbddc->benign_p0_lidx+i,B0_ncol,B0_cols,B0_vals,INSERT_VALUES);CHKERRQ(ierr); 840 ierr = MatSetValues(pcbddc->local_mat,B0_ncol,B0_cols,1,pcbddc->benign_p0_lidx+i,B0_vals,INSERT_VALUES);CHKERRQ(ierr); 841 ierr = MatSetValue(pcbddc->local_mat,pcbddc->benign_p0_lidx[i],pcbddc->benign_p0_lidx[i],0.0,INSERT_VALUES);CHKERRQ(ierr); 842 ierr = MatRestoreRow(pcbddc->benign_B0,i,&B0_ncol,(const PetscInt**)&B0_cols,(const PetscScalar**)&B0_vals);CHKERRQ(ierr); 843 } 844 ierr = MatAssemblyBegin(pcbddc->local_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 845 ierr = MatAssemblyEnd(pcbddc->local_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 846 } else { 847 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Cannot push B0!\n"); 848 } 849 } 850 PetscFunctionReturn(0); 851 } 852 853 #undef __FUNCT__ 854 #define __FUNCT__ "PCBDDCAdaptiveSelection" 855 PetscErrorCode PCBDDCAdaptiveSelection(PC pc) 856 { 857 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 858 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 859 PetscBLASInt B_dummyint,B_neigs,B_ierr,B_lwork; 860 PetscBLASInt *B_iwork,*B_ifail; 861 PetscScalar *work,lwork; 862 PetscScalar *St,*S,*eigv; 863 PetscScalar *Sarray,*Starray; 864 PetscReal *eigs,thresh; 865 PetscInt i,nmax,nmin,nv,cum,mss,cum2,cumarray,maxneigs; 866 PetscBool allocated_S_St; 867 #if defined(PETSC_USE_COMPLEX) 868 PetscReal *rwork; 869 #endif 870 PetscErrorCode ierr; 871 872 PetscFunctionBegin; 873 if (!sub_schurs->schur_explicit) { 874 SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_SUP,"Adaptive selection of constraints requires MUMPS and/or MKL_CPARDISO"); 875 } 876 877 if (sub_schurs->n_subs && (!sub_schurs->is_hermitian || !sub_schurs->is_posdef)) { 878 SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_SUP,"Adaptive selection not yet implemented for general matrix pencils (herm %d, posdef %d)\n",sub_schurs->is_hermitian,sub_schurs->is_posdef); 879 } 880 881 if (pcbddc->dbg_flag) { 882 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 883 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 884 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Check adaptive selection of constraints\n");CHKERRQ(ierr); 885 ierr = PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 886 } 887 888 if (pcbddc->dbg_flag) { 889 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d cc %d (%d,%d).\n",PetscGlobalRank,sub_schurs->n_subs,sub_schurs->is_hermitian,sub_schurs->is_posdef); 890 } 891 892 /* max size of subsets */ 893 mss = 0; 894 for (i=0;i<sub_schurs->n_subs;i++) { 895 PetscInt subset_size; 896 897 ierr = ISGetLocalSize(sub_schurs->is_subs[i],&subset_size);CHKERRQ(ierr); 898 mss = PetscMax(mss,subset_size); 899 } 900 901 /* min/max and threshold */ 902 nmax = pcbddc->adaptive_nmax > 0 ? pcbddc->adaptive_nmax : mss; 903 nmin = pcbddc->adaptive_nmin > 0 ? pcbddc->adaptive_nmin : 0; 904 nmax = PetscMax(nmin,nmax); 905 allocated_S_St = PETSC_FALSE; 906 if (nmin) { 907 allocated_S_St = PETSC_TRUE; 908 } 909 910 /* allocate lapack workspace */ 911 cum = cum2 = 0; 912 maxneigs = 0; 913 for (i=0;i<sub_schurs->n_subs;i++) { 914 PetscInt n,subset_size; 915 916 ierr = ISGetLocalSize(sub_schurs->is_subs[i],&subset_size);CHKERRQ(ierr); 917 n = PetscMin(subset_size,nmax); 918 cum += subset_size; 919 cum2 += subset_size*n; 920 maxneigs = PetscMax(maxneigs,n); 921 } 922 if (mss) { 923 if (sub_schurs->is_hermitian && sub_schurs->is_posdef) { 924 PetscBLASInt B_itype = 1; 925 PetscBLASInt B_N = mss; 926 PetscReal zero = 0.0; 927 PetscReal eps = 0.0; /* dlamch? */ 928 929 B_lwork = -1; 930 S = NULL; 931 St = NULL; 932 eigs = NULL; 933 eigv = NULL; 934 B_iwork = NULL; 935 B_ifail = NULL; 936 #if defined(PETSC_USE_COMPLEX) 937 rwork = NULL; 938 #endif 939 thresh = 1.0; 940 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 941 #if defined(PETSC_USE_COMPLEX) 942 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&zero,&thresh,&B_dummyint,&B_dummyint,&eps,&B_neigs,eigs,eigv,&B_N,&lwork,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr)); 943 #else 944 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&zero,&thresh,&B_dummyint,&B_dummyint,&eps,&B_neigs,eigs,eigv,&B_N,&lwork,&B_lwork,B_iwork,B_ifail,&B_ierr)); 945 #endif 946 if (B_ierr != 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to SYGVX Lapack routine %d",(int)B_ierr); 947 ierr = PetscFPTrapPop();CHKERRQ(ierr); 948 } else { 949 /* TODO */ 950 } 951 } else { 952 lwork = 0; 953 } 954 955 nv = 0; 956 if (sub_schurs->is_vertices && pcbddc->use_vertices) { /* complement set of active subsets, each entry is a vertex (boundary made by active subsets, vertices and dirichlet dofs) */ 957 ierr = ISGetLocalSize(sub_schurs->is_vertices,&nv);CHKERRQ(ierr); 958 } 959 ierr = PetscBLASIntCast((PetscInt)PetscRealPart(lwork),&B_lwork);CHKERRQ(ierr); 960 if (allocated_S_St) { 961 ierr = PetscMalloc2(mss*mss,&S,mss*mss,&St);CHKERRQ(ierr); 962 } 963 ierr = PetscMalloc5(mss*mss,&eigv,mss,&eigs,B_lwork,&work,5*mss,&B_iwork,mss,&B_ifail);CHKERRQ(ierr); 964 #if defined(PETSC_USE_COMPLEX) 965 ierr = PetscMalloc1(7*mss,&rwork);CHKERRQ(ierr); 966 #endif 967 ierr = PetscMalloc5(nv+sub_schurs->n_subs,&pcbddc->adaptive_constraints_n, 968 nv+sub_schurs->n_subs+1,&pcbddc->adaptive_constraints_idxs_ptr, 969 nv+sub_schurs->n_subs+1,&pcbddc->adaptive_constraints_data_ptr, 970 nv+cum,&pcbddc->adaptive_constraints_idxs, 971 nv+cum2,&pcbddc->adaptive_constraints_data);CHKERRQ(ierr); 972 ierr = PetscMemzero(pcbddc->adaptive_constraints_n,(nv+sub_schurs->n_subs)*sizeof(PetscInt));CHKERRQ(ierr); 973 974 maxneigs = 0; 975 cum = cum2 = cumarray = 0; 976 pcbddc->adaptive_constraints_idxs_ptr[0] = 0; 977 pcbddc->adaptive_constraints_data_ptr[0] = 0; 978 if (sub_schurs->is_vertices && pcbddc->use_vertices) { 979 const PetscInt *idxs; 980 981 ierr = ISGetIndices(sub_schurs->is_vertices,&idxs);CHKERRQ(ierr); 982 for (cum=0;cum<nv;cum++) { 983 pcbddc->adaptive_constraints_n[cum] = 1; 984 pcbddc->adaptive_constraints_idxs[cum] = idxs[cum]; 985 pcbddc->adaptive_constraints_data[cum] = 1.0; 986 pcbddc->adaptive_constraints_idxs_ptr[cum+1] = pcbddc->adaptive_constraints_idxs_ptr[cum]+1; 987 pcbddc->adaptive_constraints_data_ptr[cum+1] = pcbddc->adaptive_constraints_data_ptr[cum]+1; 988 } 989 cum2 = cum; 990 ierr = ISRestoreIndices(sub_schurs->is_vertices,&idxs);CHKERRQ(ierr); 991 } 992 993 if (mss) { /* multilevel */ 994 ierr = MatSeqAIJGetArray(sub_schurs->sum_S_Ej_inv_all,&Sarray);CHKERRQ(ierr); 995 ierr = MatSeqAIJGetArray(sub_schurs->sum_S_Ej_tilda_all,&Starray);CHKERRQ(ierr); 996 } 997 998 thresh = pcbddc->adaptive_threshold; 999 for (i=0;i<sub_schurs->n_subs;i++) { 1000 const PetscInt *idxs; 1001 PetscReal infty = PETSC_MAX_REAL; 1002 PetscInt j,subset_size,eigs_start = 0; 1003 PetscBLASInt B_N; 1004 PetscBool same_data = PETSC_FALSE; 1005 1006 ierr = ISGetLocalSize(sub_schurs->is_subs[i],&subset_size);CHKERRQ(ierr); 1007 ierr = ISGetIndices(sub_schurs->is_subs[i],&idxs);CHKERRQ(ierr); 1008 ierr = PetscBLASIntCast(subset_size,&B_N);CHKERRQ(ierr); 1009 if (allocated_S_St) { /* S and S_t should be copied since we could need them later */ 1010 if (sub_schurs->is_hermitian) { 1011 PetscInt j,k; 1012 if (sub_schurs->n_subs == 1) { /* zeroing memory to use PetscMemcmp later */ 1013 ierr = PetscMemzero(S,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 1014 ierr = PetscMemzero(St,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 1015 } 1016 for (j=0;j<subset_size;j++) { 1017 for (k=j;k<subset_size;k++) { 1018 S [j*subset_size+k] = Sarray [cumarray+j*subset_size+k]; 1019 St[j*subset_size+k] = Starray[cumarray+j*subset_size+k]; 1020 } 1021 } 1022 } else { 1023 ierr = PetscMemcpy(S,Sarray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 1024 ierr = PetscMemcpy(St,Starray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 1025 } 1026 } else { 1027 S = Sarray + cumarray; 1028 St = Starray + cumarray; 1029 } 1030 1031 /* see if we can save some work */ 1032 if (sub_schurs->n_subs == 1) { 1033 ierr = PetscMemcmp(S,St,subset_size*subset_size*sizeof(PetscScalar),&same_data);CHKERRQ(ierr); 1034 } 1035 1036 if (same_data) { /* there's no need of constraints here, deluxe scaling is enough */ 1037 B_neigs = 0; 1038 } else { 1039 if (sub_schurs->is_hermitian && sub_schurs->is_posdef) { 1040 PetscBLASInt B_itype = 1; 1041 PetscBLASInt B_IL, B_IU; 1042 PetscReal eps = -1.0; /* dlamch? */ 1043 PetscInt nmin_s; 1044 1045 if (pcbddc->dbg_flag) { 1046 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Computing for sub %d/%d %d %d.\n",i,sub_schurs->n_subs,subset_size,pcbddc->mat_graph->count[idxs[0]]); 1047 } 1048 1049 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 1050 if (thresh > 1.+PETSC_SMALL) { 1051 1052 /* ask for eigenvalues larger than thresh */ 1053 #if defined(PETSC_USE_COMPLEX) 1054 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&thresh,&infty,&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr)); 1055 #else 1056 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&thresh,&infty,&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr)); 1057 #endif 1058 } else { 1059 B_IU = PetscMax(1,PetscMin(B_N,nmax)); 1060 B_IL = 1; 1061 #if defined(PETSC_USE_COMPLEX) 1062 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","I","L",&B_N,St,&B_N,S,&B_N,&thresh,&infty,&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr)); 1063 #else 1064 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","I","L",&B_N,St,&B_N,S,&B_N,&thresh,&infty,&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr)); 1065 #endif 1066 } 1067 ierr = PetscFPTrapPop();CHKERRQ(ierr); 1068 if (B_ierr) { 1069 if (B_ierr < 0 ) { 1070 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: illegal value for argument %d",-(int)B_ierr); 1071 } else if (B_ierr <= B_N) { 1072 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: %d eigenvalues failed to converge",(int)B_ierr); 1073 } else { 1074 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: leading minor of order %d is not positive definite",(int)B_ierr-B_N-1); 1075 } 1076 } 1077 1078 if (B_neigs > nmax) { 1079 if (pcbddc->dbg_flag) { 1080 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," found %d eigs, more than maximum required %d.\n",B_neigs,nmax); 1081 } 1082 eigs_start = B_neigs -nmax; 1083 B_neigs = nmax; 1084 } 1085 1086 nmin_s = PetscMin(nmin,B_N); 1087 if (B_neigs < nmin_s) { 1088 PetscBLASInt B_neigs2; 1089 1090 B_IU = B_N - B_neigs; 1091 B_IL = B_N - nmin_s + 1; 1092 if (pcbddc->dbg_flag) { 1093 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," found %d eigs, less than minimum required %d. Asking for %d to %d incl (fortran like)\n",B_neigs,nmin,B_IL,B_IU); 1094 } 1095 if (sub_schurs->is_hermitian) { 1096 PetscInt j; 1097 for (j=0;j<subset_size;j++) { 1098 ierr = PetscMemcpy(S+j*(subset_size+1),Sarray+cumarray+j*(subset_size+1),(subset_size-j)*sizeof(PetscScalar));CHKERRQ(ierr); 1099 } 1100 for (j=0;j<subset_size;j++) { 1101 ierr = PetscMemcpy(St+j*(subset_size+1),Starray+cumarray+j*(subset_size+1),(subset_size-j)*sizeof(PetscScalar));CHKERRQ(ierr); 1102 } 1103 } else { 1104 ierr = PetscMemcpy(S,Sarray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 1105 ierr = PetscMemcpy(St,Starray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 1106 } 1107 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 1108 #if defined(PETSC_USE_COMPLEX) 1109 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","I","L",&B_N,St,&B_N,S,&B_N,&thresh,&infty,&B_IL,&B_IU,&eps,&B_neigs2,eigs+B_neigs,eigv+B_neigs*subset_size,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr)); 1110 #else 1111 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","I","L",&B_N,St,&B_N,S,&B_N,&thresh,&infty,&B_IL,&B_IU,&eps,&B_neigs2,eigs+B_neigs,eigv+B_neigs*subset_size,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr)); 1112 #endif 1113 ierr = PetscFPTrapPop();CHKERRQ(ierr); 1114 B_neigs += B_neigs2; 1115 } 1116 if (B_ierr) { 1117 if (B_ierr < 0 ) { 1118 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: illegal value for argument %d",-(int)B_ierr); 1119 } else if (B_ierr <= B_N) { 1120 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: %d eigenvalues failed to converge",(int)B_ierr); 1121 } else { 1122 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: leading minor of order %d is not positive definite",(int)B_ierr-B_N-1); 1123 } 1124 } 1125 if (pcbddc->dbg_flag) { 1126 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," -> Got %d eigs\n",B_neigs);CHKERRQ(ierr); 1127 for (j=0;j<B_neigs;j++) { 1128 if (eigs[j] == 0.0) { 1129 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," Inf\n");CHKERRQ(ierr); 1130 } else { 1131 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," %1.6e\n",eigs[j+eigs_start]);CHKERRQ(ierr); 1132 } 1133 } 1134 } 1135 } else { 1136 /* TODO */ 1137 } 1138 } 1139 maxneigs = PetscMax(B_neigs,maxneigs); 1140 pcbddc->adaptive_constraints_n[i+nv] = B_neigs; 1141 if (B_neigs) { 1142 ierr = PetscMemcpy(pcbddc->adaptive_constraints_data+pcbddc->adaptive_constraints_data_ptr[cum],eigv+eigs_start*subset_size,B_neigs*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 1143 1144 if (pcbddc->dbg_flag > 1) { 1145 PetscInt ii; 1146 for (ii=0;ii<B_neigs;ii++) { 1147 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," -> Eigenvector %d/%d (%d)\n",ii,B_neigs,B_N);CHKERRQ(ierr); 1148 for (j=0;j<B_N;j++) { 1149 #if defined(PETSC_USE_COMPLEX) 1150 PetscReal r = PetscRealPart(pcbddc->adaptive_constraints_data[ii*subset_size+j+pcbddc->adaptive_constraints_data_ptr[cum]]); 1151 PetscReal c = PetscImaginaryPart(pcbddc->adaptive_constraints_data[ii*subset_size+j+pcbddc->adaptive_constraints_data_ptr[cum]]); 1152 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," %1.4e + %1.4e i\n",r,c);CHKERRQ(ierr); 1153 #else 1154 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," %1.4e\n",pcbddc->adaptive_constraints_data[ii*subset_size+j+pcbddc->adaptive_constraints_data_ptr[cum]]);CHKERRQ(ierr); 1155 #endif 1156 } 1157 } 1158 } 1159 #if 0 1160 for (j=0;j<B_neigs;j++) { 1161 PetscBLASInt Blas_N,Blas_one = 1.0; 1162 PetscScalar norm; 1163 ierr = PetscBLASIntCast(subset_size,&Blas_N);CHKERRQ(ierr); 1164 PetscStackCallBLAS("BLASdot",norm = BLASdot_(&Blas_N,pcbddc->adaptive_constraints_data+pcbddc->adaptive_constraints_data_ptr[cum]+j*subset_size, 1165 &Blas_one,pcbddc->adaptive_constraints_data+pcbddc->adaptive_constraints_data_ptr[cum]+j*subset_size,&Blas_one)); 1166 if (pcbddc->adaptive_constraints_data[cum2] > 0.0) { 1167 norm = 1.0/PetscSqrtReal(PetscRealPart(norm)); 1168 } else { 1169 norm = -1.0/PetscSqrtReal(PetscRealPart(norm)); 1170 } 1171 PetscStackCallBLAS("BLASscal",BLASscal_(&Blas_N,&norm,pcbddc->adaptive_constraints_data+pcbddc->adaptive_constraints_data_ptr[cum]+j*subset_size,&Blas_one)); 1172 } 1173 #endif 1174 ierr = PetscMemcpy(pcbddc->adaptive_constraints_idxs+pcbddc->adaptive_constraints_idxs_ptr[cum],idxs,subset_size*sizeof(PetscInt));CHKERRQ(ierr); 1175 pcbddc->adaptive_constraints_idxs_ptr[cum+1] = pcbddc->adaptive_constraints_idxs_ptr[cum] + subset_size; 1176 pcbddc->adaptive_constraints_data_ptr[cum+1] = pcbddc->adaptive_constraints_data_ptr[cum] + subset_size*B_neigs; 1177 cum++; 1178 } 1179 ierr = ISRestoreIndices(sub_schurs->is_subs[i],&idxs);CHKERRQ(ierr); 1180 /* shift for next computation */ 1181 cumarray += subset_size*subset_size; 1182 } 1183 if (pcbddc->dbg_flag) { 1184 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 1185 } 1186 1187 if (mss) { 1188 ierr = MatSeqAIJRestoreArray(sub_schurs->sum_S_Ej_inv_all,&Sarray);CHKERRQ(ierr); 1189 ierr = MatSeqAIJRestoreArray(sub_schurs->sum_S_Ej_tilda_all,&Starray);CHKERRQ(ierr); 1190 /* destroy matrices (junk) */ 1191 ierr = MatDestroy(&sub_schurs->sum_S_Ej_inv_all);CHKERRQ(ierr); 1192 ierr = MatDestroy(&sub_schurs->sum_S_Ej_tilda_all);CHKERRQ(ierr); 1193 } 1194 if (allocated_S_St) { 1195 ierr = PetscFree2(S,St);CHKERRQ(ierr); 1196 } 1197 ierr = PetscFree5(eigv,eigs,work,B_iwork,B_ifail);CHKERRQ(ierr); 1198 #if defined(PETSC_USE_COMPLEX) 1199 ierr = PetscFree(rwork);CHKERRQ(ierr); 1200 #endif 1201 if (pcbddc->dbg_flag) { 1202 PetscInt maxneigs_r; 1203 ierr = MPI_Allreduce(&maxneigs,&maxneigs_r,1,MPIU_INT,MPI_MAX,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 1204 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Maximum number of constraints per cc %d\n",maxneigs_r);CHKERRQ(ierr); 1205 } 1206 PetscFunctionReturn(0); 1207 } 1208 1209 #undef __FUNCT__ 1210 #define __FUNCT__ "PCBDDCSetUpSolvers" 1211 PetscErrorCode PCBDDCSetUpSolvers(PC pc) 1212 { 1213 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 1214 PetscScalar *coarse_submat_vals; 1215 PetscErrorCode ierr; 1216 1217 PetscFunctionBegin; 1218 /* Setup local scatters R_to_B and (optionally) R_to_D */ 1219 /* PCBDDCSetUpLocalWorkVectors should be called first! */ 1220 ierr = PCBDDCSetUpLocalScatters(pc);CHKERRQ(ierr); 1221 1222 /* Setup local neumann solver ksp_R */ 1223 /* PCBDDCSetUpLocalScatters should be called first! */ 1224 ierr = PCBDDCSetUpLocalSolvers(pc,PETSC_FALSE,PETSC_TRUE);CHKERRQ(ierr); 1225 1226 /* Change global null space passed in by the user if change of basis has been requested */ 1227 if (pcbddc->NullSpace && pcbddc->ChangeOfBasisMatrix) { 1228 ierr = PCBDDCNullSpaceAdaptGlobal(pc);CHKERRQ(ierr); 1229 } 1230 1231 /* 1232 Setup local correction and local part of coarse basis. 1233 Gives back the dense local part of the coarse matrix in column major ordering 1234 */ 1235 ierr = PCBDDCSetUpCorrection(pc,&coarse_submat_vals);CHKERRQ(ierr); 1236 1237 /* Compute total number of coarse nodes and setup coarse solver */ 1238 ierr = PCBDDCSetUpCoarseSolver(pc,coarse_submat_vals);CHKERRQ(ierr); 1239 1240 /* free */ 1241 ierr = PetscFree(coarse_submat_vals);CHKERRQ(ierr); 1242 PetscFunctionReturn(0); 1243 } 1244 1245 #undef __FUNCT__ 1246 #define __FUNCT__ "PCBDDCResetCustomization" 1247 PetscErrorCode PCBDDCResetCustomization(PC pc) 1248 { 1249 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 1250 PetscErrorCode ierr; 1251 1252 PetscFunctionBegin; 1253 ierr = PCBDDCGraphResetCSR(pcbddc->mat_graph);CHKERRQ(ierr); 1254 ierr = ISDestroy(&pcbddc->user_primal_vertices);CHKERRQ(ierr); 1255 ierr = ISDestroy(&pcbddc->user_primal_vertices_local);CHKERRQ(ierr); 1256 ierr = MatNullSpaceDestroy(&pcbddc->NullSpace);CHKERRQ(ierr); 1257 ierr = ISDestroy(&pcbddc->NeumannBoundaries);CHKERRQ(ierr); 1258 ierr = ISDestroy(&pcbddc->NeumannBoundariesLocal);CHKERRQ(ierr); 1259 ierr = ISDestroy(&pcbddc->DirichletBoundaries);CHKERRQ(ierr); 1260 ierr = MatNullSpaceDestroy(&pcbddc->onearnullspace);CHKERRQ(ierr); 1261 ierr = PetscFree(pcbddc->onearnullvecs_state);CHKERRQ(ierr); 1262 ierr = ISDestroy(&pcbddc->DirichletBoundariesLocal);CHKERRQ(ierr); 1263 ierr = PCBDDCSetDofsSplitting(pc,0,NULL);CHKERRQ(ierr); 1264 ierr = PCBDDCSetDofsSplittingLocal(pc,0,NULL);CHKERRQ(ierr); 1265 PetscFunctionReturn(0); 1266 } 1267 1268 #undef __FUNCT__ 1269 #define __FUNCT__ "PCBDDCResetTopography" 1270 PetscErrorCode PCBDDCResetTopography(PC pc) 1271 { 1272 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 1273 PetscInt i; 1274 PetscErrorCode ierr; 1275 1276 PetscFunctionBegin; 1277 ierr = MatDestroy(&pcbddc->user_ChangeOfBasisMatrix);CHKERRQ(ierr); 1278 ierr = MatDestroy(&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 1279 ierr = MatDestroy(&pcbddc->ConstraintMatrix);CHKERRQ(ierr); 1280 ierr = PCBDDCGraphReset(pcbddc->mat_graph);CHKERRQ(ierr); 1281 for (i=0;i<pcbddc->n_local_subs;i++) { 1282 ierr = ISDestroy(&pcbddc->local_subs[i]);CHKERRQ(ierr); 1283 } 1284 ierr = PetscFree(pcbddc->local_subs);CHKERRQ(ierr); 1285 ierr = PCBDDCSubSchursReset(pcbddc->sub_schurs);CHKERRQ(ierr); 1286 PetscFunctionReturn(0); 1287 } 1288 1289 #undef __FUNCT__ 1290 #define __FUNCT__ "PCBDDCResetSolvers" 1291 PetscErrorCode PCBDDCResetSolvers(PC pc) 1292 { 1293 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 1294 PetscErrorCode ierr; 1295 1296 PetscFunctionBegin; 1297 ierr = VecDestroy(&pcbddc->coarse_vec);CHKERRQ(ierr); 1298 if (pcbddc->coarse_phi_B) { 1299 PetscScalar *array; 1300 ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&array);CHKERRQ(ierr); 1301 ierr = PetscFree(array);CHKERRQ(ierr); 1302 } 1303 ierr = MatDestroy(&pcbddc->coarse_phi_B);CHKERRQ(ierr); 1304 ierr = MatDestroy(&pcbddc->coarse_phi_D);CHKERRQ(ierr); 1305 ierr = MatDestroy(&pcbddc->coarse_psi_B);CHKERRQ(ierr); 1306 ierr = MatDestroy(&pcbddc->coarse_psi_D);CHKERRQ(ierr); 1307 ierr = VecDestroy(&pcbddc->vec1_P);CHKERRQ(ierr); 1308 ierr = VecDestroy(&pcbddc->vec1_C);CHKERRQ(ierr); 1309 ierr = MatDestroy(&pcbddc->local_auxmat2);CHKERRQ(ierr); 1310 ierr = MatDestroy(&pcbddc->local_auxmat1);CHKERRQ(ierr); 1311 ierr = VecDestroy(&pcbddc->vec1_R);CHKERRQ(ierr); 1312 ierr = VecDestroy(&pcbddc->vec2_R);CHKERRQ(ierr); 1313 ierr = ISDestroy(&pcbddc->is_R_local);CHKERRQ(ierr); 1314 ierr = VecScatterDestroy(&pcbddc->R_to_B);CHKERRQ(ierr); 1315 ierr = VecScatterDestroy(&pcbddc->R_to_D);CHKERRQ(ierr); 1316 ierr = VecScatterDestroy(&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr); 1317 ierr = KSPDestroy(&pcbddc->ksp_D);CHKERRQ(ierr); 1318 ierr = KSPDestroy(&pcbddc->ksp_R);CHKERRQ(ierr); 1319 ierr = KSPDestroy(&pcbddc->coarse_ksp);CHKERRQ(ierr); 1320 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 1321 ierr = PetscFree(pcbddc->primal_indices_local_idxs);CHKERRQ(ierr); 1322 ierr = PetscFree2(pcbddc->local_primal_ref_node,pcbddc->local_primal_ref_mult);CHKERRQ(ierr); 1323 ierr = PetscFree(pcbddc->global_primal_indices);CHKERRQ(ierr); 1324 ierr = ISDestroy(&pcbddc->coarse_subassembling);CHKERRQ(ierr); 1325 ierr = ISDestroy(&pcbddc->coarse_subassembling_init);CHKERRQ(ierr); 1326 ierr = MatDestroy(&pcbddc->benign_change);CHKERRQ(ierr); 1327 ierr = VecDestroy(&pcbddc->benign_vec);CHKERRQ(ierr); 1328 ierr = MatDestroy(&pcbddc->benign_original_mat);CHKERRQ(ierr); 1329 ierr = MatDestroy(&pcbddc->benign_B0);CHKERRQ(ierr); 1330 ierr = PetscSFDestroy(&pcbddc->benign_sf);CHKERRQ(ierr); 1331 if (pcbddc->benign_zerodiag_subs) { 1332 PetscInt i; 1333 for (i=0;i<pcbddc->benign_n;i++) { 1334 ierr = ISDestroy(&pcbddc->benign_zerodiag_subs[i]);CHKERRQ(ierr); 1335 } 1336 ierr = PetscFree(pcbddc->benign_zerodiag_subs);CHKERRQ(ierr); 1337 } 1338 ierr = PetscFree3(pcbddc->benign_p0_lidx,pcbddc->benign_p0_gidx,pcbddc->benign_p0);CHKERRQ(ierr); 1339 PetscFunctionReturn(0); 1340 } 1341 1342 #undef __FUNCT__ 1343 #define __FUNCT__ "PCBDDCSetUpLocalWorkVectors" 1344 PetscErrorCode PCBDDCSetUpLocalWorkVectors(PC pc) 1345 { 1346 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 1347 PC_IS *pcis = (PC_IS*)pc->data; 1348 VecType impVecType; 1349 PetscInt n_constraints,n_R,old_size; 1350 PetscErrorCode ierr; 1351 1352 PetscFunctionBegin; 1353 if (!pcbddc->ConstraintMatrix) { 1354 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"BDDC Constraint matrix has not been created"); 1355 } 1356 /* get sizes */ 1357 n_constraints = pcbddc->local_primal_size - pcbddc->benign_n - pcbddc->n_vertices; 1358 n_R = pcis->n - pcbddc->n_vertices; 1359 ierr = VecGetType(pcis->vec1_N,&impVecType);CHKERRQ(ierr); 1360 /* local work vectors (try to avoid unneeded work)*/ 1361 /* R nodes */ 1362 old_size = -1; 1363 if (pcbddc->vec1_R) { 1364 ierr = VecGetSize(pcbddc->vec1_R,&old_size);CHKERRQ(ierr); 1365 } 1366 if (n_R != old_size) { 1367 ierr = VecDestroy(&pcbddc->vec1_R);CHKERRQ(ierr); 1368 ierr = VecDestroy(&pcbddc->vec2_R);CHKERRQ(ierr); 1369 ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_R);CHKERRQ(ierr); 1370 ierr = VecSetSizes(pcbddc->vec1_R,PETSC_DECIDE,n_R);CHKERRQ(ierr); 1371 ierr = VecSetType(pcbddc->vec1_R,impVecType);CHKERRQ(ierr); 1372 ierr = VecDuplicate(pcbddc->vec1_R,&pcbddc->vec2_R);CHKERRQ(ierr); 1373 } 1374 /* local primal dofs */ 1375 old_size = -1; 1376 if (pcbddc->vec1_P) { 1377 ierr = VecGetSize(pcbddc->vec1_P,&old_size);CHKERRQ(ierr); 1378 } 1379 if (pcbddc->local_primal_size != old_size) { 1380 ierr = VecDestroy(&pcbddc->vec1_P);CHKERRQ(ierr); 1381 ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_P);CHKERRQ(ierr); 1382 ierr = VecSetSizes(pcbddc->vec1_P,PETSC_DECIDE,pcbddc->local_primal_size);CHKERRQ(ierr); 1383 ierr = VecSetType(pcbddc->vec1_P,impVecType);CHKERRQ(ierr); 1384 } 1385 /* local explicit constraints */ 1386 old_size = -1; 1387 if (pcbddc->vec1_C) { 1388 ierr = VecGetSize(pcbddc->vec1_C,&old_size);CHKERRQ(ierr); 1389 } 1390 if (n_constraints && n_constraints != old_size) { 1391 ierr = VecDestroy(&pcbddc->vec1_C);CHKERRQ(ierr); 1392 ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_C);CHKERRQ(ierr); 1393 ierr = VecSetSizes(pcbddc->vec1_C,PETSC_DECIDE,n_constraints);CHKERRQ(ierr); 1394 ierr = VecSetType(pcbddc->vec1_C,impVecType);CHKERRQ(ierr); 1395 } 1396 PetscFunctionReturn(0); 1397 } 1398 1399 #undef __FUNCT__ 1400 #define __FUNCT__ "PCBDDCSetUpCorrection" 1401 PetscErrorCode PCBDDCSetUpCorrection(PC pc, PetscScalar **coarse_submat_vals_n) 1402 { 1403 PetscErrorCode ierr; 1404 /* pointers to pcis and pcbddc */ 1405 PC_IS* pcis = (PC_IS*)pc->data; 1406 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 1407 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 1408 /* submatrices of local problem */ 1409 Mat A_RV,A_VR,A_VV,local_auxmat2_R; 1410 /* submatrices of benign trick */ 1411 Mat B0_V = NULL; 1412 /* submatrices of local coarse problem */ 1413 Mat S_VV,S_CV,S_VC,S_CC; 1414 /* working matrices */ 1415 Mat C_CR; 1416 /* additional working stuff */ 1417 PC pc_R; 1418 Mat F; 1419 PetscBool isLU,isCHOL,isILU,need_benign_correction; 1420 1421 PetscScalar *coarse_submat_vals; /* TODO: use a PETSc matrix */ 1422 PetscScalar *work; 1423 PetscInt *idx_V_B; 1424 PetscInt lda_rhs,n,n_vertices,n_constraints,*p0_lidx_I; 1425 PetscInt i,n_R,n_D,n_B; 1426 1427 /* some shortcuts to scalars */ 1428 PetscScalar one=1.0,m_one=-1.0; 1429 1430 PetscFunctionBegin; 1431 if (!pcbddc->symmetric_primal && pcbddc->benign_n) { 1432 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Non-symmetric primal basis computation with benign trick not yet implemented"); 1433 } 1434 1435 /* Set Non-overlapping dimensions */ 1436 n_vertices = pcbddc->n_vertices; 1437 n_constraints = pcbddc->local_primal_size - pcbddc->benign_n - n_vertices; 1438 n_B = pcis->n_B; 1439 n_D = pcis->n - n_B; 1440 n_R = pcis->n - n_vertices; 1441 1442 /* vertices in boundary numbering */ 1443 ierr = PetscMalloc1(n_vertices,&idx_V_B);CHKERRQ(ierr); 1444 ierr = ISGlobalToLocalMappingApply(pcis->BtoNmap,IS_GTOLM_DROP,n_vertices,pcbddc->local_primal_ref_node,&i,idx_V_B);CHKERRQ(ierr); 1445 if (i != n_vertices) { 1446 SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error in boundary numbering for BDDC vertices! %d != %d\n",n_vertices,i); 1447 } 1448 1449 /* Subdomain contribution (Non-overlapping) to coarse matrix */ 1450 ierr = PetscCalloc1(pcbddc->local_primal_size*pcbddc->local_primal_size,&coarse_submat_vals);CHKERRQ(ierr); 1451 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_vertices,n_vertices,coarse_submat_vals,&S_VV);CHKERRQ(ierr); 1452 ierr = MatSeqDenseSetLDA(S_VV,pcbddc->local_primal_size);CHKERRQ(ierr); 1453 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_constraints,n_vertices,coarse_submat_vals+n_vertices,&S_CV);CHKERRQ(ierr); 1454 ierr = MatSeqDenseSetLDA(S_CV,pcbddc->local_primal_size);CHKERRQ(ierr); 1455 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_vertices,n_constraints,coarse_submat_vals+pcbddc->local_primal_size*n_vertices,&S_VC);CHKERRQ(ierr); 1456 ierr = MatSeqDenseSetLDA(S_VC,pcbddc->local_primal_size);CHKERRQ(ierr); 1457 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_constraints,n_constraints,coarse_submat_vals+(pcbddc->local_primal_size+1)*n_vertices,&S_CC);CHKERRQ(ierr); 1458 ierr = MatSeqDenseSetLDA(S_CC,pcbddc->local_primal_size);CHKERRQ(ierr); 1459 1460 /* determine if can use MatSolve routines instead of calling KSPSolve on ksp_R */ 1461 ierr = KSPGetPC(pcbddc->ksp_R,&pc_R);CHKERRQ(ierr); 1462 ierr = PetscObjectTypeCompare((PetscObject)pc_R,PCLU,&isLU);CHKERRQ(ierr); 1463 ierr = PetscObjectTypeCompare((PetscObject)pc_R,PCILU,&isILU);CHKERRQ(ierr); 1464 ierr = PetscObjectTypeCompare((PetscObject)pc_R,PCCHOLESKY,&isCHOL);CHKERRQ(ierr); 1465 lda_rhs = n_R; 1466 need_benign_correction = PETSC_FALSE; 1467 if (isLU || isILU || isCHOL) { 1468 ierr = PCFactorGetMatrix(pc_R,&F);CHKERRQ(ierr); 1469 } else if (sub_schurs->reuse_solver) { 1470 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 1471 MatFactorType type; 1472 1473 F = reuse_solver->F; 1474 ierr = MatGetFactorType(F,&type);CHKERRQ(ierr); 1475 if (type == MAT_FACTOR_CHOLESKY) isCHOL = PETSC_TRUE; 1476 ierr = MatGetSize(F,&lda_rhs,NULL);CHKERRQ(ierr); 1477 need_benign_correction = !!reuse_solver->benign_n; 1478 } else { 1479 F = NULL; 1480 } 1481 1482 /* allocate workspace */ 1483 n = 0; 1484 if (n_constraints) { 1485 n += lda_rhs*n_constraints; 1486 } 1487 if (n_vertices) { 1488 n = PetscMax(2*lda_rhs*n_vertices,n); 1489 n = PetscMax((lda_rhs+n_B)*n_vertices,n); 1490 } 1491 ierr = PetscMalloc1(n,&work);CHKERRQ(ierr); 1492 1493 /* Precompute stuffs needed for preprocessing and application of BDDC*/ 1494 if (n_constraints) { 1495 Mat M1,M2,M3; 1496 Mat auxmat; 1497 IS is_aux; 1498 PetscScalar *array,*array2; 1499 1500 ierr = MatDestroy(&pcbddc->local_auxmat1);CHKERRQ(ierr); 1501 ierr = MatDestroy(&pcbddc->local_auxmat2);CHKERRQ(ierr); 1502 1503 /* Extract constraints on R nodes: C_{CR} */ 1504 ierr = ISCreateStride(PETSC_COMM_SELF,n_constraints,n_vertices,1,&is_aux);CHKERRQ(ierr); 1505 ierr = MatGetSubMatrix(pcbddc->ConstraintMatrix,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&C_CR);CHKERRQ(ierr); 1506 ierr = MatGetSubMatrix(pcbddc->ConstraintMatrix,is_aux,pcis->is_B_local,MAT_INITIAL_MATRIX,&auxmat);CHKERRQ(ierr); 1507 1508 /* Assemble local_auxmat2_R = (- A_{RR}^{-1} C^T_{CR}) needed by BDDC setup */ 1509 /* Assemble pcbddc->local_auxmat2 = R_to_B (- A_{RR}^{-1} C^T_{CR}) needed by BDDC application */ 1510 ierr = PetscMemzero(work,lda_rhs*n_constraints*sizeof(PetscScalar));CHKERRQ(ierr); 1511 for (i=0;i<n_constraints;i++) { 1512 const PetscScalar *row_cmat_values; 1513 const PetscInt *row_cmat_indices; 1514 PetscInt size_of_constraint,j; 1515 1516 ierr = MatGetRow(C_CR,i,&size_of_constraint,&row_cmat_indices,&row_cmat_values);CHKERRQ(ierr); 1517 for (j=0;j<size_of_constraint;j++) { 1518 work[row_cmat_indices[j]+i*lda_rhs] = -row_cmat_values[j]; 1519 } 1520 ierr = MatRestoreRow(C_CR,i,&size_of_constraint,&row_cmat_indices,&row_cmat_values);CHKERRQ(ierr); 1521 } 1522 ierr = MatCreateSeqDense(PETSC_COMM_SELF,lda_rhs,n_constraints,NULL,&local_auxmat2_R);CHKERRQ(ierr); 1523 if (F) { 1524 Mat B; 1525 1526 ierr = MatCreateSeqDense(PETSC_COMM_SELF,lda_rhs,n_constraints,work,&B);CHKERRQ(ierr); 1527 if (need_benign_correction) { 1528 PetscScalar *marr; 1529 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 1530 1531 ierr = MatDenseGetArray(B,&marr);CHKERRQ(ierr); 1532 for (i=0;i<n_constraints;i++) { 1533 ierr = VecPlaceArray(pcbddc->vec1_R,marr+i*lda_rhs);CHKERRQ(ierr); 1534 ierr = PCBDDCReuseSolversChangeInterior(reuse_solver,pcbddc->vec1_R,NULL,PETSC_FALSE);CHKERRQ(ierr); 1535 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 1536 } 1537 ierr = MatDenseRestoreArray(B,&marr);CHKERRQ(ierr); 1538 } 1539 ierr = MatMatSolve(F,B,local_auxmat2_R);CHKERRQ(ierr); 1540 if (need_benign_correction) { 1541 PetscScalar *marr; 1542 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 1543 1544 ierr = MatDenseGetArray(local_auxmat2_R,&marr);CHKERRQ(ierr); 1545 for (i=0;i<n_constraints;i++) { 1546 ierr = VecPlaceArray(pcbddc->vec1_R,marr+i*lda_rhs);CHKERRQ(ierr); 1547 ierr = PCBDDCReuseSolversChangeInterior(reuse_solver,pcbddc->vec1_R,NULL,PETSC_TRUE);CHKERRQ(ierr); 1548 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 1549 } 1550 ierr = MatDenseRestoreArray(local_auxmat2_R,&marr);CHKERRQ(ierr); 1551 } 1552 ierr = MatDestroy(&B);CHKERRQ(ierr); 1553 } else { 1554 PetscScalar *marr; 1555 1556 ierr = MatDenseGetArray(local_auxmat2_R,&marr);CHKERRQ(ierr); 1557 for (i=0;i<n_constraints;i++) { 1558 ierr = VecPlaceArray(pcbddc->vec1_R,work+i*lda_rhs);CHKERRQ(ierr); 1559 ierr = VecPlaceArray(pcbddc->vec2_R,marr+i*lda_rhs);CHKERRQ(ierr); 1560 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 1561 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 1562 ierr = VecResetArray(pcbddc->vec2_R);CHKERRQ(ierr); 1563 } 1564 ierr = MatDenseRestoreArray(local_auxmat2_R,&marr);CHKERRQ(ierr); 1565 } 1566 if (!pcbddc->switch_static) { 1567 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_B,n_constraints,NULL,&pcbddc->local_auxmat2);CHKERRQ(ierr); 1568 ierr = MatDenseGetArray(pcbddc->local_auxmat2,&array);CHKERRQ(ierr); 1569 ierr = MatDenseGetArray(local_auxmat2_R,&array2);CHKERRQ(ierr); 1570 for (i=0;i<n_constraints;i++) { 1571 ierr = VecPlaceArray(pcbddc->vec1_R,array2+i*lda_rhs);CHKERRQ(ierr); 1572 ierr = VecPlaceArray(pcis->vec1_B,array+i*n_B);CHKERRQ(ierr); 1573 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1574 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1575 ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr); 1576 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 1577 } 1578 ierr = MatDenseRestoreArray(local_auxmat2_R,&array2);CHKERRQ(ierr); 1579 ierr = MatDenseRestoreArray(pcbddc->local_auxmat2,&array);CHKERRQ(ierr); 1580 ierr = MatMatMult(auxmat,pcbddc->local_auxmat2,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&M3);CHKERRQ(ierr); 1581 } else { 1582 if (lda_rhs != n_R) { 1583 IS dummy; 1584 1585 ierr = ISCreateStride(PETSC_COMM_SELF,n_R,0,1,&dummy);CHKERRQ(ierr); 1586 ierr = MatGetSubMatrix(local_auxmat2_R,dummy,dummy,MAT_INITIAL_MATRIX,&pcbddc->local_auxmat2);CHKERRQ(ierr); 1587 ierr = ISDestroy(&dummy);CHKERRQ(ierr); 1588 } else { 1589 ierr = PetscObjectReference((PetscObject)local_auxmat2_R);CHKERRQ(ierr); 1590 pcbddc->local_auxmat2 = local_auxmat2_R; 1591 } 1592 ierr = MatMatMult(C_CR,pcbddc->local_auxmat2,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&M3);CHKERRQ(ierr); 1593 } 1594 ierr = ISDestroy(&is_aux);CHKERRQ(ierr); 1595 /* Assemble explicitly S_CC = ( C_{CR} A_{RR}^{-1} C^T_{CR} )^{-1} */ 1596 ierr = MatScale(M3,m_one);CHKERRQ(ierr); 1597 ierr = MatDuplicate(M3,MAT_DO_NOT_COPY_VALUES,&M1);CHKERRQ(ierr); 1598 ierr = MatDuplicate(M3,MAT_DO_NOT_COPY_VALUES,&M2);CHKERRQ(ierr); 1599 if (isCHOL) { 1600 ierr = MatCholeskyFactor(M3,NULL,NULL);CHKERRQ(ierr); 1601 } else { 1602 ierr = MatLUFactor(M3,NULL,NULL,NULL);CHKERRQ(ierr); 1603 } 1604 ierr = VecSet(pcbddc->vec1_C,one);CHKERRQ(ierr); 1605 ierr = MatDiagonalSet(M2,pcbddc->vec1_C,INSERT_VALUES);CHKERRQ(ierr); 1606 ierr = MatMatSolve(M3,M2,M1);CHKERRQ(ierr); 1607 ierr = MatDestroy(&M2);CHKERRQ(ierr); 1608 ierr = MatDestroy(&M3);CHKERRQ(ierr); 1609 /* Assemble local_auxmat1 = S_CC*C_{CB} needed by BDDC application in KSP and in preproc */ 1610 ierr = MatMatMult(M1,auxmat,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&pcbddc->local_auxmat1);CHKERRQ(ierr); 1611 ierr = MatDestroy(&auxmat);CHKERRQ(ierr); 1612 ierr = MatCopy(M1,S_CC,SAME_NONZERO_PATTERN);CHKERRQ(ierr); /* S_CC can have a different LDA, MatMatSolve doesn't support it */ 1613 ierr = MatDestroy(&M1);CHKERRQ(ierr); 1614 } 1615 /* Get submatrices from subdomain matrix */ 1616 if (pcbddc->benign_n) { 1617 IS dummy; 1618 Mat B0_R; 1619 PetscReal norm; 1620 1621 ierr = ISCreateStride(PETSC_COMM_SELF,pcbddc->benign_n,0,1,&dummy);CHKERRQ(ierr); 1622 ierr = MatGetSubMatrix(pcbddc->benign_B0,dummy,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&B0_R);CHKERRQ(ierr); 1623 ierr = MatNorm(B0_R,NORM_INFINITY,&norm);CHKERRQ(ierr); 1624 if (norm > PETSC_SMALL) { 1625 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"Benign trick can not be applied! ||B0_R|| = %f (should be numerically 0.)",norm); 1626 } 1627 ierr = MatDestroy(&B0_R);CHKERRQ(ierr); 1628 ierr = ISDestroy(&dummy);CHKERRQ(ierr); 1629 } 1630 1631 if (n_vertices) { 1632 IS is_aux; 1633 1634 if (sub_schurs->reuse_solver) { /* is_R_local is not sorted, ISComplement doesn't like it */ 1635 IS tis; 1636 1637 ierr = ISDuplicate(pcbddc->is_R_local,&tis);CHKERRQ(ierr); 1638 ierr = ISSort(tis);CHKERRQ(ierr); 1639 ierr = ISComplement(tis,0,pcis->n,&is_aux);CHKERRQ(ierr); 1640 ierr = ISDestroy(&tis);CHKERRQ(ierr); 1641 } else { 1642 ierr = ISComplement(pcbddc->is_R_local,0,pcis->n,&is_aux);CHKERRQ(ierr); 1643 } 1644 ierr = MatGetSubMatrix(pcbddc->local_mat,pcbddc->is_R_local,is_aux,MAT_INITIAL_MATRIX,&A_RV);CHKERRQ(ierr); 1645 ierr = MatGetSubMatrix(pcbddc->local_mat,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&A_VR);CHKERRQ(ierr); 1646 ierr = MatGetSubMatrix(pcbddc->local_mat,is_aux,is_aux,MAT_INITIAL_MATRIX,&A_VV);CHKERRQ(ierr); 1647 if (pcbddc->benign_n) { 1648 IS dummy; 1649 1650 ierr = ISCreateStride(PETSC_COMM_SELF,pcbddc->benign_n,0,1,&dummy);CHKERRQ(ierr); 1651 ierr = MatGetSubMatrix(pcbddc->benign_B0,dummy,is_aux,MAT_INITIAL_MATRIX,&B0_V);CHKERRQ(ierr); 1652 ierr = ISDestroy(&dummy);CHKERRQ(ierr); 1653 } 1654 ierr = ISDestroy(&is_aux);CHKERRQ(ierr); 1655 } 1656 1657 /* Matrix of coarse basis functions (local) */ 1658 if (pcbddc->coarse_phi_B) { 1659 PetscInt on_B,on_primal,on_D=n_D; 1660 if (pcbddc->coarse_phi_D) { 1661 ierr = MatGetSize(pcbddc->coarse_phi_D,&on_D,NULL);CHKERRQ(ierr); 1662 } 1663 ierr = MatGetSize(pcbddc->coarse_phi_B,&on_B,&on_primal);CHKERRQ(ierr); 1664 if (on_B != n_B || on_primal != pcbddc->local_primal_size || on_D != n_D) { 1665 PetscScalar *marray; 1666 1667 ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&marray);CHKERRQ(ierr); 1668 ierr = PetscFree(marray);CHKERRQ(ierr); 1669 ierr = MatDestroy(&pcbddc->coarse_phi_B);CHKERRQ(ierr); 1670 ierr = MatDestroy(&pcbddc->coarse_psi_B);CHKERRQ(ierr); 1671 ierr = MatDestroy(&pcbddc->coarse_phi_D);CHKERRQ(ierr); 1672 ierr = MatDestroy(&pcbddc->coarse_psi_D);CHKERRQ(ierr); 1673 } 1674 } 1675 1676 if (!pcbddc->coarse_phi_B) { 1677 PetscScalar *marray; 1678 1679 n = n_B*pcbddc->local_primal_size; 1680 if (pcbddc->switch_static || pcbddc->dbg_flag) { 1681 n += n_D*pcbddc->local_primal_size; 1682 } 1683 if (!pcbddc->symmetric_primal) { 1684 n *= 2; 1685 } 1686 ierr = PetscCalloc1(n,&marray);CHKERRQ(ierr); 1687 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_B,pcbddc->local_primal_size,marray,&pcbddc->coarse_phi_B);CHKERRQ(ierr); 1688 n = n_B*pcbddc->local_primal_size; 1689 if (pcbddc->switch_static || pcbddc->dbg_flag) { 1690 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_D,pcbddc->local_primal_size,marray+n,&pcbddc->coarse_phi_D);CHKERRQ(ierr); 1691 n += n_D*pcbddc->local_primal_size; 1692 } 1693 if (!pcbddc->symmetric_primal) { 1694 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_B,pcbddc->local_primal_size,marray+n,&pcbddc->coarse_psi_B);CHKERRQ(ierr); 1695 if (pcbddc->switch_static || pcbddc->dbg_flag) { 1696 n = n_B*pcbddc->local_primal_size; 1697 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_D,pcbddc->local_primal_size,marray+n,&pcbddc->coarse_psi_D);CHKERRQ(ierr); 1698 } 1699 } else { 1700 ierr = PetscObjectReference((PetscObject)pcbddc->coarse_phi_B);CHKERRQ(ierr); 1701 pcbddc->coarse_psi_B = pcbddc->coarse_phi_B; 1702 if (pcbddc->switch_static || pcbddc->dbg_flag) { 1703 ierr = PetscObjectReference((PetscObject)pcbddc->coarse_phi_D);CHKERRQ(ierr); 1704 pcbddc->coarse_psi_D = pcbddc->coarse_phi_D; 1705 } 1706 } 1707 } 1708 1709 /* We are now ready to evaluate coarse basis functions and subdomain contribution to coarse problem */ 1710 p0_lidx_I = NULL; 1711 if (pcbddc->benign_n && (pcbddc->switch_static || pcbddc->dbg_flag)) { 1712 const PetscInt *idxs; 1713 1714 ierr = ISGetIndices(pcis->is_I_local,&idxs);CHKERRQ(ierr); 1715 ierr = PetscMalloc1(pcbddc->benign_n,&p0_lidx_I);CHKERRQ(ierr); 1716 for (i=0;i<pcbddc->benign_n;i++) { 1717 ierr = PetscFindInt(pcbddc->benign_p0_lidx[i],pcis->n-pcis->n_B,idxs,&p0_lidx_I[i]);CHKERRQ(ierr); 1718 } 1719 ierr = ISRestoreIndices(pcis->is_I_local,&idxs);CHKERRQ(ierr); 1720 } 1721 1722 /* vertices */ 1723 if (n_vertices) { 1724 1725 ierr = MatConvert(A_VV,MATDENSE,MAT_REUSE_MATRIX,&A_VV);CHKERRQ(ierr); 1726 1727 if (n_R) { 1728 Mat A_RRmA_RV,A_RV_bcorr=NULL,S_VVt; /* S_VVt with LDA=N */ 1729 PetscBLASInt B_N,B_one = 1; 1730 PetscScalar *x,*y; 1731 PetscBool isseqaij; 1732 1733 ierr = MatScale(A_RV,m_one);CHKERRQ(ierr); 1734 if (need_benign_correction) { 1735 ISLocalToGlobalMapping RtoN; 1736 IS is_p0; 1737 PetscInt *idxs_p0,n; 1738 1739 ierr = PetscMalloc1(pcbddc->benign_n,&idxs_p0);CHKERRQ(ierr); 1740 ierr = ISLocalToGlobalMappingCreateIS(pcbddc->is_R_local,&RtoN);CHKERRQ(ierr); 1741 ierr = ISGlobalToLocalMappingApply(RtoN,IS_GTOLM_DROP,pcbddc->benign_n,pcbddc->benign_p0_lidx,&n,idxs_p0);CHKERRQ(ierr); 1742 if (n != pcbddc->benign_n) { 1743 SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error in R numbering for benign p0! %d != %d\n",n,pcbddc->benign_n); 1744 } 1745 ierr = ISLocalToGlobalMappingDestroy(&RtoN);CHKERRQ(ierr); 1746 ierr = ISCreateGeneral(PETSC_COMM_SELF,n,idxs_p0,PETSC_OWN_POINTER,&is_p0);CHKERRQ(ierr); 1747 ierr = MatGetSubMatrix(A_RV,is_p0,NULL,MAT_INITIAL_MATRIX,&A_RV_bcorr);CHKERRQ(ierr); 1748 ierr = ISDestroy(&is_p0);CHKERRQ(ierr); 1749 } 1750 1751 if (lda_rhs == n_R) { 1752 ierr = MatConvert(A_RV,MATDENSE,MAT_REUSE_MATRIX,&A_RV);CHKERRQ(ierr); 1753 } else { 1754 PetscScalar *av,*array; 1755 const PetscInt *xadj,*adjncy; 1756 PetscInt n; 1757 PetscBool flg_row; 1758 1759 array = work+lda_rhs*n_vertices; 1760 ierr = PetscMemzero(array,lda_rhs*n_vertices*sizeof(PetscScalar));CHKERRQ(ierr); 1761 ierr = MatConvert(A_RV,MATSEQAIJ,MAT_REUSE_MATRIX,&A_RV);CHKERRQ(ierr); 1762 ierr = MatGetRowIJ(A_RV,0,PETSC_FALSE,PETSC_FALSE,&n,&xadj,&adjncy,&flg_row);CHKERRQ(ierr); 1763 ierr = MatSeqAIJGetArray(A_RV,&av);CHKERRQ(ierr); 1764 for (i=0;i<n;i++) { 1765 PetscInt j; 1766 for (j=xadj[i];j<xadj[i+1];j++) array[lda_rhs*adjncy[j]+i] = av[j]; 1767 } 1768 ierr = MatRestoreRowIJ(A_RV,0,PETSC_FALSE,PETSC_FALSE,&n,&xadj,&adjncy,&flg_row);CHKERRQ(ierr); 1769 ierr = MatDestroy(&A_RV);CHKERRQ(ierr); 1770 ierr = MatCreateSeqDense(PETSC_COMM_SELF,lda_rhs,n_vertices,array,&A_RV);CHKERRQ(ierr); 1771 } 1772 ierr = MatCreateSeqDense(PETSC_COMM_SELF,lda_rhs,n_vertices,work,&A_RRmA_RV);CHKERRQ(ierr); 1773 if (F) { 1774 if (need_benign_correction) { 1775 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 1776 PetscScalar *marr; 1777 1778 ierr = MatDenseGetArray(A_RV,&marr);CHKERRQ(ierr); 1779 /* need \Phi^T A_RV = (I+L)A_RV, L given by 1780 1781 | 0 0 0 | (V) 1782 L = | 0 0 -1 | (P-p0) 1783 | 0 0 -1 | (p0) 1784 1785 */ 1786 for (i=0;i<reuse_solver->benign_n;i++) { 1787 const PetscScalar *vals; 1788 const PetscInt *idxs,*idxs_zero; 1789 PetscInt n,j,nz; 1790 1791 ierr = ISGetLocalSize(reuse_solver->benign_zerodiag_subs[i],&nz);CHKERRQ(ierr); 1792 ierr = ISGetIndices(reuse_solver->benign_zerodiag_subs[i],&idxs_zero);CHKERRQ(ierr); 1793 ierr = MatGetRow(A_RV_bcorr,i,&n,&idxs,&vals);CHKERRQ(ierr); 1794 for (j=0;j<n;j++) { 1795 PetscScalar val = vals[j]; 1796 PetscInt k,col = idxs[j]; 1797 for (k=0;k<nz;k++) marr[idxs_zero[k]+lda_rhs*col] -= val; 1798 } 1799 ierr = MatRestoreRow(A_RV_bcorr,i,&n,&idxs,&vals);CHKERRQ(ierr); 1800 ierr = ISRestoreIndices(reuse_solver->benign_zerodiag_subs[i],&idxs_zero);CHKERRQ(ierr); 1801 } 1802 /* need to correct the rhs */ 1803 for (i=0;i<n_vertices;i++) { 1804 ierr = VecPlaceArray(pcbddc->vec1_R,marr+i*lda_rhs);CHKERRQ(ierr); 1805 ierr = PCBDDCReuseSolversChangeInterior(reuse_solver,pcbddc->vec1_R,NULL,PETSC_FALSE);CHKERRQ(ierr); 1806 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 1807 } 1808 ierr = MatDenseRestoreArray(A_RV,&marr);CHKERRQ(ierr); 1809 } 1810 ierr = MatMatSolve(F,A_RV,A_RRmA_RV);CHKERRQ(ierr); 1811 /* need to correct the solution */ 1812 if (need_benign_correction) { 1813 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 1814 PetscScalar *marr; 1815 1816 ierr = MatDenseGetArray(A_RRmA_RV,&marr);CHKERRQ(ierr); 1817 for (i=0;i<n_vertices;i++) { 1818 ierr = VecPlaceArray(pcbddc->vec1_R,marr+i*lda_rhs);CHKERRQ(ierr); 1819 ierr = PCBDDCReuseSolversChangeInterior(reuse_solver,pcbddc->vec1_R,NULL,PETSC_TRUE);CHKERRQ(ierr); 1820 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 1821 } 1822 ierr = MatDenseRestoreArray(A_RRmA_RV,&marr);CHKERRQ(ierr); 1823 } 1824 } else { 1825 ierr = MatDenseGetArray(A_RV,&y);CHKERRQ(ierr); 1826 for (i=0;i<n_vertices;i++) { 1827 ierr = VecPlaceArray(pcbddc->vec1_R,y+i*lda_rhs);CHKERRQ(ierr); 1828 ierr = VecPlaceArray(pcbddc->vec2_R,work+i*lda_rhs);CHKERRQ(ierr); 1829 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 1830 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 1831 ierr = VecResetArray(pcbddc->vec2_R);CHKERRQ(ierr); 1832 } 1833 ierr = MatDenseRestoreArray(A_RV,&y);CHKERRQ(ierr); 1834 } 1835 ierr = MatDestroy(&A_RV);CHKERRQ(ierr); 1836 /* S_VV and S_CV */ 1837 if (n_constraints) { 1838 Mat B; 1839 1840 ierr = PetscMemzero(work+lda_rhs*n_vertices,n_B*n_vertices*sizeof(PetscScalar));CHKERRQ(ierr); 1841 for (i=0;i<n_vertices;i++) { 1842 ierr = VecPlaceArray(pcbddc->vec1_R,work+i*lda_rhs);CHKERRQ(ierr); 1843 ierr = VecPlaceArray(pcis->vec1_B,work+lda_rhs*n_vertices+i*n_B);CHKERRQ(ierr); 1844 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1845 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1846 ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr); 1847 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 1848 } 1849 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_B,n_vertices,work+lda_rhs*n_vertices,&B);CHKERRQ(ierr); 1850 ierr = MatMatMult(pcbddc->local_auxmat1,B,MAT_REUSE_MATRIX,PETSC_DEFAULT,&S_CV);CHKERRQ(ierr); 1851 ierr = MatDestroy(&B);CHKERRQ(ierr); 1852 ierr = MatCreateSeqDense(PETSC_COMM_SELF,lda_rhs,n_vertices,work+lda_rhs*n_vertices,&B);CHKERRQ(ierr); 1853 ierr = MatMatMult(local_auxmat2_R,S_CV,MAT_REUSE_MATRIX,PETSC_DEFAULT,&B);CHKERRQ(ierr); 1854 ierr = MatScale(S_CV,m_one);CHKERRQ(ierr); 1855 ierr = PetscBLASIntCast(lda_rhs*n_vertices,&B_N);CHKERRQ(ierr); 1856 PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&B_N,&one,work+lda_rhs*n_vertices,&B_one,work,&B_one)); 1857 ierr = MatDestroy(&B);CHKERRQ(ierr); 1858 } 1859 ierr = PetscObjectTypeCompare((PetscObject)A_VR,MATSEQAIJ,&isseqaij);CHKERRQ(ierr); 1860 if (!isseqaij) { /* MatMatMult with SEQ(S)BAIJ below will raise an error */ 1861 ierr = MatConvert(A_VR,MATSEQAIJ,MAT_REUSE_MATRIX,&A_VR);CHKERRQ(ierr); 1862 } 1863 if (lda_rhs != n_R) { 1864 ierr = MatDestroy(&A_RRmA_RV);CHKERRQ(ierr); 1865 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_vertices,work,&A_RRmA_RV);CHKERRQ(ierr); 1866 ierr = MatSeqDenseSetLDA(A_RRmA_RV,lda_rhs);CHKERRQ(ierr); 1867 } 1868 ierr = MatMatMult(A_VR,A_RRmA_RV,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&S_VVt);CHKERRQ(ierr); 1869 /* need A_VR * \Phi * A_RRmA_RV = A_VR * (I+L)^T * A_RRmA_RV, L given as before */ 1870 if (need_benign_correction) { 1871 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 1872 PetscScalar *marr,*sums; 1873 1874 ierr = PetscMalloc1(n_vertices,&sums);CHKERRQ(ierr); 1875 ierr = MatDenseGetArray(S_VVt,&marr); 1876 for (i=0;i<reuse_solver->benign_n;i++) { 1877 const PetscScalar *vals; 1878 const PetscInt *idxs,*idxs_zero; 1879 PetscInt n,j,nz; 1880 1881 ierr = ISGetLocalSize(reuse_solver->benign_zerodiag_subs[i],&nz);CHKERRQ(ierr); 1882 ierr = ISGetIndices(reuse_solver->benign_zerodiag_subs[i],&idxs_zero);CHKERRQ(ierr); 1883 for (j=0;j<n_vertices;j++) { 1884 PetscInt k; 1885 sums[j] = 0.; 1886 for (k=0;k<nz;k++) sums[j] += work[idxs_zero[k]+j*lda_rhs]; 1887 } 1888 ierr = MatGetRow(A_RV_bcorr,i,&n,&idxs,&vals);CHKERRQ(ierr); 1889 for (j=0;j<n;j++) { 1890 PetscScalar val = vals[j]; 1891 PetscInt k; 1892 for (k=0;k<n_vertices;k++) { 1893 marr[idxs[j]+k*n_vertices] += val*sums[k]; 1894 } 1895 } 1896 ierr = MatRestoreRow(A_RV_bcorr,i,&n,&idxs,&vals);CHKERRQ(ierr); 1897 ierr = ISRestoreIndices(reuse_solver->benign_zerodiag_subs[i],&idxs_zero);CHKERRQ(ierr); 1898 } 1899 ierr = PetscFree(sums);CHKERRQ(ierr); 1900 ierr = MatDenseRestoreArray(S_VVt,&marr); 1901 ierr = MatDestroy(&A_RV_bcorr);CHKERRQ(ierr); 1902 } 1903 ierr = MatDestroy(&A_RRmA_RV);CHKERRQ(ierr); 1904 ierr = PetscBLASIntCast(n_vertices*n_vertices,&B_N);CHKERRQ(ierr); 1905 ierr = MatDenseGetArray(A_VV,&x);CHKERRQ(ierr); 1906 ierr = MatDenseGetArray(S_VVt,&y);CHKERRQ(ierr); 1907 PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&B_N,&one,x,&B_one,y,&B_one)); 1908 ierr = MatDenseRestoreArray(A_VV,&x);CHKERRQ(ierr); 1909 ierr = MatDenseRestoreArray(S_VVt,&y);CHKERRQ(ierr); 1910 ierr = MatCopy(S_VVt,S_VV,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 1911 ierr = MatDestroy(&S_VVt);CHKERRQ(ierr); 1912 } else { 1913 ierr = MatCopy(A_VV,S_VV,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 1914 } 1915 for (i=0;i<pcbddc->benign_n;i++) { 1916 const PetscScalar *vals; 1917 const PetscInt *idxs; 1918 PetscInt n,j,primal_idx; 1919 1920 ierr = MatGetRow(B0_V,i,&n,&idxs,&vals);CHKERRQ(ierr); 1921 primal_idx = pcbddc->local_primal_size - pcbddc->benign_n + i; 1922 for (j=0;j<n;j++) { 1923 coarse_submat_vals[primal_idx*pcbddc->local_primal_size+idxs[j]] = vals[j]; 1924 coarse_submat_vals[idxs[j]*pcbddc->local_primal_size+primal_idx] = vals[j]; 1925 } 1926 ierr = MatRestoreRow(B0_V,i,&n,&idxs,&vals);CHKERRQ(ierr); 1927 } 1928 ierr = MatDestroy(&A_VV);CHKERRQ(ierr); 1929 1930 /* coarse basis functions */ 1931 for (i=0;i<n_vertices;i++) { 1932 PetscScalar *y; 1933 1934 ierr = VecPlaceArray(pcbddc->vec1_R,work+lda_rhs*i);CHKERRQ(ierr); 1935 ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&y);CHKERRQ(ierr); 1936 ierr = VecPlaceArray(pcis->vec1_B,y+n_B*i);CHKERRQ(ierr); 1937 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1938 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1939 y[n_B*i+idx_V_B[i]] = 1.0; 1940 ierr = MatDenseRestoreArray(pcbddc->coarse_phi_B,&y);CHKERRQ(ierr); 1941 ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr); 1942 1943 if (pcbddc->switch_static || pcbddc->dbg_flag) { 1944 PetscInt j; 1945 1946 ierr = MatDenseGetArray(pcbddc->coarse_phi_D,&y);CHKERRQ(ierr); 1947 ierr = VecPlaceArray(pcis->vec1_D,y+n_D*i);CHKERRQ(ierr); 1948 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1949 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1950 ierr = VecResetArray(pcis->vec1_D);CHKERRQ(ierr); 1951 for (j=0;j<pcbddc->benign_n;j++) y[n_D*i+p0_lidx_I[j]] = 0.0; 1952 ierr = MatDenseRestoreArray(pcbddc->coarse_phi_D,&y);CHKERRQ(ierr); 1953 } 1954 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 1955 } 1956 /* if n_R == 0 the object is not destroyed */ 1957 ierr = MatDestroy(&A_RV);CHKERRQ(ierr); 1958 } 1959 1960 if (n_constraints) { 1961 Mat B; 1962 1963 ierr = MatCreateSeqDense(PETSC_COMM_SELF,lda_rhs,n_constraints,work,&B);CHKERRQ(ierr); 1964 ierr = MatScale(S_CC,m_one);CHKERRQ(ierr); 1965 ierr = MatMatMult(local_auxmat2_R,S_CC,MAT_REUSE_MATRIX,PETSC_DEFAULT,&B);CHKERRQ(ierr); 1966 ierr = MatScale(S_CC,m_one);CHKERRQ(ierr); 1967 if (n_vertices) { 1968 if (isCHOL) { /* if we can solve the interior problem with cholesky, we should also be fine with transposing here */ 1969 ierr = MatTranspose(S_CV,MAT_REUSE_MATRIX,&S_VC);CHKERRQ(ierr); 1970 } else { 1971 Mat S_VCt; 1972 1973 if (lda_rhs != n_R) { 1974 ierr = MatDestroy(&B);CHKERRQ(ierr); 1975 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_vertices,work,&B);CHKERRQ(ierr); 1976 ierr = MatSeqDenseSetLDA(B,lda_rhs);CHKERRQ(ierr); 1977 } 1978 ierr = MatMatMult(A_VR,B,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&S_VCt);CHKERRQ(ierr); 1979 ierr = MatCopy(S_VCt,S_VC,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 1980 ierr = MatDestroy(&S_VCt);CHKERRQ(ierr); 1981 } 1982 } 1983 ierr = MatDestroy(&B);CHKERRQ(ierr); 1984 /* coarse basis functions */ 1985 for (i=0;i<n_constraints;i++) { 1986 PetscScalar *y; 1987 1988 ierr = VecPlaceArray(pcbddc->vec1_R,work+lda_rhs*i);CHKERRQ(ierr); 1989 ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&y);CHKERRQ(ierr); 1990 ierr = VecPlaceArray(pcis->vec1_B,y+n_B*(i+n_vertices));CHKERRQ(ierr); 1991 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1992 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1993 ierr = MatDenseRestoreArray(pcbddc->coarse_phi_B,&y);CHKERRQ(ierr); 1994 ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr); 1995 if (pcbddc->switch_static || pcbddc->dbg_flag) { 1996 PetscInt j; 1997 1998 ierr = MatDenseGetArray(pcbddc->coarse_phi_D,&y);CHKERRQ(ierr); 1999 ierr = VecPlaceArray(pcis->vec1_D,y+n_D*(i+n_vertices));CHKERRQ(ierr); 2000 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2001 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2002 ierr = VecResetArray(pcis->vec1_D);CHKERRQ(ierr); 2003 for (j=0;j<pcbddc->benign_n;j++) y[n_D*i+p0_lidx_I[j]] = 0.0; 2004 ierr = MatDenseRestoreArray(pcbddc->coarse_phi_D,&y);CHKERRQ(ierr); 2005 } 2006 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 2007 } 2008 } 2009 if (n_constraints) { 2010 ierr = MatDestroy(&local_auxmat2_R);CHKERRQ(ierr); 2011 } 2012 ierr = PetscFree(p0_lidx_I);CHKERRQ(ierr); 2013 ierr = MatDestroy(&B0_V);CHKERRQ(ierr); 2014 2015 /* compute other basis functions for non-symmetric problems */ 2016 if (!pcbddc->symmetric_primal) { 2017 Mat B_V=NULL,B_C=NULL; 2018 PetscScalar *marray; 2019 2020 if (n_constraints) { 2021 Mat S_CCT,C_CRT; 2022 2023 ierr = MatTranspose(C_CR,MAT_INITIAL_MATRIX,&C_CRT);CHKERRQ(ierr); 2024 ierr = MatTranspose(S_CC,MAT_INITIAL_MATRIX,&S_CCT);CHKERRQ(ierr); 2025 ierr = MatMatMult(C_CRT,S_CCT,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&B_C);CHKERRQ(ierr); 2026 ierr = MatDestroy(&S_CCT);CHKERRQ(ierr); 2027 if (n_vertices) { 2028 Mat S_VCT; 2029 2030 ierr = MatTranspose(S_VC,MAT_INITIAL_MATRIX,&S_VCT);CHKERRQ(ierr); 2031 ierr = MatMatMult(C_CRT,S_VCT,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&B_V);CHKERRQ(ierr); 2032 ierr = MatDestroy(&S_VCT);CHKERRQ(ierr); 2033 } 2034 ierr = MatDestroy(&C_CRT);CHKERRQ(ierr); 2035 } 2036 if (n_vertices && n_R) { 2037 PetscScalar *av,*marray; 2038 const PetscInt *xadj,*adjncy; 2039 PetscInt n; 2040 PetscBool flg_row; 2041 2042 /* B_V = B_V - A_VR^T */ 2043 ierr = MatConvert(A_VR,MATSEQAIJ,MAT_REUSE_MATRIX,&A_VR);CHKERRQ(ierr); 2044 ierr = MatGetRowIJ(A_VR,0,PETSC_FALSE,PETSC_FALSE,&n,&xadj,&adjncy,&flg_row);CHKERRQ(ierr); 2045 ierr = MatSeqAIJGetArray(A_VR,&av);CHKERRQ(ierr); 2046 ierr = MatDenseGetArray(B_V,&marray);CHKERRQ(ierr); 2047 for (i=0;i<n;i++) { 2048 PetscInt j; 2049 for (j=xadj[i];j<xadj[i+1];j++) marray[i*n_R + adjncy[j]] -= av[j]; 2050 } 2051 ierr = MatDenseRestoreArray(B_V,&marray);CHKERRQ(ierr); 2052 ierr = MatRestoreRowIJ(A_VR,0,PETSC_FALSE,PETSC_FALSE,&n,&xadj,&adjncy,&flg_row);CHKERRQ(ierr); 2053 ierr = MatDestroy(&A_VR);CHKERRQ(ierr); 2054 } 2055 2056 /* currently there's no support for MatTransposeMatSolve(F,B,X) */ 2057 ierr = MatDenseGetArray(B_V,&marray);CHKERRQ(ierr); 2058 for (i=0;i<n_vertices;i++) { 2059 ierr = VecPlaceArray(pcbddc->vec1_R,marray+i*n_R);CHKERRQ(ierr); 2060 ierr = VecPlaceArray(pcbddc->vec2_R,work+i*n_R);CHKERRQ(ierr); 2061 ierr = KSPSolveTranspose(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 2062 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 2063 ierr = VecResetArray(pcbddc->vec2_R);CHKERRQ(ierr); 2064 } 2065 ierr = MatDenseRestoreArray(B_V,&marray);CHKERRQ(ierr); 2066 ierr = MatDenseGetArray(B_C,&marray);CHKERRQ(ierr); 2067 for (i=n_vertices;i<n_constraints+n_vertices;i++) { 2068 ierr = VecPlaceArray(pcbddc->vec1_R,marray+(i-n_vertices)*n_R);CHKERRQ(ierr); 2069 ierr = VecPlaceArray(pcbddc->vec2_R,work+i*n_R);CHKERRQ(ierr); 2070 ierr = KSPSolveTranspose(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 2071 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 2072 ierr = VecResetArray(pcbddc->vec2_R);CHKERRQ(ierr); 2073 } 2074 ierr = MatDenseRestoreArray(B_C,&marray);CHKERRQ(ierr); 2075 /* coarse basis functions */ 2076 for (i=0;i<pcbddc->local_primal_size;i++) { 2077 PetscScalar *y; 2078 2079 ierr = VecPlaceArray(pcbddc->vec1_R,work+i*n_R);CHKERRQ(ierr); 2080 ierr = MatDenseGetArray(pcbddc->coarse_psi_B,&y);CHKERRQ(ierr); 2081 ierr = VecPlaceArray(pcis->vec1_B,y+n_B*i);CHKERRQ(ierr); 2082 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2083 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2084 if (i<n_vertices) { 2085 y[n_B*i+idx_V_B[i]] = 1.0; 2086 } 2087 ierr = MatDenseRestoreArray(pcbddc->coarse_psi_B,&y);CHKERRQ(ierr); 2088 ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr); 2089 2090 if (pcbddc->switch_static || pcbddc->dbg_flag) { 2091 ierr = MatDenseGetArray(pcbddc->coarse_psi_D,&y);CHKERRQ(ierr); 2092 ierr = VecPlaceArray(pcis->vec1_D,y+n_D*i);CHKERRQ(ierr); 2093 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2094 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2095 ierr = VecResetArray(pcis->vec1_D);CHKERRQ(ierr); 2096 ierr = MatDenseRestoreArray(pcbddc->coarse_psi_D,&y);CHKERRQ(ierr); 2097 } 2098 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 2099 } 2100 ierr = MatDestroy(&B_V);CHKERRQ(ierr); 2101 ierr = MatDestroy(&B_C);CHKERRQ(ierr); 2102 } 2103 /* free memory */ 2104 ierr = PetscFree(idx_V_B);CHKERRQ(ierr); 2105 ierr = MatDestroy(&S_VV);CHKERRQ(ierr); 2106 ierr = MatDestroy(&S_CV);CHKERRQ(ierr); 2107 ierr = MatDestroy(&S_VC);CHKERRQ(ierr); 2108 ierr = MatDestroy(&S_CC);CHKERRQ(ierr); 2109 ierr = PetscFree(work);CHKERRQ(ierr); 2110 if (n_vertices) { 2111 ierr = MatDestroy(&A_VR);CHKERRQ(ierr); 2112 } 2113 if (n_constraints) { 2114 ierr = MatDestroy(&C_CR);CHKERRQ(ierr); 2115 } 2116 /* Checking coarse_sub_mat and coarse basis functios */ 2117 /* Symmetric case : It should be \Phi^{(j)^T} A^{(j)} \Phi^{(j)}=coarse_sub_mat */ 2118 /* Non-symmetric case : It should be \Psi^{(j)^T} A^{(j)} \Phi^{(j)}=coarse_sub_mat */ 2119 if (pcbddc->dbg_flag) { 2120 Mat coarse_sub_mat; 2121 Mat AUXMAT,TM1,TM2,TM3,TM4; 2122 Mat coarse_phi_D,coarse_phi_B; 2123 Mat coarse_psi_D,coarse_psi_B; 2124 Mat A_II,A_BB,A_IB,A_BI; 2125 Mat C_B,CPHI; 2126 IS is_dummy; 2127 Vec mones; 2128 MatType checkmattype=MATSEQAIJ; 2129 PetscReal real_value; 2130 2131 if (pcbddc->benign_n && !pcbddc->benign_change_explicit) { 2132 Mat A; 2133 ierr = PCBDDCBenignProject(pc,NULL,NULL,&A);CHKERRQ(ierr); 2134 ierr = MatGetSubMatrix(A,pcis->is_I_local,pcis->is_I_local,MAT_INITIAL_MATRIX,&A_II);CHKERRQ(ierr); 2135 ierr = MatGetSubMatrix(A,pcis->is_I_local,pcis->is_B_local,MAT_INITIAL_MATRIX,&A_IB);CHKERRQ(ierr); 2136 ierr = MatGetSubMatrix(A,pcis->is_B_local,pcis->is_I_local,MAT_INITIAL_MATRIX,&A_BI);CHKERRQ(ierr); 2137 ierr = MatGetSubMatrix(A,pcis->is_B_local,pcis->is_B_local,MAT_INITIAL_MATRIX,&A_BB);CHKERRQ(ierr); 2138 ierr = MatDestroy(&A);CHKERRQ(ierr); 2139 } else { 2140 ierr = MatConvert(pcis->A_II,checkmattype,MAT_INITIAL_MATRIX,&A_II);CHKERRQ(ierr); 2141 ierr = MatConvert(pcis->A_IB,checkmattype,MAT_INITIAL_MATRIX,&A_IB);CHKERRQ(ierr); 2142 ierr = MatConvert(pcis->A_BI,checkmattype,MAT_INITIAL_MATRIX,&A_BI);CHKERRQ(ierr); 2143 ierr = MatConvert(pcis->A_BB,checkmattype,MAT_INITIAL_MATRIX,&A_BB);CHKERRQ(ierr); 2144 } 2145 ierr = MatConvert(pcbddc->coarse_phi_D,checkmattype,MAT_INITIAL_MATRIX,&coarse_phi_D);CHKERRQ(ierr); 2146 ierr = MatConvert(pcbddc->coarse_phi_B,checkmattype,MAT_INITIAL_MATRIX,&coarse_phi_B);CHKERRQ(ierr); 2147 if (!pcbddc->symmetric_primal) { 2148 ierr = MatConvert(pcbddc->coarse_psi_D,checkmattype,MAT_INITIAL_MATRIX,&coarse_psi_D);CHKERRQ(ierr); 2149 ierr = MatConvert(pcbddc->coarse_psi_B,checkmattype,MAT_INITIAL_MATRIX,&coarse_psi_B);CHKERRQ(ierr); 2150 } 2151 ierr = MatCreateSeqDense(PETSC_COMM_SELF,pcbddc->local_primal_size,pcbddc->local_primal_size,coarse_submat_vals,&coarse_sub_mat);CHKERRQ(ierr); 2152 2153 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 2154 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Check coarse sub mat computation (symmetric %d)\n",pcbddc->symmetric_primal);CHKERRQ(ierr); 2155 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 2156 if (!pcbddc->symmetric_primal) { 2157 ierr = MatMatMult(A_II,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 2158 ierr = MatTransposeMatMult(coarse_psi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM1);CHKERRQ(ierr); 2159 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 2160 ierr = MatMatMult(A_BB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 2161 ierr = MatTransposeMatMult(coarse_psi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM2);CHKERRQ(ierr); 2162 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 2163 ierr = MatMatMult(A_IB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 2164 ierr = MatTransposeMatMult(coarse_psi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM3);CHKERRQ(ierr); 2165 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 2166 ierr = MatMatMult(A_BI,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 2167 ierr = MatTransposeMatMult(coarse_psi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM4);CHKERRQ(ierr); 2168 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 2169 } else { 2170 ierr = MatPtAP(A_II,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&TM1);CHKERRQ(ierr); 2171 ierr = MatPtAP(A_BB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&TM2);CHKERRQ(ierr); 2172 ierr = MatMatMult(A_IB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 2173 ierr = MatTransposeMatMult(coarse_phi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM3);CHKERRQ(ierr); 2174 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 2175 ierr = MatMatMult(A_BI,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 2176 ierr = MatTransposeMatMult(coarse_phi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM4);CHKERRQ(ierr); 2177 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 2178 } 2179 ierr = MatAXPY(TM1,one,TM2,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 2180 ierr = MatAXPY(TM1,one,TM3,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 2181 ierr = MatAXPY(TM1,one,TM4,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 2182 ierr = MatConvert(TM1,MATSEQDENSE,MAT_REUSE_MATRIX,&TM1);CHKERRQ(ierr); 2183 if (pcbddc->benign_n) { 2184 Mat B0_I,B0_B,B0_BPHI,B0_IPHI; 2185 PetscScalar *data,*data2; 2186 PetscInt j; 2187 2188 ierr = ISCreateStride(PETSC_COMM_SELF,pcbddc->benign_n,0,1,&is_dummy);CHKERRQ(ierr); 2189 ierr = MatGetSubMatrix(pcbddc->benign_B0,is_dummy,pcis->is_B_local,MAT_INITIAL_MATRIX,&B0_B); 2190 ierr = MatGetSubMatrix(pcbddc->benign_B0,is_dummy,pcis->is_I_local,MAT_INITIAL_MATRIX,&B0_I); 2191 ierr = MatMatMult(B0_B,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&B0_BPHI);CHKERRQ(ierr); 2192 ierr = MatConvert(B0_BPHI,MATSEQDENSE,MAT_REUSE_MATRIX,&B0_BPHI);CHKERRQ(ierr); 2193 ierr = MatDenseGetArray(TM1,&data);CHKERRQ(ierr); 2194 ierr = MatDenseGetArray(B0_BPHI,&data2);CHKERRQ(ierr); 2195 for (j=0;j<pcbddc->benign_n;j++) { 2196 PetscInt primal_idx = pcbddc->local_primal_size - pcbddc->benign_n + j; 2197 for (i=0;i<pcbddc->local_primal_size;i++) { 2198 data[primal_idx*pcbddc->local_primal_size+i] += data2[i*pcbddc->benign_n+j]; 2199 data[i*pcbddc->local_primal_size+primal_idx] += data2[i*pcbddc->benign_n+j]; 2200 } 2201 } 2202 ierr = MatDenseRestoreArray(TM1,&data);CHKERRQ(ierr); 2203 ierr = MatDenseRestoreArray(B0_BPHI,&data2);CHKERRQ(ierr); 2204 ierr = MatDestroy(&B0_B);CHKERRQ(ierr); 2205 ierr = ISDestroy(&is_dummy);CHKERRQ(ierr); 2206 ierr = MatDestroy(&B0_BPHI);CHKERRQ(ierr); 2207 ierr = MatMatMult(B0_I,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&B0_IPHI);CHKERRQ(ierr); 2208 ierr = MatDestroy(&B0_I);CHKERRQ(ierr); 2209 ierr = MatNorm(B0_IPHI,NORM_FROBENIUS,&real_value);CHKERRQ(ierr); 2210 ierr = MatDestroy(&B0_IPHI);CHKERRQ(ierr); 2211 } 2212 #if 0 2213 { 2214 PetscViewer viewer; 2215 char filename[256]; 2216 sprintf(filename,"details_local_coarse_mat%d_level%d.m",PetscGlobalRank,pcbddc->current_level); 2217 ierr = PetscViewerASCIIOpen(PETSC_COMM_SELF,filename,&viewer);CHKERRQ(ierr); 2218 ierr = PetscViewerSetFormat(viewer,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr); 2219 ierr = PetscObjectSetName((PetscObject)coarse_sub_mat,"computed");CHKERRQ(ierr); 2220 ierr = MatView(coarse_sub_mat,viewer);CHKERRQ(ierr); 2221 ierr = PetscObjectSetName((PetscObject)TM1,"projected");CHKERRQ(ierr); 2222 ierr = MatView(TM1,viewer);CHKERRQ(ierr); 2223 ierr = PetscObjectSetName((PetscObject)pcbddc->coarse_phi_B,"phi_B");CHKERRQ(ierr); 2224 ierr = MatView(pcbddc->coarse_phi_B,viewer);CHKERRQ(ierr); 2225 if (pcbddc->coarse_phi_D) { 2226 ierr = PetscObjectSetName((PetscObject)pcbddc->coarse_phi_D,"phi_D");CHKERRQ(ierr); 2227 ierr = MatView(pcbddc->coarse_phi_D,viewer);CHKERRQ(ierr); 2228 } 2229 if (pcbddc->coarse_psi_B) { 2230 ierr = PetscObjectSetName((PetscObject)pcbddc->coarse_psi_B,"psi_B");CHKERRQ(ierr); 2231 ierr = MatView(pcbddc->coarse_psi_B,viewer);CHKERRQ(ierr); 2232 } 2233 if (pcbddc->coarse_phi_B) { 2234 ierr = PetscObjectSetName((PetscObject)pcbddc->coarse_psi_D,"psi_D");CHKERRQ(ierr); 2235 ierr = MatView(pcbddc->coarse_psi_D,viewer);CHKERRQ(ierr); 2236 } 2237 ierr = PetscViewerDestroy(&viewer);CHKERRQ(ierr); 2238 } 2239 #endif 2240 ierr = MatAXPY(TM1,m_one,coarse_sub_mat,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 2241 ierr = MatNorm(TM1,NORM_FROBENIUS,&real_value);CHKERRQ(ierr); 2242 ierr = PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 2243 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d matrix error % 1.14e\n",PetscGlobalRank,real_value);CHKERRQ(ierr); 2244 2245 /* check constraints */ 2246 if (!pcbddc->benign_n) { /* TODO: add benign case */ 2247 ierr = ISCreateStride(PETSC_COMM_SELF,pcbddc->local_primal_size,0,1,&is_dummy);CHKERRQ(ierr); 2248 ierr = MatGetSubMatrix(pcbddc->ConstraintMatrix,is_dummy,pcis->is_B_local,MAT_INITIAL_MATRIX,&C_B); 2249 ierr = MatMatMult(C_B,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&CPHI);CHKERRQ(ierr); 2250 ierr = MatCreateVecs(CPHI,&mones,NULL);CHKERRQ(ierr); 2251 ierr = VecSet(mones,-1.0);CHKERRQ(ierr); 2252 ierr = MatDiagonalSet(CPHI,mones,ADD_VALUES);CHKERRQ(ierr); 2253 ierr = MatNorm(CPHI,NORM_FROBENIUS,&real_value);CHKERRQ(ierr); 2254 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d phi constraints error % 1.14e\n",PetscGlobalRank,real_value);CHKERRQ(ierr); 2255 if (!pcbddc->symmetric_primal) { 2256 ierr = MatMatMult(C_B,coarse_psi_B,MAT_REUSE_MATRIX,1.0,&CPHI);CHKERRQ(ierr); 2257 ierr = VecSet(mones,-1.0);CHKERRQ(ierr); 2258 ierr = MatDiagonalSet(CPHI,mones,ADD_VALUES);CHKERRQ(ierr); 2259 ierr = MatNorm(CPHI,NORM_FROBENIUS,&real_value);CHKERRQ(ierr); 2260 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d psi constraints error % 1.14e\n",PetscGlobalRank,real_value);CHKERRQ(ierr); 2261 } 2262 ierr = MatDestroy(&C_B);CHKERRQ(ierr); 2263 ierr = MatDestroy(&CPHI);CHKERRQ(ierr); 2264 ierr = ISDestroy(&is_dummy);CHKERRQ(ierr); 2265 ierr = VecDestroy(&mones);CHKERRQ(ierr); 2266 } 2267 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 2268 ierr = MatDestroy(&A_II);CHKERRQ(ierr); 2269 ierr = MatDestroy(&A_BB);CHKERRQ(ierr); 2270 ierr = MatDestroy(&A_IB);CHKERRQ(ierr); 2271 ierr = MatDestroy(&A_BI);CHKERRQ(ierr); 2272 ierr = MatDestroy(&TM1);CHKERRQ(ierr); 2273 ierr = MatDestroy(&TM2);CHKERRQ(ierr); 2274 ierr = MatDestroy(&TM3);CHKERRQ(ierr); 2275 ierr = MatDestroy(&TM4);CHKERRQ(ierr); 2276 ierr = MatDestroy(&coarse_phi_D);CHKERRQ(ierr); 2277 ierr = MatDestroy(&coarse_phi_B);CHKERRQ(ierr); 2278 if (!pcbddc->symmetric_primal) { 2279 ierr = MatDestroy(&coarse_psi_D);CHKERRQ(ierr); 2280 ierr = MatDestroy(&coarse_psi_B);CHKERRQ(ierr); 2281 } 2282 ierr = MatDestroy(&coarse_sub_mat);CHKERRQ(ierr); 2283 } 2284 /* get back data */ 2285 *coarse_submat_vals_n = coarse_submat_vals; 2286 PetscFunctionReturn(0); 2287 } 2288 2289 #undef __FUNCT__ 2290 #define __FUNCT__ "MatGetSubMatrixUnsorted" 2291 PetscErrorCode MatGetSubMatrixUnsorted(Mat A, IS isrow, IS iscol, Mat* B) 2292 { 2293 Mat *work_mat; 2294 IS isrow_s,iscol_s; 2295 PetscBool rsorted,csorted; 2296 PetscInt rsize,*idxs_perm_r,csize,*idxs_perm_c; 2297 PetscErrorCode ierr; 2298 2299 PetscFunctionBegin; 2300 ierr = ISSorted(isrow,&rsorted);CHKERRQ(ierr); 2301 ierr = ISSorted(iscol,&csorted);CHKERRQ(ierr); 2302 ierr = ISGetLocalSize(isrow,&rsize);CHKERRQ(ierr); 2303 ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr); 2304 2305 if (!rsorted) { 2306 const PetscInt *idxs; 2307 PetscInt *idxs_sorted,i; 2308 2309 ierr = PetscMalloc1(rsize,&idxs_perm_r);CHKERRQ(ierr); 2310 ierr = PetscMalloc1(rsize,&idxs_sorted);CHKERRQ(ierr); 2311 for (i=0;i<rsize;i++) { 2312 idxs_perm_r[i] = i; 2313 } 2314 ierr = ISGetIndices(isrow,&idxs);CHKERRQ(ierr); 2315 ierr = PetscSortIntWithPermutation(rsize,idxs,idxs_perm_r);CHKERRQ(ierr); 2316 for (i=0;i<rsize;i++) { 2317 idxs_sorted[i] = idxs[idxs_perm_r[i]]; 2318 } 2319 ierr = ISRestoreIndices(isrow,&idxs);CHKERRQ(ierr); 2320 ierr = ISCreateGeneral(PETSC_COMM_SELF,rsize,idxs_sorted,PETSC_OWN_POINTER,&isrow_s);CHKERRQ(ierr); 2321 } else { 2322 ierr = PetscObjectReference((PetscObject)isrow);CHKERRQ(ierr); 2323 isrow_s = isrow; 2324 } 2325 2326 if (!csorted) { 2327 if (isrow == iscol) { 2328 ierr = PetscObjectReference((PetscObject)isrow_s);CHKERRQ(ierr); 2329 iscol_s = isrow_s; 2330 } else { 2331 const PetscInt *idxs; 2332 PetscInt *idxs_sorted,i; 2333 2334 ierr = PetscMalloc1(csize,&idxs_perm_c);CHKERRQ(ierr); 2335 ierr = PetscMalloc1(csize,&idxs_sorted);CHKERRQ(ierr); 2336 for (i=0;i<csize;i++) { 2337 idxs_perm_c[i] = i; 2338 } 2339 ierr = ISGetIndices(iscol,&idxs);CHKERRQ(ierr); 2340 ierr = PetscSortIntWithPermutation(csize,idxs,idxs_perm_c);CHKERRQ(ierr); 2341 for (i=0;i<csize;i++) { 2342 idxs_sorted[i] = idxs[idxs_perm_c[i]]; 2343 } 2344 ierr = ISRestoreIndices(iscol,&idxs);CHKERRQ(ierr); 2345 ierr = ISCreateGeneral(PETSC_COMM_SELF,csize,idxs_sorted,PETSC_OWN_POINTER,&iscol_s);CHKERRQ(ierr); 2346 } 2347 } else { 2348 ierr = PetscObjectReference((PetscObject)iscol);CHKERRQ(ierr); 2349 iscol_s = iscol; 2350 } 2351 2352 ierr = MatGetSubMatrices(A,1,&isrow_s,&iscol_s,MAT_INITIAL_MATRIX,&work_mat);CHKERRQ(ierr); 2353 2354 if (!rsorted || !csorted) { 2355 Mat new_mat; 2356 IS is_perm_r,is_perm_c; 2357 2358 if (!rsorted) { 2359 PetscInt *idxs_r,i; 2360 ierr = PetscMalloc1(rsize,&idxs_r);CHKERRQ(ierr); 2361 for (i=0;i<rsize;i++) { 2362 idxs_r[idxs_perm_r[i]] = i; 2363 } 2364 ierr = PetscFree(idxs_perm_r);CHKERRQ(ierr); 2365 ierr = ISCreateGeneral(PETSC_COMM_SELF,rsize,idxs_r,PETSC_OWN_POINTER,&is_perm_r);CHKERRQ(ierr); 2366 } else { 2367 ierr = ISCreateStride(PETSC_COMM_SELF,rsize,0,1,&is_perm_r);CHKERRQ(ierr); 2368 } 2369 ierr = ISSetPermutation(is_perm_r);CHKERRQ(ierr); 2370 2371 if (!csorted) { 2372 if (isrow_s == iscol_s) { 2373 ierr = PetscObjectReference((PetscObject)is_perm_r);CHKERRQ(ierr); 2374 is_perm_c = is_perm_r; 2375 } else { 2376 PetscInt *idxs_c,i; 2377 ierr = PetscMalloc1(csize,&idxs_c);CHKERRQ(ierr); 2378 for (i=0;i<csize;i++) { 2379 idxs_c[idxs_perm_c[i]] = i; 2380 } 2381 ierr = PetscFree(idxs_perm_c);CHKERRQ(ierr); 2382 ierr = ISCreateGeneral(PETSC_COMM_SELF,csize,idxs_c,PETSC_OWN_POINTER,&is_perm_c);CHKERRQ(ierr); 2383 } 2384 } else { 2385 ierr = ISCreateStride(PETSC_COMM_SELF,csize,0,1,&is_perm_c);CHKERRQ(ierr); 2386 } 2387 ierr = ISSetPermutation(is_perm_c);CHKERRQ(ierr); 2388 2389 ierr = MatPermute(work_mat[0],is_perm_r,is_perm_c,&new_mat);CHKERRQ(ierr); 2390 ierr = MatDestroy(&work_mat[0]);CHKERRQ(ierr); 2391 work_mat[0] = new_mat; 2392 ierr = ISDestroy(&is_perm_r);CHKERRQ(ierr); 2393 ierr = ISDestroy(&is_perm_c);CHKERRQ(ierr); 2394 } 2395 2396 ierr = PetscObjectReference((PetscObject)work_mat[0]);CHKERRQ(ierr); 2397 *B = work_mat[0]; 2398 ierr = MatDestroyMatrices(1,&work_mat);CHKERRQ(ierr); 2399 ierr = ISDestroy(&isrow_s);CHKERRQ(ierr); 2400 ierr = ISDestroy(&iscol_s);CHKERRQ(ierr); 2401 PetscFunctionReturn(0); 2402 } 2403 2404 #undef __FUNCT__ 2405 #define __FUNCT__ "PCBDDCComputeLocalMatrix" 2406 PetscErrorCode PCBDDCComputeLocalMatrix(PC pc, Mat ChangeOfBasisMatrix) 2407 { 2408 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 2409 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 2410 Mat new_mat; 2411 IS is_local,is_global; 2412 PetscInt local_size; 2413 PetscBool isseqaij; 2414 PetscErrorCode ierr; 2415 2416 PetscFunctionBegin; 2417 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 2418 ierr = MatGetSize(matis->A,&local_size,NULL);CHKERRQ(ierr); 2419 ierr = ISCreateStride(PetscObjectComm((PetscObject)matis->A),local_size,0,1,&is_local);CHKERRQ(ierr); 2420 ierr = ISLocalToGlobalMappingApplyIS(pc->pmat->rmap->mapping,is_local,&is_global);CHKERRQ(ierr); 2421 ierr = ISDestroy(&is_local);CHKERRQ(ierr); 2422 ierr = MatGetSubMatrixUnsorted(ChangeOfBasisMatrix,is_global,is_global,&new_mat);CHKERRQ(ierr); 2423 ierr = ISDestroy(&is_global);CHKERRQ(ierr); 2424 2425 /* check */ 2426 if (pcbddc->dbg_flag) { 2427 Vec x,x_change; 2428 PetscReal error; 2429 2430 ierr = MatCreateVecs(ChangeOfBasisMatrix,&x,&x_change);CHKERRQ(ierr); 2431 ierr = VecSetRandom(x,NULL);CHKERRQ(ierr); 2432 ierr = MatMult(ChangeOfBasisMatrix,x,x_change);CHKERRQ(ierr); 2433 ierr = VecScatterBegin(matis->cctx,x,matis->x,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2434 ierr = VecScatterEnd(matis->cctx,x,matis->x,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2435 ierr = MatMult(new_mat,matis->x,matis->y);CHKERRQ(ierr); 2436 ierr = VecScatterBegin(matis->rctx,matis->y,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2437 ierr = VecScatterEnd(matis->rctx,matis->y,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2438 ierr = VecAXPY(x,-1.0,x_change);CHKERRQ(ierr); 2439 ierr = VecNorm(x,NORM_INFINITY,&error);CHKERRQ(ierr); 2440 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 2441 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Error global vs local change on N: %1.6e\n",error);CHKERRQ(ierr); 2442 ierr = VecDestroy(&x);CHKERRQ(ierr); 2443 ierr = VecDestroy(&x_change);CHKERRQ(ierr); 2444 } 2445 2446 /* TODO: HOW TO WORK WITH BAIJ and SBAIJ and SEQDENSE? */ 2447 ierr = PetscObjectTypeCompare((PetscObject)matis->A,MATSEQAIJ,&isseqaij);CHKERRQ(ierr); 2448 if (isseqaij) { 2449 Mat M; 2450 2451 ierr = MatPtAP(matis->A,new_mat,MAT_INITIAL_MATRIX,2.0,&M);CHKERRQ(ierr); 2452 ierr = MatSeqAIJCompress(M,&pcbddc->local_mat);CHKERRQ(ierr); 2453 ierr = MatDestroy(&M);CHKERRQ(ierr); 2454 } else { 2455 Mat work_mat,M; 2456 2457 ierr = MatConvert(matis->A,MATSEQAIJ,MAT_INITIAL_MATRIX,&work_mat);CHKERRQ(ierr); 2458 ierr = MatPtAP(work_mat,new_mat,MAT_INITIAL_MATRIX,2.0,&M);CHKERRQ(ierr); 2459 ierr = MatSeqAIJCompress(M,&pcbddc->local_mat);CHKERRQ(ierr); 2460 ierr = MatDestroy(&M);CHKERRQ(ierr); 2461 ierr = MatDestroy(&work_mat);CHKERRQ(ierr); 2462 } 2463 if (matis->A->symmetric_set) { 2464 ierr = MatSetOption(pcbddc->local_mat,MAT_SYMMETRIC,matis->A->symmetric);CHKERRQ(ierr); 2465 #if !defined(PETSC_USE_COMPLEX) 2466 ierr = MatSetOption(pcbddc->local_mat,MAT_HERMITIAN,matis->A->symmetric);CHKERRQ(ierr); 2467 #endif 2468 } 2469 /* 2470 ierr = PetscViewerSetFormat(PETSC_VIEWER_STDOUT_SELF,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr); 2471 ierr = MatView(new_mat,(PetscViewer)0);CHKERRQ(ierr); 2472 */ 2473 ierr = MatDestroy(&new_mat);CHKERRQ(ierr); 2474 PetscFunctionReturn(0); 2475 } 2476 2477 #undef __FUNCT__ 2478 #define __FUNCT__ "PCBDDCSetUpLocalScatters" 2479 PetscErrorCode PCBDDCSetUpLocalScatters(PC pc) 2480 { 2481 PC_IS* pcis = (PC_IS*)(pc->data); 2482 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 2483 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 2484 PetscInt *idx_R_local=NULL; 2485 PetscInt n_vertices,i,j,n_R,n_D,n_B; 2486 PetscInt vbs,bs; 2487 PetscBT bitmask=NULL; 2488 PetscErrorCode ierr; 2489 2490 PetscFunctionBegin; 2491 /* 2492 No need to setup local scatters if 2493 - primal space is unchanged 2494 AND 2495 - we actually have locally some primal dofs (could not be true in multilevel or for isolated subdomains) 2496 AND 2497 - we are not in debugging mode (this is needed since there are Synchronized prints at the end of the subroutine 2498 */ 2499 if (!pcbddc->new_primal_space_local && pcbddc->local_primal_size && !pcbddc->dbg_flag) { 2500 PetscFunctionReturn(0); 2501 } 2502 /* destroy old objects */ 2503 ierr = ISDestroy(&pcbddc->is_R_local);CHKERRQ(ierr); 2504 ierr = VecScatterDestroy(&pcbddc->R_to_B);CHKERRQ(ierr); 2505 ierr = VecScatterDestroy(&pcbddc->R_to_D);CHKERRQ(ierr); 2506 /* Set Non-overlapping dimensions */ 2507 n_B = pcis->n_B; 2508 n_D = pcis->n - n_B; 2509 n_vertices = pcbddc->n_vertices; 2510 2511 /* Dohrmann's notation: dofs splitted in R (Remaining: all dofs but the vertices) and V (Vertices) */ 2512 2513 /* create auxiliary bitmask and allocate workspace */ 2514 if (!sub_schurs->reuse_solver) { 2515 ierr = PetscMalloc1(pcis->n-n_vertices,&idx_R_local);CHKERRQ(ierr); 2516 ierr = PetscBTCreate(pcis->n,&bitmask);CHKERRQ(ierr); 2517 for (i=0;i<n_vertices;i++) { 2518 ierr = PetscBTSet(bitmask,pcbddc->local_primal_ref_node[i]);CHKERRQ(ierr); 2519 } 2520 2521 for (i=0, n_R=0; i<pcis->n; i++) { 2522 if (!PetscBTLookup(bitmask,i)) { 2523 idx_R_local[n_R++] = i; 2524 } 2525 } 2526 } else { /* A different ordering (already computed) is present if we are reusing the Schur solver */ 2527 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 2528 2529 ierr = ISGetIndices(reuse_solver->is_R,(const PetscInt**)&idx_R_local);CHKERRQ(ierr); 2530 ierr = ISGetLocalSize(reuse_solver->is_R,&n_R);CHKERRQ(ierr); 2531 } 2532 2533 /* Block code */ 2534 vbs = 1; 2535 ierr = MatGetBlockSize(pcbddc->local_mat,&bs);CHKERRQ(ierr); 2536 if (bs>1 && !(n_vertices%bs)) { 2537 PetscBool is_blocked = PETSC_TRUE; 2538 PetscInt *vary; 2539 if (!sub_schurs->reuse_solver) { 2540 ierr = PetscMalloc1(pcis->n/bs,&vary);CHKERRQ(ierr); 2541 ierr = PetscMemzero(vary,pcis->n/bs*sizeof(PetscInt));CHKERRQ(ierr); 2542 /* Verify that the vertex indices correspond to each element in a block (code taken from sbaij2.c) */ 2543 /* it is ok to check this way since local_primal_ref_node are always sorted by local numbering and idx_R_local is obtained as a complement */ 2544 for (i=0; i<n_vertices; i++) vary[pcbddc->local_primal_ref_node[i]/bs]++; 2545 for (i=0; i<pcis->n/bs; i++) { 2546 if (vary[i]!=0 && vary[i]!=bs) { 2547 is_blocked = PETSC_FALSE; 2548 break; 2549 } 2550 } 2551 ierr = PetscFree(vary);CHKERRQ(ierr); 2552 } else { 2553 /* Verify directly the R set */ 2554 for (i=0; i<n_R/bs; i++) { 2555 PetscInt j,node=idx_R_local[bs*i]; 2556 for (j=1; j<bs; j++) { 2557 if (node != idx_R_local[bs*i+j]-j) { 2558 is_blocked = PETSC_FALSE; 2559 break; 2560 } 2561 } 2562 } 2563 } 2564 if (is_blocked) { /* build compressed IS for R nodes (complement of vertices) */ 2565 vbs = bs; 2566 for (i=0;i<n_R/vbs;i++) { 2567 idx_R_local[i] = idx_R_local[vbs*i]/vbs; 2568 } 2569 } 2570 } 2571 ierr = ISCreateBlock(PETSC_COMM_SELF,vbs,n_R/vbs,idx_R_local,PETSC_COPY_VALUES,&pcbddc->is_R_local);CHKERRQ(ierr); 2572 if (sub_schurs->reuse_solver) { 2573 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 2574 2575 ierr = ISRestoreIndices(reuse_solver->is_R,(const PetscInt**)&idx_R_local);CHKERRQ(ierr); 2576 ierr = ISDestroy(&reuse_solver->is_R);CHKERRQ(ierr); 2577 ierr = PetscObjectReference((PetscObject)pcbddc->is_R_local);CHKERRQ(ierr); 2578 reuse_solver->is_R = pcbddc->is_R_local; 2579 } else { 2580 ierr = PetscFree(idx_R_local);CHKERRQ(ierr); 2581 } 2582 2583 /* print some info if requested */ 2584 if (pcbddc->dbg_flag) { 2585 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 2586 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 2587 ierr = PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 2588 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d local dimensions\n",PetscGlobalRank);CHKERRQ(ierr); 2589 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local_size = %d, dirichlet_size = %d, boundary_size = %d\n",pcis->n,n_D,n_B);CHKERRQ(ierr); 2590 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"r_size = %d, v_size = %d, constraints = %d, local_primal_size = %d\n",n_R,n_vertices,pcbddc->local_primal_size-n_vertices-pcbddc->benign_n,pcbddc->local_primal_size);CHKERRQ(ierr); 2591 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 2592 } 2593 2594 /* VecScatters pcbddc->R_to_B and (optionally) pcbddc->R_to_D */ 2595 if (!sub_schurs->reuse_solver) { 2596 IS is_aux1,is_aux2; 2597 PetscInt *aux_array1,*aux_array2,*is_indices,*idx_R_local; 2598 2599 ierr = ISGetIndices(pcbddc->is_R_local,(const PetscInt**)&idx_R_local);CHKERRQ(ierr); 2600 ierr = PetscMalloc1(pcis->n_B-n_vertices,&aux_array1);CHKERRQ(ierr); 2601 ierr = PetscMalloc1(pcis->n_B-n_vertices,&aux_array2);CHKERRQ(ierr); 2602 ierr = ISGetIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 2603 for (i=0; i<n_D; i++) { 2604 ierr = PetscBTSet(bitmask,is_indices[i]);CHKERRQ(ierr); 2605 } 2606 ierr = ISRestoreIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 2607 for (i=0, j=0; i<n_R; i++) { 2608 if (!PetscBTLookup(bitmask,idx_R_local[i])) { 2609 aux_array1[j++] = i; 2610 } 2611 } 2612 ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array1,PETSC_OWN_POINTER,&is_aux1);CHKERRQ(ierr); 2613 ierr = ISGetIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 2614 for (i=0, j=0; i<n_B; i++) { 2615 if (!PetscBTLookup(bitmask,is_indices[i])) { 2616 aux_array2[j++] = i; 2617 } 2618 } 2619 ierr = ISRestoreIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 2620 ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array2,PETSC_OWN_POINTER,&is_aux2);CHKERRQ(ierr); 2621 ierr = VecScatterCreate(pcbddc->vec1_R,is_aux1,pcis->vec1_B,is_aux2,&pcbddc->R_to_B);CHKERRQ(ierr); 2622 ierr = ISDestroy(&is_aux1);CHKERRQ(ierr); 2623 ierr = ISDestroy(&is_aux2);CHKERRQ(ierr); 2624 2625 if (pcbddc->switch_static || pcbddc->dbg_flag) { 2626 ierr = PetscMalloc1(n_D,&aux_array1);CHKERRQ(ierr); 2627 for (i=0, j=0; i<n_R; i++) { 2628 if (PetscBTLookup(bitmask,idx_R_local[i])) { 2629 aux_array1[j++] = i; 2630 } 2631 } 2632 ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array1,PETSC_OWN_POINTER,&is_aux1);CHKERRQ(ierr); 2633 ierr = VecScatterCreate(pcbddc->vec1_R,is_aux1,pcis->vec1_D,(IS)0,&pcbddc->R_to_D);CHKERRQ(ierr); 2634 ierr = ISDestroy(&is_aux1);CHKERRQ(ierr); 2635 } 2636 ierr = PetscBTDestroy(&bitmask);CHKERRQ(ierr); 2637 ierr = ISRestoreIndices(pcbddc->is_R_local,(const PetscInt**)&idx_R_local);CHKERRQ(ierr); 2638 } else { 2639 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 2640 IS tis; 2641 PetscInt schur_size; 2642 2643 ierr = ISGetLocalSize(reuse_solver->is_B,&schur_size);CHKERRQ(ierr); 2644 ierr = ISCreateStride(PETSC_COMM_SELF,schur_size,n_D,1,&tis);CHKERRQ(ierr); 2645 ierr = VecScatterCreate(pcbddc->vec1_R,tis,pcis->vec1_B,reuse_solver->is_B,&pcbddc->R_to_B);CHKERRQ(ierr); 2646 ierr = ISDestroy(&tis);CHKERRQ(ierr); 2647 if (pcbddc->switch_static || pcbddc->dbg_flag) { 2648 ierr = ISCreateStride(PETSC_COMM_SELF,n_D,0,1,&tis);CHKERRQ(ierr); 2649 ierr = VecScatterCreate(pcbddc->vec1_R,tis,pcis->vec1_D,(IS)0,&pcbddc->R_to_D);CHKERRQ(ierr); 2650 ierr = ISDestroy(&tis);CHKERRQ(ierr); 2651 } 2652 } 2653 PetscFunctionReturn(0); 2654 } 2655 2656 2657 #undef __FUNCT__ 2658 #define __FUNCT__ "PCBDDCSetUpLocalSolvers" 2659 PetscErrorCode PCBDDCSetUpLocalSolvers(PC pc, PetscBool dirichlet, PetscBool neumann) 2660 { 2661 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 2662 PC_IS *pcis = (PC_IS*)pc->data; 2663 PC pc_temp; 2664 Mat A_RR; 2665 MatReuse reuse; 2666 PetscScalar m_one = -1.0; 2667 PetscReal value; 2668 PetscInt n_D,n_R; 2669 PetscBool use_exact,use_exact_reduced,issbaij; 2670 PetscErrorCode ierr; 2671 /* prefixes stuff */ 2672 char dir_prefix[256],neu_prefix[256],str_level[16]; 2673 size_t len; 2674 2675 PetscFunctionBegin; 2676 2677 /* compute prefixes */ 2678 ierr = PetscStrcpy(dir_prefix,"");CHKERRQ(ierr); 2679 ierr = PetscStrcpy(neu_prefix,"");CHKERRQ(ierr); 2680 if (!pcbddc->current_level) { 2681 ierr = PetscStrcpy(dir_prefix,((PetscObject)pc)->prefix);CHKERRQ(ierr); 2682 ierr = PetscStrcpy(neu_prefix,((PetscObject)pc)->prefix);CHKERRQ(ierr); 2683 ierr = PetscStrcat(dir_prefix,"pc_bddc_dirichlet_");CHKERRQ(ierr); 2684 ierr = PetscStrcat(neu_prefix,"pc_bddc_neumann_");CHKERRQ(ierr); 2685 } else { 2686 ierr = PetscStrcpy(str_level,"");CHKERRQ(ierr); 2687 sprintf(str_level,"l%d_",(int)(pcbddc->current_level)); 2688 ierr = PetscStrlen(((PetscObject)pc)->prefix,&len);CHKERRQ(ierr); 2689 len -= 15; /* remove "pc_bddc_coarse_" */ 2690 if (pcbddc->current_level>1) len -= 3; /* remove "lX_" with X level number */ 2691 if (pcbddc->current_level>10) len -= 1; /* remove another char from level number */ 2692 ierr = PetscStrncpy(dir_prefix,((PetscObject)pc)->prefix,len+1);CHKERRQ(ierr); 2693 ierr = PetscStrncpy(neu_prefix,((PetscObject)pc)->prefix,len+1);CHKERRQ(ierr); 2694 ierr = PetscStrcat(dir_prefix,"pc_bddc_dirichlet_");CHKERRQ(ierr); 2695 ierr = PetscStrcat(neu_prefix,"pc_bddc_neumann_");CHKERRQ(ierr); 2696 ierr = PetscStrcat(dir_prefix,str_level);CHKERRQ(ierr); 2697 ierr = PetscStrcat(neu_prefix,str_level);CHKERRQ(ierr); 2698 } 2699 2700 /* DIRICHLET PROBLEM */ 2701 if (dirichlet) { 2702 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 2703 if (pcbddc->benign_n && !pcbddc->benign_change_explicit && pcbddc->dbg_flag) { 2704 Mat A_IIn; 2705 2706 ierr = PCBDDCBenignProject(pc,pcis->is_I_local,pcis->is_I_local,&A_IIn);CHKERRQ(ierr); 2707 ierr = MatDestroy(&pcis->A_II);CHKERRQ(ierr); 2708 pcis->A_II = A_IIn; 2709 } 2710 if (pcbddc->local_mat->symmetric_set) { 2711 ierr = MatSetOption(pcis->A_II,MAT_SYMMETRIC,pcbddc->local_mat->symmetric_set);CHKERRQ(ierr); 2712 } 2713 /* Matrix for Dirichlet problem is pcis->A_II */ 2714 n_D = pcis->n - pcis->n_B; 2715 if (!pcbddc->ksp_D) { /* create object if not yet build */ 2716 ierr = KSPCreate(PETSC_COMM_SELF,&pcbddc->ksp_D);CHKERRQ(ierr); 2717 ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->ksp_D,(PetscObject)pc,1);CHKERRQ(ierr); 2718 /* default */ 2719 ierr = KSPSetType(pcbddc->ksp_D,KSPPREONLY);CHKERRQ(ierr); 2720 ierr = KSPSetOptionsPrefix(pcbddc->ksp_D,dir_prefix);CHKERRQ(ierr); 2721 ierr = PetscObjectTypeCompare((PetscObject)pcis->A_II,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr); 2722 ierr = KSPGetPC(pcbddc->ksp_D,&pc_temp);CHKERRQ(ierr); 2723 if (issbaij) { 2724 ierr = PCSetType(pc_temp,PCCHOLESKY);CHKERRQ(ierr); 2725 } else { 2726 ierr = PCSetType(pc_temp,PCLU);CHKERRQ(ierr); 2727 } 2728 /* Allow user's customization */ 2729 ierr = KSPSetFromOptions(pcbddc->ksp_D);CHKERRQ(ierr); 2730 ierr = PCFactorSetReuseFill(pc_temp,PETSC_TRUE);CHKERRQ(ierr); 2731 } 2732 ierr = KSPSetOperators(pcbddc->ksp_D,pcis->A_II,pcis->A_II);CHKERRQ(ierr); 2733 if (sub_schurs->reuse_solver) { 2734 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 2735 2736 ierr = KSPSetPC(pcbddc->ksp_D,reuse_solver->interior_solver);CHKERRQ(ierr); 2737 } 2738 /* umfpack interface has a bug when matrix dimension is zero. TODO solve from umfpack interface */ 2739 if (!n_D) { 2740 ierr = KSPGetPC(pcbddc->ksp_D,&pc_temp);CHKERRQ(ierr); 2741 ierr = PCSetType(pc_temp,PCNONE);CHKERRQ(ierr); 2742 } 2743 /* Set Up KSP for Dirichlet problem of BDDC */ 2744 ierr = KSPSetUp(pcbddc->ksp_D);CHKERRQ(ierr); 2745 /* set ksp_D into pcis data */ 2746 ierr = KSPDestroy(&pcis->ksp_D);CHKERRQ(ierr); 2747 ierr = PetscObjectReference((PetscObject)pcbddc->ksp_D);CHKERRQ(ierr); 2748 pcis->ksp_D = pcbddc->ksp_D; 2749 } 2750 2751 /* NEUMANN PROBLEM */ 2752 A_RR = 0; 2753 if (neumann) { 2754 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 2755 PetscInt ibs,mbs; 2756 PetscBool issbaij; 2757 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 2758 /* Matrix for Neumann problem is A_RR -> we need to create/reuse it at this point */ 2759 ierr = ISGetSize(pcbddc->is_R_local,&n_R);CHKERRQ(ierr); 2760 if (pcbddc->ksp_R) { /* already created ksp */ 2761 PetscInt nn_R; 2762 ierr = KSPGetOperators(pcbddc->ksp_R,NULL,&A_RR);CHKERRQ(ierr); 2763 ierr = PetscObjectReference((PetscObject)A_RR);CHKERRQ(ierr); 2764 ierr = MatGetSize(A_RR,&nn_R,NULL);CHKERRQ(ierr); 2765 if (nn_R != n_R) { /* old ksp is not reusable, so reset it */ 2766 ierr = KSPReset(pcbddc->ksp_R);CHKERRQ(ierr); 2767 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 2768 reuse = MAT_INITIAL_MATRIX; 2769 } else { /* same sizes, but nonzero pattern depend on primal vertices so it can be changed */ 2770 if (pcbddc->new_primal_space_local) { /* we are not sure the matrix will have the same nonzero pattern */ 2771 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 2772 reuse = MAT_INITIAL_MATRIX; 2773 } else { /* safe to reuse the matrix */ 2774 reuse = MAT_REUSE_MATRIX; 2775 } 2776 } 2777 /* last check */ 2778 if (pc->flag == DIFFERENT_NONZERO_PATTERN) { 2779 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 2780 reuse = MAT_INITIAL_MATRIX; 2781 } 2782 } else { /* first time, so we need to create the matrix */ 2783 reuse = MAT_INITIAL_MATRIX; 2784 } 2785 /* extract A_RR */ 2786 ierr = MatGetBlockSize(pcbddc->local_mat,&mbs);CHKERRQ(ierr); 2787 ierr = ISGetBlockSize(pcbddc->is_R_local,&ibs);CHKERRQ(ierr); 2788 ierr = PetscObjectTypeCompare((PetscObject)pcbddc->local_mat,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr); 2789 if (ibs != mbs) { /* need to convert to SEQAIJ to extract any submatrix with is_R_local */ 2790 if (matis->A == pcbddc->local_mat) { 2791 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 2792 ierr = MatConvert(matis->A,MATSEQAIJ,MAT_INITIAL_MATRIX,&pcbddc->local_mat);CHKERRQ(ierr); 2793 } else { 2794 ierr = MatConvert(pcbddc->local_mat,MATSEQAIJ,MAT_REUSE_MATRIX,&pcbddc->local_mat);CHKERRQ(ierr); 2795 } 2796 } else if (issbaij) { /* need to convert to BAIJ to get offdiagonal blocks */ 2797 if (matis->A == pcbddc->local_mat) { 2798 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 2799 ierr = MatConvert(matis->A,MATSEQBAIJ,MAT_INITIAL_MATRIX,&pcbddc->local_mat);CHKERRQ(ierr); 2800 } else { 2801 ierr = MatConvert(pcbddc->local_mat,MATSEQBAIJ,MAT_REUSE_MATRIX,&pcbddc->local_mat);CHKERRQ(ierr); 2802 } 2803 } 2804 ierr = MatGetSubMatrix(pcbddc->local_mat,pcbddc->is_R_local,pcbddc->is_R_local,reuse,&A_RR);CHKERRQ(ierr); 2805 if (pcbddc->local_mat->symmetric_set) { 2806 ierr = MatSetOption(A_RR,MAT_SYMMETRIC,pcbddc->local_mat->symmetric_set);CHKERRQ(ierr); 2807 } 2808 if (pcbddc->benign_n && !pcbddc->benign_change_explicit && pcbddc->dbg_flag) { 2809 Mat A_RRn; 2810 ierr = PCBDDCBenignProject(pc,pcbddc->is_R_local,pcbddc->is_R_local,&A_RRn);CHKERRQ(ierr); 2811 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 2812 A_RR = A_RRn; 2813 } 2814 if (!pcbddc->ksp_R) { /* create object if not present */ 2815 ierr = KSPCreate(PETSC_COMM_SELF,&pcbddc->ksp_R);CHKERRQ(ierr); 2816 ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->ksp_R,(PetscObject)pc,1);CHKERRQ(ierr); 2817 /* default */ 2818 ierr = KSPSetType(pcbddc->ksp_R,KSPPREONLY);CHKERRQ(ierr); 2819 ierr = KSPSetOptionsPrefix(pcbddc->ksp_R,neu_prefix);CHKERRQ(ierr); 2820 ierr = KSPGetPC(pcbddc->ksp_R,&pc_temp);CHKERRQ(ierr); 2821 ierr = PetscObjectTypeCompare((PetscObject)A_RR,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr); 2822 if (issbaij) { 2823 ierr = PCSetType(pc_temp,PCCHOLESKY);CHKERRQ(ierr); 2824 } else { 2825 ierr = PCSetType(pc_temp,PCLU);CHKERRQ(ierr); 2826 } 2827 /* Allow user's customization */ 2828 ierr = KSPSetFromOptions(pcbddc->ksp_R);CHKERRQ(ierr); 2829 ierr = PCFactorSetReuseFill(pc_temp,PETSC_TRUE);CHKERRQ(ierr); 2830 } 2831 /* umfpack interface has a bug when matrix dimension is zero. TODO solve from umfpack interface */ 2832 if (!n_R) { 2833 ierr = KSPGetPC(pcbddc->ksp_R,&pc_temp);CHKERRQ(ierr); 2834 ierr = PCSetType(pc_temp,PCNONE);CHKERRQ(ierr); 2835 } 2836 /* Reuse solver if it is present */ 2837 if (sub_schurs->reuse_solver) { 2838 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 2839 2840 ierr = KSPSetPC(pcbddc->ksp_R,reuse_solver->correction_solver);CHKERRQ(ierr); 2841 } 2842 ierr = KSPSetOperators(pcbddc->ksp_R,A_RR,A_RR);CHKERRQ(ierr); 2843 /* Set Up KSP for Neumann problem of BDDC */ 2844 ierr = KSPSetUp(pcbddc->ksp_R);CHKERRQ(ierr); 2845 } 2846 /* free Neumann problem's matrix */ 2847 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 2848 2849 /* check Dirichlet and Neumann solvers and adapt them if a nullspace correction is needed */ 2850 if (pcbddc->NullSpace || pcbddc->dbg_flag) { 2851 if (pcbddc->dbg_flag) { 2852 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 2853 ierr = PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 2854 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 2855 } 2856 if (dirichlet) { /* Dirichlet */ 2857 ierr = VecSetRandom(pcis->vec1_D,NULL);CHKERRQ(ierr); 2858 ierr = MatMult(pcis->A_II,pcis->vec1_D,pcis->vec2_D);CHKERRQ(ierr); 2859 ierr = KSPSolve(pcbddc->ksp_D,pcis->vec2_D,pcis->vec2_D);CHKERRQ(ierr); 2860 ierr = VecAXPY(pcis->vec1_D,m_one,pcis->vec2_D);CHKERRQ(ierr); 2861 ierr = VecNorm(pcis->vec1_D,NORM_INFINITY,&value);CHKERRQ(ierr); 2862 /* need to be adapted? */ 2863 use_exact = (PetscAbsReal(value) > 1.e-4 ? PETSC_FALSE : PETSC_TRUE); 2864 ierr = MPI_Allreduce(&use_exact,&use_exact_reduced,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 2865 ierr = PCBDDCSetUseExactDirichlet(pc,use_exact_reduced);CHKERRQ(ierr); 2866 /* print info */ 2867 if (pcbddc->dbg_flag) { 2868 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d infinity error for Dirichlet solve (%s) = % 1.14e \n",PetscGlobalRank,((PetscObject)(pcbddc->ksp_D))->prefix,value);CHKERRQ(ierr); 2869 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 2870 } 2871 if (pcbddc->NullSpace && !use_exact_reduced && !pcbddc->switch_static) { 2872 ierr = PCBDDCNullSpaceAssembleCorrection(pc,PETSC_TRUE,pcis->is_I_local);CHKERRQ(ierr); 2873 } 2874 } 2875 if (neumann) { /* Neumann */ 2876 ierr = KSPGetOperators(pcbddc->ksp_R,&A_RR,NULL);CHKERRQ(ierr); 2877 ierr = VecSetRandom(pcbddc->vec1_R,NULL);CHKERRQ(ierr); 2878 ierr = MatMult(A_RR,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 2879 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec2_R,pcbddc->vec2_R);CHKERRQ(ierr); 2880 ierr = VecAXPY(pcbddc->vec1_R,m_one,pcbddc->vec2_R);CHKERRQ(ierr); 2881 ierr = VecNorm(pcbddc->vec1_R,NORM_INFINITY,&value);CHKERRQ(ierr); 2882 /* need to be adapted? */ 2883 use_exact = (PetscAbsReal(value) > 1.e-4 ? PETSC_FALSE : PETSC_TRUE); 2884 ierr = MPI_Allreduce(&use_exact,&use_exact_reduced,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 2885 /* print info */ 2886 if (pcbddc->dbg_flag) { 2887 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d infinity error for Neumann solve (%s) = % 1.14e\n",PetscGlobalRank,((PetscObject)(pcbddc->ksp_R))->prefix,value);CHKERRQ(ierr); 2888 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 2889 } 2890 if (pcbddc->NullSpace && !use_exact_reduced) { /* is it the right logic? */ 2891 ierr = PCBDDCNullSpaceAssembleCorrection(pc,PETSC_FALSE,pcbddc->is_R_local);CHKERRQ(ierr); 2892 } 2893 } 2894 } 2895 PetscFunctionReturn(0); 2896 } 2897 2898 #undef __FUNCT__ 2899 #define __FUNCT__ "PCBDDCSolveSubstructureCorrection" 2900 static PetscErrorCode PCBDDCSolveSubstructureCorrection(PC pc, Vec inout_B, Vec inout_D, PetscBool applytranspose) 2901 { 2902 PetscErrorCode ierr; 2903 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 2904 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 2905 2906 PetscFunctionBegin; 2907 if (!sub_schurs->reuse_solver) { 2908 ierr = VecSet(pcbddc->vec1_R,0.);CHKERRQ(ierr); 2909 } 2910 if (!pcbddc->switch_static) { 2911 if (applytranspose && pcbddc->local_auxmat1) { 2912 ierr = MatMultTranspose(pcbddc->local_auxmat2,inout_B,pcbddc->vec1_C);CHKERRQ(ierr); 2913 ierr = MatMultTransposeAdd(pcbddc->local_auxmat1,pcbddc->vec1_C,inout_B,inout_B);CHKERRQ(ierr); 2914 } 2915 if (!sub_schurs->reuse_solver) { 2916 ierr = VecScatterBegin(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2917 ierr = VecScatterEnd(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2918 } else { 2919 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 2920 2921 ierr = VecScatterBegin(reuse_solver->correction_scatter_B,inout_B,reuse_solver->rhs_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2922 ierr = VecScatterEnd(reuse_solver->correction_scatter_B,inout_B,reuse_solver->rhs_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2923 } 2924 } else { 2925 ierr = VecScatterBegin(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2926 ierr = VecScatterEnd(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2927 ierr = VecScatterBegin(pcbddc->R_to_D,inout_D,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2928 ierr = VecScatterEnd(pcbddc->R_to_D,inout_D,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2929 if (applytranspose && pcbddc->local_auxmat1) { 2930 ierr = MatMultTranspose(pcbddc->local_auxmat2,pcbddc->vec1_R,pcbddc->vec1_C);CHKERRQ(ierr); 2931 ierr = MatMultTransposeAdd(pcbddc->local_auxmat1,pcbddc->vec1_C,inout_B,inout_B);CHKERRQ(ierr); 2932 ierr = VecScatterBegin(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2933 ierr = VecScatterEnd(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2934 } 2935 } 2936 if (!sub_schurs->reuse_solver) { 2937 if (applytranspose) { 2938 ierr = KSPSolveTranspose(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr); 2939 } else { 2940 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr); 2941 } 2942 } else { 2943 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 2944 2945 if (applytranspose) { 2946 ierr = MatFactorSolveSchurComplementTranspose(reuse_solver->F,reuse_solver->rhs_B,reuse_solver->sol_B);CHKERRQ(ierr); 2947 } else { 2948 ierr = MatFactorSolveSchurComplement(reuse_solver->F,reuse_solver->rhs_B,reuse_solver->sol_B);CHKERRQ(ierr); 2949 } 2950 } 2951 ierr = VecSet(inout_B,0.);CHKERRQ(ierr); 2952 if (!pcbddc->switch_static) { 2953 if (!sub_schurs->reuse_solver) { 2954 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2955 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2956 } else { 2957 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 2958 2959 ierr = VecScatterBegin(reuse_solver->correction_scatter_B,reuse_solver->sol_B,inout_B,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2960 ierr = VecScatterEnd(reuse_solver->correction_scatter_B,reuse_solver->sol_B,inout_B,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2961 } 2962 if (!applytranspose && pcbddc->local_auxmat1) { 2963 ierr = MatMult(pcbddc->local_auxmat1,inout_B,pcbddc->vec1_C);CHKERRQ(ierr); 2964 ierr = MatMultAdd(pcbddc->local_auxmat2,pcbddc->vec1_C,inout_B,inout_B);CHKERRQ(ierr); 2965 } 2966 } else { 2967 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2968 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2969 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,inout_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2970 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,inout_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2971 if (!applytranspose && pcbddc->local_auxmat1) { 2972 ierr = MatMult(pcbddc->local_auxmat1,inout_B,pcbddc->vec1_C);CHKERRQ(ierr); 2973 ierr = MatMultAdd(pcbddc->local_auxmat2,pcbddc->vec1_C,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr); 2974 } 2975 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2976 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2977 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,inout_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2978 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,inout_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2979 } 2980 PetscFunctionReturn(0); 2981 } 2982 2983 /* parameter apply transpose determines if the interface preconditioner should be applied transposed or not */ 2984 #undef __FUNCT__ 2985 #define __FUNCT__ "PCBDDCApplyInterfacePreconditioner" 2986 PetscErrorCode PCBDDCApplyInterfacePreconditioner(PC pc, PetscBool applytranspose) 2987 { 2988 PetscErrorCode ierr; 2989 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 2990 PC_IS* pcis = (PC_IS*) (pc->data); 2991 const PetscScalar zero = 0.0; 2992 2993 PetscFunctionBegin; 2994 /* Application of PSI^T or PHI^T (depending on applytranspose, see comment above) */ 2995 if (applytranspose) { 2996 ierr = MatMultTranspose(pcbddc->coarse_phi_B,pcis->vec1_B,pcbddc->vec1_P);CHKERRQ(ierr); 2997 if (pcbddc->switch_static) { ierr = MatMultTransposeAdd(pcbddc->coarse_phi_D,pcis->vec1_D,pcbddc->vec1_P,pcbddc->vec1_P);CHKERRQ(ierr); } 2998 } else { 2999 ierr = MatMultTranspose(pcbddc->coarse_psi_B,pcis->vec1_B,pcbddc->vec1_P);CHKERRQ(ierr); 3000 if (pcbddc->switch_static) { ierr = MatMultTransposeAdd(pcbddc->coarse_psi_D,pcis->vec1_D,pcbddc->vec1_P,pcbddc->vec1_P);CHKERRQ(ierr); } 3001 } 3002 3003 /* add p0 to the last value of vec1_P holding the coarse dof relative to p0 */ 3004 if (pcbddc->benign_n) { 3005 PetscScalar *array; 3006 PetscInt j; 3007 3008 ierr = VecGetArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 3009 for (j=0;j<pcbddc->benign_n;j++) array[pcbddc->local_primal_size-pcbddc->benign_n+j] += pcbddc->benign_p0[j]; 3010 ierr = VecRestoreArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 3011 } 3012 3013 /* start communications from local primal nodes to rhs of coarse solver */ 3014 ierr = VecSet(pcbddc->coarse_vec,zero);CHKERRQ(ierr); 3015 ierr = PCBDDCScatterCoarseDataBegin(pc,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3016 ierr = PCBDDCScatterCoarseDataEnd(pc,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3017 3018 /* Coarse solution -> rhs and sol updated inside PCBDDCScattarCoarseDataBegin/End */ 3019 /* TODO remove null space when doing multilevel */ 3020 if (pcbddc->coarse_ksp) { 3021 Mat coarse_mat; 3022 Vec rhs,sol; 3023 MatNullSpace nullsp; 3024 3025 ierr = KSPGetRhs(pcbddc->coarse_ksp,&rhs);CHKERRQ(ierr); 3026 ierr = KSPGetSolution(pcbddc->coarse_ksp,&sol);CHKERRQ(ierr); 3027 ierr = KSPGetOperators(pcbddc->coarse_ksp,&coarse_mat,NULL);CHKERRQ(ierr); 3028 ierr = MatGetNullSpace(coarse_mat,&nullsp);CHKERRQ(ierr); 3029 if (nullsp) { 3030 ierr = MatNullSpaceRemove(nullsp,rhs);CHKERRQ(ierr); 3031 } 3032 if (applytranspose) { 3033 ierr = KSPSolveTranspose(pcbddc->coarse_ksp,rhs,sol);CHKERRQ(ierr); 3034 } else { 3035 ierr = KSPSolve(pcbddc->coarse_ksp,rhs,sol);CHKERRQ(ierr); 3036 } 3037 if (nullsp) { 3038 ierr = MatNullSpaceRemove(nullsp,sol);CHKERRQ(ierr); 3039 } 3040 } 3041 3042 /* Local solution on R nodes */ 3043 if (pcis->n) { /* in/out pcbddc->vec1_B,pcbddc->vec1_D */ 3044 ierr = PCBDDCSolveSubstructureCorrection(pc,pcis->vec1_B,pcis->vec1_D,applytranspose);CHKERRQ(ierr); 3045 } 3046 3047 /* communications from coarse sol to local primal nodes */ 3048 ierr = PCBDDCScatterCoarseDataBegin(pc,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 3049 ierr = PCBDDCScatterCoarseDataEnd(pc,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 3050 3051 /* Sum contributions from two levels */ 3052 if (applytranspose) { 3053 ierr = MatMultAdd(pcbddc->coarse_psi_B,pcbddc->vec1_P,pcis->vec1_B,pcis->vec1_B);CHKERRQ(ierr); 3054 if (pcbddc->switch_static) { ierr = MatMultAdd(pcbddc->coarse_psi_D,pcbddc->vec1_P,pcis->vec1_D,pcis->vec1_D);CHKERRQ(ierr); } 3055 } else { 3056 ierr = MatMultAdd(pcbddc->coarse_phi_B,pcbddc->vec1_P,pcis->vec1_B,pcis->vec1_B);CHKERRQ(ierr); 3057 if (pcbddc->switch_static) { ierr = MatMultAdd(pcbddc->coarse_phi_D,pcbddc->vec1_P,pcis->vec1_D,pcis->vec1_D);CHKERRQ(ierr); } 3058 } 3059 /* store p0 */ 3060 if (pcbddc->benign_n) { 3061 PetscScalar *array; 3062 PetscInt j; 3063 3064 ierr = VecGetArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 3065 for (j=0;j<pcbddc->benign_n;j++) pcbddc->benign_p0[j] = array[pcbddc->local_primal_size-pcbddc->benign_n+j]; 3066 ierr = VecRestoreArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 3067 } 3068 PetscFunctionReturn(0); 3069 } 3070 3071 #undef __FUNCT__ 3072 #define __FUNCT__ "PCBDDCScatterCoarseDataBegin" 3073 PetscErrorCode PCBDDCScatterCoarseDataBegin(PC pc,InsertMode imode, ScatterMode smode) 3074 { 3075 PetscErrorCode ierr; 3076 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 3077 PetscScalar *array; 3078 Vec from,to; 3079 3080 PetscFunctionBegin; 3081 if (smode == SCATTER_REVERSE) { /* from global to local -> get data from coarse solution */ 3082 from = pcbddc->coarse_vec; 3083 to = pcbddc->vec1_P; 3084 if (pcbddc->coarse_ksp) { /* get array from coarse processes */ 3085 Vec tvec; 3086 3087 ierr = KSPGetRhs(pcbddc->coarse_ksp,&tvec);CHKERRQ(ierr); 3088 ierr = VecResetArray(tvec);CHKERRQ(ierr); 3089 ierr = KSPGetSolution(pcbddc->coarse_ksp,&tvec);CHKERRQ(ierr); 3090 ierr = VecGetArray(tvec,&array);CHKERRQ(ierr); 3091 ierr = VecPlaceArray(from,array);CHKERRQ(ierr); 3092 ierr = VecRestoreArray(tvec,&array);CHKERRQ(ierr); 3093 } 3094 } else { /* from local to global -> put data in coarse right hand side */ 3095 from = pcbddc->vec1_P; 3096 to = pcbddc->coarse_vec; 3097 } 3098 ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,from,to,imode,smode);CHKERRQ(ierr); 3099 PetscFunctionReturn(0); 3100 } 3101 3102 #undef __FUNCT__ 3103 #define __FUNCT__ "PCBDDCScatterCoarseDataEnd" 3104 PetscErrorCode PCBDDCScatterCoarseDataEnd(PC pc, InsertMode imode, ScatterMode smode) 3105 { 3106 PetscErrorCode ierr; 3107 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 3108 PetscScalar *array; 3109 Vec from,to; 3110 3111 PetscFunctionBegin; 3112 if (smode == SCATTER_REVERSE) { /* from global to local -> get data from coarse solution */ 3113 from = pcbddc->coarse_vec; 3114 to = pcbddc->vec1_P; 3115 } else { /* from local to global -> put data in coarse right hand side */ 3116 from = pcbddc->vec1_P; 3117 to = pcbddc->coarse_vec; 3118 } 3119 ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,from,to,imode,smode);CHKERRQ(ierr); 3120 if (smode == SCATTER_FORWARD) { 3121 if (pcbddc->coarse_ksp) { /* get array from coarse processes */ 3122 Vec tvec; 3123 3124 ierr = KSPGetRhs(pcbddc->coarse_ksp,&tvec);CHKERRQ(ierr); 3125 ierr = VecGetArray(to,&array);CHKERRQ(ierr); 3126 ierr = VecPlaceArray(tvec,array);CHKERRQ(ierr); 3127 ierr = VecRestoreArray(to,&array);CHKERRQ(ierr); 3128 } 3129 } else { 3130 if (pcbddc->coarse_ksp) { /* restore array of pcbddc->coarse_vec */ 3131 ierr = VecResetArray(from);CHKERRQ(ierr); 3132 } 3133 } 3134 PetscFunctionReturn(0); 3135 } 3136 3137 /* uncomment for testing purposes */ 3138 /* #define PETSC_MISSING_LAPACK_GESVD 1 */ 3139 #undef __FUNCT__ 3140 #define __FUNCT__ "PCBDDCConstraintsSetUp" 3141 PetscErrorCode PCBDDCConstraintsSetUp(PC pc) 3142 { 3143 PetscErrorCode ierr; 3144 PC_IS* pcis = (PC_IS*)(pc->data); 3145 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 3146 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 3147 /* one and zero */ 3148 PetscScalar one=1.0,zero=0.0; 3149 /* space to store constraints and their local indices */ 3150 PetscScalar *constraints_data; 3151 PetscInt *constraints_idxs,*constraints_idxs_B; 3152 PetscInt *constraints_idxs_ptr,*constraints_data_ptr; 3153 PetscInt *constraints_n; 3154 /* iterators */ 3155 PetscInt i,j,k,total_counts,total_counts_cc,cum; 3156 /* BLAS integers */ 3157 PetscBLASInt lwork,lierr; 3158 PetscBLASInt Blas_N,Blas_M,Blas_K,Blas_one=1; 3159 PetscBLASInt Blas_LDA,Blas_LDB,Blas_LDC; 3160 /* reuse */ 3161 PetscInt olocal_primal_size,olocal_primal_size_cc; 3162 PetscInt *olocal_primal_ref_node,*olocal_primal_ref_mult; 3163 /* change of basis */ 3164 PetscBool qr_needed; 3165 PetscBT change_basis,qr_needed_idx; 3166 /* auxiliary stuff */ 3167 PetscInt *nnz,*is_indices; 3168 PetscInt ncc; 3169 /* some quantities */ 3170 PetscInt n_vertices,total_primal_vertices,valid_constraints; 3171 PetscInt size_of_constraint,max_size_of_constraint=0,max_constraints,temp_constraints; 3172 3173 PetscFunctionBegin; 3174 /* Destroy Mat objects computed previously */ 3175 ierr = MatDestroy(&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 3176 ierr = MatDestroy(&pcbddc->ConstraintMatrix);CHKERRQ(ierr); 3177 /* save info on constraints from previous setup (if any) */ 3178 olocal_primal_size = pcbddc->local_primal_size; 3179 olocal_primal_size_cc = pcbddc->local_primal_size_cc; 3180 ierr = PetscMalloc2(olocal_primal_size_cc,&olocal_primal_ref_node,olocal_primal_size_cc,&olocal_primal_ref_mult);CHKERRQ(ierr); 3181 ierr = PetscMemcpy(olocal_primal_ref_node,pcbddc->local_primal_ref_node,olocal_primal_size_cc*sizeof(PetscInt));CHKERRQ(ierr); 3182 ierr = PetscMemcpy(olocal_primal_ref_mult,pcbddc->local_primal_ref_mult,olocal_primal_size_cc*sizeof(PetscInt));CHKERRQ(ierr); 3183 ierr = PetscFree2(pcbddc->local_primal_ref_node,pcbddc->local_primal_ref_mult);CHKERRQ(ierr); 3184 ierr = PetscFree(pcbddc->primal_indices_local_idxs);CHKERRQ(ierr); 3185 3186 /* print some info */ 3187 if (pcbddc->dbg_flag) { 3188 IS vertices; 3189 PetscInt nv,nedges,nfaces; 3190 ierr = PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph,&nfaces,NULL,&nedges,NULL,&vertices);CHKERRQ(ierr); 3191 ierr = ISGetSize(vertices,&nv);CHKERRQ(ierr); 3192 ierr = ISDestroy(&vertices);CHKERRQ(ierr); 3193 ierr = PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 3194 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"--------------------------------------------------------------\n");CHKERRQ(ierr); 3195 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate vertices (%d)\n",PetscGlobalRank,nv,pcbddc->use_vertices);CHKERRQ(ierr); 3196 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate edges (%d)\n",PetscGlobalRank,nedges,pcbddc->use_edges);CHKERRQ(ierr); 3197 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate faces (%d)\n",PetscGlobalRank,nfaces,pcbddc->use_faces);CHKERRQ(ierr); 3198 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 3199 ierr = PetscViewerASCIIPopSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 3200 } 3201 3202 if (!pcbddc->adaptive_selection) { 3203 IS ISForVertices,*ISForFaces,*ISForEdges; 3204 MatNullSpace nearnullsp; 3205 const Vec *nearnullvecs; 3206 Vec *localnearnullsp; 3207 PetscScalar *array; 3208 PetscInt n_ISForFaces,n_ISForEdges,nnsp_size; 3209 PetscBool nnsp_has_cnst; 3210 /* LAPACK working arrays for SVD or POD */ 3211 PetscBool skip_lapack,boolforchange; 3212 PetscScalar *work; 3213 PetscReal *singular_vals; 3214 #if defined(PETSC_USE_COMPLEX) 3215 PetscReal *rwork; 3216 #endif 3217 #if defined(PETSC_MISSING_LAPACK_GESVD) 3218 PetscScalar *temp_basis,*correlation_mat; 3219 #else 3220 PetscBLASInt dummy_int=1; 3221 PetscScalar dummy_scalar=1.; 3222 #endif 3223 3224 /* Get index sets for faces, edges and vertices from graph */ 3225 ierr = PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph,&n_ISForFaces,&ISForFaces,&n_ISForEdges,&ISForEdges,&ISForVertices);CHKERRQ(ierr); 3226 /* free unneeded index sets */ 3227 if (!pcbddc->use_vertices) { 3228 ierr = ISDestroy(&ISForVertices);CHKERRQ(ierr); 3229 } 3230 if (!pcbddc->use_edges) { 3231 for (i=0;i<n_ISForEdges;i++) { 3232 ierr = ISDestroy(&ISForEdges[i]);CHKERRQ(ierr); 3233 } 3234 ierr = PetscFree(ISForEdges);CHKERRQ(ierr); 3235 n_ISForEdges = 0; 3236 } 3237 if (!pcbddc->use_faces) { 3238 for (i=0;i<n_ISForFaces;i++) { 3239 ierr = ISDestroy(&ISForFaces[i]);CHKERRQ(ierr); 3240 } 3241 ierr = PetscFree(ISForFaces);CHKERRQ(ierr); 3242 n_ISForFaces = 0; 3243 } 3244 3245 #if defined(PETSC_USE_DEBUG) 3246 /* HACK: when solving singular problems not using vertices, a change of basis is mandatory. 3247 Also use_change_of_basis should be consistent among processors */ 3248 if (pcbddc->NullSpace) { 3249 PetscBool tbool[2],gbool[2]; 3250 3251 if (!ISForVertices && !pcbddc->user_ChangeOfBasisMatrix) { 3252 pcbddc->use_change_of_basis = PETSC_TRUE; 3253 if (!ISForEdges) { 3254 pcbddc->use_change_on_faces = PETSC_TRUE; 3255 } 3256 } 3257 tbool[0] = pcbddc->use_change_of_basis; 3258 tbool[1] = pcbddc->use_change_on_faces; 3259 ierr = MPI_Allreduce(tbool,gbool,2,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 3260 pcbddc->use_change_of_basis = gbool[0]; 3261 pcbddc->use_change_on_faces = gbool[1]; 3262 } 3263 #endif 3264 3265 /* check if near null space is attached to global mat */ 3266 ierr = MatGetNearNullSpace(pc->pmat,&nearnullsp);CHKERRQ(ierr); 3267 if (nearnullsp) { 3268 ierr = MatNullSpaceGetVecs(nearnullsp,&nnsp_has_cnst,&nnsp_size,&nearnullvecs);CHKERRQ(ierr); 3269 /* remove any stored info */ 3270 ierr = MatNullSpaceDestroy(&pcbddc->onearnullspace);CHKERRQ(ierr); 3271 ierr = PetscFree(pcbddc->onearnullvecs_state);CHKERRQ(ierr); 3272 /* store information for BDDC solver reuse */ 3273 ierr = PetscObjectReference((PetscObject)nearnullsp);CHKERRQ(ierr); 3274 pcbddc->onearnullspace = nearnullsp; 3275 ierr = PetscMalloc1(nnsp_size,&pcbddc->onearnullvecs_state);CHKERRQ(ierr); 3276 for (i=0;i<nnsp_size;i++) { 3277 ierr = PetscObjectStateGet((PetscObject)nearnullvecs[i],&pcbddc->onearnullvecs_state[i]);CHKERRQ(ierr); 3278 } 3279 } else { /* if near null space is not provided BDDC uses constants by default */ 3280 nnsp_size = 0; 3281 nnsp_has_cnst = PETSC_TRUE; 3282 } 3283 /* get max number of constraints on a single cc */ 3284 max_constraints = nnsp_size; 3285 if (nnsp_has_cnst) max_constraints++; 3286 3287 /* 3288 Evaluate maximum storage size needed by the procedure 3289 - Indices for connected component i stored at "constraints_idxs + constraints_idxs_ptr[i]" 3290 - Values for constraints on connected component i stored at "constraints_data + constraints_data_ptr[i]" 3291 There can be multiple constraints per connected component 3292 */ 3293 n_vertices = 0; 3294 if (ISForVertices) { 3295 ierr = ISGetSize(ISForVertices,&n_vertices);CHKERRQ(ierr); 3296 } 3297 ncc = n_vertices+n_ISForFaces+n_ISForEdges; 3298 ierr = PetscMalloc3(ncc+1,&constraints_idxs_ptr,ncc+1,&constraints_data_ptr,ncc,&constraints_n);CHKERRQ(ierr); 3299 3300 total_counts = n_ISForFaces+n_ISForEdges; 3301 total_counts *= max_constraints; 3302 total_counts += n_vertices; 3303 ierr = PetscBTCreate(total_counts,&change_basis);CHKERRQ(ierr); 3304 3305 total_counts = 0; 3306 max_size_of_constraint = 0; 3307 for (i=0;i<n_ISForEdges+n_ISForFaces;i++) { 3308 IS used_is; 3309 if (i<n_ISForEdges) { 3310 used_is = ISForEdges[i]; 3311 } else { 3312 used_is = ISForFaces[i-n_ISForEdges]; 3313 } 3314 ierr = ISGetSize(used_is,&j);CHKERRQ(ierr); 3315 total_counts += j; 3316 max_size_of_constraint = PetscMax(j,max_size_of_constraint); 3317 } 3318 ierr = PetscMalloc3(total_counts*max_constraints+n_vertices,&constraints_data,total_counts+n_vertices,&constraints_idxs,total_counts+n_vertices,&constraints_idxs_B);CHKERRQ(ierr); 3319 3320 /* get local part of global near null space vectors */ 3321 ierr = PetscMalloc1(nnsp_size,&localnearnullsp);CHKERRQ(ierr); 3322 for (k=0;k<nnsp_size;k++) { 3323 ierr = VecDuplicate(pcis->vec1_N,&localnearnullsp[k]);CHKERRQ(ierr); 3324 ierr = VecScatterBegin(matis->rctx,nearnullvecs[k],localnearnullsp[k],INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3325 ierr = VecScatterEnd(matis->rctx,nearnullvecs[k],localnearnullsp[k],INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3326 } 3327 3328 /* whether or not to skip lapack calls */ 3329 skip_lapack = PETSC_TRUE; 3330 if (n_ISForFaces+n_ISForEdges && max_constraints > 1 && !pcbddc->use_nnsp_true) skip_lapack = PETSC_FALSE; 3331 3332 /* First we issue queries to allocate optimal workspace for LAPACKgesvd (or LAPACKsyev if SVD is missing) */ 3333 if (!skip_lapack) { 3334 PetscScalar temp_work; 3335 3336 #if defined(PETSC_MISSING_LAPACK_GESVD) 3337 /* Proper Orthogonal Decomposition (POD) using the snapshot method */ 3338 ierr = PetscMalloc1(max_constraints*max_constraints,&correlation_mat);CHKERRQ(ierr); 3339 ierr = PetscMalloc1(max_constraints,&singular_vals);CHKERRQ(ierr); 3340 ierr = PetscMalloc1(max_size_of_constraint*max_constraints,&temp_basis);CHKERRQ(ierr); 3341 #if defined(PETSC_USE_COMPLEX) 3342 ierr = PetscMalloc1(3*max_constraints,&rwork);CHKERRQ(ierr); 3343 #endif 3344 /* now we evaluate the optimal workspace using query with lwork=-1 */ 3345 ierr = PetscBLASIntCast(max_constraints,&Blas_N);CHKERRQ(ierr); 3346 ierr = PetscBLASIntCast(max_constraints,&Blas_LDA);CHKERRQ(ierr); 3347 lwork = -1; 3348 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 3349 #if !defined(PETSC_USE_COMPLEX) 3350 PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,&temp_work,&lwork,&lierr)); 3351 #else 3352 PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,&temp_work,&lwork,rwork,&lierr)); 3353 #endif 3354 ierr = PetscFPTrapPop();CHKERRQ(ierr); 3355 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to SYEV Lapack routine %d",(int)lierr); 3356 #else /* on missing GESVD */ 3357 /* SVD */ 3358 PetscInt max_n,min_n; 3359 max_n = max_size_of_constraint; 3360 min_n = max_constraints; 3361 if (max_size_of_constraint < max_constraints) { 3362 min_n = max_size_of_constraint; 3363 max_n = max_constraints; 3364 } 3365 ierr = PetscMalloc1(min_n,&singular_vals);CHKERRQ(ierr); 3366 #if defined(PETSC_USE_COMPLEX) 3367 ierr = PetscMalloc1(5*min_n,&rwork);CHKERRQ(ierr); 3368 #endif 3369 /* now we evaluate the optimal workspace using query with lwork=-1 */ 3370 lwork = -1; 3371 ierr = PetscBLASIntCast(max_n,&Blas_M);CHKERRQ(ierr); 3372 ierr = PetscBLASIntCast(min_n,&Blas_N);CHKERRQ(ierr); 3373 ierr = PetscBLASIntCast(max_n,&Blas_LDA);CHKERRQ(ierr); 3374 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 3375 #if !defined(PETSC_USE_COMPLEX) 3376 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&constraints_data[0],&Blas_LDA,singular_vals,&dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,&temp_work,&lwork,&lierr)); 3377 #else 3378 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&constraints_data[0],&Blas_LDA,singular_vals,&dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,&temp_work,&lwork,rwork,&lierr)); 3379 #endif 3380 ierr = PetscFPTrapPop();CHKERRQ(ierr); 3381 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to GESVD Lapack routine %d",(int)lierr); 3382 #endif /* on missing GESVD */ 3383 /* Allocate optimal workspace */ 3384 ierr = PetscBLASIntCast((PetscInt)PetscRealPart(temp_work),&lwork);CHKERRQ(ierr); 3385 ierr = PetscMalloc1(lwork,&work);CHKERRQ(ierr); 3386 } 3387 /* Now we can loop on constraining sets */ 3388 total_counts = 0; 3389 constraints_idxs_ptr[0] = 0; 3390 constraints_data_ptr[0] = 0; 3391 /* vertices */ 3392 if (n_vertices) { 3393 ierr = ISGetIndices(ISForVertices,(const PetscInt**)&is_indices);CHKERRQ(ierr); 3394 ierr = PetscMemcpy(constraints_idxs,is_indices,n_vertices*sizeof(PetscInt));CHKERRQ(ierr); 3395 for (i=0;i<n_vertices;i++) { 3396 constraints_n[total_counts] = 1; 3397 constraints_data[total_counts] = 1.0; 3398 constraints_idxs_ptr[total_counts+1] = constraints_idxs_ptr[total_counts]+1; 3399 constraints_data_ptr[total_counts+1] = constraints_data_ptr[total_counts]+1; 3400 total_counts++; 3401 } 3402 ierr = ISRestoreIndices(ISForVertices,(const PetscInt**)&is_indices);CHKERRQ(ierr); 3403 n_vertices = total_counts; 3404 } 3405 3406 /* edges and faces */ 3407 total_counts_cc = total_counts; 3408 for (ncc=0;ncc<n_ISForEdges+n_ISForFaces;ncc++) { 3409 IS used_is; 3410 PetscBool idxs_copied = PETSC_FALSE; 3411 3412 if (ncc<n_ISForEdges) { 3413 used_is = ISForEdges[ncc]; 3414 boolforchange = pcbddc->use_change_of_basis; /* change or not the basis on the edge */ 3415 } else { 3416 used_is = ISForFaces[ncc-n_ISForEdges]; 3417 boolforchange = (PetscBool)(pcbddc->use_change_of_basis && pcbddc->use_change_on_faces); /* change or not the basis on the face */ 3418 } 3419 temp_constraints = 0; /* zero the number of constraints I have on this conn comp */ 3420 3421 ierr = ISGetSize(used_is,&size_of_constraint);CHKERRQ(ierr); 3422 ierr = ISGetIndices(used_is,(const PetscInt**)&is_indices);CHKERRQ(ierr); 3423 /* change of basis should not be performed on local periodic nodes */ 3424 if (pcbddc->mat_graph->mirrors && pcbddc->mat_graph->mirrors[is_indices[0]]) boolforchange = PETSC_FALSE; 3425 if (nnsp_has_cnst) { 3426 PetscScalar quad_value; 3427 3428 ierr = PetscMemcpy(constraints_idxs + constraints_idxs_ptr[total_counts_cc],is_indices,size_of_constraint*sizeof(PetscInt));CHKERRQ(ierr); 3429 idxs_copied = PETSC_TRUE; 3430 3431 if (!pcbddc->use_nnsp_true) { 3432 quad_value = (PetscScalar)(1.0/PetscSqrtReal((PetscReal)size_of_constraint)); 3433 } else { 3434 quad_value = 1.0; 3435 } 3436 for (j=0;j<size_of_constraint;j++) { 3437 constraints_data[constraints_data_ptr[total_counts_cc]+j] = quad_value; 3438 } 3439 temp_constraints++; 3440 total_counts++; 3441 } 3442 for (k=0;k<nnsp_size;k++) { 3443 PetscReal real_value; 3444 PetscScalar *ptr_to_data; 3445 3446 ierr = VecGetArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr); 3447 ptr_to_data = &constraints_data[constraints_data_ptr[total_counts_cc]+temp_constraints*size_of_constraint]; 3448 for (j=0;j<size_of_constraint;j++) { 3449 ptr_to_data[j] = array[is_indices[j]]; 3450 } 3451 ierr = VecRestoreArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr); 3452 /* check if array is null on the connected component */ 3453 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 3454 PetscStackCallBLAS("BLASasum",real_value = BLASasum_(&Blas_N,ptr_to_data,&Blas_one)); 3455 if (real_value > 0.0) { /* keep indices and values */ 3456 temp_constraints++; 3457 total_counts++; 3458 if (!idxs_copied) { 3459 ierr = PetscMemcpy(constraints_idxs + constraints_idxs_ptr[total_counts_cc],is_indices,size_of_constraint*sizeof(PetscInt));CHKERRQ(ierr); 3460 idxs_copied = PETSC_TRUE; 3461 } 3462 } 3463 } 3464 ierr = ISRestoreIndices(used_is,(const PetscInt**)&is_indices);CHKERRQ(ierr); 3465 valid_constraints = temp_constraints; 3466 if (!pcbddc->use_nnsp_true && temp_constraints) { 3467 if (temp_constraints == 1) { /* just normalize the constraint */ 3468 PetscScalar norm,*ptr_to_data; 3469 3470 ptr_to_data = &constraints_data[constraints_data_ptr[total_counts_cc]]; 3471 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 3472 PetscStackCallBLAS("BLASdot",norm = BLASdot_(&Blas_N,ptr_to_data,&Blas_one,ptr_to_data,&Blas_one)); 3473 norm = 1.0/PetscSqrtReal(PetscRealPart(norm)); 3474 PetscStackCallBLAS("BLASscal",BLASscal_(&Blas_N,&norm,ptr_to_data,&Blas_one)); 3475 } else { /* perform SVD */ 3476 PetscReal tol = 1.0e-8; /* tolerance for retaining eigenmodes */ 3477 PetscScalar *ptr_to_data = &constraints_data[constraints_data_ptr[total_counts_cc]]; 3478 3479 #if defined(PETSC_MISSING_LAPACK_GESVD) 3480 /* SVD: Y = U*S*V^H -> U (eigenvectors of Y*Y^H) = Y*V*(S)^\dag 3481 POD: Y^H*Y = V*D*V^H, D = S^H*S -> U = Y*V*D^(-1/2) 3482 -> When PETSC_USE_COMPLEX and PETSC_MISSING_LAPACK_GESVD are defined 3483 the constraints basis will differ (by a complex factor with absolute value equal to 1) 3484 from that computed using LAPACKgesvd 3485 -> This is due to a different computation of eigenvectors in LAPACKheev 3486 -> The quality of the POD-computed basis will be the same */ 3487 ierr = PetscMemzero(correlation_mat,temp_constraints*temp_constraints*sizeof(PetscScalar));CHKERRQ(ierr); 3488 /* Store upper triangular part of correlation matrix */ 3489 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 3490 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 3491 for (j=0;j<temp_constraints;j++) { 3492 for (k=0;k<j+1;k++) { 3493 PetscStackCallBLAS("BLASdot",correlation_mat[j*temp_constraints+k] = BLASdot_(&Blas_N,ptr_to_data+k*size_of_constraint,&Blas_one,ptr_to_data+j*size_of_constraint,&Blas_one)); 3494 } 3495 } 3496 /* compute eigenvalues and eigenvectors of correlation matrix */ 3497 ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr); 3498 ierr = PetscBLASIntCast(temp_constraints,&Blas_LDA);CHKERRQ(ierr); 3499 #if !defined(PETSC_USE_COMPLEX) 3500 PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,work,&lwork,&lierr)); 3501 #else 3502 PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,work,&lwork,rwork,&lierr)); 3503 #endif 3504 ierr = PetscFPTrapPop();CHKERRQ(ierr); 3505 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYEV Lapack routine %d",(int)lierr); 3506 /* retain eigenvalues greater than tol: note that LAPACKsyev gives eigs in ascending order */ 3507 j = 0; 3508 while (j < temp_constraints && singular_vals[j] < tol) j++; 3509 total_counts = total_counts-j; 3510 valid_constraints = temp_constraints-j; 3511 /* scale and copy POD basis into used quadrature memory */ 3512 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 3513 ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr); 3514 ierr = PetscBLASIntCast(temp_constraints,&Blas_K);CHKERRQ(ierr); 3515 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 3516 ierr = PetscBLASIntCast(temp_constraints,&Blas_LDB);CHKERRQ(ierr); 3517 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDC);CHKERRQ(ierr); 3518 if (j<temp_constraints) { 3519 PetscInt ii; 3520 for (k=j;k<temp_constraints;k++) singular_vals[k] = 1.0/PetscSqrtReal(singular_vals[k]); 3521 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 3522 PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&Blas_M,&Blas_N,&Blas_K,&one,ptr_to_data,&Blas_LDA,correlation_mat,&Blas_LDB,&zero,temp_basis,&Blas_LDC)); 3523 ierr = PetscFPTrapPop();CHKERRQ(ierr); 3524 for (k=0;k<temp_constraints-j;k++) { 3525 for (ii=0;ii<size_of_constraint;ii++) { 3526 ptr_to_data[k*size_of_constraint+ii] = singular_vals[temp_constraints-1-k]*temp_basis[(temp_constraints-1-k)*size_of_constraint+ii]; 3527 } 3528 } 3529 } 3530 #else /* on missing GESVD */ 3531 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 3532 ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr); 3533 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 3534 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 3535 #if !defined(PETSC_USE_COMPLEX) 3536 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,ptr_to_data,&Blas_LDA,singular_vals,&dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,work,&lwork,&lierr)); 3537 #else 3538 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,ptr_to_data,&Blas_LDA,singular_vals,&dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,work,&lwork,rwork,&lierr)); 3539 #endif 3540 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in GESVD Lapack routine %d",(int)lierr); 3541 ierr = PetscFPTrapPop();CHKERRQ(ierr); 3542 /* retain eigenvalues greater than tol: note that LAPACKgesvd gives eigs in descending order */ 3543 k = temp_constraints; 3544 if (k > size_of_constraint) k = size_of_constraint; 3545 j = 0; 3546 while (j < k && singular_vals[k-j-1] < tol) j++; 3547 valid_constraints = k-j; 3548 total_counts = total_counts-temp_constraints+valid_constraints; 3549 #endif /* on missing GESVD */ 3550 } 3551 } 3552 /* update pointers information */ 3553 if (valid_constraints) { 3554 constraints_n[total_counts_cc] = valid_constraints; 3555 constraints_idxs_ptr[total_counts_cc+1] = constraints_idxs_ptr[total_counts_cc]+size_of_constraint; 3556 constraints_data_ptr[total_counts_cc+1] = constraints_data_ptr[total_counts_cc]+size_of_constraint*valid_constraints; 3557 /* set change_of_basis flag */ 3558 if (boolforchange) { 3559 PetscBTSet(change_basis,total_counts_cc); 3560 } 3561 total_counts_cc++; 3562 } 3563 } 3564 /* free workspace */ 3565 if (!skip_lapack) { 3566 ierr = PetscFree(work);CHKERRQ(ierr); 3567 #if defined(PETSC_USE_COMPLEX) 3568 ierr = PetscFree(rwork);CHKERRQ(ierr); 3569 #endif 3570 ierr = PetscFree(singular_vals);CHKERRQ(ierr); 3571 #if defined(PETSC_MISSING_LAPACK_GESVD) 3572 ierr = PetscFree(correlation_mat);CHKERRQ(ierr); 3573 ierr = PetscFree(temp_basis);CHKERRQ(ierr); 3574 #endif 3575 } 3576 for (k=0;k<nnsp_size;k++) { 3577 ierr = VecDestroy(&localnearnullsp[k]);CHKERRQ(ierr); 3578 } 3579 ierr = PetscFree(localnearnullsp);CHKERRQ(ierr); 3580 /* free index sets of faces, edges and vertices */ 3581 for (i=0;i<n_ISForFaces;i++) { 3582 ierr = ISDestroy(&ISForFaces[i]);CHKERRQ(ierr); 3583 } 3584 if (n_ISForFaces) { 3585 ierr = PetscFree(ISForFaces);CHKERRQ(ierr); 3586 } 3587 for (i=0;i<n_ISForEdges;i++) { 3588 ierr = ISDestroy(&ISForEdges[i]);CHKERRQ(ierr); 3589 } 3590 if (n_ISForEdges) { 3591 ierr = PetscFree(ISForEdges);CHKERRQ(ierr); 3592 } 3593 ierr = ISDestroy(&ISForVertices);CHKERRQ(ierr); 3594 } else { 3595 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 3596 3597 total_counts = 0; 3598 n_vertices = 0; 3599 if (sub_schurs->is_vertices && pcbddc->use_vertices) { 3600 ierr = ISGetLocalSize(sub_schurs->is_vertices,&n_vertices);CHKERRQ(ierr); 3601 } 3602 max_constraints = 0; 3603 total_counts_cc = 0; 3604 for (i=0;i<sub_schurs->n_subs+n_vertices;i++) { 3605 total_counts += pcbddc->adaptive_constraints_n[i]; 3606 if (pcbddc->adaptive_constraints_n[i]) total_counts_cc++; 3607 max_constraints = PetscMax(max_constraints,pcbddc->adaptive_constraints_n[i]); 3608 } 3609 constraints_idxs_ptr = pcbddc->adaptive_constraints_idxs_ptr; 3610 constraints_data_ptr = pcbddc->adaptive_constraints_data_ptr; 3611 constraints_idxs = pcbddc->adaptive_constraints_idxs; 3612 constraints_data = pcbddc->adaptive_constraints_data; 3613 /* constraints_n differs from pcbddc->adaptive_constraints_n */ 3614 ierr = PetscMalloc1(total_counts_cc,&constraints_n);CHKERRQ(ierr); 3615 total_counts_cc = 0; 3616 for (i=0;i<sub_schurs->n_subs+n_vertices;i++) { 3617 if (pcbddc->adaptive_constraints_n[i]) { 3618 constraints_n[total_counts_cc++] = pcbddc->adaptive_constraints_n[i]; 3619 } 3620 } 3621 #if 0 3622 printf("Found %d totals (%d)\n",total_counts_cc,total_counts); 3623 for (i=0;i<total_counts_cc;i++) { 3624 printf("const %d, start %d",i,constraints_idxs_ptr[i]); 3625 printf(" end %d:\n",constraints_idxs_ptr[i+1]); 3626 for (j=constraints_idxs_ptr[i];j<constraints_idxs_ptr[i+1];j++) { 3627 printf(" %d",constraints_idxs[j]); 3628 } 3629 printf("\n"); 3630 printf("number of cc: %d\n",constraints_n[i]); 3631 } 3632 for (i=0;i<n_vertices;i++) { 3633 PetscPrintf(PETSC_COMM_SELF,"[%d] vertex %d, n %d\n",PetscGlobalRank,i,pcbddc->adaptive_constraints_n[i]); 3634 } 3635 for (i=0;i<sub_schurs->n_subs;i++) { 3636 PetscPrintf(PETSC_COMM_SELF,"[%d] sub %d, edge %d, n %d\n",PetscGlobalRank,i,(PetscBool)PetscBTLookup(sub_schurs->is_edge,i),pcbddc->adaptive_constraints_n[i+n_vertices]); 3637 } 3638 #endif 3639 3640 max_size_of_constraint = 0; 3641 for (i=0;i<total_counts_cc;i++) max_size_of_constraint = PetscMax(max_size_of_constraint,constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i]); 3642 ierr = PetscMalloc1(constraints_idxs_ptr[total_counts_cc],&constraints_idxs_B);CHKERRQ(ierr); 3643 /* Change of basis */ 3644 ierr = PetscBTCreate(total_counts_cc,&change_basis);CHKERRQ(ierr); 3645 if (pcbddc->use_change_of_basis) { 3646 for (i=0;i<sub_schurs->n_subs;i++) { 3647 if (PetscBTLookup(sub_schurs->is_edge,i) || pcbddc->use_change_on_faces) { 3648 ierr = PetscBTSet(change_basis,i+n_vertices);CHKERRQ(ierr); 3649 } 3650 } 3651 } 3652 } 3653 pcbddc->local_primal_size = total_counts; 3654 ierr = PetscMalloc1(pcbddc->local_primal_size+pcbddc->benign_n,&pcbddc->primal_indices_local_idxs);CHKERRQ(ierr); 3655 3656 /* map constraints_idxs in boundary numbering */ 3657 ierr = ISGlobalToLocalMappingApply(pcis->BtoNmap,IS_GTOLM_DROP,constraints_idxs_ptr[total_counts_cc],constraints_idxs,&i,constraints_idxs_B);CHKERRQ(ierr); 3658 if (i != constraints_idxs_ptr[total_counts_cc]) { 3659 SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_SUP,"Error in boundary numbering for constraints indices %d != %d\n",constraints_idxs_ptr[total_counts_cc],i); 3660 } 3661 3662 /* Create constraint matrix */ 3663 ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->ConstraintMatrix);CHKERRQ(ierr); 3664 ierr = MatSetType(pcbddc->ConstraintMatrix,MATAIJ);CHKERRQ(ierr); 3665 ierr = MatSetSizes(pcbddc->ConstraintMatrix,pcbddc->local_primal_size,pcis->n,pcbddc->local_primal_size,pcis->n);CHKERRQ(ierr); 3666 3667 /* find primal_dofs: subdomain corners plus dofs selected as primal after change of basis */ 3668 /* determine if a QR strategy is needed for change of basis */ 3669 qr_needed = PETSC_FALSE; 3670 ierr = PetscBTCreate(total_counts_cc,&qr_needed_idx);CHKERRQ(ierr); 3671 total_primal_vertices=0; 3672 pcbddc->local_primal_size_cc = 0; 3673 for (i=0;i<total_counts_cc;i++) { 3674 size_of_constraint = constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i]; 3675 if (size_of_constraint == 1) { 3676 pcbddc->primal_indices_local_idxs[total_primal_vertices++] = constraints_idxs[constraints_idxs_ptr[i]]; 3677 pcbddc->local_primal_size_cc += 1; 3678 } else if (PetscBTLookup(change_basis,i)) { 3679 for (k=0;k<constraints_n[i];k++) { 3680 pcbddc->primal_indices_local_idxs[total_primal_vertices++] = constraints_idxs[constraints_idxs_ptr[i]+k]; 3681 } 3682 pcbddc->local_primal_size_cc += constraints_n[i]; 3683 if (constraints_n[i] > 1 || pcbddc->use_qr_single || pcbddc->faster_deluxe) { 3684 PetscBTSet(qr_needed_idx,i); 3685 qr_needed = PETSC_TRUE; 3686 } 3687 } else { 3688 pcbddc->local_primal_size_cc += 1; 3689 } 3690 } 3691 /* note that the local variable n_vertices used below stores the number of pointwise constraints */ 3692 pcbddc->n_vertices = total_primal_vertices; 3693 /* permute indices in order to have a sorted set of vertices */ 3694 ierr = PetscSortInt(total_primal_vertices,pcbddc->primal_indices_local_idxs);CHKERRQ(ierr); 3695 3696 ierr = PetscMalloc2(pcbddc->local_primal_size_cc+pcbddc->benign_n,&pcbddc->local_primal_ref_node,pcbddc->local_primal_size_cc+pcbddc->benign_n,&pcbddc->local_primal_ref_mult);CHKERRQ(ierr); 3697 ierr = PetscMemcpy(pcbddc->local_primal_ref_node,pcbddc->primal_indices_local_idxs,total_primal_vertices*sizeof(PetscInt));CHKERRQ(ierr); 3698 for (i=0;i<total_primal_vertices;i++) pcbddc->local_primal_ref_mult[i] = 1; 3699 3700 /* nonzero structure of constraint matrix */ 3701 /* and get reference dof for local constraints */ 3702 ierr = PetscMalloc1(pcbddc->local_primal_size,&nnz);CHKERRQ(ierr); 3703 for (i=0;i<total_primal_vertices;i++) nnz[i] = 1; 3704 3705 j = total_primal_vertices; 3706 total_counts = total_primal_vertices; 3707 cum = total_primal_vertices; 3708 for (i=n_vertices;i<total_counts_cc;i++) { 3709 if (!PetscBTLookup(change_basis,i)) { 3710 pcbddc->local_primal_ref_node[cum] = constraints_idxs[constraints_idxs_ptr[i]]; 3711 pcbddc->local_primal_ref_mult[cum] = constraints_n[i]; 3712 cum++; 3713 size_of_constraint = constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i]; 3714 for (k=0;k<constraints_n[i];k++) { 3715 pcbddc->primal_indices_local_idxs[total_counts++] = constraints_idxs[constraints_idxs_ptr[i]+k]; 3716 nnz[j+k] = size_of_constraint; 3717 } 3718 j += constraints_n[i]; 3719 } 3720 } 3721 ierr = MatSeqAIJSetPreallocation(pcbddc->ConstraintMatrix,0,nnz);CHKERRQ(ierr); 3722 ierr = PetscFree(nnz);CHKERRQ(ierr); 3723 3724 /* set values in constraint matrix */ 3725 for (i=0;i<total_primal_vertices;i++) { 3726 ierr = MatSetValue(pcbddc->ConstraintMatrix,i,pcbddc->local_primal_ref_node[i],1.0,INSERT_VALUES);CHKERRQ(ierr); 3727 } 3728 total_counts = total_primal_vertices; 3729 for (i=n_vertices;i<total_counts_cc;i++) { 3730 if (!PetscBTLookup(change_basis,i)) { 3731 PetscInt *cols; 3732 3733 size_of_constraint = constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i]; 3734 cols = constraints_idxs+constraints_idxs_ptr[i]; 3735 for (k=0;k<constraints_n[i];k++) { 3736 PetscInt row = total_counts+k; 3737 PetscScalar *vals; 3738 3739 vals = constraints_data+constraints_data_ptr[i]+k*size_of_constraint; 3740 ierr = MatSetValues(pcbddc->ConstraintMatrix,1,&row,size_of_constraint,cols,vals,INSERT_VALUES);CHKERRQ(ierr); 3741 } 3742 total_counts += constraints_n[i]; 3743 } 3744 } 3745 /* assembling */ 3746 ierr = MatAssemblyBegin(pcbddc->ConstraintMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3747 ierr = MatAssemblyEnd(pcbddc->ConstraintMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3748 3749 /* 3750 ierr = PetscViewerSetFormat(PETSC_VIEWER_STDOUT_SELF,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr); 3751 ierr = MatView(pcbddc->ConstraintMatrix,(PetscViewer)0);CHKERRQ(ierr); 3752 */ 3753 /* Create matrix for change of basis. We don't need it in case pcbddc->use_change_of_basis is FALSE */ 3754 if (pcbddc->use_change_of_basis) { 3755 /* dual and primal dofs on a single cc */ 3756 PetscInt dual_dofs,primal_dofs; 3757 /* working stuff for GEQRF */ 3758 PetscScalar *qr_basis,*qr_tau = NULL,*qr_work,lqr_work_t; 3759 PetscBLASInt lqr_work; 3760 /* working stuff for UNGQR */ 3761 PetscScalar *gqr_work,lgqr_work_t; 3762 PetscBLASInt lgqr_work; 3763 /* working stuff for TRTRS */ 3764 PetscScalar *trs_rhs; 3765 PetscBLASInt Blas_NRHS; 3766 /* pointers for values insertion into change of basis matrix */ 3767 PetscInt *start_rows,*start_cols; 3768 PetscScalar *start_vals; 3769 /* working stuff for values insertion */ 3770 PetscBT is_primal; 3771 PetscInt *aux_primal_numbering_B; 3772 /* matrix sizes */ 3773 PetscInt global_size,local_size; 3774 /* temporary change of basis */ 3775 Mat localChangeOfBasisMatrix; 3776 /* extra space for debugging */ 3777 PetscScalar *dbg_work; 3778 3779 /* local temporary change of basis acts on local interfaces -> dimension is n_B x n_B */ 3780 ierr = MatCreate(PETSC_COMM_SELF,&localChangeOfBasisMatrix);CHKERRQ(ierr); 3781 ierr = MatSetType(localChangeOfBasisMatrix,MATAIJ);CHKERRQ(ierr); 3782 ierr = MatSetSizes(localChangeOfBasisMatrix,pcis->n,pcis->n,pcis->n,pcis->n);CHKERRQ(ierr); 3783 /* nonzeros for local mat */ 3784 ierr = PetscMalloc1(pcis->n,&nnz);CHKERRQ(ierr); 3785 for (i=0;i<pcis->n;i++) nnz[i]=1; 3786 for (i=n_vertices;i<total_counts_cc;i++) { 3787 if (PetscBTLookup(change_basis,i)) { 3788 size_of_constraint = constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i]; 3789 if (PetscBTLookup(qr_needed_idx,i)) { 3790 for (j=0;j<size_of_constraint;j++) nnz[constraints_idxs[constraints_idxs_ptr[i]+j]] = size_of_constraint; 3791 } else { 3792 nnz[constraints_idxs[constraints_idxs_ptr[i]]] = size_of_constraint; 3793 for (j=1;j<size_of_constraint;j++) nnz[constraints_idxs[constraints_idxs_ptr[i]+j]] = 2; 3794 } 3795 } 3796 } 3797 ierr = MatSeqAIJSetPreallocation(localChangeOfBasisMatrix,0,nnz);CHKERRQ(ierr); 3798 ierr = PetscFree(nnz);CHKERRQ(ierr); 3799 /* Set initial identity in the matrix */ 3800 for (i=0;i<pcis->n;i++) { 3801 ierr = MatSetValue(localChangeOfBasisMatrix,i,i,1.0,INSERT_VALUES);CHKERRQ(ierr); 3802 } 3803 3804 if (pcbddc->dbg_flag) { 3805 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"--------------------------------------------------------------\n");CHKERRQ(ierr); 3806 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Checking change of basis computation for subdomain %04d\n",PetscGlobalRank);CHKERRQ(ierr); 3807 } 3808 3809 3810 /* Now we loop on the constraints which need a change of basis */ 3811 /* 3812 Change of basis matrix is evaluated similarly to the FIRST APPROACH in 3813 Klawonn and Widlund, Dual-primal FETI-DP methods for linear elasticity, (see Sect 6.2.1) 3814 3815 Basic blocks of change of basis matrix T computed by 3816 3817 - Using the following block transformation if there is only a primal dof on the cc (and -pc_bddc_use_qr_single is not specified) 3818 3819 | 1 0 ... 0 s_1/S | 3820 | 0 1 ... 0 s_2/S | 3821 | ... | 3822 | 0 ... 1 s_{n-1}/S | 3823 | -s_1/s_n ... -s_{n-1}/s_n s_n/S | 3824 3825 with S = \sum_{i=1}^n s_i^2 3826 NOTE: in the above example, the primal dof is the last one of the edge in LOCAL ordering 3827 in the current implementation, the primal dof is the first one of the edge in GLOBAL ordering 3828 3829 - QR decomposition of constraints otherwise 3830 */ 3831 if (qr_needed) { 3832 /* space to store Q */ 3833 ierr = PetscMalloc1(max_size_of_constraint*max_size_of_constraint,&qr_basis);CHKERRQ(ierr); 3834 /* first we issue queries for optimal work */ 3835 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_M);CHKERRQ(ierr); 3836 ierr = PetscBLASIntCast(max_constraints,&Blas_N);CHKERRQ(ierr); 3837 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 3838 lqr_work = -1; 3839 PetscStackCallBLAS("LAPACKgeqrf",LAPACKgeqrf_(&Blas_M,&Blas_N,qr_basis,&Blas_LDA,qr_tau,&lqr_work_t,&lqr_work,&lierr)); 3840 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to GEQRF Lapack routine %d",(int)lierr); 3841 ierr = PetscBLASIntCast((PetscInt)PetscRealPart(lqr_work_t),&lqr_work);CHKERRQ(ierr); 3842 ierr = PetscMalloc1((PetscInt)PetscRealPart(lqr_work_t),&qr_work);CHKERRQ(ierr); 3843 lgqr_work = -1; 3844 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_M);CHKERRQ(ierr); 3845 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_N);CHKERRQ(ierr); 3846 ierr = PetscBLASIntCast(max_constraints,&Blas_K);CHKERRQ(ierr); 3847 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 3848 if (Blas_K>Blas_M) Blas_K=Blas_M; /* adjust just for computing optimal work */ 3849 PetscStackCallBLAS("LAPACKungqr",LAPACKungqr_(&Blas_M,&Blas_N,&Blas_K,qr_basis,&Blas_LDA,qr_tau,&lgqr_work_t,&lgqr_work,&lierr)); 3850 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to UNGQR Lapack routine %d",(int)lierr); 3851 ierr = PetscBLASIntCast((PetscInt)PetscRealPart(lgqr_work_t),&lgqr_work);CHKERRQ(ierr); 3852 ierr = PetscMalloc1((PetscInt)PetscRealPart(lgqr_work_t),&gqr_work);CHKERRQ(ierr); 3853 /* array to store scaling factors for reflectors */ 3854 ierr = PetscMalloc1(max_constraints,&qr_tau);CHKERRQ(ierr); 3855 /* array to store rhs and solution of triangular solver */ 3856 ierr = PetscMalloc1(max_constraints*max_constraints,&trs_rhs);CHKERRQ(ierr); 3857 /* allocating workspace for check */ 3858 if (pcbddc->dbg_flag) { 3859 ierr = PetscMalloc1(max_size_of_constraint*(max_constraints+max_size_of_constraint),&dbg_work);CHKERRQ(ierr); 3860 } 3861 } 3862 /* array to store whether a node is primal or not */ 3863 ierr = PetscBTCreate(pcis->n_B,&is_primal);CHKERRQ(ierr); 3864 ierr = PetscMalloc1(total_primal_vertices,&aux_primal_numbering_B);CHKERRQ(ierr); 3865 ierr = ISGlobalToLocalMappingApply(pcis->BtoNmap,IS_GTOLM_DROP,total_primal_vertices,pcbddc->local_primal_ref_node,&i,aux_primal_numbering_B);CHKERRQ(ierr); 3866 if (i != total_primal_vertices) { 3867 SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_SUP,"Error in boundary numbering for BDDC vertices! %d != %d\n",total_primal_vertices,i); 3868 } 3869 for (i=0;i<total_primal_vertices;i++) { 3870 ierr = PetscBTSet(is_primal,aux_primal_numbering_B[i]);CHKERRQ(ierr); 3871 } 3872 ierr = PetscFree(aux_primal_numbering_B);CHKERRQ(ierr); 3873 3874 /* loop on constraints and see whether or not they need a change of basis and compute it */ 3875 for (total_counts=n_vertices;total_counts<total_counts_cc;total_counts++) { 3876 size_of_constraint = constraints_idxs_ptr[total_counts+1]-constraints_idxs_ptr[total_counts]; 3877 if (PetscBTLookup(change_basis,total_counts)) { 3878 /* get constraint info */ 3879 primal_dofs = constraints_n[total_counts]; 3880 dual_dofs = size_of_constraint-primal_dofs; 3881 3882 if (pcbddc->dbg_flag) { 3883 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Constraints %d: %d need a change of basis (size %d)\n",total_counts,primal_dofs,size_of_constraint);CHKERRQ(ierr); 3884 } 3885 3886 if (PetscBTLookup(qr_needed_idx,total_counts)) { /* QR */ 3887 3888 /* copy quadrature constraints for change of basis check */ 3889 if (pcbddc->dbg_flag) { 3890 ierr = PetscMemcpy(dbg_work,&constraints_data[constraints_data_ptr[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr); 3891 } 3892 /* copy temporary constraints into larger work vector (in order to store all columns of Q) */ 3893 ierr = PetscMemcpy(qr_basis,&constraints_data[constraints_data_ptr[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr); 3894 3895 /* compute QR decomposition of constraints */ 3896 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 3897 ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr); 3898 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 3899 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 3900 PetscStackCallBLAS("LAPACKgeqrf",LAPACKgeqrf_(&Blas_M,&Blas_N,qr_basis,&Blas_LDA,qr_tau,qr_work,&lqr_work,&lierr)); 3901 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in GEQRF Lapack routine %d",(int)lierr); 3902 ierr = PetscFPTrapPop();CHKERRQ(ierr); 3903 3904 /* explictly compute R^-T */ 3905 ierr = PetscMemzero(trs_rhs,primal_dofs*primal_dofs*sizeof(*trs_rhs));CHKERRQ(ierr); 3906 for (j=0;j<primal_dofs;j++) trs_rhs[j*(primal_dofs+1)] = 1.0; 3907 ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr); 3908 ierr = PetscBLASIntCast(primal_dofs,&Blas_NRHS);CHKERRQ(ierr); 3909 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 3910 ierr = PetscBLASIntCast(primal_dofs,&Blas_LDB);CHKERRQ(ierr); 3911 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 3912 PetscStackCallBLAS("LAPACKtrtrs",LAPACKtrtrs_("U","T","N",&Blas_N,&Blas_NRHS,qr_basis,&Blas_LDA,trs_rhs,&Blas_LDB,&lierr)); 3913 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in TRTRS Lapack routine %d",(int)lierr); 3914 ierr = PetscFPTrapPop();CHKERRQ(ierr); 3915 3916 /* explicitly compute all columns of Q (Q = [Q1 | Q2] ) overwriting QR factorization in qr_basis */ 3917 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 3918 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 3919 ierr = PetscBLASIntCast(primal_dofs,&Blas_K);CHKERRQ(ierr); 3920 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 3921 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 3922 PetscStackCallBLAS("LAPACKungqr",LAPACKungqr_(&Blas_M,&Blas_N,&Blas_K,qr_basis,&Blas_LDA,qr_tau,gqr_work,&lgqr_work,&lierr)); 3923 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in UNGQR Lapack routine %d",(int)lierr); 3924 ierr = PetscFPTrapPop();CHKERRQ(ierr); 3925 3926 /* first primal_dofs columns of Q need to be re-scaled in order to be unitary w.r.t constraints 3927 i.e. C_{pxn}*Q_{nxn} should be equal to [I_pxp | 0_pxd] (see check below) 3928 where n=size_of_constraint, p=primal_dofs, d=dual_dofs (n=p+d), I and 0 identity and null matrix resp. */ 3929 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 3930 ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr); 3931 ierr = PetscBLASIntCast(primal_dofs,&Blas_K);CHKERRQ(ierr); 3932 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 3933 ierr = PetscBLASIntCast(primal_dofs,&Blas_LDB);CHKERRQ(ierr); 3934 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDC);CHKERRQ(ierr); 3935 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 3936 PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&Blas_M,&Blas_N,&Blas_K,&one,qr_basis,&Blas_LDA,trs_rhs,&Blas_LDB,&zero,constraints_data+constraints_data_ptr[total_counts],&Blas_LDC)); 3937 ierr = PetscFPTrapPop();CHKERRQ(ierr); 3938 ierr = PetscMemcpy(qr_basis,&constraints_data[constraints_data_ptr[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr); 3939 3940 /* insert values in change of basis matrix respecting global ordering of new primal dofs */ 3941 start_rows = &constraints_idxs[constraints_idxs_ptr[total_counts]]; 3942 /* insert cols for primal dofs */ 3943 for (j=0;j<primal_dofs;j++) { 3944 start_vals = &qr_basis[j*size_of_constraint]; 3945 start_cols = &constraints_idxs[constraints_idxs_ptr[total_counts]+j]; 3946 ierr = MatSetValues(localChangeOfBasisMatrix,size_of_constraint,start_rows,1,start_cols,start_vals,INSERT_VALUES);CHKERRQ(ierr); 3947 } 3948 /* insert cols for dual dofs */ 3949 for (j=0,k=0;j<dual_dofs;k++) { 3950 if (!PetscBTLookup(is_primal,constraints_idxs_B[constraints_idxs_ptr[total_counts]+k])) { 3951 start_vals = &qr_basis[(primal_dofs+j)*size_of_constraint]; 3952 start_cols = &constraints_idxs[constraints_idxs_ptr[total_counts]+k]; 3953 ierr = MatSetValues(localChangeOfBasisMatrix,size_of_constraint,start_rows,1,start_cols,start_vals,INSERT_VALUES);CHKERRQ(ierr); 3954 j++; 3955 } 3956 } 3957 3958 /* check change of basis */ 3959 if (pcbddc->dbg_flag) { 3960 PetscInt ii,jj; 3961 PetscBool valid_qr=PETSC_TRUE; 3962 ierr = PetscBLASIntCast(primal_dofs,&Blas_M);CHKERRQ(ierr); 3963 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 3964 ierr = PetscBLASIntCast(size_of_constraint,&Blas_K);CHKERRQ(ierr); 3965 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 3966 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDB);CHKERRQ(ierr); 3967 ierr = PetscBLASIntCast(primal_dofs,&Blas_LDC);CHKERRQ(ierr); 3968 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 3969 PetscStackCallBLAS("BLASgemm",BLASgemm_("T","N",&Blas_M,&Blas_N,&Blas_K,&one,dbg_work,&Blas_LDA,qr_basis,&Blas_LDB,&zero,&dbg_work[size_of_constraint*primal_dofs],&Blas_LDC)); 3970 ierr = PetscFPTrapPop();CHKERRQ(ierr); 3971 for (jj=0;jj<size_of_constraint;jj++) { 3972 for (ii=0;ii<primal_dofs;ii++) { 3973 if (ii != jj && PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]) > 1.e-12) valid_qr = PETSC_FALSE; 3974 if (ii == jj && PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]-1.0) > 1.e-12) valid_qr = PETSC_FALSE; 3975 } 3976 } 3977 if (!valid_qr) { 3978 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> wrong change of basis!\n");CHKERRQ(ierr); 3979 for (jj=0;jj<size_of_constraint;jj++) { 3980 for (ii=0;ii<primal_dofs;ii++) { 3981 if (ii != jj && PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]) > 1.e-12) { 3982 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\tQr basis function %d is not orthogonal to constraint %d (%1.14e)!\n",jj,ii,PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii])); 3983 } 3984 if (ii == jj && PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]-1.0) > 1.e-12) { 3985 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\tQr basis function %d is not unitary w.r.t constraint %d (%1.14e)!\n",jj,ii,PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii])); 3986 } 3987 } 3988 } 3989 } else { 3990 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> right change of basis!\n");CHKERRQ(ierr); 3991 } 3992 } 3993 } else { /* simple transformation block */ 3994 PetscInt row,col; 3995 PetscScalar val,norm; 3996 3997 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 3998 PetscStackCallBLAS("BLASdot",norm = BLASdot_(&Blas_N,constraints_data+constraints_data_ptr[total_counts],&Blas_one,constraints_data+constraints_data_ptr[total_counts],&Blas_one)); 3999 for (j=0;j<size_of_constraint;j++) { 4000 PetscInt row_B = constraints_idxs_B[constraints_idxs_ptr[total_counts]+j]; 4001 row = constraints_idxs[constraints_idxs_ptr[total_counts]+j]; 4002 if (!PetscBTLookup(is_primal,row_B)) { 4003 col = constraints_idxs[constraints_idxs_ptr[total_counts]]; 4004 ierr = MatSetValue(localChangeOfBasisMatrix,row,row,1.0,INSERT_VALUES);CHKERRQ(ierr); 4005 ierr = MatSetValue(localChangeOfBasisMatrix,row,col,constraints_data[constraints_data_ptr[total_counts]+j]/norm,INSERT_VALUES);CHKERRQ(ierr); 4006 } else { 4007 for (k=0;k<size_of_constraint;k++) { 4008 col = constraints_idxs[constraints_idxs_ptr[total_counts]+k]; 4009 if (row != col) { 4010 val = -constraints_data[constraints_data_ptr[total_counts]+k]/constraints_data[constraints_data_ptr[total_counts]]; 4011 } else { 4012 val = constraints_data[constraints_data_ptr[total_counts]]/norm; 4013 } 4014 ierr = MatSetValue(localChangeOfBasisMatrix,row,col,val,INSERT_VALUES);CHKERRQ(ierr); 4015 } 4016 } 4017 } 4018 if (pcbddc->dbg_flag) { 4019 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> using standard change of basis\n");CHKERRQ(ierr); 4020 } 4021 } 4022 } else { 4023 if (pcbddc->dbg_flag) { 4024 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Constraint %d does not need a change of basis (size %d)\n",total_counts,size_of_constraint);CHKERRQ(ierr); 4025 } 4026 } 4027 } 4028 4029 /* free workspace */ 4030 if (qr_needed) { 4031 if (pcbddc->dbg_flag) { 4032 ierr = PetscFree(dbg_work);CHKERRQ(ierr); 4033 } 4034 ierr = PetscFree(trs_rhs);CHKERRQ(ierr); 4035 ierr = PetscFree(qr_tau);CHKERRQ(ierr); 4036 ierr = PetscFree(qr_work);CHKERRQ(ierr); 4037 ierr = PetscFree(gqr_work);CHKERRQ(ierr); 4038 ierr = PetscFree(qr_basis);CHKERRQ(ierr); 4039 } 4040 ierr = PetscBTDestroy(&is_primal);CHKERRQ(ierr); 4041 ierr = MatAssemblyBegin(localChangeOfBasisMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4042 ierr = MatAssemblyEnd(localChangeOfBasisMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4043 4044 /* assembling of global change of variable */ 4045 { 4046 Mat tmat; 4047 PetscInt bs; 4048 4049 ierr = VecGetSize(pcis->vec1_global,&global_size);CHKERRQ(ierr); 4050 ierr = VecGetLocalSize(pcis->vec1_global,&local_size);CHKERRQ(ierr); 4051 ierr = MatDuplicate(pc->pmat,MAT_DO_NOT_COPY_VALUES,&tmat);CHKERRQ(ierr); 4052 ierr = MatISSetLocalMat(tmat,localChangeOfBasisMatrix);CHKERRQ(ierr); 4053 ierr = MatCreate(PetscObjectComm((PetscObject)pc),&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 4054 ierr = MatSetType(pcbddc->ChangeOfBasisMatrix,MATAIJ);CHKERRQ(ierr); 4055 ierr = MatGetBlockSize(pc->pmat,&bs);CHKERRQ(ierr); 4056 ierr = MatSetBlockSize(pcbddc->ChangeOfBasisMatrix,bs);CHKERRQ(ierr); 4057 ierr = MatSetSizes(pcbddc->ChangeOfBasisMatrix,local_size,local_size,global_size,global_size);CHKERRQ(ierr); 4058 ierr = MatISSetMPIXAIJPreallocation_Private(tmat,pcbddc->ChangeOfBasisMatrix,PETSC_TRUE);CHKERRQ(ierr); 4059 ierr = MatISGetMPIXAIJ(tmat,MAT_REUSE_MATRIX,&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 4060 ierr = MatDestroy(&tmat);CHKERRQ(ierr); 4061 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 4062 ierr = VecSet(pcis->vec1_N,1.0);CHKERRQ(ierr); 4063 ierr = VecScatterBegin(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4064 ierr = VecScatterEnd(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4065 ierr = VecReciprocal(pcis->vec1_global);CHKERRQ(ierr); 4066 ierr = MatDiagonalScale(pcbddc->ChangeOfBasisMatrix,pcis->vec1_global,NULL);CHKERRQ(ierr); 4067 } 4068 /* check */ 4069 if (pcbddc->dbg_flag) { 4070 PetscReal error; 4071 Vec x,x_change; 4072 4073 ierr = VecDuplicate(pcis->vec1_global,&x);CHKERRQ(ierr); 4074 ierr = VecDuplicate(pcis->vec1_global,&x_change);CHKERRQ(ierr); 4075 ierr = VecSetRandom(x,NULL);CHKERRQ(ierr); 4076 ierr = VecCopy(x,pcis->vec1_global);CHKERRQ(ierr); 4077 ierr = VecScatterBegin(matis->rctx,x,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4078 ierr = VecScatterEnd(matis->rctx,x,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4079 ierr = MatMult(localChangeOfBasisMatrix,pcis->vec1_N,pcis->vec2_N);CHKERRQ(ierr); 4080 ierr = VecScatterBegin(matis->rctx,pcis->vec2_N,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4081 ierr = VecScatterEnd(matis->rctx,pcis->vec2_N,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4082 ierr = MatMult(pcbddc->ChangeOfBasisMatrix,pcis->vec1_global,x_change);CHKERRQ(ierr); 4083 ierr = VecAXPY(x,-1.0,x_change);CHKERRQ(ierr); 4084 ierr = VecNorm(x,NORM_INFINITY,&error);CHKERRQ(ierr); 4085 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4086 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Error global vs local change: %1.6e\n",error);CHKERRQ(ierr); 4087 ierr = VecDestroy(&x);CHKERRQ(ierr); 4088 ierr = VecDestroy(&x_change);CHKERRQ(ierr); 4089 } 4090 4091 /* adapt sub_schurs computed (if any) */ 4092 if (pcbddc->use_deluxe_scaling) { 4093 PCBDDCSubSchurs sub_schurs=pcbddc->sub_schurs; 4094 if (sub_schurs->S_Ej_all) { 4095 Mat S_new,tmat; 4096 ISLocalToGlobalMapping NtoSall; 4097 IS is_all_N,is_V,is_V_Sall; 4098 const PetscScalar *array; 4099 const PetscInt *idxs_V,*idxs_all; 4100 PetscInt i,n_V; 4101 4102 ierr = ISLocalToGlobalMappingApplyIS(pcis->BtoNmap,sub_schurs->is_Ej_all,&is_all_N);CHKERRQ(ierr); 4103 ierr = MatGetSubMatrix(localChangeOfBasisMatrix,is_all_N,is_all_N,MAT_INITIAL_MATRIX,&tmat);CHKERRQ(ierr); 4104 ierr = ISCreateGeneral(PETSC_COMM_SELF,pcbddc->n_vertices,pcbddc->local_primal_ref_node,PETSC_COPY_VALUES,&is_V);CHKERRQ(ierr); 4105 ierr = ISLocalToGlobalMappingCreateIS(is_all_N,&NtoSall);CHKERRQ(ierr); 4106 ierr = ISGlobalToLocalMappingApplyIS(NtoSall,IS_GTOLM_DROP,is_V,&is_V_Sall);CHKERRQ(ierr); 4107 ierr = ISLocalToGlobalMappingDestroy(&NtoSall);CHKERRQ(ierr); 4108 ierr = ISDestroy(&is_all_N);CHKERRQ(ierr); 4109 ierr = ISDestroy(&is_V);CHKERRQ(ierr); 4110 ierr = MatPtAP(sub_schurs->S_Ej_all,tmat,MAT_INITIAL_MATRIX,1.0,&S_new);CHKERRQ(ierr); 4111 ierr = MatDestroy(&sub_schurs->S_Ej_all);CHKERRQ(ierr); 4112 ierr = PetscObjectReference((PetscObject)S_new);CHKERRQ(ierr); 4113 ierr = MatZeroRowsColumnsIS(S_new,is_V_Sall,1.,NULL,NULL);CHKERRQ(ierr); 4114 ierr = ISGetLocalSize(is_V_Sall,&n_V);CHKERRQ(ierr); 4115 ierr = ISGetIndices(is_V_Sall,&idxs_V);CHKERRQ(ierr); 4116 ierr = ISGetIndices(sub_schurs->is_Ej_all,&idxs_all);CHKERRQ(ierr); 4117 ierr = VecGetArrayRead(pcis->D,&array);CHKERRQ(ierr); 4118 for (i=0;i<n_V;i++) { 4119 PetscScalar val; 4120 PetscInt idx; 4121 4122 idx = idxs_V[i]; 4123 val = array[idxs_all[idxs_V[i]]]; 4124 ierr = MatSetValue(S_new,idx,idx,val,INSERT_VALUES);CHKERRQ(ierr); 4125 } 4126 ierr = MatAssemblyBegin(S_new,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4127 ierr = MatAssemblyEnd(S_new,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4128 sub_schurs->S_Ej_all = S_new; 4129 ierr = MatDestroy(&S_new);CHKERRQ(ierr); 4130 if (sub_schurs->sum_S_Ej_all) { 4131 ierr = MatPtAP(sub_schurs->sum_S_Ej_all,tmat,MAT_INITIAL_MATRIX,1.0,&S_new);CHKERRQ(ierr); 4132 ierr = MatDestroy(&sub_schurs->sum_S_Ej_all);CHKERRQ(ierr); 4133 ierr = PetscObjectReference((PetscObject)S_new);CHKERRQ(ierr); 4134 ierr = MatZeroRowsColumnsIS(S_new,is_V_Sall,1.,NULL,NULL);CHKERRQ(ierr); 4135 sub_schurs->sum_S_Ej_all = S_new; 4136 ierr = MatDestroy(&S_new);CHKERRQ(ierr); 4137 } 4138 ierr = VecRestoreArrayRead(pcis->D,&array);CHKERRQ(ierr); 4139 ierr = ISRestoreIndices(sub_schurs->is_Ej_all,&idxs_all);CHKERRQ(ierr); 4140 ierr = ISRestoreIndices(is_V_Sall,&idxs_V);CHKERRQ(ierr); 4141 ierr = MatDestroy(&tmat);CHKERRQ(ierr); 4142 ierr = ISDestroy(&is_V_Sall);CHKERRQ(ierr); 4143 } 4144 } 4145 ierr = MatDestroy(&localChangeOfBasisMatrix);CHKERRQ(ierr); 4146 } else if (pcbddc->user_ChangeOfBasisMatrix) { 4147 ierr = PetscObjectReference((PetscObject)pcbddc->user_ChangeOfBasisMatrix);CHKERRQ(ierr); 4148 pcbddc->ChangeOfBasisMatrix = pcbddc->user_ChangeOfBasisMatrix; 4149 } 4150 4151 /* set up change of basis context */ 4152 if (pcbddc->ChangeOfBasisMatrix) { 4153 PCBDDCChange_ctx change_ctx; 4154 4155 if (!pcbddc->new_global_mat) { 4156 PetscInt global_size,local_size; 4157 4158 ierr = VecGetSize(pcis->vec1_global,&global_size);CHKERRQ(ierr); 4159 ierr = VecGetLocalSize(pcis->vec1_global,&local_size);CHKERRQ(ierr); 4160 ierr = MatCreate(PetscObjectComm((PetscObject)pc),&pcbddc->new_global_mat);CHKERRQ(ierr); 4161 ierr = MatSetSizes(pcbddc->new_global_mat,local_size,local_size,global_size,global_size);CHKERRQ(ierr); 4162 ierr = MatSetType(pcbddc->new_global_mat,MATSHELL);CHKERRQ(ierr); 4163 ierr = MatShellSetOperation(pcbddc->new_global_mat,MATOP_MULT,(void (*)(void))PCBDDCMatMult_Private);CHKERRQ(ierr); 4164 ierr = MatShellSetOperation(pcbddc->new_global_mat,MATOP_MULT_TRANSPOSE,(void (*)(void))PCBDDCMatMultTranspose_Private);CHKERRQ(ierr); 4165 ierr = PetscNew(&change_ctx);CHKERRQ(ierr); 4166 ierr = MatShellSetContext(pcbddc->new_global_mat,change_ctx);CHKERRQ(ierr); 4167 } else { 4168 ierr = MatShellGetContext(pcbddc->new_global_mat,&change_ctx);CHKERRQ(ierr); 4169 ierr = MatDestroy(&change_ctx->global_change);CHKERRQ(ierr); 4170 ierr = VecDestroyVecs(2,&change_ctx->work);CHKERRQ(ierr); 4171 } 4172 if (!pcbddc->user_ChangeOfBasisMatrix) { 4173 ierr = PetscObjectReference((PetscObject)pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 4174 change_ctx->global_change = pcbddc->ChangeOfBasisMatrix; 4175 } else { 4176 ierr = PetscObjectReference((PetscObject)pcbddc->user_ChangeOfBasisMatrix);CHKERRQ(ierr); 4177 change_ctx->global_change = pcbddc->user_ChangeOfBasisMatrix; 4178 } 4179 ierr = VecDuplicateVecs(pcis->vec1_global,2,&change_ctx->work);CHKERRQ(ierr); 4180 ierr = MatSetUp(pcbddc->new_global_mat);CHKERRQ(ierr); 4181 ierr = MatAssemblyBegin(pcbddc->new_global_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4182 ierr = MatAssemblyEnd(pcbddc->new_global_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4183 } else { 4184 ierr = MatDestroy(&pcbddc->new_global_mat);CHKERRQ(ierr); 4185 } 4186 4187 /* add pressure dofs to set of primal nodes for numbering purposes */ 4188 for (i=0;i<pcbddc->benign_n;i++) { 4189 pcbddc->local_primal_ref_node[pcbddc->local_primal_size_cc] = pcbddc->benign_p0_lidx[i]; 4190 pcbddc->primal_indices_local_idxs[pcbddc->local_primal_size] = pcbddc->benign_p0_lidx[i]; 4191 pcbddc->local_primal_ref_mult[pcbddc->local_primal_size_cc] = 1; 4192 pcbddc->local_primal_size_cc++; 4193 pcbddc->local_primal_size++; 4194 } 4195 4196 /* check if a new primal space has been introduced (also take into account benign trick) */ 4197 pcbddc->new_primal_space_local = PETSC_TRUE; 4198 if (olocal_primal_size == pcbddc->local_primal_size) { 4199 ierr = PetscMemcmp(pcbddc->local_primal_ref_node,olocal_primal_ref_node,olocal_primal_size_cc*sizeof(PetscInt),&pcbddc->new_primal_space_local);CHKERRQ(ierr); 4200 pcbddc->new_primal_space_local = (PetscBool)(!pcbddc->new_primal_space_local); 4201 if (!pcbddc->new_primal_space_local) { 4202 ierr = PetscMemcmp(pcbddc->local_primal_ref_mult,olocal_primal_ref_mult,olocal_primal_size_cc*sizeof(PetscInt),&pcbddc->new_primal_space_local);CHKERRQ(ierr); 4203 pcbddc->new_primal_space_local = (PetscBool)(!pcbddc->new_primal_space_local); 4204 } 4205 } 4206 ierr = PetscFree2(olocal_primal_ref_node,olocal_primal_ref_mult);CHKERRQ(ierr); 4207 /* new_primal_space will be used for numbering of coarse dofs, so it should be the same across all subdomains */ 4208 ierr = MPI_Allreduce(&pcbddc->new_primal_space_local,&pcbddc->new_primal_space,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 4209 4210 /* flush dbg viewer */ 4211 if (pcbddc->dbg_flag) { 4212 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4213 } 4214 4215 /* free workspace */ 4216 ierr = PetscBTDestroy(&qr_needed_idx);CHKERRQ(ierr); 4217 ierr = PetscBTDestroy(&change_basis);CHKERRQ(ierr); 4218 if (!pcbddc->adaptive_selection) { 4219 ierr = PetscFree3(constraints_idxs_ptr,constraints_data_ptr,constraints_n);CHKERRQ(ierr); 4220 ierr = PetscFree3(constraints_data,constraints_idxs,constraints_idxs_B);CHKERRQ(ierr); 4221 } else { 4222 ierr = PetscFree5(pcbddc->adaptive_constraints_n, 4223 pcbddc->adaptive_constraints_idxs_ptr, 4224 pcbddc->adaptive_constraints_data_ptr, 4225 pcbddc->adaptive_constraints_idxs, 4226 pcbddc->adaptive_constraints_data);CHKERRQ(ierr); 4227 ierr = PetscFree(constraints_n);CHKERRQ(ierr); 4228 ierr = PetscFree(constraints_idxs_B);CHKERRQ(ierr); 4229 } 4230 PetscFunctionReturn(0); 4231 } 4232 4233 #undef __FUNCT__ 4234 #define __FUNCT__ "PCBDDCAnalyzeInterface" 4235 PetscErrorCode PCBDDCAnalyzeInterface(PC pc) 4236 { 4237 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 4238 PC_IS *pcis = (PC_IS*)pc->data; 4239 Mat_IS *matis = (Mat_IS*)pc->pmat->data; 4240 PetscInt ierr,i,vertex_size,N; 4241 PetscViewer viewer=pcbddc->dbg_viewer; 4242 4243 PetscFunctionBegin; 4244 /* Reset previously computed graph */ 4245 ierr = PCBDDCGraphReset(pcbddc->mat_graph);CHKERRQ(ierr); 4246 /* Init local Graph struct */ 4247 ierr = MatGetSize(pc->pmat,&N,NULL);CHKERRQ(ierr); 4248 ierr = PCBDDCGraphInit(pcbddc->mat_graph,pcis->mapping,N);CHKERRQ(ierr); 4249 4250 /* Check validity of the csr graph passed in by the user */ 4251 if (pcbddc->mat_graph->nvtxs_csr && pcbddc->mat_graph->nvtxs_csr != pcbddc->mat_graph->nvtxs) { 4252 SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Invalid size of local CSR graph! Found %d, expected %d\n",pcbddc->mat_graph->nvtxs_csr,pcbddc->mat_graph->nvtxs); 4253 } 4254 4255 /* Set default CSR adjacency of local dofs if not provided by the user with PCBDDCSetLocalAdjacencyGraph */ 4256 if ( (!pcbddc->mat_graph->xadj || !pcbddc->mat_graph->adjncy) && pcbddc->use_local_adj) { 4257 PetscInt *xadj,*adjncy; 4258 PetscInt nvtxs; 4259 PetscBool flg_row=PETSC_FALSE; 4260 4261 ierr = MatGetRowIJ(matis->A,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);CHKERRQ(ierr); 4262 if (flg_row) { 4263 ierr = PCBDDCSetLocalAdjacencyGraph(pc,nvtxs,xadj,adjncy,PETSC_COPY_VALUES);CHKERRQ(ierr); 4264 pcbddc->computed_rowadj = PETSC_TRUE; 4265 } 4266 ierr = MatRestoreRowIJ(matis->A,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);CHKERRQ(ierr); 4267 } 4268 if (pcbddc->dbg_flag) { 4269 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4270 } 4271 4272 /* Set default dofs' splitting if no information has been provided by the user with PCBDDCSetDofsSplitting or PCBDDCSetDofsSplittingLocal */ 4273 vertex_size = 1; 4274 if (pcbddc->user_provided_isfordofs) { 4275 if (pcbddc->n_ISForDofs) { /* need to convert from global to local and remove references to global dofs splitting */ 4276 ierr = PetscMalloc1(pcbddc->n_ISForDofs,&pcbddc->ISForDofsLocal);CHKERRQ(ierr); 4277 for (i=0;i<pcbddc->n_ISForDofs;i++) { 4278 ierr = PCBDDCGlobalToLocal(matis->rctx,pcis->vec1_global,pcis->vec1_N,pcbddc->ISForDofs[i],&pcbddc->ISForDofsLocal[i]);CHKERRQ(ierr); 4279 ierr = ISDestroy(&pcbddc->ISForDofs[i]);CHKERRQ(ierr); 4280 } 4281 pcbddc->n_ISForDofsLocal = pcbddc->n_ISForDofs; 4282 pcbddc->n_ISForDofs = 0; 4283 ierr = PetscFree(pcbddc->ISForDofs);CHKERRQ(ierr); 4284 } 4285 /* mat block size as vertex size (used for elasticity with rigid body modes as nearnullspace) */ 4286 ierr = MatGetBlockSize(matis->A,&vertex_size);CHKERRQ(ierr); 4287 } else { 4288 if (!pcbddc->n_ISForDofsLocal) { /* field split not present, create it in local ordering */ 4289 ierr = MatGetBlockSize(pc->pmat,&pcbddc->n_ISForDofsLocal);CHKERRQ(ierr); 4290 ierr = PetscMalloc1(pcbddc->n_ISForDofsLocal,&pcbddc->ISForDofsLocal);CHKERRQ(ierr); 4291 for (i=0;i<pcbddc->n_ISForDofsLocal;i++) { 4292 ierr = ISCreateStride(PetscObjectComm((PetscObject)pc),pcis->n/pcbddc->n_ISForDofsLocal,i,pcbddc->n_ISForDofsLocal,&pcbddc->ISForDofsLocal[i]);CHKERRQ(ierr); 4293 } 4294 } 4295 } 4296 4297 /* Setup of Graph */ 4298 if (!pcbddc->DirichletBoundariesLocal && pcbddc->DirichletBoundaries) { /* need to convert from global to local */ 4299 ierr = PCBDDCGlobalToLocal(matis->rctx,pcis->vec1_global,pcis->vec1_N,pcbddc->DirichletBoundaries,&pcbddc->DirichletBoundariesLocal);CHKERRQ(ierr); 4300 } 4301 if (!pcbddc->NeumannBoundariesLocal && pcbddc->NeumannBoundaries) { /* need to convert from global to local */ 4302 ierr = PCBDDCGlobalToLocal(matis->rctx,pcis->vec1_global,pcis->vec1_N,pcbddc->NeumannBoundaries,&pcbddc->NeumannBoundariesLocal);CHKERRQ(ierr); 4303 } 4304 if (!pcbddc->user_primal_vertices_local && pcbddc->user_primal_vertices) { /* need to convert from global to local */ 4305 ierr = PCBDDCGlobalToLocal(matis->rctx,pcis->vec1_global,pcis->vec1_N,pcbddc->user_primal_vertices,&pcbddc->user_primal_vertices_local);CHKERRQ(ierr); 4306 } 4307 ierr = PCBDDCGraphSetUp(pcbddc->mat_graph,vertex_size,pcbddc->NeumannBoundariesLocal,pcbddc->DirichletBoundariesLocal,pcbddc->n_ISForDofsLocal,pcbddc->ISForDofsLocal,pcbddc->user_primal_vertices_local);CHKERRQ(ierr); 4308 4309 /* attach info on disconnected subdomains if present */ 4310 if (pcbddc->n_local_subs) { 4311 PetscInt *local_subs; 4312 4313 ierr = PetscMalloc1(N,&local_subs);CHKERRQ(ierr); 4314 for (i=0;i<pcbddc->n_local_subs;i++) { 4315 const PetscInt *idxs; 4316 PetscInt nl,j; 4317 4318 ierr = ISGetLocalSize(pcbddc->local_subs[i],&nl);CHKERRQ(ierr); 4319 ierr = ISGetIndices(pcbddc->local_subs[i],&idxs);CHKERRQ(ierr); 4320 for (j=0;j<nl;j++) { 4321 local_subs[idxs[j]] = i; 4322 } 4323 ierr = ISRestoreIndices(pcbddc->local_subs[i],&idxs);CHKERRQ(ierr); 4324 } 4325 pcbddc->mat_graph->n_local_subs = pcbddc->n_local_subs; 4326 pcbddc->mat_graph->local_subs = local_subs; 4327 } 4328 4329 /* Graph's connected components analysis */ 4330 ierr = PCBDDCGraphComputeConnectedComponents(pcbddc->mat_graph);CHKERRQ(ierr); 4331 4332 /* print some info to stdout */ 4333 if (pcbddc->dbg_flag) { 4334 ierr = PCBDDCGraphASCIIView(pcbddc->mat_graph,pcbddc->dbg_flag,viewer);CHKERRQ(ierr); 4335 } 4336 4337 /* mark topography has done */ 4338 pcbddc->recompute_topography = PETSC_FALSE; 4339 PetscFunctionReturn(0); 4340 } 4341 4342 /* given an index sets possibly with holes, renumbers the indexes removing the holes */ 4343 #undef __FUNCT__ 4344 #define __FUNCT__ "PCBDDCSubsetNumbering" 4345 PetscErrorCode PCBDDCSubsetNumbering(IS subset, IS subset_mult, PetscInt *N_n, IS *subset_n) 4346 { 4347 PetscSF sf; 4348 PetscLayout map; 4349 const PetscInt *idxs; 4350 PetscInt *leaf_data,*root_data,*gidxs; 4351 PetscInt N,n,i,lbounds[2],gbounds[2],Nl; 4352 PetscInt n_n,nlocals,start,first_index; 4353 PetscMPIInt commsize; 4354 PetscBool first_found; 4355 PetscErrorCode ierr; 4356 4357 PetscFunctionBegin; 4358 ierr = ISGetLocalSize(subset,&n);CHKERRQ(ierr); 4359 if (subset_mult) { 4360 PetscCheckSameComm(subset,1,subset_mult,2); 4361 ierr = ISGetLocalSize(subset,&i);CHKERRQ(ierr); 4362 if (i != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Local subset and multiplicity sizes don't match! %d != %d",n,i); 4363 } 4364 /* create workspace layout for computing global indices of subset */ 4365 ierr = ISGetIndices(subset,&idxs);CHKERRQ(ierr); 4366 lbounds[0] = lbounds[1] = 0; 4367 for (i=0;i<n;i++) { 4368 if (idxs[i] < lbounds[0]) lbounds[0] = idxs[i]; 4369 else if (idxs[i] > lbounds[1]) lbounds[1] = idxs[i]; 4370 } 4371 lbounds[0] = -lbounds[0]; 4372 ierr = MPI_Allreduce(lbounds,gbounds,2,MPIU_INT,MPI_MAX,PetscObjectComm((PetscObject)subset));CHKERRQ(ierr); 4373 gbounds[0] = -gbounds[0]; 4374 N = gbounds[1] - gbounds[0] + 1; 4375 ierr = PetscLayoutCreate(PetscObjectComm((PetscObject)subset),&map);CHKERRQ(ierr); 4376 ierr = PetscLayoutSetBlockSize(map,1);CHKERRQ(ierr); 4377 ierr = PetscLayoutSetSize(map,N);CHKERRQ(ierr); 4378 ierr = PetscLayoutSetUp(map);CHKERRQ(ierr); 4379 ierr = PetscLayoutGetLocalSize(map,&Nl);CHKERRQ(ierr); 4380 4381 /* create sf : leaf_data == multiplicity of indexes, root data == global index in layout */ 4382 ierr = PetscMalloc2(n,&leaf_data,Nl,&root_data);CHKERRQ(ierr); 4383 if (subset_mult) { 4384 const PetscInt* idxs_mult; 4385 4386 ierr = ISGetIndices(subset_mult,&idxs_mult);CHKERRQ(ierr); 4387 ierr = PetscMemcpy(leaf_data,idxs_mult,n*sizeof(PetscInt));CHKERRQ(ierr); 4388 ierr = ISRestoreIndices(subset_mult,&idxs_mult);CHKERRQ(ierr); 4389 } else { 4390 for (i=0;i<n;i++) leaf_data[i] = 1; 4391 } 4392 /* local size of new subset */ 4393 n_n = 0; 4394 for (i=0;i<n;i++) n_n += leaf_data[i]; 4395 4396 /* global indexes in layout */ 4397 ierr = PetscMalloc1(n_n,&gidxs);CHKERRQ(ierr); /* allocating possibly extra space in gidxs which will be used later */ 4398 for (i=0;i<n;i++) gidxs[i] = idxs[i] - gbounds[0]; 4399 ierr = ISRestoreIndices(subset,&idxs);CHKERRQ(ierr); 4400 ierr = PetscSFCreate(PetscObjectComm((PetscObject)subset),&sf);CHKERRQ(ierr); 4401 ierr = PetscSFSetGraphLayout(sf,map,n,NULL,PETSC_COPY_VALUES,gidxs);CHKERRQ(ierr); 4402 ierr = PetscLayoutDestroy(&map);CHKERRQ(ierr); 4403 4404 /* reduce from leaves to roots */ 4405 ierr = PetscMemzero(root_data,Nl*sizeof(PetscInt));CHKERRQ(ierr); 4406 ierr = PetscSFReduceBegin(sf,MPIU_INT,leaf_data,root_data,MPI_MAX);CHKERRQ(ierr); 4407 ierr = PetscSFReduceEnd(sf,MPIU_INT,leaf_data,root_data,MPI_MAX);CHKERRQ(ierr); 4408 4409 /* count indexes in local part of layout */ 4410 nlocals = 0; 4411 first_index = -1; 4412 first_found = PETSC_FALSE; 4413 for (i=0;i<Nl;i++) { 4414 if (!first_found && root_data[i]) { 4415 first_found = PETSC_TRUE; 4416 first_index = i; 4417 } 4418 nlocals += root_data[i]; 4419 } 4420 4421 /* cumulative of number of indexes and size of subset without holes */ 4422 #if defined(PETSC_HAVE_MPI_EXSCAN) 4423 start = 0; 4424 ierr = MPI_Exscan(&nlocals,&start,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)subset));CHKERRQ(ierr); 4425 #else 4426 ierr = MPI_Scan(&nlocals,&start,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)subset));CHKERRQ(ierr); 4427 start = start-nlocals; 4428 #endif 4429 4430 if (N_n) { /* compute total size of new subset if requested */ 4431 *N_n = start + nlocals; 4432 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)subset),&commsize);CHKERRQ(ierr); 4433 ierr = MPI_Bcast(N_n,1,MPIU_INT,commsize-1,PetscObjectComm((PetscObject)subset));CHKERRQ(ierr); 4434 } 4435 4436 /* adapt root data with cumulative */ 4437 if (first_found) { 4438 PetscInt old_index; 4439 4440 root_data[first_index] += start; 4441 old_index = first_index; 4442 for (i=first_index+1;i<Nl;i++) { 4443 if (root_data[i]) { 4444 root_data[i] += root_data[old_index]; 4445 old_index = i; 4446 } 4447 } 4448 } 4449 4450 /* from roots to leaves */ 4451 ierr = PetscSFBcastBegin(sf,MPIU_INT,root_data,leaf_data);CHKERRQ(ierr); 4452 ierr = PetscSFBcastEnd(sf,MPIU_INT,root_data,leaf_data);CHKERRQ(ierr); 4453 ierr = PetscSFDestroy(&sf);CHKERRQ(ierr); 4454 4455 /* create new IS with global indexes without holes */ 4456 if (subset_mult) { 4457 const PetscInt* idxs_mult; 4458 PetscInt cum; 4459 4460 cum = 0; 4461 ierr = ISGetIndices(subset_mult,&idxs_mult);CHKERRQ(ierr); 4462 for (i=0;i<n;i++) { 4463 PetscInt j; 4464 for (j=0;j<idxs_mult[i];j++) gidxs[cum++] = leaf_data[i] - idxs_mult[i] + j; 4465 } 4466 ierr = ISRestoreIndices(subset_mult,&idxs_mult);CHKERRQ(ierr); 4467 } else { 4468 for (i=0;i<n;i++) { 4469 gidxs[i] = leaf_data[i]-1; 4470 } 4471 } 4472 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)subset),n_n,gidxs,PETSC_OWN_POINTER,subset_n);CHKERRQ(ierr); 4473 ierr = PetscFree2(leaf_data,root_data);CHKERRQ(ierr); 4474 PetscFunctionReturn(0); 4475 } 4476 4477 #undef __FUNCT__ 4478 #define __FUNCT__ "PCBDDCOrthonormalizeVecs" 4479 PetscErrorCode PCBDDCOrthonormalizeVecs(PetscInt n, Vec vecs[]) 4480 { 4481 PetscInt i,j; 4482 PetscScalar *alphas; 4483 PetscErrorCode ierr; 4484 4485 PetscFunctionBegin; 4486 /* this implements stabilized Gram-Schmidt */ 4487 ierr = PetscMalloc1(n,&alphas);CHKERRQ(ierr); 4488 for (i=0;i<n;i++) { 4489 ierr = VecNormalize(vecs[i],NULL);CHKERRQ(ierr); 4490 if (i<n) { ierr = VecMDot(vecs[i],n-i-1,&vecs[i+1],&alphas[i+1]);CHKERRQ(ierr); } 4491 for (j=i+1;j<n;j++) { ierr = VecAXPY(vecs[j],PetscConj(-alphas[j]),vecs[i]);CHKERRQ(ierr); } 4492 } 4493 ierr = PetscFree(alphas);CHKERRQ(ierr); 4494 PetscFunctionReturn(0); 4495 } 4496 4497 #undef __FUNCT__ 4498 #define __FUNCT__ "MatISGetSubassemblingPattern" 4499 PetscErrorCode MatISGetSubassemblingPattern(Mat mat, PetscInt n_subdomains, PetscInt redprocs, IS* is_sends) 4500 { 4501 IS ranks_send_to; 4502 PetscInt n_neighs,*neighs,*n_shared,**shared; 4503 PetscMPIInt size,rank,color; 4504 PetscInt *xadj,*adjncy; 4505 PetscInt *adjncy_wgt,*v_wgt,*ranks_send_to_idx; 4506 PetscInt i,local_size,threshold=0; 4507 PetscBool use_vwgt=PETSC_FALSE,use_square=PETSC_FALSE; 4508 PetscSubcomm subcomm; 4509 PetscErrorCode ierr; 4510 4511 PetscFunctionBegin; 4512 ierr = PetscOptionsGetBool(NULL,"-matis_partitioning_use_square",&use_square,NULL);CHKERRQ(ierr); 4513 ierr = PetscOptionsGetBool(NULL,"-matis_partitioning_use_vwgt",&use_vwgt,NULL);CHKERRQ(ierr); 4514 ierr = PetscOptionsGetInt(NULL,"-matis_partitioning_threshold",&threshold,NULL);CHKERRQ(ierr); 4515 4516 /* Get info on mapping */ 4517 ierr = ISLocalToGlobalMappingGetSize(mat->rmap->mapping,&local_size);CHKERRQ(ierr); 4518 ierr = ISLocalToGlobalMappingGetInfo(mat->rmap->mapping,&n_neighs,&neighs,&n_shared,&shared);CHKERRQ(ierr); 4519 4520 /* build local CSR graph of subdomains' connectivity */ 4521 ierr = PetscMalloc1(2,&xadj);CHKERRQ(ierr); 4522 xadj[0] = 0; 4523 xadj[1] = PetscMax(n_neighs-1,0); 4524 ierr = PetscMalloc1(xadj[1],&adjncy);CHKERRQ(ierr); 4525 ierr = PetscMalloc1(xadj[1],&adjncy_wgt);CHKERRQ(ierr); 4526 4527 if (threshold) { 4528 PetscInt xadj_count = 0; 4529 for (i=1;i<n_neighs;i++) { 4530 if (n_shared[i] > threshold) { 4531 adjncy[xadj_count] = neighs[i]; 4532 adjncy_wgt[xadj_count] = n_shared[i]; 4533 xadj_count++; 4534 } 4535 } 4536 xadj[1] = xadj_count; 4537 } else { 4538 if (xadj[1]) { 4539 ierr = PetscMemcpy(adjncy,&neighs[1],xadj[1]*sizeof(*adjncy));CHKERRQ(ierr); 4540 ierr = PetscMemcpy(adjncy_wgt,&n_shared[1],xadj[1]*sizeof(*adjncy_wgt));CHKERRQ(ierr); 4541 } 4542 } 4543 ierr = ISLocalToGlobalMappingRestoreInfo(mat->rmap->mapping,&n_neighs,&neighs,&n_shared,&shared);CHKERRQ(ierr); 4544 if (use_square) { 4545 for (i=0;i<xadj[1];i++) { 4546 adjncy_wgt[i] = adjncy_wgt[i]*adjncy_wgt[i]; 4547 } 4548 } 4549 ierr = PetscSortIntWithArray(xadj[1],adjncy,adjncy_wgt);CHKERRQ(ierr); 4550 4551 ierr = PetscMalloc1(1,&ranks_send_to_idx);CHKERRQ(ierr); 4552 4553 /* 4554 Restrict work on active processes only. 4555 */ 4556 ierr = PetscSubcommCreate(PetscObjectComm((PetscObject)mat),&subcomm);CHKERRQ(ierr); 4557 ierr = PetscSubcommSetNumber(subcomm,2);CHKERRQ(ierr); /* 2 groups, active process and not active processes */ 4558 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr); 4559 ierr = PetscMPIIntCast(!local_size,&color);CHKERRQ(ierr); 4560 ierr = PetscSubcommSetTypeGeneral(subcomm,color,rank);CHKERRQ(ierr); 4561 if (color) { 4562 ierr = PetscFree(xadj);CHKERRQ(ierr); 4563 ierr = PetscFree(adjncy);CHKERRQ(ierr); 4564 ierr = PetscFree(adjncy_wgt);CHKERRQ(ierr); 4565 } else { 4566 Mat subdomain_adj; 4567 IS new_ranks,new_ranks_contig; 4568 MatPartitioning partitioner; 4569 PetscInt prank,rstart=0,rend=0; 4570 PetscInt *is_indices,*oldranks; 4571 PetscBool aggregate; 4572 4573 ierr = MPI_Comm_size(PetscSubcommChild(subcomm),&size);CHKERRQ(ierr); 4574 ierr = PetscMalloc1(size,&oldranks);CHKERRQ(ierr); 4575 prank = rank; 4576 ierr = MPI_Allgather(&prank,1,MPIU_INT,oldranks,1,MPIU_INT,PetscSubcommChild(subcomm));CHKERRQ(ierr); 4577 /* 4578 for (i=0;i<size;i++) { 4579 PetscPrintf(subcomm->comm,"oldranks[%d] = %d\n",i,oldranks[i]); 4580 } 4581 */ 4582 for (i=0;i<xadj[1];i++) { 4583 ierr = PetscFindInt(adjncy[i],size,oldranks,&adjncy[i]);CHKERRQ(ierr); 4584 } 4585 ierr = PetscSortIntWithArray(xadj[1],adjncy,adjncy_wgt);CHKERRQ(ierr); 4586 aggregate = ((redprocs > 0 && redprocs < size) ? PETSC_TRUE : PETSC_FALSE); 4587 if (aggregate) { 4588 PetscInt lrows,row,ncols,*cols; 4589 PetscMPIInt nrank; 4590 PetscScalar *vals; 4591 4592 ierr = MPI_Comm_rank(PetscSubcommChild(subcomm),&nrank);CHKERRQ(ierr); 4593 lrows = 0; 4594 if (nrank<redprocs) { 4595 lrows = size/redprocs; 4596 if (nrank<size%redprocs) lrows++; 4597 } 4598 ierr = MatCreateAIJ(PetscSubcommChild(subcomm),lrows,lrows,size,size,50,NULL,50,NULL,&subdomain_adj);CHKERRQ(ierr); 4599 ierr = MatGetOwnershipRange(subdomain_adj,&rstart,&rend);CHKERRQ(ierr); 4600 ierr = MatSetOption(subdomain_adj,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr); 4601 ierr = MatSetOption(subdomain_adj,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr); 4602 row = nrank; 4603 ncols = xadj[1]-xadj[0]; 4604 cols = adjncy; 4605 ierr = PetscMalloc1(ncols,&vals);CHKERRQ(ierr); 4606 for (i=0;i<ncols;i++) vals[i] = adjncy_wgt[i]; 4607 ierr = MatSetValues(subdomain_adj,1,&row,ncols,cols,vals,INSERT_VALUES);CHKERRQ(ierr); 4608 ierr = MatAssemblyBegin(subdomain_adj,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4609 ierr = MatAssemblyEnd(subdomain_adj,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4610 ierr = PetscFree(xadj);CHKERRQ(ierr); 4611 ierr = PetscFree(adjncy);CHKERRQ(ierr); 4612 ierr = PetscFree(adjncy_wgt);CHKERRQ(ierr); 4613 ierr = PetscFree(vals);CHKERRQ(ierr); 4614 } else { 4615 ierr = MatCreateMPIAdj(PetscSubcommChild(subcomm),1,(PetscInt)size,xadj,adjncy,adjncy_wgt,&subdomain_adj);CHKERRQ(ierr); 4616 } 4617 /* ierr = MatView(subdomain_adj,0);CHKERRQ(ierr); */ 4618 4619 /* Partition */ 4620 ierr = MatPartitioningCreate(PetscSubcommChild(subcomm),&partitioner);CHKERRQ(ierr); 4621 ierr = MatPartitioningSetAdjacency(partitioner,subdomain_adj);CHKERRQ(ierr); 4622 if (use_vwgt) { 4623 ierr = PetscMalloc1(1,&v_wgt);CHKERRQ(ierr); 4624 v_wgt[0] = local_size; 4625 ierr = MatPartitioningSetVertexWeights(partitioner,v_wgt);CHKERRQ(ierr); 4626 } 4627 n_subdomains = PetscMin((PetscInt)size,n_subdomains); 4628 ierr = MatPartitioningSetNParts(partitioner,n_subdomains);CHKERRQ(ierr); 4629 ierr = MatPartitioningSetFromOptions(partitioner);CHKERRQ(ierr); 4630 ierr = MatPartitioningApply(partitioner,&new_ranks);CHKERRQ(ierr); 4631 /* ierr = MatPartitioningView(partitioner,0);CHKERRQ(ierr); */ 4632 4633 /* renumber new_ranks to avoid "holes" in new set of processors */ 4634 ierr = PCBDDCSubsetNumbering(new_ranks,NULL,NULL,&new_ranks_contig);CHKERRQ(ierr); 4635 ierr = ISDestroy(&new_ranks);CHKERRQ(ierr); 4636 ierr = ISGetIndices(new_ranks_contig,(const PetscInt**)&is_indices);CHKERRQ(ierr); 4637 if (!redprocs) { 4638 ranks_send_to_idx[0] = oldranks[is_indices[0]]; 4639 } else { 4640 PetscInt idxs[1]; 4641 PetscMPIInt tag; 4642 MPI_Request *reqs; 4643 4644 ierr = PetscObjectGetNewTag((PetscObject)subdomain_adj,&tag);CHKERRQ(ierr); 4645 ierr = PetscMalloc1(rend-rstart,&reqs);CHKERRQ(ierr); 4646 for (i=rstart;i<rend;i++) { 4647 ierr = MPI_Isend(is_indices+i-rstart,1,MPIU_INT,i,tag,PetscSubcommChild(subcomm),&reqs[i-rstart]);CHKERRQ(ierr); 4648 } 4649 ierr = MPI_Recv(idxs,1,MPIU_INT,MPI_ANY_SOURCE,tag,PetscSubcommChild(subcomm),MPI_STATUS_IGNORE);CHKERRQ(ierr); 4650 ierr = MPI_Waitall(rend-rstart,reqs,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 4651 ierr = PetscFree(reqs);CHKERRQ(ierr); 4652 ranks_send_to_idx[0] = oldranks[idxs[0]]; 4653 } 4654 ierr = ISRestoreIndices(new_ranks_contig,(const PetscInt**)&is_indices);CHKERRQ(ierr); 4655 /* clean up */ 4656 ierr = PetscFree(oldranks);CHKERRQ(ierr); 4657 ierr = ISDestroy(&new_ranks_contig);CHKERRQ(ierr); 4658 ierr = MatDestroy(&subdomain_adj);CHKERRQ(ierr); 4659 ierr = MatPartitioningDestroy(&partitioner);CHKERRQ(ierr); 4660 } 4661 ierr = PetscSubcommDestroy(&subcomm);CHKERRQ(ierr); 4662 4663 /* assemble parallel IS for sends */ 4664 i = 1; 4665 if (color) i=0; 4666 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)mat),i,ranks_send_to_idx,PETSC_OWN_POINTER,&ranks_send_to);CHKERRQ(ierr); 4667 /* get back IS */ 4668 *is_sends = ranks_send_to; 4669 PetscFunctionReturn(0); 4670 } 4671 4672 typedef enum {MATDENSE_PRIVATE=0,MATAIJ_PRIVATE,MATBAIJ_PRIVATE,MATSBAIJ_PRIVATE}MatTypePrivate; 4673 4674 #undef __FUNCT__ 4675 #define __FUNCT__ "MatISSubassemble" 4676 PetscErrorCode MatISSubassemble(Mat mat, IS is_sends, PetscInt n_subdomains, PetscBool restrict_comm, PetscBool restrict_full, MatReuse reuse, Mat *mat_n, PetscInt nis, IS isarray[]) 4677 { 4678 Mat local_mat; 4679 IS is_sends_internal; 4680 PetscInt rows,cols,new_local_rows; 4681 PetscInt i,bs,buf_size_idxs,buf_size_idxs_is,buf_size_vals; 4682 PetscBool ismatis,isdense,newisdense,destroy_mat; 4683 ISLocalToGlobalMapping l2gmap; 4684 PetscInt* l2gmap_indices; 4685 const PetscInt* is_indices; 4686 MatType new_local_type; 4687 /* buffers */ 4688 PetscInt *ptr_idxs,*send_buffer_idxs,*recv_buffer_idxs; 4689 PetscInt *ptr_idxs_is,*send_buffer_idxs_is,*recv_buffer_idxs_is; 4690 PetscInt *recv_buffer_idxs_local; 4691 PetscScalar *ptr_vals,*send_buffer_vals,*recv_buffer_vals; 4692 /* MPI */ 4693 MPI_Comm comm,comm_n; 4694 PetscSubcomm subcomm; 4695 PetscMPIInt n_sends,n_recvs,commsize; 4696 PetscMPIInt *iflags,*ilengths_idxs,*ilengths_vals,*ilengths_idxs_is; 4697 PetscMPIInt *onodes,*onodes_is,*olengths_idxs,*olengths_idxs_is,*olengths_vals; 4698 PetscMPIInt len,tag_idxs,tag_idxs_is,tag_vals,source_dest; 4699 MPI_Request *send_req_idxs,*send_req_idxs_is,*send_req_vals; 4700 MPI_Request *recv_req_idxs,*recv_req_idxs_is,*recv_req_vals; 4701 PetscErrorCode ierr; 4702 4703 PetscFunctionBegin; 4704 /* TODO: add missing checks */ 4705 PetscValidLogicalCollectiveInt(mat,n_subdomains,3); 4706 PetscValidLogicalCollectiveBool(mat,restrict_comm,4); 4707 PetscValidLogicalCollectiveEnum(mat,reuse,5); 4708 PetscValidLogicalCollectiveInt(mat,nis,7); 4709 ierr = PetscObjectTypeCompare((PetscObject)mat,MATIS,&ismatis);CHKERRQ(ierr); 4710 if (!ismatis) SETERRQ1(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot use %s on a matrix object which is not of type MATIS",__FUNCT__); 4711 ierr = MatISGetLocalMat(mat,&local_mat);CHKERRQ(ierr); 4712 ierr = PetscObjectTypeCompare((PetscObject)local_mat,MATSEQDENSE,&isdense);CHKERRQ(ierr); 4713 if (!isdense) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Currently cannot subassemble MATIS when local matrix type is not of type SEQDENSE"); 4714 ierr = MatGetSize(local_mat,&rows,&cols);CHKERRQ(ierr); 4715 if (rows != cols) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Local MATIS matrices should be square"); 4716 if (reuse == MAT_REUSE_MATRIX && *mat_n) { 4717 PetscInt mrows,mcols,mnrows,mncols; 4718 ierr = PetscObjectTypeCompare((PetscObject)*mat_n,MATIS,&ismatis);CHKERRQ(ierr); 4719 if (!ismatis) SETERRQ(PetscObjectComm((PetscObject)*mat_n),PETSC_ERR_SUP,"Cannot reuse a matrix which is not of type MATIS"); 4720 ierr = MatGetSize(mat,&mrows,&mcols);CHKERRQ(ierr); 4721 ierr = MatGetSize(*mat_n,&mnrows,&mncols);CHKERRQ(ierr); 4722 if (mrows != mnrows) SETERRQ2(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot reuse matrix! Wrong number of rows %D != %D",mrows,mnrows); 4723 if (mcols != mncols) SETERRQ2(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot reuse matrix! Wrong number of cols %D != %D",mcols,mncols); 4724 } 4725 ierr = MatGetBlockSize(local_mat,&bs);CHKERRQ(ierr); 4726 PetscValidLogicalCollectiveInt(mat,bs,0); 4727 /* prepare IS for sending if not provided */ 4728 if (!is_sends) { 4729 if (!n_subdomains) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"You should specify either an IS or a target number of subdomains"); 4730 ierr = MatISGetSubassemblingPattern(mat,n_subdomains,0,&is_sends_internal);CHKERRQ(ierr); 4731 } else { 4732 ierr = PetscObjectReference((PetscObject)is_sends);CHKERRQ(ierr); 4733 is_sends_internal = is_sends; 4734 } 4735 4736 /* get comm */ 4737 ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr); 4738 4739 /* compute number of sends */ 4740 ierr = ISGetLocalSize(is_sends_internal,&i);CHKERRQ(ierr); 4741 ierr = PetscMPIIntCast(i,&n_sends);CHKERRQ(ierr); 4742 4743 /* compute number of receives */ 4744 ierr = MPI_Comm_size(comm,&commsize);CHKERRQ(ierr); 4745 ierr = PetscMalloc1(commsize,&iflags);CHKERRQ(ierr); 4746 ierr = PetscMemzero(iflags,commsize*sizeof(*iflags));CHKERRQ(ierr); 4747 ierr = ISGetIndices(is_sends_internal,&is_indices);CHKERRQ(ierr); 4748 for (i=0;i<n_sends;i++) iflags[is_indices[i]] = 1; 4749 ierr = PetscGatherNumberOfMessages(comm,iflags,NULL,&n_recvs);CHKERRQ(ierr); 4750 ierr = PetscFree(iflags);CHKERRQ(ierr); 4751 4752 /* restrict comm if requested */ 4753 subcomm = 0; 4754 destroy_mat = PETSC_FALSE; 4755 if (restrict_comm) { 4756 PetscMPIInt color,subcommsize; 4757 4758 color = 0; 4759 if (restrict_full) { 4760 if (!n_recvs) color = 1; /* processes not receiving anything will not partecipate in new comm (full restriction) */ 4761 } else { 4762 if (!n_recvs && n_sends) color = 1; /* just those processes that are sending but not receiving anything will not partecipate in new comm */ 4763 } 4764 ierr = MPI_Allreduce(&color,&subcommsize,1,MPI_INT,MPI_SUM,comm);CHKERRQ(ierr); 4765 subcommsize = commsize - subcommsize; 4766 /* check if reuse has been requested */ 4767 if (reuse == MAT_REUSE_MATRIX) { 4768 if (*mat_n) { 4769 PetscMPIInt subcommsize2; 4770 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)*mat_n),&subcommsize2);CHKERRQ(ierr); 4771 if (subcommsize != subcommsize2) SETERRQ2(PetscObjectComm((PetscObject)*mat_n),PETSC_ERR_PLIB,"Cannot reuse matrix! wrong subcomm size %d != %d",subcommsize,subcommsize2); 4772 comm_n = PetscObjectComm((PetscObject)*mat_n); 4773 } else { 4774 comm_n = PETSC_COMM_SELF; 4775 } 4776 } else { /* MAT_INITIAL_MATRIX */ 4777 PetscMPIInt rank; 4778 4779 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 4780 ierr = PetscSubcommCreate(comm,&subcomm);CHKERRQ(ierr); 4781 ierr = PetscSubcommSetNumber(subcomm,2);CHKERRQ(ierr); 4782 ierr = PetscSubcommSetTypeGeneral(subcomm,color,rank);CHKERRQ(ierr); 4783 comm_n = PetscSubcommChild(subcomm); 4784 } 4785 /* flag to destroy *mat_n if not significative */ 4786 if (color) destroy_mat = PETSC_TRUE; 4787 } else { 4788 comm_n = comm; 4789 } 4790 4791 /* prepare send/receive buffers */ 4792 ierr = PetscMalloc1(commsize,&ilengths_idxs);CHKERRQ(ierr); 4793 ierr = PetscMemzero(ilengths_idxs,commsize*sizeof(*ilengths_idxs));CHKERRQ(ierr); 4794 ierr = PetscMalloc1(commsize,&ilengths_vals);CHKERRQ(ierr); 4795 ierr = PetscMemzero(ilengths_vals,commsize*sizeof(*ilengths_vals));CHKERRQ(ierr); 4796 if (nis) { 4797 ierr = PetscCalloc1(commsize,&ilengths_idxs_is);CHKERRQ(ierr); 4798 } 4799 4800 /* Get data from local matrices */ 4801 if (!isdense) { 4802 SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Subassembling of AIJ local matrices not yet implemented"); 4803 /* TODO: See below some guidelines on how to prepare the local buffers */ 4804 /* 4805 send_buffer_vals should contain the raw values of the local matrix 4806 send_buffer_idxs should contain: 4807 - MatType_PRIVATE type 4808 - PetscInt size_of_l2gmap 4809 - PetscInt global_row_indices[size_of_l2gmap] 4810 - PetscInt all_other_info_which_is_needed_to_compute_preallocation_and_set_values 4811 */ 4812 } else { 4813 ierr = MatDenseGetArray(local_mat,&send_buffer_vals);CHKERRQ(ierr); 4814 ierr = ISLocalToGlobalMappingGetSize(mat->rmap->mapping,&i);CHKERRQ(ierr); 4815 ierr = PetscMalloc1(i+2,&send_buffer_idxs);CHKERRQ(ierr); 4816 send_buffer_idxs[0] = (PetscInt)MATDENSE_PRIVATE; 4817 send_buffer_idxs[1] = i; 4818 ierr = ISLocalToGlobalMappingGetIndices(mat->rmap->mapping,(const PetscInt**)&ptr_idxs);CHKERRQ(ierr); 4819 ierr = PetscMemcpy(&send_buffer_idxs[2],ptr_idxs,i*sizeof(PetscInt));CHKERRQ(ierr); 4820 ierr = ISLocalToGlobalMappingRestoreIndices(mat->rmap->mapping,(const PetscInt**)&ptr_idxs);CHKERRQ(ierr); 4821 ierr = PetscMPIIntCast(i,&len);CHKERRQ(ierr); 4822 for (i=0;i<n_sends;i++) { 4823 ilengths_vals[is_indices[i]] = len*len; 4824 ilengths_idxs[is_indices[i]] = len+2; 4825 } 4826 } 4827 ierr = PetscGatherMessageLengths2(comm,n_sends,n_recvs,ilengths_idxs,ilengths_vals,&onodes,&olengths_idxs,&olengths_vals);CHKERRQ(ierr); 4828 /* additional is (if any) */ 4829 if (nis) { 4830 PetscMPIInt psum; 4831 PetscInt j; 4832 for (j=0,psum=0;j<nis;j++) { 4833 PetscInt plen; 4834 ierr = ISGetLocalSize(isarray[j],&plen);CHKERRQ(ierr); 4835 ierr = PetscMPIIntCast(plen,&len);CHKERRQ(ierr); 4836 psum += len+1; /* indices + lenght */ 4837 } 4838 ierr = PetscMalloc1(psum,&send_buffer_idxs_is);CHKERRQ(ierr); 4839 for (j=0,psum=0;j<nis;j++) { 4840 PetscInt plen; 4841 const PetscInt *is_array_idxs; 4842 ierr = ISGetLocalSize(isarray[j],&plen);CHKERRQ(ierr); 4843 send_buffer_idxs_is[psum] = plen; 4844 ierr = ISGetIndices(isarray[j],&is_array_idxs);CHKERRQ(ierr); 4845 ierr = PetscMemcpy(&send_buffer_idxs_is[psum+1],is_array_idxs,plen*sizeof(PetscInt));CHKERRQ(ierr); 4846 ierr = ISRestoreIndices(isarray[j],&is_array_idxs);CHKERRQ(ierr); 4847 psum += plen+1; /* indices + lenght */ 4848 } 4849 for (i=0;i<n_sends;i++) { 4850 ilengths_idxs_is[is_indices[i]] = psum; 4851 } 4852 ierr = PetscGatherMessageLengths(comm,n_sends,n_recvs,ilengths_idxs_is,&onodes_is,&olengths_idxs_is);CHKERRQ(ierr); 4853 } 4854 4855 buf_size_idxs = 0; 4856 buf_size_vals = 0; 4857 buf_size_idxs_is = 0; 4858 for (i=0;i<n_recvs;i++) { 4859 buf_size_idxs += (PetscInt)olengths_idxs[i]; 4860 buf_size_vals += (PetscInt)olengths_vals[i]; 4861 if (nis) buf_size_idxs_is += (PetscInt)olengths_idxs_is[i]; 4862 } 4863 ierr = PetscMalloc1(buf_size_idxs,&recv_buffer_idxs);CHKERRQ(ierr); 4864 ierr = PetscMalloc1(buf_size_vals,&recv_buffer_vals);CHKERRQ(ierr); 4865 ierr = PetscMalloc1(buf_size_idxs_is,&recv_buffer_idxs_is);CHKERRQ(ierr); 4866 4867 /* get new tags for clean communications */ 4868 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_idxs);CHKERRQ(ierr); 4869 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_vals);CHKERRQ(ierr); 4870 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_idxs_is);CHKERRQ(ierr); 4871 4872 /* allocate for requests */ 4873 ierr = PetscMalloc1(n_sends,&send_req_idxs);CHKERRQ(ierr); 4874 ierr = PetscMalloc1(n_sends,&send_req_vals);CHKERRQ(ierr); 4875 ierr = PetscMalloc1(n_sends,&send_req_idxs_is);CHKERRQ(ierr); 4876 ierr = PetscMalloc1(n_recvs,&recv_req_idxs);CHKERRQ(ierr); 4877 ierr = PetscMalloc1(n_recvs,&recv_req_vals);CHKERRQ(ierr); 4878 ierr = PetscMalloc1(n_recvs,&recv_req_idxs_is);CHKERRQ(ierr); 4879 4880 /* communications */ 4881 ptr_idxs = recv_buffer_idxs; 4882 ptr_vals = recv_buffer_vals; 4883 ptr_idxs_is = recv_buffer_idxs_is; 4884 for (i=0;i<n_recvs;i++) { 4885 source_dest = onodes[i]; 4886 ierr = MPI_Irecv(ptr_idxs,olengths_idxs[i],MPIU_INT,source_dest,tag_idxs,comm,&recv_req_idxs[i]);CHKERRQ(ierr); 4887 ierr = MPI_Irecv(ptr_vals,olengths_vals[i],MPIU_SCALAR,source_dest,tag_vals,comm,&recv_req_vals[i]);CHKERRQ(ierr); 4888 ptr_idxs += olengths_idxs[i]; 4889 ptr_vals += olengths_vals[i]; 4890 if (nis) { 4891 ierr = MPI_Irecv(ptr_idxs_is,olengths_idxs_is[i],MPIU_INT,source_dest,tag_idxs_is,comm,&recv_req_idxs_is[i]);CHKERRQ(ierr); 4892 ptr_idxs_is += olengths_idxs_is[i]; 4893 } 4894 } 4895 for (i=0;i<n_sends;i++) { 4896 ierr = PetscMPIIntCast(is_indices[i],&source_dest);CHKERRQ(ierr); 4897 ierr = MPI_Isend(send_buffer_idxs,ilengths_idxs[source_dest],MPIU_INT,source_dest,tag_idxs,comm,&send_req_idxs[i]);CHKERRQ(ierr); 4898 ierr = MPI_Isend(send_buffer_vals,ilengths_vals[source_dest],MPIU_SCALAR,source_dest,tag_vals,comm,&send_req_vals[i]);CHKERRQ(ierr); 4899 if (nis) { 4900 ierr = MPI_Isend(send_buffer_idxs_is,ilengths_idxs_is[source_dest],MPIU_INT,source_dest,tag_idxs_is,comm,&send_req_idxs_is[i]);CHKERRQ(ierr); 4901 } 4902 } 4903 ierr = ISRestoreIndices(is_sends_internal,&is_indices);CHKERRQ(ierr); 4904 ierr = ISDestroy(&is_sends_internal);CHKERRQ(ierr); 4905 4906 /* assemble new l2g map */ 4907 ierr = MPI_Waitall(n_recvs,recv_req_idxs,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 4908 ptr_idxs = recv_buffer_idxs; 4909 new_local_rows = 0; 4910 for (i=0;i<n_recvs;i++) { 4911 new_local_rows += *(ptr_idxs+1); /* second element is the local size of the l2gmap */ 4912 ptr_idxs += olengths_idxs[i]; 4913 } 4914 ierr = PetscMalloc1(new_local_rows,&l2gmap_indices);CHKERRQ(ierr); 4915 ptr_idxs = recv_buffer_idxs; 4916 new_local_rows = 0; 4917 for (i=0;i<n_recvs;i++) { 4918 ierr = PetscMemcpy(&l2gmap_indices[new_local_rows],ptr_idxs+2,(*(ptr_idxs+1))*sizeof(PetscInt));CHKERRQ(ierr); 4919 new_local_rows += *(ptr_idxs+1); /* second element is the local size of the l2gmap */ 4920 ptr_idxs += olengths_idxs[i]; 4921 } 4922 ierr = PetscSortRemoveDupsInt(&new_local_rows,l2gmap_indices);CHKERRQ(ierr); 4923 ierr = ISLocalToGlobalMappingCreate(comm_n,1,new_local_rows,l2gmap_indices,PETSC_COPY_VALUES,&l2gmap);CHKERRQ(ierr); 4924 ierr = PetscFree(l2gmap_indices);CHKERRQ(ierr); 4925 4926 /* infer new local matrix type from received local matrices type */ 4927 /* currently if all local matrices are of type X, then the resulting matrix will be of type X, except for the dense case */ 4928 /* it also assumes that if the block size is set, than it is the same among all local matrices (see checks at the beginning of the function) */ 4929 if (n_recvs) { 4930 MatTypePrivate new_local_type_private = (MatTypePrivate)send_buffer_idxs[0]; 4931 ptr_idxs = recv_buffer_idxs; 4932 for (i=0;i<n_recvs;i++) { 4933 if ((PetscInt)new_local_type_private != *ptr_idxs) { 4934 new_local_type_private = MATAIJ_PRIVATE; 4935 break; 4936 } 4937 ptr_idxs += olengths_idxs[i]; 4938 } 4939 switch (new_local_type_private) { 4940 case MATDENSE_PRIVATE: 4941 if (n_recvs>1) { /* subassembling of dense matrices does not give a dense matrix! */ 4942 new_local_type = MATSEQAIJ; 4943 bs = 1; 4944 } else { /* if I receive only 1 dense matrix */ 4945 new_local_type = MATSEQDENSE; 4946 bs = 1; 4947 } 4948 break; 4949 case MATAIJ_PRIVATE: 4950 new_local_type = MATSEQAIJ; 4951 bs = 1; 4952 break; 4953 case MATBAIJ_PRIVATE: 4954 new_local_type = MATSEQBAIJ; 4955 break; 4956 case MATSBAIJ_PRIVATE: 4957 new_local_type = MATSEQSBAIJ; 4958 break; 4959 default: 4960 SETERRQ2(comm,PETSC_ERR_SUP,"Unsupported private type %d in %s",new_local_type_private,__FUNCT__); 4961 break; 4962 } 4963 } else { /* by default, new_local_type is seqdense */ 4964 new_local_type = MATSEQDENSE; 4965 bs = 1; 4966 } 4967 4968 /* create MATIS object if needed */ 4969 if (reuse == MAT_INITIAL_MATRIX) { 4970 ierr = MatGetSize(mat,&rows,&cols);CHKERRQ(ierr); 4971 ierr = MatCreateIS(comm_n,bs,PETSC_DECIDE,PETSC_DECIDE,rows,cols,l2gmap,NULL,mat_n);CHKERRQ(ierr); 4972 } else { 4973 /* it also destroys the local matrices */ 4974 ierr = MatSetLocalToGlobalMapping(*mat_n,l2gmap,l2gmap);CHKERRQ(ierr); 4975 } 4976 ierr = MatISGetLocalMat(*mat_n,&local_mat);CHKERRQ(ierr); 4977 ierr = MatSetType(local_mat,new_local_type);CHKERRQ(ierr); 4978 4979 ierr = MPI_Waitall(n_recvs,recv_req_vals,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 4980 4981 /* Global to local map of received indices */ 4982 ierr = PetscMalloc1(buf_size_idxs,&recv_buffer_idxs_local);CHKERRQ(ierr); /* needed for values insertion */ 4983 ierr = ISGlobalToLocalMappingApply(l2gmap,IS_GTOLM_MASK,buf_size_idxs,recv_buffer_idxs,&i,recv_buffer_idxs_local);CHKERRQ(ierr); 4984 ierr = ISLocalToGlobalMappingDestroy(&l2gmap);CHKERRQ(ierr); 4985 4986 /* restore attributes -> type of incoming data and its size */ 4987 buf_size_idxs = 0; 4988 for (i=0;i<n_recvs;i++) { 4989 recv_buffer_idxs_local[buf_size_idxs] = recv_buffer_idxs[buf_size_idxs]; 4990 recv_buffer_idxs_local[buf_size_idxs+1] = recv_buffer_idxs[buf_size_idxs+1]; 4991 buf_size_idxs += (PetscInt)olengths_idxs[i]; 4992 } 4993 ierr = PetscFree(recv_buffer_idxs);CHKERRQ(ierr); 4994 4995 /* set preallocation */ 4996 ierr = PetscObjectTypeCompare((PetscObject)local_mat,MATSEQDENSE,&newisdense);CHKERRQ(ierr); 4997 if (!newisdense) { 4998 PetscInt *new_local_nnz=0; 4999 5000 ptr_vals = recv_buffer_vals; 5001 ptr_idxs = recv_buffer_idxs_local; 5002 if (n_recvs) { 5003 ierr = PetscCalloc1(new_local_rows,&new_local_nnz);CHKERRQ(ierr); 5004 } 5005 for (i=0;i<n_recvs;i++) { 5006 PetscInt j; 5007 if (*ptr_idxs == (PetscInt)MATDENSE_PRIVATE) { /* preallocation provided for dense case only */ 5008 for (j=0;j<*(ptr_idxs+1);j++) { 5009 new_local_nnz[*(ptr_idxs+2+j)] += *(ptr_idxs+1); 5010 } 5011 } else { 5012 /* TODO */ 5013 } 5014 ptr_idxs += olengths_idxs[i]; 5015 } 5016 if (new_local_nnz) { 5017 for (i=0;i<new_local_rows;i++) new_local_nnz[i] = PetscMin(new_local_nnz[i],new_local_rows); 5018 ierr = MatSeqAIJSetPreallocation(local_mat,0,new_local_nnz);CHKERRQ(ierr); 5019 for (i=0;i<new_local_rows;i++) new_local_nnz[i] /= bs; 5020 ierr = MatSeqBAIJSetPreallocation(local_mat,bs,0,new_local_nnz);CHKERRQ(ierr); 5021 for (i=0;i<new_local_rows;i++) new_local_nnz[i] = PetscMax(new_local_nnz[i]-i,0); 5022 ierr = MatSeqSBAIJSetPreallocation(local_mat,bs,0,new_local_nnz);CHKERRQ(ierr); 5023 } else { 5024 ierr = MatSetUp(local_mat);CHKERRQ(ierr); 5025 } 5026 ierr = PetscFree(new_local_nnz);CHKERRQ(ierr); 5027 } else { 5028 ierr = MatSetUp(local_mat);CHKERRQ(ierr); 5029 } 5030 5031 /* set values */ 5032 ptr_vals = recv_buffer_vals; 5033 ptr_idxs = recv_buffer_idxs_local; 5034 for (i=0;i<n_recvs;i++) { 5035 if (*ptr_idxs == (PetscInt)MATDENSE_PRIVATE) { /* values insertion provided for dense case only */ 5036 ierr = MatSetOption(local_mat,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr); 5037 ierr = MatSetValues(local_mat,*(ptr_idxs+1),ptr_idxs+2,*(ptr_idxs+1),ptr_idxs+2,ptr_vals,ADD_VALUES);CHKERRQ(ierr); 5038 ierr = MatAssemblyBegin(local_mat,MAT_FLUSH_ASSEMBLY);CHKERRQ(ierr); 5039 ierr = MatAssemblyEnd(local_mat,MAT_FLUSH_ASSEMBLY);CHKERRQ(ierr); 5040 ierr = MatSetOption(local_mat,MAT_ROW_ORIENTED,PETSC_TRUE);CHKERRQ(ierr); 5041 } else { 5042 /* TODO */ 5043 } 5044 ptr_idxs += olengths_idxs[i]; 5045 ptr_vals += olengths_vals[i]; 5046 } 5047 ierr = MatAssemblyBegin(local_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5048 ierr = MatAssemblyEnd(local_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5049 ierr = MatAssemblyBegin(*mat_n,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5050 ierr = MatAssemblyEnd(*mat_n,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5051 ierr = PetscFree(recv_buffer_vals);CHKERRQ(ierr); 5052 ierr = PetscFree(recv_buffer_idxs_local);CHKERRQ(ierr); 5053 5054 #if 0 5055 if (!restrict_comm) { /* check */ 5056 Vec lvec,rvec; 5057 PetscReal infty_error; 5058 5059 ierr = MatCreateVecs(mat,&rvec,&lvec);CHKERRQ(ierr); 5060 ierr = VecSetRandom(rvec,NULL);CHKERRQ(ierr); 5061 ierr = MatMult(mat,rvec,lvec);CHKERRQ(ierr); 5062 ierr = VecScale(lvec,-1.0);CHKERRQ(ierr); 5063 ierr = MatMultAdd(*mat_n,rvec,lvec,lvec);CHKERRQ(ierr); 5064 ierr = VecNorm(lvec,NORM_INFINITY,&infty_error);CHKERRQ(ierr); 5065 ierr = PetscPrintf(PetscObjectComm((PetscObject)mat),"Infinity error subassembling %1.6e\n",infty_error); 5066 ierr = VecDestroy(&rvec);CHKERRQ(ierr); 5067 ierr = VecDestroy(&lvec);CHKERRQ(ierr); 5068 } 5069 #endif 5070 5071 /* assemble new additional is (if any) */ 5072 if (nis) { 5073 PetscInt **temp_idxs,*count_is,j,psum; 5074 5075 ierr = MPI_Waitall(n_recvs,recv_req_idxs_is,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 5076 ierr = PetscCalloc1(nis,&count_is);CHKERRQ(ierr); 5077 ptr_idxs = recv_buffer_idxs_is; 5078 psum = 0; 5079 for (i=0;i<n_recvs;i++) { 5080 for (j=0;j<nis;j++) { 5081 PetscInt plen = *(ptr_idxs); /* first element is the local size of IS's indices */ 5082 count_is[j] += plen; /* increment counting of buffer for j-th IS */ 5083 psum += plen; 5084 ptr_idxs += plen+1; /* shift pointer to received data */ 5085 } 5086 } 5087 ierr = PetscMalloc1(nis,&temp_idxs);CHKERRQ(ierr); 5088 ierr = PetscMalloc1(psum,&temp_idxs[0]);CHKERRQ(ierr); 5089 for (i=1;i<nis;i++) { 5090 temp_idxs[i] = temp_idxs[i-1]+count_is[i-1]; 5091 } 5092 ierr = PetscMemzero(count_is,nis*sizeof(PetscInt));CHKERRQ(ierr); 5093 ptr_idxs = recv_buffer_idxs_is; 5094 for (i=0;i<n_recvs;i++) { 5095 for (j=0;j<nis;j++) { 5096 PetscInt plen = *(ptr_idxs); /* first element is the local size of IS's indices */ 5097 ierr = PetscMemcpy(&temp_idxs[j][count_is[j]],ptr_idxs+1,plen*sizeof(PetscInt));CHKERRQ(ierr); 5098 count_is[j] += plen; /* increment starting point of buffer for j-th IS */ 5099 ptr_idxs += plen+1; /* shift pointer to received data */ 5100 } 5101 } 5102 for (i=0;i<nis;i++) { 5103 ierr = ISDestroy(&isarray[i]);CHKERRQ(ierr); 5104 ierr = PetscSortRemoveDupsInt(&count_is[i],temp_idxs[i]);CHKERRQ(ierr);CHKERRQ(ierr); 5105 ierr = ISCreateGeneral(comm_n,count_is[i],temp_idxs[i],PETSC_COPY_VALUES,&isarray[i]);CHKERRQ(ierr); 5106 } 5107 ierr = PetscFree(count_is);CHKERRQ(ierr); 5108 ierr = PetscFree(temp_idxs[0]);CHKERRQ(ierr); 5109 ierr = PetscFree(temp_idxs);CHKERRQ(ierr); 5110 } 5111 /* free workspace */ 5112 ierr = PetscFree(recv_buffer_idxs_is);CHKERRQ(ierr); 5113 ierr = MPI_Waitall(n_sends,send_req_idxs,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 5114 ierr = PetscFree(send_buffer_idxs);CHKERRQ(ierr); 5115 ierr = MPI_Waitall(n_sends,send_req_vals,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 5116 if (isdense) { 5117 ierr = MatISGetLocalMat(mat,&local_mat);CHKERRQ(ierr); 5118 ierr = MatDenseRestoreArray(local_mat,&send_buffer_vals);CHKERRQ(ierr); 5119 } else { 5120 /* ierr = PetscFree(send_buffer_vals);CHKERRQ(ierr); */ 5121 } 5122 if (nis) { 5123 ierr = MPI_Waitall(n_sends,send_req_idxs_is,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 5124 ierr = PetscFree(send_buffer_idxs_is);CHKERRQ(ierr); 5125 } 5126 ierr = PetscFree(recv_req_idxs);CHKERRQ(ierr); 5127 ierr = PetscFree(recv_req_vals);CHKERRQ(ierr); 5128 ierr = PetscFree(recv_req_idxs_is);CHKERRQ(ierr); 5129 ierr = PetscFree(send_req_idxs);CHKERRQ(ierr); 5130 ierr = PetscFree(send_req_vals);CHKERRQ(ierr); 5131 ierr = PetscFree(send_req_idxs_is);CHKERRQ(ierr); 5132 ierr = PetscFree(ilengths_vals);CHKERRQ(ierr); 5133 ierr = PetscFree(ilengths_idxs);CHKERRQ(ierr); 5134 ierr = PetscFree(olengths_vals);CHKERRQ(ierr); 5135 ierr = PetscFree(olengths_idxs);CHKERRQ(ierr); 5136 ierr = PetscFree(onodes);CHKERRQ(ierr); 5137 if (nis) { 5138 ierr = PetscFree(ilengths_idxs_is);CHKERRQ(ierr); 5139 ierr = PetscFree(olengths_idxs_is);CHKERRQ(ierr); 5140 ierr = PetscFree(onodes_is);CHKERRQ(ierr); 5141 } 5142 ierr = PetscSubcommDestroy(&subcomm);CHKERRQ(ierr); 5143 if (destroy_mat) { /* destroy mat is true only if restrict comm is true and process will not partecipate */ 5144 ierr = MatDestroy(mat_n);CHKERRQ(ierr); 5145 for (i=0;i<nis;i++) { 5146 ierr = ISDestroy(&isarray[i]);CHKERRQ(ierr); 5147 } 5148 *mat_n = NULL; 5149 } 5150 PetscFunctionReturn(0); 5151 } 5152 5153 /* temporary hack into ksp private data structure */ 5154 #include <petsc/private/kspimpl.h> 5155 5156 #undef __FUNCT__ 5157 #define __FUNCT__ "PCBDDCSetUpCoarseSolver" 5158 PetscErrorCode PCBDDCSetUpCoarseSolver(PC pc,PetscScalar* coarse_submat_vals) 5159 { 5160 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 5161 PC_IS *pcis = (PC_IS*)pc->data; 5162 Mat coarse_mat,coarse_mat_is,coarse_submat_dense; 5163 MatNullSpace CoarseNullSpace=NULL; 5164 ISLocalToGlobalMapping coarse_islg; 5165 IS coarse_is,*isarray; 5166 PetscInt i,im_active=-1,active_procs=-1; 5167 PetscInt nis,nisdofs,nisneu,nisvert; 5168 PC pc_temp; 5169 PCType coarse_pc_type; 5170 KSPType coarse_ksp_type; 5171 PetscBool multilevel_requested,multilevel_allowed; 5172 PetscBool isredundant,isbddc,isnn,coarse_reuse; 5173 Mat t_coarse_mat_is; 5174 PetscInt void_procs,ncoarse_ml,ncoarse_ds,ncoarse; 5175 PetscMPIInt all_procs; 5176 PetscBool csin_ml,csin_ds,csin,csin_type_simple,redist; 5177 PetscBool compute_vecs = PETSC_FALSE; 5178 PetscScalar *array; 5179 PetscErrorCode ierr; 5180 5181 PetscFunctionBegin; 5182 /* Assign global numbering to coarse dofs */ 5183 if (pcbddc->new_primal_space || pcbddc->coarse_size == -1) { /* a new primal space is present or it is the first initialization, so recompute global numbering */ 5184 PetscInt ocoarse_size; 5185 compute_vecs = PETSC_TRUE; 5186 ocoarse_size = pcbddc->coarse_size; 5187 ierr = PetscFree(pcbddc->global_primal_indices);CHKERRQ(ierr); 5188 ierr = PCBDDCComputePrimalNumbering(pc,&pcbddc->coarse_size,&pcbddc->global_primal_indices);CHKERRQ(ierr); 5189 /* see if we can avoid some work */ 5190 if (pcbddc->coarse_ksp) { /* coarse ksp has already been created */ 5191 /* if the coarse size is different or we are using adaptive selection, better to not reuse the coarse matrix */ 5192 if (ocoarse_size != pcbddc->coarse_size || pcbddc->adaptive_selection) { 5193 PC pc; 5194 PetscBool isbddc; 5195 5196 /* temporary workaround since PCBDDC does not have a reset method so far */ 5197 ierr = KSPGetPC(pcbddc->coarse_ksp,&pc);CHKERRQ(ierr); 5198 ierr = PetscObjectTypeCompare((PetscObject)pc,PCBDDC,&isbddc);CHKERRQ(ierr); 5199 if (isbddc) { 5200 ierr = KSPDestroy(&pcbddc->coarse_ksp);CHKERRQ(ierr); 5201 } else { 5202 ierr = KSPReset(pcbddc->coarse_ksp);CHKERRQ(ierr); 5203 } 5204 coarse_reuse = PETSC_FALSE; 5205 } else { /* we can safely reuse already computed coarse matrix */ 5206 coarse_reuse = PETSC_TRUE; 5207 } 5208 } else { /* there's no coarse ksp, so we need to create the coarse matrix too */ 5209 coarse_reuse = PETSC_FALSE; 5210 } 5211 /* reset any subassembling information */ 5212 ierr = ISDestroy(&pcbddc->coarse_subassembling);CHKERRQ(ierr); 5213 ierr = ISDestroy(&pcbddc->coarse_subassembling_init);CHKERRQ(ierr); 5214 } else { /* primal space is unchanged, so we can reuse coarse matrix */ 5215 coarse_reuse = PETSC_TRUE; 5216 } 5217 5218 /* count "active" (i.e. with positive local size) and "void" processes */ 5219 im_active = !!(pcis->n); 5220 ierr = MPI_Allreduce(&im_active,&active_procs,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 5221 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)pc),&all_procs);CHKERRQ(ierr); 5222 void_procs = all_procs-active_procs; 5223 csin_type_simple = PETSC_TRUE; 5224 redist = PETSC_FALSE; 5225 if (pcbddc->current_level && void_procs) { 5226 csin_ml = PETSC_TRUE; 5227 ncoarse_ml = void_procs; 5228 /* it has no sense to redistribute on a set of processors larger than the number of active processes */ 5229 if (pcbddc->redistribute_coarse > 0 && pcbddc->redistribute_coarse < active_procs) { 5230 csin_ds = PETSC_TRUE; 5231 ncoarse_ds = pcbddc->redistribute_coarse; 5232 redist = PETSC_TRUE; 5233 } else { 5234 csin_ds = PETSC_TRUE; 5235 ncoarse_ds = active_procs; 5236 redist = PETSC_TRUE; 5237 } 5238 } else { 5239 csin_ml = PETSC_FALSE; 5240 ncoarse_ml = all_procs; 5241 if (void_procs) { 5242 csin_ds = PETSC_TRUE; 5243 ncoarse_ds = void_procs; 5244 csin_type_simple = PETSC_FALSE; 5245 } else { 5246 if (pcbddc->redistribute_coarse > 0 && pcbddc->redistribute_coarse < all_procs) { 5247 csin_ds = PETSC_TRUE; 5248 ncoarse_ds = pcbddc->redistribute_coarse; 5249 redist = PETSC_TRUE; 5250 } else { 5251 csin_ds = PETSC_FALSE; 5252 ncoarse_ds = all_procs; 5253 } 5254 } 5255 } 5256 5257 /* 5258 test if we can go multilevel: three conditions must be satisfied: 5259 - we have not exceeded the number of levels requested 5260 - we can actually subassemble the active processes 5261 - we can find a suitable number of MPI processes where we can place the subassembled problem 5262 */ 5263 multilevel_allowed = PETSC_FALSE; 5264 multilevel_requested = PETSC_FALSE; 5265 if (pcbddc->current_level < pcbddc->max_levels) { 5266 multilevel_requested = PETSC_TRUE; 5267 if (active_procs/pcbddc->coarsening_ratio < 2 || ncoarse_ml/pcbddc->coarsening_ratio < 2) { 5268 multilevel_allowed = PETSC_FALSE; 5269 } else { 5270 multilevel_allowed = PETSC_TRUE; 5271 } 5272 } 5273 /* determine number of process partecipating to coarse solver */ 5274 if (multilevel_allowed) { 5275 ncoarse = ncoarse_ml; 5276 csin = csin_ml; 5277 redist = PETSC_FALSE; 5278 } else { 5279 ncoarse = ncoarse_ds; 5280 csin = csin_ds; 5281 } 5282 5283 /* creates temporary l2gmap and IS for coarse indexes */ 5284 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),pcbddc->local_primal_size,pcbddc->global_primal_indices,PETSC_COPY_VALUES,&coarse_is);CHKERRQ(ierr); 5285 ierr = ISLocalToGlobalMappingCreateIS(coarse_is,&coarse_islg);CHKERRQ(ierr); 5286 5287 /* creates temporary MATIS object for coarse matrix */ 5288 ierr = MatCreateSeqDense(PETSC_COMM_SELF,pcbddc->local_primal_size,pcbddc->local_primal_size,NULL,&coarse_submat_dense);CHKERRQ(ierr); 5289 ierr = MatDenseGetArray(coarse_submat_dense,&array);CHKERRQ(ierr); 5290 ierr = PetscMemcpy(array,coarse_submat_vals,sizeof(*coarse_submat_vals)*pcbddc->local_primal_size*pcbddc->local_primal_size);CHKERRQ(ierr); 5291 ierr = MatDenseRestoreArray(coarse_submat_dense,&array);CHKERRQ(ierr); 5292 ierr = MatCreateIS(PetscObjectComm((PetscObject)pc),1,PETSC_DECIDE,PETSC_DECIDE,pcbddc->coarse_size,pcbddc->coarse_size,coarse_islg,NULL,&t_coarse_mat_is);CHKERRQ(ierr); 5293 ierr = MatISSetLocalMat(t_coarse_mat_is,coarse_submat_dense);CHKERRQ(ierr); 5294 ierr = MatAssemblyBegin(t_coarse_mat_is,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5295 ierr = MatAssemblyEnd(t_coarse_mat_is,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5296 ierr = MatDestroy(&coarse_submat_dense);CHKERRQ(ierr); 5297 5298 /* compute dofs splitting and neumann boundaries for coarse dofs */ 5299 if (multilevel_allowed && (pcbddc->n_ISForDofsLocal || pcbddc->NeumannBoundariesLocal || (pcbddc->benign_saddle_point && pcbddc->user_primal_vertices_local))) { /* protects from unneded computations */ 5300 PetscInt *tidxs,*tidxs2,nout,tsize,i; 5301 const PetscInt *idxs; 5302 ISLocalToGlobalMapping tmap; 5303 5304 /* create map between primal indices (in local representative ordering) and local primal numbering */ 5305 ierr = ISLocalToGlobalMappingCreate(PETSC_COMM_SELF,1,pcbddc->local_primal_size,pcbddc->primal_indices_local_idxs,PETSC_COPY_VALUES,&tmap);CHKERRQ(ierr); 5306 /* allocate space for temporary storage */ 5307 ierr = PetscMalloc1(pcbddc->local_primal_size,&tidxs);CHKERRQ(ierr); 5308 ierr = PetscMalloc1(pcbddc->local_primal_size,&tidxs2);CHKERRQ(ierr); 5309 /* allocate for IS array */ 5310 nisdofs = pcbddc->n_ISForDofsLocal; 5311 nisneu = !!pcbddc->NeumannBoundariesLocal; 5312 nisvert = 0; 5313 if (pcbddc->benign_saddle_point && pcbddc->user_primal_vertices_local) { 5314 nisvert = 1; 5315 } 5316 nis = nisdofs + nisneu + nisvert; 5317 ierr = PetscMalloc1(nis,&isarray);CHKERRQ(ierr); 5318 /* dofs splitting */ 5319 for (i=0;i<nisdofs;i++) { 5320 /* ierr = ISView(pcbddc->ISForDofsLocal[i],0);CHKERRQ(ierr); */ 5321 ierr = ISGetLocalSize(pcbddc->ISForDofsLocal[i],&tsize);CHKERRQ(ierr); 5322 ierr = ISGetIndices(pcbddc->ISForDofsLocal[i],&idxs);CHKERRQ(ierr); 5323 ierr = ISGlobalToLocalMappingApply(tmap,IS_GTOLM_DROP,tsize,idxs,&nout,tidxs);CHKERRQ(ierr); 5324 ierr = ISRestoreIndices(pcbddc->ISForDofsLocal[i],&idxs);CHKERRQ(ierr); 5325 ierr = ISLocalToGlobalMappingApply(coarse_islg,nout,tidxs,tidxs2);CHKERRQ(ierr); 5326 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),nout,tidxs2,PETSC_COPY_VALUES,&isarray[i]);CHKERRQ(ierr); 5327 /* ierr = ISView(isarray[i],0);CHKERRQ(ierr); */ 5328 } 5329 /* neumann boundaries */ 5330 if (pcbddc->NeumannBoundariesLocal) { 5331 /* ierr = ISView(pcbddc->NeumannBoundariesLocal,0);CHKERRQ(ierr); */ 5332 ierr = ISGetLocalSize(pcbddc->NeumannBoundariesLocal,&tsize);CHKERRQ(ierr); 5333 ierr = ISGetIndices(pcbddc->NeumannBoundariesLocal,&idxs);CHKERRQ(ierr); 5334 ierr = ISGlobalToLocalMappingApply(tmap,IS_GTOLM_DROP,tsize,idxs,&nout,tidxs);CHKERRQ(ierr); 5335 ierr = ISRestoreIndices(pcbddc->NeumannBoundariesLocal,&idxs);CHKERRQ(ierr); 5336 ierr = ISLocalToGlobalMappingApply(coarse_islg,nout,tidxs,tidxs2);CHKERRQ(ierr); 5337 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),nout,tidxs2,PETSC_COPY_VALUES,&isarray[nisdofs]);CHKERRQ(ierr); 5338 /* ierr = ISView(isarray[nisdofs],0);CHKERRQ(ierr); */ 5339 } 5340 /* primal vertices (benign) */ 5341 if (pcbddc->benign_saddle_point && pcbddc->user_primal_vertices_local) { 5342 ierr = ISGetLocalSize(pcbddc->user_primal_vertices_local,&tsize);CHKERRQ(ierr); 5343 ierr = ISGetIndices(pcbddc->user_primal_vertices_local,&idxs);CHKERRQ(ierr); 5344 ierr = ISGlobalToLocalMappingApply(tmap,IS_GTOLM_DROP,tsize,idxs,&nout,tidxs);CHKERRQ(ierr); 5345 ierr = ISRestoreIndices(pcbddc->user_primal_vertices_local,&idxs);CHKERRQ(ierr); 5346 ierr = ISLocalToGlobalMappingApply(coarse_islg,nout,tidxs,tidxs2);CHKERRQ(ierr); 5347 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),nout,tidxs2,PETSC_COPY_VALUES,&isarray[nis-1]);CHKERRQ(ierr); 5348 } 5349 /* free memory */ 5350 ierr = PetscFree(tidxs);CHKERRQ(ierr); 5351 ierr = PetscFree(tidxs2);CHKERRQ(ierr); 5352 ierr = ISLocalToGlobalMappingDestroy(&tmap);CHKERRQ(ierr); 5353 } else { 5354 nis = 0; 5355 nisdofs = 0; 5356 nisneu = 0; 5357 nisvert = 0; 5358 isarray = NULL; 5359 } 5360 /* destroy no longer needed map */ 5361 ierr = ISLocalToGlobalMappingDestroy(&coarse_islg);CHKERRQ(ierr); 5362 5363 /* restrict on coarse candidates (if needed) */ 5364 coarse_mat_is = NULL; 5365 if (csin) { 5366 if (!pcbddc->coarse_subassembling_init ) { /* creates subassembling init pattern if not present */ 5367 if (redist) { 5368 PetscMPIInt rank; 5369 PetscInt spc,n_spc_p1,dest[1],destsize; 5370 5371 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)pc),&rank);CHKERRQ(ierr); 5372 spc = active_procs/ncoarse; 5373 n_spc_p1 = active_procs%ncoarse; 5374 if (im_active) { 5375 destsize = 1; 5376 if (rank > n_spc_p1*(spc+1)-1) { 5377 dest[0] = n_spc_p1+(rank-(n_spc_p1*(spc+1)))/spc; 5378 } else { 5379 dest[0] = rank/(spc+1); 5380 } 5381 } else { 5382 destsize = 0; 5383 } 5384 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),destsize,dest,PETSC_COPY_VALUES,&pcbddc->coarse_subassembling_init);CHKERRQ(ierr); 5385 } else if (csin_type_simple) { 5386 PetscMPIInt rank; 5387 PetscInt issize,isidx; 5388 5389 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)pc),&rank);CHKERRQ(ierr); 5390 if (im_active) { 5391 issize = 1; 5392 isidx = (PetscInt)rank; 5393 } else { 5394 issize = 0; 5395 isidx = -1; 5396 } 5397 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),issize,&isidx,PETSC_COPY_VALUES,&pcbddc->coarse_subassembling_init);CHKERRQ(ierr); 5398 } else { /* get a suitable subassembling pattern from MATIS code */ 5399 ierr = MatISGetSubassemblingPattern(t_coarse_mat_is,ncoarse,pcbddc->coarse_adj_red,&pcbddc->coarse_subassembling_init);CHKERRQ(ierr); 5400 } 5401 5402 /* we need to shift on coarse candidates either if we are not redistributing or we are redistributing and we have enough void processes */ 5403 if (!redist || ncoarse <= void_procs) { 5404 PetscInt ncoarse_cand,tissize,*nisindices; 5405 PetscInt *coarse_candidates; 5406 const PetscInt* tisindices; 5407 5408 /* get coarse candidates' ranks in pc communicator */ 5409 ierr = PetscMalloc1(all_procs,&coarse_candidates);CHKERRQ(ierr); 5410 ierr = MPI_Allgather(&im_active,1,MPIU_INT,coarse_candidates,1,MPIU_INT,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 5411 for (i=0,ncoarse_cand=0;i<all_procs;i++) { 5412 if (!coarse_candidates[i]) { 5413 coarse_candidates[ncoarse_cand++]=i; 5414 } 5415 } 5416 if (ncoarse_cand < ncoarse) SETERRQ2(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"This should not happen! %d < %d",ncoarse_cand,ncoarse); 5417 5418 5419 if (pcbddc->dbg_flag) { 5420 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 5421 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Subassembling pattern init (before shift)\n");CHKERRQ(ierr); 5422 ierr = ISView(pcbddc->coarse_subassembling_init,pcbddc->dbg_viewer);CHKERRQ(ierr); 5423 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Coarse candidates\n");CHKERRQ(ierr); 5424 for (i=0;i<ncoarse_cand;i++) { 5425 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"%d ",coarse_candidates[i]);CHKERRQ(ierr); 5426 } 5427 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"\n");CHKERRQ(ierr); 5428 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 5429 } 5430 /* shift the pattern on coarse candidates */ 5431 ierr = ISGetLocalSize(pcbddc->coarse_subassembling_init,&tissize);CHKERRQ(ierr); 5432 ierr = ISGetIndices(pcbddc->coarse_subassembling_init,&tisindices);CHKERRQ(ierr); 5433 ierr = PetscMalloc1(tissize,&nisindices);CHKERRQ(ierr); 5434 for (i=0;i<tissize;i++) nisindices[i] = coarse_candidates[tisindices[i]]; 5435 ierr = ISRestoreIndices(pcbddc->coarse_subassembling_init,&tisindices);CHKERRQ(ierr); 5436 ierr = ISGeneralSetIndices(pcbddc->coarse_subassembling_init,tissize,nisindices,PETSC_OWN_POINTER);CHKERRQ(ierr); 5437 ierr = PetscFree(coarse_candidates);CHKERRQ(ierr); 5438 } 5439 if (pcbddc->dbg_flag) { 5440 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 5441 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Subassembling pattern init\n");CHKERRQ(ierr); 5442 ierr = ISView(pcbddc->coarse_subassembling_init,pcbddc->dbg_viewer);CHKERRQ(ierr); 5443 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 5444 } 5445 } 5446 /* get temporary coarse mat in IS format restricted on coarse procs (plus additional index sets of isarray) */ 5447 if (multilevel_allowed) { /* we need to keep tracking of void processes for future placements */ 5448 ierr = MatISSubassemble(t_coarse_mat_is,pcbddc->coarse_subassembling_init,0,PETSC_TRUE,PETSC_FALSE,MAT_INITIAL_MATRIX,&coarse_mat_is,nis,isarray);CHKERRQ(ierr); 5449 } else { /* this is the last level, so use just receiving processes in subcomm */ 5450 ierr = MatISSubassemble(t_coarse_mat_is,pcbddc->coarse_subassembling_init,0,PETSC_TRUE,PETSC_TRUE,MAT_INITIAL_MATRIX,&coarse_mat_is,nis,isarray);CHKERRQ(ierr); 5451 } 5452 } else { 5453 if (pcbddc->dbg_flag) { 5454 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 5455 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Subassembling pattern init not needed\n");CHKERRQ(ierr); 5456 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 5457 } 5458 ierr = PetscObjectReference((PetscObject)t_coarse_mat_is);CHKERRQ(ierr); 5459 coarse_mat_is = t_coarse_mat_is; 5460 } 5461 5462 /* create local to global scatters for coarse problem */ 5463 if (compute_vecs) { 5464 PetscInt lrows; 5465 ierr = VecDestroy(&pcbddc->coarse_vec);CHKERRQ(ierr); 5466 if (coarse_mat_is) { 5467 ierr = MatGetLocalSize(coarse_mat_is,&lrows,NULL);CHKERRQ(ierr); 5468 } else { 5469 lrows = 0; 5470 } 5471 ierr = VecCreate(PetscObjectComm((PetscObject)pc),&pcbddc->coarse_vec);CHKERRQ(ierr); 5472 ierr = VecSetSizes(pcbddc->coarse_vec,lrows,PETSC_DECIDE);CHKERRQ(ierr); 5473 ierr = VecSetType(pcbddc->coarse_vec,VECSTANDARD);CHKERRQ(ierr); 5474 ierr = VecScatterDestroy(&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr); 5475 ierr = VecScatterCreate(pcbddc->vec1_P,NULL,pcbddc->coarse_vec,coarse_is,&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr); 5476 } 5477 ierr = ISDestroy(&coarse_is);CHKERRQ(ierr); 5478 ierr = MatDestroy(&t_coarse_mat_is);CHKERRQ(ierr); 5479 5480 /* set defaults for coarse KSP and PC */ 5481 if (multilevel_allowed) { 5482 coarse_ksp_type = KSPRICHARDSON; 5483 coarse_pc_type = PCBDDC; 5484 } else { 5485 coarse_ksp_type = KSPPREONLY; 5486 coarse_pc_type = PCREDUNDANT; 5487 } 5488 5489 /* print some info if requested */ 5490 if (pcbddc->dbg_flag) { 5491 if (!multilevel_allowed) { 5492 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 5493 if (multilevel_requested) { 5494 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Not enough active processes on level %d (active processes %d, coarsening ratio %d)\n",pcbddc->current_level,active_procs,pcbddc->coarsening_ratio);CHKERRQ(ierr); 5495 } else if (pcbddc->max_levels) { 5496 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Maximum number of requested levels reached (%d)\n",pcbddc->max_levels);CHKERRQ(ierr); 5497 } 5498 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 5499 } 5500 } 5501 5502 /* create the coarse KSP object only once with defaults */ 5503 if (coarse_mat_is) { 5504 MatReuse coarse_mat_reuse; 5505 PetscViewer dbg_viewer = NULL; 5506 if (pcbddc->dbg_flag) { 5507 dbg_viewer = PETSC_VIEWER_STDOUT_(PetscObjectComm((PetscObject)coarse_mat_is)); 5508 ierr = PetscViewerASCIIAddTab(dbg_viewer,2*pcbddc->current_level);CHKERRQ(ierr); 5509 } 5510 if (!pcbddc->coarse_ksp) { 5511 char prefix[256],str_level[16]; 5512 size_t len; 5513 ierr = KSPCreate(PetscObjectComm((PetscObject)coarse_mat_is),&pcbddc->coarse_ksp);CHKERRQ(ierr); 5514 ierr = KSPSetErrorIfNotConverged(pcbddc->coarse_ksp,pc->erroriffailure);CHKERRQ(ierr); 5515 ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->coarse_ksp,(PetscObject)pc,1);CHKERRQ(ierr); 5516 ierr = KSPSetTolerances(pcbddc->coarse_ksp,PETSC_DEFAULT,PETSC_DEFAULT,PETSC_DEFAULT,1);CHKERRQ(ierr); 5517 ierr = KSPSetOperators(pcbddc->coarse_ksp,coarse_mat_is,coarse_mat_is);CHKERRQ(ierr); 5518 ierr = KSPSetType(pcbddc->coarse_ksp,coarse_ksp_type);CHKERRQ(ierr); 5519 ierr = KSPSetNormType(pcbddc->coarse_ksp,KSP_NORM_NONE);CHKERRQ(ierr); 5520 ierr = KSPGetPC(pcbddc->coarse_ksp,&pc_temp);CHKERRQ(ierr); 5521 ierr = PCSetType(pc_temp,coarse_pc_type);CHKERRQ(ierr); 5522 /* prefix */ 5523 ierr = PetscStrcpy(prefix,"");CHKERRQ(ierr); 5524 ierr = PetscStrcpy(str_level,"");CHKERRQ(ierr); 5525 if (!pcbddc->current_level) { 5526 ierr = PetscStrcpy(prefix,((PetscObject)pc)->prefix);CHKERRQ(ierr); 5527 ierr = PetscStrcat(prefix,"pc_bddc_coarse_");CHKERRQ(ierr); 5528 } else { 5529 ierr = PetscStrlen(((PetscObject)pc)->prefix,&len);CHKERRQ(ierr); 5530 if (pcbddc->current_level>1) len -= 3; /* remove "lX_" with X level number */ 5531 if (pcbddc->current_level>10) len -= 1; /* remove another char from level number */ 5532 ierr = PetscStrncpy(prefix,((PetscObject)pc)->prefix,len+1);CHKERRQ(ierr); 5533 sprintf(str_level,"l%d_",(int)(pcbddc->current_level)); 5534 ierr = PetscStrcat(prefix,str_level);CHKERRQ(ierr); 5535 } 5536 ierr = KSPSetOptionsPrefix(pcbddc->coarse_ksp,prefix);CHKERRQ(ierr); 5537 /* propagate BDDC info to the next level (these are dummy calls if pc_temp is not of type PCBDDC) */ 5538 ierr = PCBDDCSetLevel(pc_temp,pcbddc->current_level+1);CHKERRQ(ierr); 5539 ierr = PCBDDCSetCoarseningRatio(pc_temp,pcbddc->coarsening_ratio);CHKERRQ(ierr); 5540 ierr = PCBDDCSetLevels(pc_temp,pcbddc->max_levels);CHKERRQ(ierr); 5541 /* allow user customization */ 5542 ierr = KSPSetFromOptions(pcbddc->coarse_ksp);CHKERRQ(ierr); 5543 } 5544 /* propagate BDDC info to the next level (these are dummy calls if pc_temp is not of type PCBDDC) */ 5545 ierr = KSPGetPC(pcbddc->coarse_ksp,&pc_temp);CHKERRQ(ierr); 5546 if (nisdofs) { 5547 ierr = PCBDDCSetDofsSplitting(pc_temp,nisdofs,isarray);CHKERRQ(ierr); 5548 for (i=0;i<nisdofs;i++) { 5549 ierr = ISDestroy(&isarray[i]);CHKERRQ(ierr); 5550 } 5551 } 5552 if (nisneu) { 5553 ierr = PCBDDCSetNeumannBoundaries(pc_temp,isarray[nisdofs]);CHKERRQ(ierr); 5554 ierr = ISDestroy(&isarray[nisdofs]);CHKERRQ(ierr); 5555 } 5556 if (nisvert) { 5557 ierr = PCBDDCSetPrimalVerticesIS(pc_temp,isarray[nis-1]);CHKERRQ(ierr); 5558 ierr = ISDestroy(&isarray[nis-1]);CHKERRQ(ierr); 5559 } 5560 5561 /* get some info after set from options */ 5562 ierr = PetscObjectTypeCompare((PetscObject)pc_temp,PCNN,&isnn);CHKERRQ(ierr); 5563 ierr = PetscObjectTypeCompare((PetscObject)pc_temp,PCBDDC,&isbddc);CHKERRQ(ierr); 5564 ierr = PetscObjectTypeCompare((PetscObject)pc_temp,PCREDUNDANT,&isredundant);CHKERRQ(ierr); 5565 if (isbddc && !multilevel_allowed) { /* multilevel can only be requested via pc_bddc_set_levels */ 5566 ierr = PCSetType(pc_temp,coarse_pc_type);CHKERRQ(ierr); 5567 isbddc = PETSC_FALSE; 5568 } 5569 ierr = PCFactorSetReuseFill(pc_temp,PETSC_TRUE);CHKERRQ(ierr); 5570 if (isredundant) { 5571 KSP inner_ksp; 5572 PC inner_pc; 5573 ierr = PCRedundantGetKSP(pc_temp,&inner_ksp);CHKERRQ(ierr); 5574 ierr = KSPGetPC(inner_ksp,&inner_pc);CHKERRQ(ierr); 5575 ierr = PCFactorSetReuseFill(inner_pc,PETSC_TRUE);CHKERRQ(ierr); 5576 } 5577 5578 /* assemble coarse matrix */ 5579 if (coarse_reuse) { 5580 ierr = KSPGetOperators(pcbddc->coarse_ksp,&coarse_mat,NULL);CHKERRQ(ierr); 5581 ierr = PetscObjectReference((PetscObject)coarse_mat);CHKERRQ(ierr); 5582 coarse_mat_reuse = MAT_REUSE_MATRIX; 5583 } else { 5584 coarse_mat_reuse = MAT_INITIAL_MATRIX; 5585 } 5586 if (isbddc || isnn) { 5587 if (isbddc) { /* currently there's no API for this */ 5588 PC_BDDC* pcbddc = (PC_BDDC*)pc_temp->data; 5589 pcbddc->detect_disconnected = PETSC_TRUE; 5590 } 5591 if (pcbddc->coarsening_ratio > 1) { 5592 if (!pcbddc->coarse_subassembling) { /* subassembling info is not present */ 5593 ierr = MatISGetSubassemblingPattern(coarse_mat_is,active_procs/pcbddc->coarsening_ratio,pcbddc->coarse_adj_red,&pcbddc->coarse_subassembling);CHKERRQ(ierr); 5594 if (pcbddc->dbg_flag) { 5595 ierr = PetscViewerASCIIPrintf(dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 5596 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Subassembling pattern\n");CHKERRQ(ierr); 5597 ierr = ISView(pcbddc->coarse_subassembling,dbg_viewer);CHKERRQ(ierr); 5598 ierr = PetscViewerFlush(dbg_viewer);CHKERRQ(ierr); 5599 } 5600 } 5601 ierr = MatISSubassemble(coarse_mat_is,pcbddc->coarse_subassembling,0,PETSC_FALSE,PETSC_FALSE,coarse_mat_reuse,&coarse_mat,0,NULL);CHKERRQ(ierr); 5602 } else { 5603 ierr = PetscObjectReference((PetscObject)coarse_mat_is);CHKERRQ(ierr); 5604 coarse_mat = coarse_mat_is; 5605 } 5606 } else { 5607 ierr = MatISGetMPIXAIJ(coarse_mat_is,coarse_mat_reuse,&coarse_mat);CHKERRQ(ierr); 5608 } 5609 ierr = MatDestroy(&coarse_mat_is);CHKERRQ(ierr); 5610 5611 /* propagate symmetry info of coarse matrix */ 5612 ierr = MatSetOption(coarse_mat,MAT_STRUCTURALLY_SYMMETRIC,PETSC_TRUE);CHKERRQ(ierr); 5613 if (pc->pmat->symmetric_set) { 5614 ierr = MatSetOption(coarse_mat,MAT_SYMMETRIC,pc->pmat->symmetric);CHKERRQ(ierr); 5615 } 5616 if (pc->pmat->hermitian_set) { 5617 ierr = MatSetOption(coarse_mat,MAT_HERMITIAN,pc->pmat->hermitian);CHKERRQ(ierr); 5618 } 5619 if (pc->pmat->spd_set) { 5620 ierr = MatSetOption(coarse_mat,MAT_SPD,pc->pmat->spd);CHKERRQ(ierr); 5621 } 5622 /* set operators */ 5623 ierr = KSPSetOperators(pcbddc->coarse_ksp,coarse_mat,coarse_mat);CHKERRQ(ierr); 5624 if (pcbddc->dbg_flag) { 5625 ierr = PetscViewerASCIISubtractTab(dbg_viewer,2*pcbddc->current_level);CHKERRQ(ierr); 5626 } 5627 } else { /* processes non partecipating to coarse solver (if any) */ 5628 coarse_mat = 0; 5629 } 5630 ierr = PetscFree(isarray);CHKERRQ(ierr); 5631 #if 0 5632 { 5633 PetscViewer viewer; 5634 char filename[256]; 5635 sprintf(filename,"coarse_mat_level%d.m",pcbddc->current_level); 5636 ierr = PetscViewerASCIIOpen(PetscObjectComm((PetscObject)coarse_mat),filename,&viewer);CHKERRQ(ierr); 5637 ierr = PetscViewerSetFormat(viewer,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr); 5638 ierr = MatView(coarse_mat,viewer);CHKERRQ(ierr); 5639 ierr = PetscViewerDestroy(&viewer);CHKERRQ(ierr); 5640 } 5641 #endif 5642 5643 /* Compute coarse null space (special handling by BDDC only) */ 5644 #if 0 5645 if (pcbddc->NullSpace) { 5646 ierr = PCBDDCNullSpaceAssembleCoarse(pc,coarse_mat,&CoarseNullSpace);CHKERRQ(ierr); 5647 } 5648 #endif 5649 /* hack */ 5650 if (pcbddc->coarse_ksp) { 5651 Vec crhs,csol; 5652 5653 ierr = KSPGetSolution(pcbddc->coarse_ksp,&csol);CHKERRQ(ierr); 5654 ierr = KSPGetRhs(pcbddc->coarse_ksp,&crhs);CHKERRQ(ierr); 5655 if (!csol) { 5656 ierr = MatCreateVecs(coarse_mat,&((pcbddc->coarse_ksp)->vec_sol),NULL);CHKERRQ(ierr); 5657 } 5658 if (!crhs) { 5659 ierr = MatCreateVecs(coarse_mat,NULL,&((pcbddc->coarse_ksp)->vec_rhs));CHKERRQ(ierr); 5660 } 5661 } 5662 5663 /* compute null space for coarse solver if the benign trick has been requested */ 5664 if (pcbddc->benign_null) { 5665 5666 ierr = VecSet(pcbddc->vec1_P,0.);CHKERRQ(ierr); 5667 for (i=0;i<pcbddc->benign_n;i++) { 5668 ierr = VecSetValue(pcbddc->vec1_P,pcbddc->local_primal_size-pcbddc->benign_n+i,1.0,INSERT_VALUES);CHKERRQ(ierr); 5669 } 5670 ierr = VecAssemblyBegin(pcbddc->vec1_P);CHKERRQ(ierr); 5671 ierr = VecAssemblyEnd(pcbddc->vec1_P);CHKERRQ(ierr); 5672 ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,pcbddc->vec1_P,pcbddc->coarse_vec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 5673 ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,pcbddc->vec1_P,pcbddc->coarse_vec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 5674 if (coarse_mat) { 5675 Vec nullv; 5676 PetscScalar *array,*array2; 5677 PetscInt nl; 5678 5679 ierr = MatCreateVecs(coarse_mat,&nullv,NULL);CHKERRQ(ierr); 5680 ierr = VecGetLocalSize(nullv,&nl);CHKERRQ(ierr); 5681 ierr = VecGetArrayRead(pcbddc->coarse_vec,(const PetscScalar**)&array);CHKERRQ(ierr); 5682 ierr = VecGetArray(nullv,&array2);CHKERRQ(ierr); 5683 ierr = PetscMemcpy(array2,array,nl*sizeof(*array));CHKERRQ(ierr); 5684 ierr = VecRestoreArray(nullv,&array2);CHKERRQ(ierr); 5685 ierr = VecRestoreArrayRead(pcbddc->coarse_vec,(const PetscScalar**)&array);CHKERRQ(ierr); 5686 ierr = VecNormalize(nullv,NULL);CHKERRQ(ierr); 5687 ierr = MatNullSpaceCreate(PetscObjectComm((PetscObject)coarse_mat),PETSC_FALSE,1,&nullv,&CoarseNullSpace);CHKERRQ(ierr); 5688 ierr = VecDestroy(&nullv);CHKERRQ(ierr); 5689 } 5690 } 5691 5692 if (pcbddc->coarse_ksp) { 5693 PetscBool ispreonly; 5694 5695 if (CoarseNullSpace) { 5696 PetscBool isnull; 5697 ierr = MatNullSpaceTest(CoarseNullSpace,coarse_mat,&isnull);CHKERRQ(ierr); 5698 if (0) { 5699 if (isbddc && !pcbddc->benign_saddle_point) { 5700 ierr = PCBDDCSetNullSpace(pc_temp,CoarseNullSpace);CHKERRQ(ierr); 5701 } else { 5702 ierr = MatSetNullSpace(coarse_mat,CoarseNullSpace);CHKERRQ(ierr); 5703 } 5704 } else { 5705 ierr = MatNullSpaceDestroy(&CoarseNullSpace);CHKERRQ(ierr); 5706 } 5707 } 5708 /* setup coarse ksp */ 5709 ierr = KSPSetUp(pcbddc->coarse_ksp);CHKERRQ(ierr); 5710 /* Check coarse problem if in debug mode or if solving with an iterative method */ 5711 ierr = PetscObjectTypeCompare((PetscObject)pcbddc->coarse_ksp,KSPPREONLY,&ispreonly);CHKERRQ(ierr); 5712 if (pcbddc->dbg_flag || (!ispreonly && pcbddc->use_coarse_estimates) ) { 5713 KSP check_ksp; 5714 KSPType check_ksp_type; 5715 PC check_pc; 5716 Vec check_vec,coarse_vec; 5717 PetscReal abs_infty_error,infty_error,lambda_min=1.0,lambda_max=1.0; 5718 PetscInt its; 5719 PetscBool compute_eigs; 5720 PetscReal *eigs_r,*eigs_c; 5721 PetscInt neigs; 5722 const char *prefix; 5723 5724 /* Create ksp object suitable for estimation of extreme eigenvalues */ 5725 ierr = KSPCreate(PetscObjectComm((PetscObject)pcbddc->coarse_ksp),&check_ksp);CHKERRQ(ierr); 5726 ierr = KSPSetErrorIfNotConverged(pcbddc->coarse_ksp,pc->erroriffailure);CHKERRQ(ierr); 5727 ierr = KSPSetOperators(check_ksp,coarse_mat,coarse_mat);CHKERRQ(ierr); 5728 ierr = KSPSetTolerances(check_ksp,1.e-12,1.e-12,PETSC_DEFAULT,pcbddc->coarse_size);CHKERRQ(ierr); 5729 if (ispreonly) { 5730 check_ksp_type = KSPPREONLY; 5731 compute_eigs = PETSC_FALSE; 5732 } else { 5733 check_ksp_type = KSPGMRES; 5734 compute_eigs = PETSC_TRUE; 5735 } 5736 ierr = KSPSetType(check_ksp,check_ksp_type);CHKERRQ(ierr); 5737 ierr = KSPSetComputeSingularValues(check_ksp,compute_eigs);CHKERRQ(ierr); 5738 ierr = KSPSetComputeEigenvalues(check_ksp,compute_eigs);CHKERRQ(ierr); 5739 ierr = KSPGMRESSetRestart(check_ksp,pcbddc->coarse_size+1);CHKERRQ(ierr); 5740 ierr = KSPGetOptionsPrefix(pcbddc->coarse_ksp,&prefix);CHKERRQ(ierr); 5741 ierr = KSPSetOptionsPrefix(check_ksp,prefix);CHKERRQ(ierr); 5742 ierr = KSPAppendOptionsPrefix(check_ksp,"check_");CHKERRQ(ierr); 5743 ierr = KSPSetFromOptions(check_ksp);CHKERRQ(ierr); 5744 ierr = KSPSetUp(check_ksp);CHKERRQ(ierr); 5745 ierr = KSPGetPC(pcbddc->coarse_ksp,&check_pc);CHKERRQ(ierr); 5746 ierr = KSPSetPC(check_ksp,check_pc);CHKERRQ(ierr); 5747 /* create random vec */ 5748 ierr = KSPGetSolution(pcbddc->coarse_ksp,&coarse_vec);CHKERRQ(ierr); 5749 ierr = VecDuplicate(coarse_vec,&check_vec);CHKERRQ(ierr); 5750 ierr = VecSetRandom(check_vec,NULL);CHKERRQ(ierr); 5751 if (CoarseNullSpace) { 5752 ierr = MatNullSpaceRemove(CoarseNullSpace,check_vec);CHKERRQ(ierr); 5753 } 5754 ierr = MatMult(coarse_mat,check_vec,coarse_vec);CHKERRQ(ierr); 5755 /* solve coarse problem */ 5756 ierr = KSPSolve(check_ksp,coarse_vec,coarse_vec);CHKERRQ(ierr); 5757 if (CoarseNullSpace) { 5758 ierr = MatNullSpaceRemove(CoarseNullSpace,coarse_vec);CHKERRQ(ierr); 5759 } 5760 /* set eigenvalue estimation if preonly has not been requested */ 5761 if (compute_eigs) { 5762 ierr = PetscMalloc1(pcbddc->coarse_size+1,&eigs_r);CHKERRQ(ierr); 5763 ierr = PetscMalloc1(pcbddc->coarse_size+1,&eigs_c);CHKERRQ(ierr); 5764 ierr = KSPComputeEigenvalues(check_ksp,pcbddc->coarse_size+1,eigs_r,eigs_c,&neigs);CHKERRQ(ierr); 5765 lambda_max = eigs_r[neigs-1]; 5766 lambda_min = eigs_r[0]; 5767 if (pcbddc->use_coarse_estimates) { 5768 if (lambda_max>lambda_min) { 5769 ierr = KSPChebyshevSetEigenvalues(pcbddc->coarse_ksp,lambda_max,lambda_min);CHKERRQ(ierr); 5770 ierr = KSPRichardsonSetScale(pcbddc->coarse_ksp,2.0/(lambda_max+lambda_min));CHKERRQ(ierr); 5771 } 5772 } 5773 } 5774 5775 /* check coarse problem residual error */ 5776 if (pcbddc->dbg_flag) { 5777 PetscViewer dbg_viewer = PETSC_VIEWER_STDOUT_(PetscObjectComm((PetscObject)pcbddc->coarse_ksp)); 5778 ierr = PetscViewerASCIIAddTab(dbg_viewer,2*(pcbddc->current_level+1));CHKERRQ(ierr); 5779 ierr = VecAXPY(check_vec,-1.0,coarse_vec);CHKERRQ(ierr); 5780 ierr = VecNorm(check_vec,NORM_INFINITY,&infty_error);CHKERRQ(ierr); 5781 ierr = MatMult(coarse_mat,check_vec,coarse_vec);CHKERRQ(ierr); 5782 ierr = VecNorm(coarse_vec,NORM_INFINITY,&abs_infty_error);CHKERRQ(ierr); 5783 ierr = VecDestroy(&check_vec);CHKERRQ(ierr); 5784 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem details (use estimates %d)\n",pcbddc->use_coarse_estimates);CHKERRQ(ierr); 5785 ierr = PetscObjectPrintClassNamePrefixType((PetscObject)(pcbddc->coarse_ksp),dbg_viewer);CHKERRQ(ierr); 5786 ierr = PetscObjectPrintClassNamePrefixType((PetscObject)(check_pc),dbg_viewer);CHKERRQ(ierr); 5787 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem exact infty_error : %1.6e\n",infty_error);CHKERRQ(ierr); 5788 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem residual infty_error: %1.6e\n",abs_infty_error);CHKERRQ(ierr); 5789 if (CoarseNullSpace) { 5790 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem is singular\n");CHKERRQ(ierr); 5791 } 5792 if (compute_eigs) { 5793 PetscReal lambda_max_s,lambda_min_s; 5794 ierr = KSPGetType(check_ksp,&check_ksp_type);CHKERRQ(ierr); 5795 ierr = KSPGetIterationNumber(check_ksp,&its);CHKERRQ(ierr); 5796 ierr = KSPComputeExtremeSingularValues(check_ksp,&lambda_max_s,&lambda_min_s);CHKERRQ(ierr); 5797 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem eigenvalues (estimated with %d iterations of %s): %1.6e %1.6e (%1.6e %1.6e)\n",its,check_ksp_type,lambda_min,lambda_max,lambda_min_s,lambda_max_s);CHKERRQ(ierr); 5798 for (i=0;i<neigs;i++) { 5799 ierr = PetscViewerASCIIPrintf(dbg_viewer,"%1.6e %1.6ei\n",eigs_r[i],eigs_c[i]);CHKERRQ(ierr); 5800 } 5801 } 5802 ierr = PetscViewerFlush(dbg_viewer);CHKERRQ(ierr); 5803 ierr = PetscViewerASCIISubtractTab(dbg_viewer,2*(pcbddc->current_level+1));CHKERRQ(ierr); 5804 } 5805 ierr = KSPDestroy(&check_ksp);CHKERRQ(ierr); 5806 if (compute_eigs) { 5807 ierr = PetscFree(eigs_r);CHKERRQ(ierr); 5808 ierr = PetscFree(eigs_c);CHKERRQ(ierr); 5809 } 5810 } 5811 } 5812 /* print additional info */ 5813 if (pcbddc->dbg_flag) { 5814 /* waits until all processes reaches this point */ 5815 ierr = PetscBarrier((PetscObject)pc);CHKERRQ(ierr); 5816 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Coarse solver setup completed at level %d\n",pcbddc->current_level);CHKERRQ(ierr); 5817 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 5818 } 5819 5820 /* free memory */ 5821 ierr = MatNullSpaceDestroy(&CoarseNullSpace);CHKERRQ(ierr); 5822 ierr = MatDestroy(&coarse_mat);CHKERRQ(ierr); 5823 PetscFunctionReturn(0); 5824 } 5825 5826 #undef __FUNCT__ 5827 #define __FUNCT__ "PCBDDCComputePrimalNumbering" 5828 PetscErrorCode PCBDDCComputePrimalNumbering(PC pc,PetscInt* coarse_size_n,PetscInt** local_primal_indices_n) 5829 { 5830 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 5831 PC_IS* pcis = (PC_IS*)pc->data; 5832 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 5833 IS subset,subset_mult,subset_n; 5834 PetscInt local_size,coarse_size=0; 5835 PetscInt *local_primal_indices=NULL; 5836 const PetscInt *t_local_primal_indices; 5837 PetscErrorCode ierr; 5838 5839 PetscFunctionBegin; 5840 /* Compute global number of coarse dofs */ 5841 if (pcbddc->local_primal_size && !pcbddc->local_primal_ref_node) { 5842 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"BDDC ConstraintsSetUp should be called first"); 5843 } 5844 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)(pc->pmat)),pcbddc->local_primal_size_cc,pcbddc->local_primal_ref_node,PETSC_COPY_VALUES,&subset_n);CHKERRQ(ierr); 5845 ierr = ISLocalToGlobalMappingApplyIS(pcis->mapping,subset_n,&subset);CHKERRQ(ierr); 5846 ierr = ISDestroy(&subset_n);CHKERRQ(ierr); 5847 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)(pc->pmat)),pcbddc->local_primal_size_cc,pcbddc->local_primal_ref_mult,PETSC_COPY_VALUES,&subset_mult);CHKERRQ(ierr); 5848 ierr = PCBDDCSubsetNumbering(subset,subset_mult,&coarse_size,&subset_n);CHKERRQ(ierr); 5849 ierr = ISDestroy(&subset);CHKERRQ(ierr); 5850 ierr = ISDestroy(&subset_mult);CHKERRQ(ierr); 5851 ierr = ISGetLocalSize(subset_n,&local_size);CHKERRQ(ierr); 5852 if (local_size != pcbddc->local_primal_size) { 5853 SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Invalid number of local primal indices computed %d != %d",local_size,pcbddc->local_primal_size); 5854 } 5855 ierr = PetscMalloc1(local_size,&local_primal_indices);CHKERRQ(ierr); 5856 ierr = ISGetIndices(subset_n,&t_local_primal_indices);CHKERRQ(ierr); 5857 ierr = PetscMemcpy(local_primal_indices,t_local_primal_indices,local_size*sizeof(PetscInt));CHKERRQ(ierr); 5858 ierr = ISRestoreIndices(subset_n,&t_local_primal_indices);CHKERRQ(ierr); 5859 ierr = ISDestroy(&subset_n);CHKERRQ(ierr); 5860 5861 /* check numbering */ 5862 if (pcbddc->dbg_flag) { 5863 PetscScalar coarsesum,*array,*array2; 5864 PetscInt i; 5865 PetscBool set_error = PETSC_FALSE,set_error_reduced = PETSC_FALSE; 5866 5867 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 5868 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 5869 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Check coarse indices\n");CHKERRQ(ierr); 5870 ierr = PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 5871 /* counter */ 5872 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 5873 ierr = VecSet(pcis->vec1_N,1.0);CHKERRQ(ierr); 5874 ierr = VecScatterBegin(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 5875 ierr = VecScatterEnd(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 5876 ierr = VecScatterBegin(matis->rctx,pcis->vec1_global,pcis->vec2_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 5877 ierr = VecScatterEnd(matis->rctx,pcis->vec1_global,pcis->vec2_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 5878 ierr = VecSet(pcis->vec1_N,0.0);CHKERRQ(ierr); 5879 for (i=0;i<pcbddc->local_primal_size;i++) { 5880 ierr = VecSetValue(pcis->vec1_N,pcbddc->primal_indices_local_idxs[i],1.0,INSERT_VALUES);CHKERRQ(ierr); 5881 } 5882 ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr); 5883 ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr); 5884 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 5885 ierr = VecScatterBegin(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 5886 ierr = VecScatterEnd(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 5887 ierr = VecScatterBegin(matis->rctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 5888 ierr = VecScatterEnd(matis->rctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 5889 ierr = VecGetArray(pcis->vec1_N,&array);CHKERRQ(ierr); 5890 ierr = VecGetArray(pcis->vec2_N,&array2);CHKERRQ(ierr); 5891 for (i=0;i<pcis->n;i++) { 5892 if (array[i] != 0.0 && array[i] != array2[i]) { 5893 PetscInt owned = (PetscInt)PetscRealPart(array[i]); 5894 PetscInt neigh = (PetscInt)PetscRealPart(array2[i]); 5895 set_error = PETSC_TRUE; 5896 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d: local index %d owned by a %d processes instead of %d!\n",PetscGlobalRank,i,owned,neigh);CHKERRQ(ierr); 5897 } 5898 } 5899 ierr = VecRestoreArray(pcis->vec2_N,&array2);CHKERRQ(ierr); 5900 ierr = MPI_Allreduce(&set_error,&set_error_reduced,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 5901 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 5902 for (i=0;i<pcis->n;i++) { 5903 if (PetscRealPart(array[i]) > 0.0) array[i] = 1.0/PetscRealPart(array[i]); 5904 } 5905 ierr = VecRestoreArray(pcis->vec1_N,&array);CHKERRQ(ierr); 5906 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 5907 ierr = VecScatterBegin(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 5908 ierr = VecScatterEnd(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 5909 ierr = VecSum(pcis->vec1_global,&coarsesum);CHKERRQ(ierr); 5910 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Size of coarse problem is %d (%lf)\n",coarse_size,PetscRealPart(coarsesum));CHKERRQ(ierr); 5911 if (pcbddc->dbg_flag > 1 || set_error_reduced) { 5912 PetscInt *gidxs; 5913 5914 ierr = PetscMalloc1(pcbddc->local_primal_size,&gidxs);CHKERRQ(ierr); 5915 ierr = ISLocalToGlobalMappingApply(pcis->mapping,pcbddc->local_primal_size,pcbddc->primal_indices_local_idxs,gidxs);CHKERRQ(ierr); 5916 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Distribution of local primal indices\n");CHKERRQ(ierr); 5917 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 5918 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d\n",PetscGlobalRank);CHKERRQ(ierr); 5919 for (i=0;i<pcbddc->local_primal_size;i++) { 5920 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local_primal_indices[%d]=%d (%d,%d)\n",i,local_primal_indices[i],pcbddc->primal_indices_local_idxs[i],gidxs[i]);CHKERRQ(ierr); 5921 } 5922 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 5923 ierr = PetscFree(gidxs);CHKERRQ(ierr); 5924 } 5925 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 5926 ierr = PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 5927 if (set_error_reduced) SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"BDDC Numbering of coarse dofs failed"); 5928 } 5929 /* ierr = PetscPrintf(PetscObjectComm((PetscObject)pc),"Size of coarse problem is %d\n",coarse_size);CHKERRQ(ierr); */ 5930 /* get back data */ 5931 *coarse_size_n = coarse_size; 5932 *local_primal_indices_n = local_primal_indices; 5933 PetscFunctionReturn(0); 5934 } 5935 5936 #undef __FUNCT__ 5937 #define __FUNCT__ "PCBDDCGlobalToLocal" 5938 PetscErrorCode PCBDDCGlobalToLocal(VecScatter g2l_ctx,Vec gwork, Vec lwork, IS globalis, IS* localis) 5939 { 5940 IS localis_t; 5941 PetscInt i,lsize,*idxs,n; 5942 PetscScalar *vals; 5943 PetscErrorCode ierr; 5944 5945 PetscFunctionBegin; 5946 /* get indices in local ordering exploiting local to global map */ 5947 ierr = ISGetLocalSize(globalis,&lsize);CHKERRQ(ierr); 5948 ierr = PetscMalloc1(lsize,&vals);CHKERRQ(ierr); 5949 for (i=0;i<lsize;i++) vals[i] = 1.0; 5950 ierr = ISGetIndices(globalis,(const PetscInt**)&idxs);CHKERRQ(ierr); 5951 ierr = VecSet(gwork,0.0);CHKERRQ(ierr); 5952 ierr = VecSet(lwork,0.0);CHKERRQ(ierr); 5953 if (idxs) { /* multilevel guard */ 5954 ierr = VecSetValues(gwork,lsize,idxs,vals,INSERT_VALUES);CHKERRQ(ierr); 5955 } 5956 ierr = VecAssemblyBegin(gwork);CHKERRQ(ierr); 5957 ierr = ISRestoreIndices(globalis,(const PetscInt**)&idxs);CHKERRQ(ierr); 5958 ierr = PetscFree(vals);CHKERRQ(ierr); 5959 ierr = VecAssemblyEnd(gwork);CHKERRQ(ierr); 5960 /* now compute set in local ordering */ 5961 ierr = VecScatterBegin(g2l_ctx,gwork,lwork,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 5962 ierr = VecScatterEnd(g2l_ctx,gwork,lwork,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 5963 ierr = VecGetArrayRead(lwork,(const PetscScalar**)&vals);CHKERRQ(ierr); 5964 ierr = VecGetSize(lwork,&n);CHKERRQ(ierr); 5965 for (i=0,lsize=0;i<n;i++) { 5966 if (PetscRealPart(vals[i]) > 0.5) { 5967 lsize++; 5968 } 5969 } 5970 ierr = PetscMalloc1(lsize,&idxs);CHKERRQ(ierr); 5971 for (i=0,lsize=0;i<n;i++) { 5972 if (PetscRealPart(vals[i]) > 0.5) { 5973 idxs[lsize++] = i; 5974 } 5975 } 5976 ierr = VecRestoreArrayRead(lwork,(const PetscScalar**)&vals);CHKERRQ(ierr); 5977 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)gwork),lsize,idxs,PETSC_OWN_POINTER,&localis_t);CHKERRQ(ierr); 5978 *localis = localis_t; 5979 PetscFunctionReturn(0); 5980 } 5981 5982 /* the next two functions will be called in KSPMatMult if a change of basis has been requested */ 5983 #undef __FUNCT__ 5984 #define __FUNCT__ "PCBDDCMatMult_Private" 5985 static PetscErrorCode PCBDDCMatMult_Private(Mat A, Vec x, Vec y) 5986 { 5987 PCBDDCChange_ctx change_ctx; 5988 PetscErrorCode ierr; 5989 5990 PetscFunctionBegin; 5991 ierr = MatShellGetContext(A,&change_ctx);CHKERRQ(ierr); 5992 ierr = MatMult(change_ctx->global_change,x,change_ctx->work[0]);CHKERRQ(ierr); 5993 ierr = MatMult(change_ctx->original_mat,change_ctx->work[0],change_ctx->work[1]);CHKERRQ(ierr); 5994 ierr = MatMultTranspose(change_ctx->global_change,change_ctx->work[1],y);CHKERRQ(ierr); 5995 PetscFunctionReturn(0); 5996 } 5997 5998 #undef __FUNCT__ 5999 #define __FUNCT__ "PCBDDCMatMultTranspose_Private" 6000 static PetscErrorCode PCBDDCMatMultTranspose_Private(Mat A, Vec x, Vec y) 6001 { 6002 PCBDDCChange_ctx change_ctx; 6003 PetscErrorCode ierr; 6004 6005 PetscFunctionBegin; 6006 ierr = MatShellGetContext(A,&change_ctx);CHKERRQ(ierr); 6007 ierr = MatMult(change_ctx->global_change,x,change_ctx->work[0]);CHKERRQ(ierr); 6008 ierr = MatMultTranspose(change_ctx->original_mat,change_ctx->work[0],change_ctx->work[1]);CHKERRQ(ierr); 6009 ierr = MatMultTranspose(change_ctx->global_change,change_ctx->work[1],y);CHKERRQ(ierr); 6010 PetscFunctionReturn(0); 6011 } 6012 6013 #undef __FUNCT__ 6014 #define __FUNCT__ "PCBDDCSetUpSubSchurs" 6015 PetscErrorCode PCBDDCSetUpSubSchurs(PC pc) 6016 { 6017 PC_IS *pcis=(PC_IS*)pc->data; 6018 PC_BDDC *pcbddc=(PC_BDDC*)pc->data; 6019 PCBDDCSubSchurs sub_schurs=pcbddc->sub_schurs; 6020 Mat S_j; 6021 PetscInt *used_xadj,*used_adjncy; 6022 PetscBool free_used_adj; 6023 PetscErrorCode ierr; 6024 6025 PetscFunctionBegin; 6026 /* decide the adjacency to be used for determining internal problems for local schur on subsets */ 6027 free_used_adj = PETSC_FALSE; 6028 if (pcbddc->sub_schurs_layers == -1) { 6029 used_xadj = NULL; 6030 used_adjncy = NULL; 6031 } else { 6032 if (pcbddc->sub_schurs_use_useradj && pcbddc->mat_graph->xadj) { 6033 used_xadj = pcbddc->mat_graph->xadj; 6034 used_adjncy = pcbddc->mat_graph->adjncy; 6035 } else if (pcbddc->computed_rowadj) { 6036 used_xadj = pcbddc->mat_graph->xadj; 6037 used_adjncy = pcbddc->mat_graph->adjncy; 6038 } else { 6039 PetscBool flg_row=PETSC_FALSE; 6040 const PetscInt *xadj,*adjncy; 6041 PetscInt nvtxs; 6042 6043 ierr = MatGetRowIJ(pcbddc->local_mat,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,&xadj,&adjncy,&flg_row);CHKERRQ(ierr); 6044 if (flg_row) { 6045 ierr = PetscMalloc2(nvtxs+1,&used_xadj,xadj[nvtxs],&used_adjncy);CHKERRQ(ierr); 6046 ierr = PetscMemcpy(used_xadj,xadj,(nvtxs+1)*sizeof(*xadj));CHKERRQ(ierr); 6047 ierr = PetscMemcpy(used_adjncy,adjncy,(xadj[nvtxs])*sizeof(*adjncy));CHKERRQ(ierr); 6048 free_used_adj = PETSC_TRUE; 6049 } else { 6050 pcbddc->sub_schurs_layers = -1; 6051 used_xadj = NULL; 6052 used_adjncy = NULL; 6053 } 6054 ierr = MatRestoreRowIJ(pcbddc->local_mat,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,&xadj,&adjncy,&flg_row);CHKERRQ(ierr); 6055 } 6056 } 6057 6058 /* setup sub_schurs data */ 6059 ierr = MatCreateSchurComplement(pcis->A_II,pcis->A_II,pcis->A_IB,pcis->A_BI,pcis->A_BB,&S_j);CHKERRQ(ierr); 6060 if (!sub_schurs->schur_explicit) { 6061 /* pcbddc->ksp_D up to date only if not using MatFactor with Schur complement support */ 6062 ierr = MatSchurComplementSetKSP(S_j,pcbddc->ksp_D);CHKERRQ(ierr); 6063 ierr = PCBDDCSubSchursSetUp(sub_schurs,NULL,S_j,PETSC_FALSE,used_xadj,used_adjncy,pcbddc->sub_schurs_layers,pcbddc->faster_deluxe,pcbddc->adaptive_selection,PETSC_FALSE,PETSC_FALSE,0,NULL,NULL);CHKERRQ(ierr); 6064 } else { 6065 PetscBool reuse_solvers = (PetscBool)!pcbddc->use_change_of_basis; 6066 PetscBool isseqaij; 6067 PetscInt benign_n; 6068 6069 if (!pcbddc->use_vertices && reuse_solvers) { 6070 PetscInt n_vertices; 6071 6072 ierr = ISGetLocalSize(sub_schurs->is_vertices,&n_vertices);CHKERRQ(ierr); 6073 reuse_solvers = (PetscBool)!n_vertices; 6074 } 6075 ierr = PetscObjectTypeCompare((PetscObject)pcbddc->local_mat,MATSEQAIJ,&isseqaij);CHKERRQ(ierr); 6076 if (!isseqaij) { 6077 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 6078 if (matis->A == pcbddc->local_mat) { 6079 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 6080 ierr = MatConvert(matis->A,MATSEQAIJ,MAT_INITIAL_MATRIX,&pcbddc->local_mat);CHKERRQ(ierr); 6081 } else { 6082 ierr = MatConvert(pcbddc->local_mat,MATSEQAIJ,MAT_REUSE_MATRIX,&pcbddc->local_mat);CHKERRQ(ierr); 6083 } 6084 } 6085 if (!pcbddc->benign_change_explicit) { 6086 benign_n = pcbddc->benign_n; 6087 } else { 6088 benign_n = 0; 6089 } 6090 ierr = PCBDDCSubSchursSetUp(sub_schurs,pcbddc->local_mat,S_j,pcbddc->sub_schurs_exact_schur,used_xadj,used_adjncy,pcbddc->sub_schurs_layers,pcbddc->faster_deluxe,pcbddc->adaptive_selection,reuse_solvers,pcbddc->benign_saddle_point,benign_n,pcbddc->benign_p0_lidx,pcbddc->benign_zerodiag_subs);CHKERRQ(ierr); 6091 } 6092 ierr = MatDestroy(&S_j);CHKERRQ(ierr); 6093 6094 /* free adjacency */ 6095 if (free_used_adj) { 6096 ierr = PetscFree2(used_xadj,used_adjncy);CHKERRQ(ierr); 6097 } 6098 PetscFunctionReturn(0); 6099 } 6100 6101 #undef __FUNCT__ 6102 #define __FUNCT__ "PCBDDCInitSubSchurs" 6103 PetscErrorCode PCBDDCInitSubSchurs(PC pc) 6104 { 6105 PC_IS *pcis=(PC_IS*)pc->data; 6106 PC_BDDC *pcbddc=(PC_BDDC*)pc->data; 6107 PCBDDCSubSchurs sub_schurs=pcbddc->sub_schurs; 6108 PCBDDCGraph graph; 6109 PetscErrorCode ierr; 6110 6111 PetscFunctionBegin; 6112 /* attach interface graph for determining subsets */ 6113 if (pcbddc->sub_schurs_rebuild) { /* in case rebuild has been requested, it uses a graph generated only by the neighbouring information */ 6114 IS verticesIS,verticescomm; 6115 PetscInt vsize,*idxs; 6116 6117 ierr = PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph,NULL,NULL,NULL,NULL,&verticesIS);CHKERRQ(ierr); 6118 ierr = ISGetSize(verticesIS,&vsize);CHKERRQ(ierr); 6119 ierr = ISGetIndices(verticesIS,(const PetscInt**)&idxs);CHKERRQ(ierr); 6120 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),vsize,idxs,PETSC_COPY_VALUES,&verticescomm);CHKERRQ(ierr); 6121 ierr = ISRestoreIndices(verticesIS,(const PetscInt**)&idxs);CHKERRQ(ierr); 6122 ierr = ISDestroy(&verticesIS);CHKERRQ(ierr); 6123 ierr = PCBDDCGraphCreate(&graph);CHKERRQ(ierr); 6124 ierr = PCBDDCGraphInit(graph,pcbddc->mat_graph->l2gmap,pcbddc->mat_graph->nvtxs_global);CHKERRQ(ierr); 6125 ierr = PCBDDCGraphSetUp(graph,0,NULL,pcbddc->DirichletBoundariesLocal,0,NULL,verticescomm);CHKERRQ(ierr); 6126 ierr = ISDestroy(&verticescomm);CHKERRQ(ierr); 6127 ierr = PCBDDCGraphComputeConnectedComponents(graph);CHKERRQ(ierr); 6128 /* 6129 if (pcbddc->dbg_flag) { 6130 ierr = PCBDDCGraphASCIIView(graph,pcbddc->dbg_flag,pcbddc->dbg_viewer);CHKERRQ(ierr); 6131 } 6132 */ 6133 } else { 6134 graph = pcbddc->mat_graph; 6135 } 6136 6137 /* sub_schurs init */ 6138 ierr = PCBDDCSubSchursInit(sub_schurs,pcis->is_I_local,pcis->is_B_local,graph,pcis->BtoNmap);CHKERRQ(ierr); 6139 6140 /* free graph struct */ 6141 if (pcbddc->sub_schurs_rebuild) { 6142 ierr = PCBDDCGraphDestroy(&graph);CHKERRQ(ierr); 6143 } 6144 PetscFunctionReturn(0); 6145 } 6146 6147 #undef __FUNCT__ 6148 #define __FUNCT__ "PCBDDCCheckOperator" 6149 PetscErrorCode PCBDDCCheckOperator(PC pc) 6150 { 6151 PC_IS *pcis=(PC_IS*)pc->data; 6152 PC_BDDC *pcbddc=(PC_BDDC*)pc->data; 6153 PetscErrorCode ierr; 6154 6155 PetscFunctionBegin; 6156 if (pcbddc->n_vertices == pcbddc->local_primal_size) { 6157 IS zerodiag = NULL; 6158 Mat S_j,B0_B=NULL; 6159 Vec dummy_vec=NULL,vec_check_B,vec_scale_P; 6160 PetscScalar *p0_check,*array,*array2; 6161 PetscReal norm; 6162 PetscInt i; 6163 6164 /* B0 and B0_B */ 6165 if (zerodiag) { 6166 IS dummy; 6167 6168 ierr = ISCreateStride(PETSC_COMM_SELF,pcbddc->benign_n,0,1,&dummy);CHKERRQ(ierr); 6169 ierr = MatGetSubMatrix(pcbddc->benign_B0,dummy,pcis->is_B_local,MAT_INITIAL_MATRIX,&B0_B);CHKERRQ(ierr); 6170 ierr = MatCreateVecs(B0_B,NULL,&dummy_vec);CHKERRQ(ierr); 6171 ierr = ISDestroy(&dummy);CHKERRQ(ierr); 6172 } 6173 /* I need a primal vector to scale primal nodes since BDDC sums contibutions */ 6174 ierr = VecDuplicate(pcbddc->vec1_P,&vec_scale_P);CHKERRQ(ierr); 6175 ierr = VecSet(pcbddc->vec1_P,1.0);CHKERRQ(ierr); 6176 ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,pcbddc->vec1_P,pcbddc->coarse_vec,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 6177 ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,pcbddc->vec1_P,pcbddc->coarse_vec,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 6178 ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,pcbddc->coarse_vec,vec_scale_P,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 6179 ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,pcbddc->coarse_vec,vec_scale_P,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 6180 ierr = VecReciprocal(vec_scale_P);CHKERRQ(ierr); 6181 /* S_j */ 6182 ierr = MatCreateSchurComplement(pcis->A_II,pcis->A_II,pcis->A_IB,pcis->A_BI,pcis->A_BB,&S_j);CHKERRQ(ierr); 6183 ierr = MatSchurComplementSetKSP(S_j,pcbddc->ksp_D);CHKERRQ(ierr); 6184 6185 /* mimic vector in \widetilde{W}_\Gamma */ 6186 ierr = VecSetRandom(pcis->vec1_N,NULL);CHKERRQ(ierr); 6187 /* continuous in primal space */ 6188 ierr = VecSetRandom(pcbddc->coarse_vec,NULL);CHKERRQ(ierr); 6189 ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,pcbddc->coarse_vec,pcbddc->vec1_P,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 6190 ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,pcbddc->coarse_vec,pcbddc->vec1_P,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 6191 ierr = VecGetArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 6192 ierr = PetscCalloc1(pcbddc->benign_n,&p0_check);CHKERRQ(ierr); 6193 for (i=0;i<pcbddc->benign_n;i++) p0_check[i] = array[pcbddc->local_primal_size-pcbddc->benign_n+i]; 6194 ierr = VecSetValues(pcis->vec1_N,pcbddc->local_primal_size,pcbddc->local_primal_ref_node,array,INSERT_VALUES);CHKERRQ(ierr); 6195 ierr = VecRestoreArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 6196 ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr); 6197 ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr); 6198 ierr = VecScatterBegin(pcis->N_to_B,pcis->vec1_N,pcis->vec2_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 6199 ierr = VecScatterEnd(pcis->N_to_B,pcis->vec1_N,pcis->vec2_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 6200 ierr = VecDuplicate(pcis->vec2_B,&vec_check_B);CHKERRQ(ierr); 6201 ierr = VecCopy(pcis->vec2_B,vec_check_B);CHKERRQ(ierr); 6202 6203 /* assemble rhs for coarse problem */ 6204 /* widetilde{S}_\Gamma w_\Gamma + \widetilde{B0}^T_B p0 */ 6205 /* local with Schur */ 6206 ierr = MatMult(S_j,pcis->vec2_B,pcis->vec1_B);CHKERRQ(ierr); 6207 if (zerodiag) { 6208 ierr = VecGetArray(dummy_vec,&array);CHKERRQ(ierr); 6209 for (i=0;i<pcbddc->benign_n;i++) array[i] = p0_check[i]; 6210 ierr = VecRestoreArray(dummy_vec,&array);CHKERRQ(ierr); 6211 ierr = MatMultTransposeAdd(B0_B,dummy_vec,pcis->vec1_B,pcis->vec1_B);CHKERRQ(ierr); 6212 } 6213 /* sum on primal nodes the local contributions */ 6214 ierr = VecScatterBegin(pcis->N_to_B,pcis->vec1_B,pcis->vec1_N,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 6215 ierr = VecScatterEnd(pcis->N_to_B,pcis->vec1_B,pcis->vec1_N,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 6216 ierr = VecGetArray(pcis->vec1_N,&array);CHKERRQ(ierr); 6217 ierr = VecGetArray(pcbddc->vec1_P,&array2);CHKERRQ(ierr); 6218 for (i=0;i<pcbddc->local_primal_size;i++) array2[i] = array[pcbddc->local_primal_ref_node[i]]; 6219 ierr = VecRestoreArray(pcbddc->vec1_P,&array2);CHKERRQ(ierr); 6220 ierr = VecRestoreArray(pcis->vec1_N,&array);CHKERRQ(ierr); 6221 ierr = VecSet(pcbddc->coarse_vec,0.);CHKERRQ(ierr); 6222 ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,pcbddc->vec1_P,pcbddc->coarse_vec,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 6223 ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,pcbddc->vec1_P,pcbddc->coarse_vec,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 6224 ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,pcbddc->coarse_vec,pcbddc->vec1_P,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 6225 ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,pcbddc->coarse_vec,pcbddc->vec1_P,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 6226 ierr = VecGetArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 6227 /* scale primal nodes (BDDC sums contibutions) */ 6228 ierr = VecPointwiseMult(pcbddc->vec1_P,vec_scale_P,pcbddc->vec1_P);CHKERRQ(ierr); 6229 ierr = VecSetValues(pcis->vec1_N,pcbddc->local_primal_size,pcbddc->local_primal_ref_node,array,INSERT_VALUES);CHKERRQ(ierr); 6230 ierr = VecRestoreArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 6231 ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr); 6232 ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr); 6233 ierr = VecScatterBegin(pcis->N_to_B,pcis->vec1_N,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 6234 ierr = VecScatterEnd(pcis->N_to_B,pcis->vec1_N,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 6235 /* global: \widetilde{B0}_B w_\Gamma */ 6236 if (zerodiag) { 6237 ierr = MatMult(B0_B,pcis->vec2_B,dummy_vec);CHKERRQ(ierr); 6238 ierr = VecGetArray(dummy_vec,&array);CHKERRQ(ierr); 6239 for (i=0;i<pcbddc->benign_n;i++) pcbddc->benign_p0[i] = array[i]; 6240 ierr = VecRestoreArray(dummy_vec,&array);CHKERRQ(ierr); 6241 } 6242 /* BDDC */ 6243 ierr = VecSet(pcis->vec1_D,0.);CHKERRQ(ierr); 6244 ierr = PCBDDCApplyInterfacePreconditioner(pc,PETSC_FALSE);CHKERRQ(ierr); 6245 6246 ierr = VecCopy(pcis->vec1_B,pcis->vec2_B);CHKERRQ(ierr); 6247 ierr = VecAXPY(pcis->vec1_B,-1.0,vec_check_B);CHKERRQ(ierr); 6248 ierr = VecNorm(pcis->vec1_B,NORM_INFINITY,&norm);CHKERRQ(ierr); 6249 PetscPrintf(PETSC_COMM_SELF,"[%d] BDDC local error is %1.4e\n",PetscGlobalRank,norm); 6250 for (i=0;i<pcbddc->benign_n;i++) { 6251 PetscPrintf(PETSC_COMM_SELF,"[%d] BDDC p0[%d] error is %1.4e\n",PetscGlobalRank,i,PetscAbsScalar(pcbddc->benign_p0[i]-p0_check[i])); 6252 } 6253 ierr = PetscFree(p0_check);CHKERRQ(ierr); 6254 ierr = VecDestroy(&vec_scale_P);CHKERRQ(ierr); 6255 ierr = VecDestroy(&vec_check_B);CHKERRQ(ierr); 6256 ierr = VecDestroy(&dummy_vec);CHKERRQ(ierr); 6257 ierr = MatDestroy(&S_j);CHKERRQ(ierr); 6258 ierr = MatDestroy(&B0_B);CHKERRQ(ierr); 6259 } 6260 PetscFunctionReturn(0); 6261 } 6262