1 #include <../src/mat/impls/aij/seq/aij.h> 2 #include <../src/ksp/pc/impls/bddc/bddc.h> 3 #include <../src/ksp/pc/impls/bddc/bddcprivate.h> 4 #include <petscblaslapack.h> 5 6 static PetscErrorCode PCBDDCMatMultTranspose_Private(Mat A, Vec x, Vec y); 7 static PetscErrorCode PCBDDCMatMult_Private(Mat A, Vec x, Vec y); 8 9 #undef __FUNCT__ 10 #define __FUNCT__ "PCBDDCBenignMatMult_Private_Private" 11 PetscErrorCode PCBDDCBenignMatMult_Private_Private(Mat A, Vec x, Vec y, PetscBool transpose) 12 { 13 PCBDDCBenignMatMult_ctx ctx; 14 PetscErrorCode ierr; 15 PetscBool apply_right,apply_left,reset_x; 16 17 PetscFunctionBegin; 18 ierr = MatShellGetContext(A,&ctx);CHKERRQ(ierr); 19 if (transpose) { 20 apply_right = ctx->apply_left; 21 apply_left = ctx->apply_right; 22 } else { 23 apply_right = ctx->apply_right; 24 apply_left = ctx->apply_left; 25 } 26 reset_x = PETSC_FALSE; 27 if (apply_right) { 28 const PetscScalar *ax; 29 PetscInt nl,i; 30 31 ierr = VecGetLocalSize(x,&nl);CHKERRQ(ierr); 32 ierr = VecGetArrayRead(x,&ax);CHKERRQ(ierr); 33 ierr = PetscMemcpy(ctx->work,ax,nl*sizeof(PetscScalar));CHKERRQ(ierr); 34 ierr = VecRestoreArrayRead(x,&ax);CHKERRQ(ierr); 35 for (i=0;i<ctx->benign_n;i++) { 36 PetscScalar sum,val; 37 const PetscInt *idxs; 38 PetscInt nz,j; 39 ierr = ISGetLocalSize(ctx->benign_zerodiag_subs[i],&nz);CHKERRQ(ierr); 40 ierr = ISGetIndices(ctx->benign_zerodiag_subs[i],&idxs);CHKERRQ(ierr); 41 sum = 0.; 42 if (ctx->apply_p0) { 43 val = ctx->work[idxs[nz-1]]; 44 for (j=0;j<nz-1;j++) { 45 sum += ctx->work[idxs[j]]; 46 ctx->work[idxs[j]] += val; 47 } 48 } else { 49 for (j=0;j<nz-1;j++) { 50 sum += ctx->work[idxs[j]]; 51 } 52 } 53 ctx->work[idxs[nz-1]] -= sum; 54 ierr = ISRestoreIndices(ctx->benign_zerodiag_subs[i],&idxs);CHKERRQ(ierr); 55 } 56 ierr = VecPlaceArray(x,ctx->work);CHKERRQ(ierr); 57 reset_x = PETSC_TRUE; 58 } 59 if (transpose) { 60 ierr = MatMultTranspose(ctx->A,x,y);CHKERRQ(ierr); 61 } else { 62 ierr = MatMult(ctx->A,x,y);CHKERRQ(ierr); 63 } 64 if (reset_x) { 65 ierr = VecResetArray(x);CHKERRQ(ierr); 66 } 67 if (apply_left) { 68 PetscScalar *ay; 69 PetscInt i; 70 71 ierr = VecGetArray(y,&ay);CHKERRQ(ierr); 72 for (i=0;i<ctx->benign_n;i++) { 73 PetscScalar sum,val; 74 const PetscInt *idxs; 75 PetscInt nz,j; 76 ierr = ISGetLocalSize(ctx->benign_zerodiag_subs[i],&nz);CHKERRQ(ierr); 77 ierr = ISGetIndices(ctx->benign_zerodiag_subs[i],&idxs);CHKERRQ(ierr); 78 val = -ay[idxs[nz-1]]; 79 if (ctx->apply_p0) { 80 sum = 0.; 81 for (j=0;j<nz-1;j++) { 82 sum += ay[idxs[j]]; 83 ay[idxs[j]] += val; 84 } 85 ay[idxs[nz-1]] += sum; 86 } else { 87 for (j=0;j<nz-1;j++) { 88 ay[idxs[j]] += val; 89 } 90 ay[idxs[nz-1]] = 0.; 91 } 92 ierr = ISRestoreIndices(ctx->benign_zerodiag_subs[i],&idxs);CHKERRQ(ierr); 93 } 94 ierr = VecRestoreArray(y,&ay);CHKERRQ(ierr); 95 } 96 PetscFunctionReturn(0); 97 } 98 99 #undef __FUNCT__ 100 #define __FUNCT__ "PCBDDCBenignMatMultTranspose_Private" 101 PetscErrorCode PCBDDCBenignMatMultTranspose_Private(Mat A, Vec x, Vec y) 102 { 103 PetscErrorCode ierr; 104 105 PetscFunctionBegin; 106 ierr = PCBDDCBenignMatMult_Private_Private(A,x,y,PETSC_TRUE);CHKERRQ(ierr); 107 PetscFunctionReturn(0); 108 } 109 110 #undef __FUNCT__ 111 #define __FUNCT__ "PCBDDCBenignMatMult_Private" 112 PetscErrorCode PCBDDCBenignMatMult_Private(Mat A, Vec x, Vec y) 113 { 114 PetscErrorCode ierr; 115 116 PetscFunctionBegin; 117 ierr = PCBDDCBenignMatMult_Private_Private(A,x,y,PETSC_FALSE);CHKERRQ(ierr); 118 PetscFunctionReturn(0); 119 } 120 121 #undef __FUNCT__ 122 #define __FUNCT__ "PCBDDCBenignShellMat" 123 PetscErrorCode PCBDDCBenignShellMat(PC pc, PetscBool restore) 124 { 125 PC_IS *pcis = (PC_IS*)pc->data; 126 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 127 Mat_IS *matis = (Mat_IS*)pc->pmat->data; 128 PCBDDCBenignMatMult_ctx ctx; 129 PetscErrorCode ierr; 130 131 PetscFunctionBegin; 132 if (!restore) { 133 Mat A,A_IB,A_BI; 134 PetscScalar *work; 135 PCBDDCReuseSolvers reuse = pcbddc->sub_schurs->reuse_solver; 136 137 if (!reuse) { 138 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Cannot setup shell matrices!"); 139 } 140 141 /* local mat */ 142 ierr = MatCreate(PETSC_COMM_SELF,&A);CHKERRQ(ierr); 143 ierr = MatSetSizes(A,pcis->n,pcis->n,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr); 144 ierr = MatSetType(A,MATSHELL);CHKERRQ(ierr); 145 ierr = MatShellSetOperation(A,MATOP_MULT,(void (*)(void))PCBDDCBenignMatMult_Private);CHKERRQ(ierr); 146 ierr = MatShellSetOperation(A,MATOP_MULT_TRANSPOSE,(void (*)(void))PCBDDCBenignMatMultTranspose_Private);CHKERRQ(ierr); 147 ierr = PetscNew(&ctx);CHKERRQ(ierr); 148 ierr = MatShellSetContext(A,ctx);CHKERRQ(ierr); 149 ctx->apply_left = PETSC_TRUE; 150 ctx->apply_right = PETSC_TRUE; 151 ctx->apply_p0 = PETSC_TRUE; 152 ctx->benign_n = pcbddc->benign_n; 153 ctx->benign_zerodiag_subs = pcbddc->benign_zerodiag_subs; 154 ctx->A = matis->A; 155 ierr = PetscMalloc1(pcis->n,&work);CHKERRQ(ierr); 156 ctx->work = work; 157 ierr = MatSetUp(A);CHKERRQ(ierr); 158 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 159 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 160 matis->A = A; 161 162 /* A_IB */ 163 ierr = MatCreate(PETSC_COMM_SELF,&A_IB);CHKERRQ(ierr); 164 ierr = MatSetSizes(A_IB,pcis->n-pcis->n_B,pcis->n_B,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr); 165 ierr = MatSetType(A_IB,MATSHELL);CHKERRQ(ierr); 166 ierr = MatShellSetOperation(A_IB,MATOP_MULT,(void (*)(void))PCBDDCBenignMatMult_Private);CHKERRQ(ierr); 167 ierr = MatShellSetOperation(A_IB,MATOP_MULT_TRANSPOSE,(void (*)(void))PCBDDCBenignMatMultTranspose_Private);CHKERRQ(ierr); 168 ierr = PetscNew(&ctx);CHKERRQ(ierr); 169 ierr = MatShellSetContext(A_IB,ctx);CHKERRQ(ierr); 170 ctx->apply_left = PETSC_TRUE; 171 ctx->apply_right = PETSC_FALSE; 172 ctx->apply_p0 = PETSC_FALSE; 173 ctx->benign_n = pcbddc->benign_n; 174 ctx->benign_zerodiag_subs = reuse->benign_zerodiag_subs; 175 ctx->A = pcis->A_IB; 176 ctx->work = work; 177 ierr = MatSetUp(A_IB);CHKERRQ(ierr); 178 ierr = MatAssemblyBegin(A_IB,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 179 ierr = MatAssemblyEnd(A_IB,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 180 pcis->A_IB = A_IB; 181 182 /* A_BI as A_IB^T */ 183 ierr = MatCreateTranspose(A_IB,&A_BI);CHKERRQ(ierr); 184 pcbddc->benign_original_mat = pcis->A_BI; 185 pcis->A_BI = A_BI; 186 } else { 187 ierr = MatShellGetContext(matis->A,&ctx);CHKERRQ(ierr); 188 ierr = MatDestroy(&matis->A);CHKERRQ(ierr); 189 matis->A = ctx->A; 190 ierr = PetscFree(ctx);CHKERRQ(ierr); 191 ierr = MatShellGetContext(pcis->A_IB,&ctx);CHKERRQ(ierr); 192 ierr = MatDestroy(&pcis->A_IB);CHKERRQ(ierr); 193 pcis->A_IB = ctx->A; 194 ierr = PetscFree(ctx->work);CHKERRQ(ierr); 195 ierr = PetscFree(ctx);CHKERRQ(ierr); 196 ierr = MatDestroy(&pcis->A_BI);CHKERRQ(ierr); 197 pcis->A_BI = pcbddc->benign_original_mat; 198 pcbddc->benign_original_mat = NULL; 199 } 200 PetscFunctionReturn(0); 201 } 202 203 /* used just in bddc debug mode */ 204 #undef __FUNCT__ 205 #define __FUNCT__ "PCBDDCBenignProject" 206 PetscErrorCode PCBDDCBenignProject(PC pc, IS is1, IS is2, Mat *B) 207 { 208 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 209 Mat_IS *matis = (Mat_IS*)pc->pmat->data; 210 Mat An; 211 PetscErrorCode ierr; 212 213 PetscFunctionBegin; 214 ierr = MatPtAP(matis->A,pcbddc->benign_change,MAT_INITIAL_MATRIX,2.0,&An);CHKERRQ(ierr); 215 ierr = MatZeroRowsColumns(An,pcbddc->benign_n,pcbddc->benign_p0_lidx,1.0,NULL,NULL);CHKERRQ(ierr); 216 if (is1) { 217 ierr = MatGetSubMatrix(An,is1,is2,MAT_INITIAL_MATRIX,B);CHKERRQ(ierr); 218 ierr = MatDestroy(&An);CHKERRQ(ierr); 219 } else { 220 *B = An; 221 } 222 PetscFunctionReturn(0); 223 } 224 225 /* TODO: add reuse flag */ 226 #undef __FUNCT__ 227 #define __FUNCT__ "MatSeqAIJCompress" 228 PetscErrorCode MatSeqAIJCompress(Mat A, Mat *B) 229 { 230 Mat Bt; 231 PetscScalar *a,*bdata; 232 const PetscInt *ii,*ij; 233 PetscInt m,n,i,nnz,*bii,*bij; 234 PetscBool flg_row; 235 PetscErrorCode ierr; 236 237 PetscFunctionBegin; 238 ierr = MatGetSize(A,&n,&m);CHKERRQ(ierr); 239 ierr = MatGetRowIJ(A,0,PETSC_FALSE,PETSC_FALSE,&n,&ii,&ij,&flg_row);CHKERRQ(ierr); 240 ierr = MatSeqAIJGetArray(A,&a);CHKERRQ(ierr); 241 nnz = n; 242 for (i=0;i<ii[n];i++) { 243 if (PetscLikely(PetscAbsScalar(a[i]) > PETSC_SMALL)) nnz++; 244 } 245 ierr = PetscMalloc1(n+1,&bii);CHKERRQ(ierr); 246 ierr = PetscMalloc1(nnz,&bij);CHKERRQ(ierr); 247 ierr = PetscMalloc1(nnz,&bdata);CHKERRQ(ierr); 248 nnz = 0; 249 bii[0] = 0; 250 for (i=0;i<n;i++) { 251 PetscInt j; 252 for (j=ii[i];j<ii[i+1];j++) { 253 PetscScalar entry = a[j]; 254 if (PetscLikely(PetscAbsScalar(entry) > PETSC_SMALL) || ij[j] == i) { 255 bij[nnz] = ij[j]; 256 bdata[nnz] = entry; 257 nnz++; 258 } 259 } 260 bii[i+1] = nnz; 261 } 262 ierr = MatSeqAIJRestoreArray(A,&a);CHKERRQ(ierr); 263 ierr = MatCreateSeqAIJWithArrays(PetscObjectComm((PetscObject)A),n,m,bii,bij,bdata,&Bt);CHKERRQ(ierr); 264 ierr = MatRestoreRowIJ(A,0,PETSC_FALSE,PETSC_FALSE,&n,&ii,&ij,&flg_row);CHKERRQ(ierr); 265 { 266 Mat_SeqAIJ *b = (Mat_SeqAIJ*)(Bt->data); 267 b->free_a = PETSC_TRUE; 268 b->free_ij = PETSC_TRUE; 269 } 270 *B = Bt; 271 PetscFunctionReturn(0); 272 } 273 274 #undef __FUNCT__ 275 #define __FUNCT__ "MatDetectDisconnectedComponents" 276 PetscErrorCode MatDetectDisconnectedComponents(Mat A, PetscBool filter, PetscInt *ncc, IS* cc[]) 277 { 278 Mat B; 279 IS is_dummy,*cc_n; 280 ISLocalToGlobalMapping l2gmap_dummy; 281 PCBDDCGraph graph; 282 PetscInt i,n; 283 PetscInt *xadj,*adjncy; 284 PetscInt *xadj_filtered,*adjncy_filtered; 285 PetscBool flg_row,isseqaij; 286 PetscErrorCode ierr; 287 288 PetscFunctionBegin; 289 if (!A->rmap->N || !A->cmap->N) { 290 *ncc = 0; 291 *cc = NULL; 292 PetscFunctionReturn(0); 293 } 294 ierr = PetscObjectTypeCompare((PetscObject)A,MATSEQAIJ,&isseqaij);CHKERRQ(ierr); 295 if (!isseqaij && filter) { 296 PetscBool isseqdense; 297 298 ierr = PetscObjectTypeCompare((PetscObject)A,MATSEQDENSE,&isseqdense);CHKERRQ(ierr); 299 if (!isseqdense) { 300 ierr = MatConvert(A,MATSEQAIJ,MAT_INITIAL_MATRIX,&B);CHKERRQ(ierr); 301 } else { /* TODO: rectangular case and LDA */ 302 PetscScalar *array; 303 PetscReal chop=1.e-6; 304 305 ierr = MatDuplicate(A,MAT_COPY_VALUES,&B);CHKERRQ(ierr); 306 ierr = MatDenseGetArray(B,&array);CHKERRQ(ierr); 307 ierr = MatGetSize(B,&n,NULL);CHKERRQ(ierr); 308 for (i=0;i<n;i++) { 309 PetscInt j; 310 for (j=i+1;j<n;j++) { 311 PetscReal thresh = chop*(PetscAbsScalar(array[i*(n+1)])+PetscAbsScalar(array[j*(n+1)])); 312 if (PetscAbsScalar(array[i*n+j]) < thresh) array[i*n+j] = 0.; 313 if (PetscAbsScalar(array[j*n+i]) < thresh) array[j*n+i] = 0.; 314 } 315 } 316 ierr = MatDenseRestoreArray(B,&array);CHKERRQ(ierr); 317 ierr = MatConvert(B,MATSEQAIJ,MAT_REUSE_MATRIX,&B);CHKERRQ(ierr); 318 } 319 } else { 320 B = A; 321 } 322 ierr = MatGetRowIJ(B,0,PETSC_TRUE,PETSC_FALSE,&n,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);CHKERRQ(ierr); 323 324 /* if filter is true, then removes entries lower than PETSC_SMALL in magnitude */ 325 if (filter) { 326 PetscScalar *data; 327 PetscInt j,cum; 328 329 ierr = PetscCalloc2(n+1,&xadj_filtered,xadj[n],&adjncy_filtered);CHKERRQ(ierr); 330 ierr = MatSeqAIJGetArray(B,&data);CHKERRQ(ierr); 331 cum = 0; 332 for (i=0;i<n;i++) { 333 PetscInt t; 334 335 for (j=xadj[i];j<xadj[i+1];j++) { 336 if (PetscUnlikely(PetscAbsScalar(data[j]) < PETSC_SMALL)) { 337 continue; 338 } 339 adjncy_filtered[cum+xadj_filtered[i]++] = adjncy[j]; 340 } 341 t = xadj_filtered[i]; 342 xadj_filtered[i] = cum; 343 cum += t; 344 } 345 ierr = MatSeqAIJRestoreArray(B,&data);CHKERRQ(ierr); 346 } else { 347 xadj_filtered = NULL; 348 adjncy_filtered = NULL; 349 } 350 351 /* compute local connected components using PCBDDCGraph */ 352 ierr = ISCreateStride(PETSC_COMM_SELF,n,0,1,&is_dummy);CHKERRQ(ierr); 353 ierr = ISLocalToGlobalMappingCreateIS(is_dummy,&l2gmap_dummy);CHKERRQ(ierr); 354 ierr = ISDestroy(&is_dummy);CHKERRQ(ierr); 355 ierr = PCBDDCGraphCreate(&graph);CHKERRQ(ierr); 356 ierr = PCBDDCGraphInit(graph,l2gmap_dummy,n);CHKERRQ(ierr); 357 ierr = ISLocalToGlobalMappingDestroy(&l2gmap_dummy);CHKERRQ(ierr); 358 if (xadj_filtered) { 359 graph->xadj = xadj_filtered; 360 graph->adjncy = adjncy_filtered; 361 } else { 362 graph->xadj = xadj; 363 graph->adjncy = adjncy; 364 } 365 ierr = PCBDDCGraphSetUp(graph,1,NULL,NULL,0,NULL,NULL);CHKERRQ(ierr); 366 ierr = PCBDDCGraphComputeConnectedComponents(graph);CHKERRQ(ierr); 367 /* partial clean up */ 368 ierr = PetscFree2(xadj_filtered,adjncy_filtered);CHKERRQ(ierr); 369 ierr = MatRestoreRowIJ(B,0,PETSC_TRUE,PETSC_FALSE,&n,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);CHKERRQ(ierr); 370 if (A != B) { 371 ierr = MatDestroy(&B);CHKERRQ(ierr); 372 } 373 374 /* get back data */ 375 if (ncc) *ncc = graph->ncc; 376 if (cc) { 377 ierr = PetscMalloc1(graph->ncc,&cc_n);CHKERRQ(ierr); 378 for (i=0;i<graph->ncc;i++) { 379 ierr = ISCreateGeneral(PETSC_COMM_SELF,graph->cptr[i+1]-graph->cptr[i],graph->queue+graph->cptr[i],PETSC_COPY_VALUES,&cc_n[i]);CHKERRQ(ierr); 380 } 381 *cc = cc_n; 382 } 383 /* clean up graph */ 384 graph->xadj = 0; 385 graph->adjncy = 0; 386 ierr = PCBDDCGraphDestroy(&graph);CHKERRQ(ierr); 387 PetscFunctionReturn(0); 388 } 389 390 #undef __FUNCT__ 391 #define __FUNCT__ "PCBDDCBenignCheck" 392 PetscErrorCode PCBDDCBenignCheck(PC pc, IS zerodiag) 393 { 394 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 395 PC_IS* pcis = (PC_IS*)(pc->data); 396 IS dirIS = NULL; 397 PetscInt i; 398 PetscErrorCode ierr; 399 400 PetscFunctionBegin; 401 ierr = PCBDDCGraphGetDirichletDofs(pcbddc->mat_graph,&dirIS);CHKERRQ(ierr); 402 if (zerodiag) { 403 Mat A; 404 Vec vec3_N; 405 PetscScalar *vals; 406 const PetscInt *idxs; 407 PetscInt nz,*count; 408 409 /* p0 */ 410 ierr = VecSet(pcis->vec1_N,0.);CHKERRQ(ierr); 411 ierr = PetscMalloc1(pcis->n,&vals);CHKERRQ(ierr); 412 ierr = ISGetLocalSize(zerodiag,&nz);CHKERRQ(ierr); 413 ierr = ISGetIndices(zerodiag,&idxs);CHKERRQ(ierr); 414 for (i=0;i<nz;i++) vals[i] = 1.; 415 ierr = VecSetValues(pcis->vec1_N,nz,idxs,vals,INSERT_VALUES);CHKERRQ(ierr); 416 ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr); 417 ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr); 418 /* v_I */ 419 ierr = VecSetRandom(pcis->vec2_N,NULL);CHKERRQ(ierr); 420 for (i=0;i<nz;i++) vals[i] = 0.; 421 ierr = VecSetValues(pcis->vec2_N,nz,idxs,vals,INSERT_VALUES);CHKERRQ(ierr); 422 ierr = ISRestoreIndices(zerodiag,&idxs);CHKERRQ(ierr); 423 ierr = ISGetIndices(pcis->is_B_local,&idxs);CHKERRQ(ierr); 424 for (i=0;i<pcis->n_B;i++) vals[i] = 0.; 425 ierr = VecSetValues(pcis->vec2_N,pcis->n_B,idxs,vals,INSERT_VALUES);CHKERRQ(ierr); 426 ierr = ISRestoreIndices(pcis->is_B_local,&idxs);CHKERRQ(ierr); 427 if (dirIS) { 428 PetscInt n; 429 430 ierr = ISGetLocalSize(dirIS,&n);CHKERRQ(ierr); 431 ierr = ISGetIndices(dirIS,&idxs);CHKERRQ(ierr); 432 for (i=0;i<n;i++) vals[i] = 0.; 433 ierr = VecSetValues(pcis->vec2_N,n,idxs,vals,INSERT_VALUES);CHKERRQ(ierr); 434 ierr = ISRestoreIndices(dirIS,&idxs);CHKERRQ(ierr); 435 } 436 ierr = VecAssemblyBegin(pcis->vec2_N);CHKERRQ(ierr); 437 ierr = VecAssemblyEnd(pcis->vec2_N);CHKERRQ(ierr); 438 ierr = VecDuplicate(pcis->vec1_N,&vec3_N);CHKERRQ(ierr); 439 ierr = VecSet(vec3_N,0.);CHKERRQ(ierr); 440 ierr = MatISGetLocalMat(pc->mat,&A);CHKERRQ(ierr); 441 ierr = MatMult(A,pcis->vec1_N,vec3_N);CHKERRQ(ierr); 442 ierr = VecDot(vec3_N,pcis->vec2_N,&vals[0]);CHKERRQ(ierr); 443 if (PetscAbsScalar(vals[0]) > PETSC_SMALL) { 444 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"Benign trick can not be applied! b(v_I,p_0) = %f (should be numerically 0.)",PetscAbsScalar(vals[0])); 445 } 446 ierr = PetscFree(vals);CHKERRQ(ierr); 447 ierr = VecDestroy(&vec3_N);CHKERRQ(ierr); 448 449 /* there should not be any pressure dofs lying on the interface */ 450 ierr = PetscCalloc1(pcis->n,&count);CHKERRQ(ierr); 451 ierr = ISGetIndices(pcis->is_B_local,&idxs);CHKERRQ(ierr); 452 for (i=0;i<pcis->n_B;i++) count[idxs[i]]++; 453 ierr = ISRestoreIndices(pcis->is_B_local,&idxs);CHKERRQ(ierr); 454 ierr = ISGetIndices(zerodiag,&idxs);CHKERRQ(ierr); 455 for (i=0;i<nz;i++) { 456 if (count[idxs[i]]) { 457 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"Benign trick can not be applied! pressure dof %d is an interface dof",idxs[i]); 458 } 459 } 460 ierr = ISRestoreIndices(zerodiag,&idxs);CHKERRQ(ierr); 461 ierr = PetscFree(count);CHKERRQ(ierr); 462 } 463 ierr = ISDestroy(&dirIS);CHKERRQ(ierr); 464 465 /* check PCBDDCBenignGetOrSetP0 */ 466 ierr = VecSetRandom(pcis->vec1_global,NULL);CHKERRQ(ierr); 467 for (i=0;i<pcbddc->benign_n;i++) pcbddc->benign_p0[i] = -PetscGlobalRank-i; 468 ierr = PCBDDCBenignGetOrSetP0(pc,pcis->vec1_global,PETSC_FALSE);CHKERRQ(ierr); 469 for (i=0;i<pcbddc->benign_n;i++) pcbddc->benign_p0[i] = 1; 470 ierr = PCBDDCBenignGetOrSetP0(pc,pcis->vec1_global,PETSC_TRUE);CHKERRQ(ierr); 471 for (i=0;i<pcbddc->benign_n;i++) { 472 if ((PetscInt)PetscRealPart(pcbddc->benign_p0[i]) != -PetscGlobalRank-i) { 473 SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error testing PCBDDCBenignGetOrSetP0! Found %1.4e at %d instead of %1.4e\n",pcbddc->benign_p0[i],i,-PetscGlobalRank-i);CHKERRQ(ierr); 474 } 475 } 476 PetscFunctionReturn(0); 477 } 478 479 #undef __FUNCT__ 480 #define __FUNCT__ "PCBDDCBenignDetectSaddlePoint" 481 PetscErrorCode PCBDDCBenignDetectSaddlePoint(PC pc, IS *zerodiaglocal) 482 { 483 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 484 IS pressures,zerodiag,*zerodiag_subs; 485 PetscInt nz,n; 486 PetscBool sorted,have_null,has_null_pressures,recompute_zerodiag; 487 PetscErrorCode ierr; 488 489 PetscFunctionBegin; 490 ierr = PetscSFDestroy(&pcbddc->benign_sf);CHKERRQ(ierr); 491 ierr = MatDestroy(&pcbddc->benign_B0);CHKERRQ(ierr); 492 ierr = MatDestroy(&pcbddc->benign_original_mat);CHKERRQ(ierr); 493 for (n=0;n<pcbddc->benign_n;n++) { 494 ierr = ISDestroy(&pcbddc->benign_zerodiag_subs[n]);CHKERRQ(ierr); 495 } 496 ierr = PetscFree(pcbddc->benign_zerodiag_subs);CHKERRQ(ierr); 497 pcbddc->benign_n = 0; 498 /* if a local info on dofs is present, assumes the last field is represented by "pressures" 499 otherwise, it uses only zerodiagonal dofs (ok if the pressure block is all zero; it could fail if it is not) 500 Checks if all the pressure dofs in each subdomain have a zero diagonal 501 If not, a change of basis on pressures is not needed 502 since the local Schur complements are SPD 503 */ 504 has_null_pressures = PETSC_TRUE; 505 have_null = PETSC_TRUE; 506 if (pcbddc->n_ISForDofsLocal) { 507 PetscInt npl,*idxs,p = pcbddc->n_ISForDofsLocal-1; 508 509 /* Dofs splitting for BDDC cannot have PETSC_COMM_SELF, so create a sequential IS */ 510 ierr = ISGetLocalSize(pcbddc->ISForDofsLocal[p],&npl);CHKERRQ(ierr); 511 ierr = ISGetIndices(pcbddc->ISForDofsLocal[p],(const PetscInt**)&idxs);CHKERRQ(ierr); 512 ierr = ISCreateGeneral(PETSC_COMM_SELF,npl,idxs,PETSC_COPY_VALUES,&pressures);CHKERRQ(ierr); 513 ierr = ISRestoreIndices(pcbddc->ISForDofsLocal[p],(const PetscInt**)&idxs);CHKERRQ(ierr); 514 ierr = ISSorted(pressures,&sorted);CHKERRQ(ierr); 515 if (!sorted) { 516 ierr = ISSort(pressures);CHKERRQ(ierr); 517 } 518 } else { 519 pressures = NULL; 520 } 521 /* pcis has not been setup yet, so get the local size from the subdomain matrix */ 522 ierr = MatGetLocalSize(pcbddc->local_mat,&n,NULL);CHKERRQ(ierr); 523 ierr = MatFindZeroDiagonals(pcbddc->local_mat,&zerodiag);CHKERRQ(ierr); 524 ierr = ISSorted(zerodiag,&sorted);CHKERRQ(ierr); 525 if (!sorted) { 526 ierr = ISSort(zerodiag);CHKERRQ(ierr); 527 } 528 ierr = ISGetLocalSize(zerodiag,&nz);CHKERRQ(ierr); 529 if (!nz) { 530 if (n) have_null = PETSC_FALSE; 531 has_null_pressures = PETSC_FALSE; 532 ierr = ISDestroy(&zerodiag);CHKERRQ(ierr); 533 } 534 recompute_zerodiag = PETSC_FALSE; 535 /* in case disconnected subdomains info is present, split the pressures accordingly (otherwise the benign trick could fail) */ 536 zerodiag_subs = NULL; 537 pcbddc->benign_n = 0; 538 if (has_null_pressures) { 539 IS *subs; 540 PetscInt nsubs,i; 541 542 subs = pcbddc->local_subs; 543 nsubs = pcbddc->n_local_subs; 544 if (nsubs > 1) { 545 ierr = PetscCalloc1(nsubs,&zerodiag_subs);CHKERRQ(ierr); 546 for (i=0;i<nsubs;i++) { 547 ISLocalToGlobalMapping l2g; 548 IS t_zerodiag_subs; 549 PetscInt nl; 550 551 ierr = ISLocalToGlobalMappingCreateIS(subs[i],&l2g);CHKERRQ(ierr); 552 ierr = ISGlobalToLocalMappingApplyIS(l2g,IS_GTOLM_DROP,zerodiag,&t_zerodiag_subs);CHKERRQ(ierr); 553 ierr = ISGetLocalSize(t_zerodiag_subs,&nl);CHKERRQ(ierr); 554 if (nl) { 555 PetscBool valid = PETSC_TRUE; 556 557 if (pressures) { 558 IS t_pressure_subs; 559 ierr = ISGlobalToLocalMappingApplyIS(l2g,IS_GTOLM_DROP,pressures,&t_pressure_subs);CHKERRQ(ierr); 560 ierr = ISEqual(t_pressure_subs,t_zerodiag_subs,&valid);CHKERRQ(ierr); 561 ierr = ISDestroy(&t_pressure_subs);CHKERRQ(ierr); 562 } 563 if (valid) { 564 ierr = ISLocalToGlobalMappingApplyIS(l2g,t_zerodiag_subs,&zerodiag_subs[pcbddc->benign_n]);CHKERRQ(ierr); 565 pcbddc->benign_n++; 566 } else { 567 recompute_zerodiag = PETSC_TRUE; 568 } 569 } 570 ierr = ISDestroy(&t_zerodiag_subs);CHKERRQ(ierr); 571 ierr = ISLocalToGlobalMappingDestroy(&l2g);CHKERRQ(ierr); 572 } 573 } else { /* there's just one subdomain (or zero if they have not been detected */ 574 PetscBool valid = PETSC_TRUE; 575 if (pressures) { 576 ierr = ISEqual(pressures,zerodiag,&valid);CHKERRQ(ierr); 577 } 578 if (valid) { 579 pcbddc->benign_n = 1; 580 ierr = PetscMalloc1(pcbddc->benign_n,&zerodiag_subs);CHKERRQ(ierr); 581 ierr = PetscObjectReference((PetscObject)zerodiag);CHKERRQ(ierr); 582 zerodiag_subs[0] = zerodiag; 583 } 584 } 585 } 586 587 if (!pcbddc->benign_n) { 588 ierr = ISDestroy(&zerodiag);CHKERRQ(ierr); 589 recompute_zerodiag = PETSC_FALSE; 590 has_null_pressures = PETSC_FALSE; 591 have_null = PETSC_FALSE; 592 } 593 594 /* final check for null pressures */ 595 if (zerodiag && pressures) { 596 PetscInt nz,np; 597 ierr = ISGetLocalSize(zerodiag,&nz);CHKERRQ(ierr); 598 ierr = ISGetLocalSize(pressures,&np);CHKERRQ(ierr); 599 if (nz != np) have_null = PETSC_FALSE; 600 } 601 602 if (recompute_zerodiag) { 603 ierr = ISDestroy(&zerodiag);CHKERRQ(ierr); 604 if (pcbddc->benign_n == 1) { 605 ierr = PetscObjectReference((PetscObject)zerodiag_subs[0]);CHKERRQ(ierr); 606 zerodiag = zerodiag_subs[0]; 607 } else { 608 PetscInt i,nzn,*new_idxs; 609 610 nzn = 0; 611 for (i=0;i<pcbddc->benign_n;i++) { 612 PetscInt ns; 613 ierr = ISGetLocalSize(zerodiag_subs[i],&ns);CHKERRQ(ierr); 614 nzn += ns; 615 } 616 ierr = PetscMalloc1(nzn,&new_idxs);CHKERRQ(ierr); 617 nzn = 0; 618 for (i=0;i<pcbddc->benign_n;i++) { 619 PetscInt ns,*idxs; 620 ierr = ISGetLocalSize(zerodiag_subs[i],&ns);CHKERRQ(ierr); 621 ierr = ISGetIndices(zerodiag_subs[i],(const PetscInt**)&idxs);CHKERRQ(ierr); 622 ierr = PetscMemcpy(new_idxs+nzn,idxs,ns*sizeof(PetscInt));CHKERRQ(ierr); 623 ierr = ISRestoreIndices(zerodiag_subs[i],(const PetscInt**)&idxs);CHKERRQ(ierr); 624 nzn += ns; 625 } 626 ierr = PetscSortInt(nzn,new_idxs);CHKERRQ(ierr); 627 ierr = ISCreateGeneral(PETSC_COMM_SELF,nzn,new_idxs,PETSC_OWN_POINTER,&zerodiag);CHKERRQ(ierr); 628 } 629 have_null = PETSC_FALSE; 630 } 631 632 if (has_null_pressures) { 633 IS zerodiagc; 634 const PetscInt *idxs,*idxsc; 635 PetscInt i,s,*nnz; 636 637 ierr = ISGetLocalSize(zerodiag,&nz);CHKERRQ(ierr); 638 ierr = ISComplement(zerodiag,0,n,&zerodiagc);CHKERRQ(ierr); 639 ierr = ISGetIndices(zerodiagc,&idxsc);CHKERRQ(ierr); 640 /* local change of basis for pressures */ 641 ierr = MatDestroy(&pcbddc->benign_change);CHKERRQ(ierr); 642 ierr = MatCreate(PetscObjectComm((PetscObject)pcbddc->local_mat),&pcbddc->benign_change);CHKERRQ(ierr); 643 ierr = MatSetType(pcbddc->benign_change,MATAIJ);CHKERRQ(ierr); 644 ierr = MatSetSizes(pcbddc->benign_change,n,n,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr); 645 ierr = PetscMalloc1(n,&nnz);CHKERRQ(ierr); 646 for (i=0;i<n-nz;i++) nnz[idxsc[i]] = 1; /* identity on velocities plus pressure dofs for non-singular subdomains */ 647 for (i=0;i<pcbddc->benign_n;i++) { 648 PetscInt nzs,j; 649 650 ierr = ISGetLocalSize(zerodiag_subs[i],&nzs);CHKERRQ(ierr); 651 ierr = ISGetIndices(zerodiag_subs[i],&idxs);CHKERRQ(ierr); 652 for (j=0;j<nzs-1;j++) nnz[idxs[j]] = 2; /* change on pressures */ 653 nnz[idxs[nzs-1]] = nzs; /* last local pressure dof in subdomain */ 654 ierr = ISRestoreIndices(zerodiag_subs[i],&idxs);CHKERRQ(ierr); 655 } 656 ierr = MatSeqAIJSetPreallocation(pcbddc->benign_change,0,nnz);CHKERRQ(ierr); 657 ierr = PetscFree(nnz);CHKERRQ(ierr); 658 /* set identity on velocities */ 659 for (i=0;i<n-nz;i++) { 660 ierr = MatSetValue(pcbddc->benign_change,idxsc[i],idxsc[i],1.,INSERT_VALUES);CHKERRQ(ierr); 661 } 662 ierr = ISRestoreIndices(zerodiagc,&idxsc);CHKERRQ(ierr); 663 ierr = ISDestroy(&zerodiagc);CHKERRQ(ierr); 664 ierr = PetscFree3(pcbddc->benign_p0_lidx,pcbddc->benign_p0_gidx,pcbddc->benign_p0);CHKERRQ(ierr); 665 ierr = PetscMalloc3(pcbddc->benign_n,&pcbddc->benign_p0_lidx,pcbddc->benign_n,&pcbddc->benign_p0_gidx,pcbddc->benign_n,&pcbddc->benign_p0);CHKERRQ(ierr); 666 /* set change on pressures */ 667 for (s=0;s<pcbddc->benign_n;s++) { 668 PetscScalar *array; 669 PetscInt nzs; 670 671 ierr = ISGetLocalSize(zerodiag_subs[s],&nzs);CHKERRQ(ierr); 672 ierr = ISGetIndices(zerodiag_subs[s],&idxs);CHKERRQ(ierr); 673 for (i=0;i<nzs-1;i++) { 674 PetscScalar vals[2]; 675 PetscInt cols[2]; 676 677 cols[0] = idxs[i]; 678 cols[1] = idxs[nzs-1]; 679 vals[0] = 1.; 680 vals[1] = 1.; 681 ierr = MatSetValues(pcbddc->benign_change,1,cols,2,cols,vals,INSERT_VALUES);CHKERRQ(ierr); 682 } 683 ierr = PetscMalloc1(nzs,&array);CHKERRQ(ierr); 684 for (i=0;i<nzs-1;i++) array[i] = -1.; 685 array[nzs-1] = 1.; 686 ierr = MatSetValues(pcbddc->benign_change,1,idxs+nzs-1,nzs,idxs,array,INSERT_VALUES);CHKERRQ(ierr); 687 /* store local idxs for p0 */ 688 pcbddc->benign_p0_lidx[s] = idxs[nzs-1]; 689 ierr = ISRestoreIndices(zerodiag_subs[s],&idxs);CHKERRQ(ierr); 690 ierr = PetscFree(array);CHKERRQ(ierr); 691 } 692 ierr = MatAssemblyBegin(pcbddc->benign_change,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 693 ierr = MatAssemblyEnd(pcbddc->benign_change,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 694 /* project if needed */ 695 if (pcbddc->benign_change_explicit) { 696 ierr = MatPtAP(pcbddc->local_mat,pcbddc->benign_change,MAT_INITIAL_MATRIX,2.0,&pcbddc->benign_original_mat);CHKERRQ(ierr); 697 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 698 ierr = MatSeqAIJCompress(pcbddc->benign_original_mat,&pcbddc->local_mat);CHKERRQ(ierr); 699 ierr = MatDestroy(&pcbddc->benign_original_mat);CHKERRQ(ierr); 700 } 701 /* store global idxs for p0 */ 702 ierr = ISLocalToGlobalMappingApply(pc->pmat->rmap->mapping,pcbddc->benign_n,pcbddc->benign_p0_lidx,pcbddc->benign_p0_gidx);CHKERRQ(ierr); 703 } 704 pcbddc->benign_zerodiag_subs = zerodiag_subs; 705 ierr = ISDestroy(&pressures);CHKERRQ(ierr); 706 707 /* determines if the coarse solver will be singular or not */ 708 ierr = MPI_Allreduce(&have_null,&pcbddc->benign_null,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 709 *zerodiaglocal = zerodiag; 710 PetscFunctionReturn(0); 711 } 712 713 #undef __FUNCT__ 714 #define __FUNCT__ "PCBDDCBenignGetOrSetP0" 715 PetscErrorCode PCBDDCBenignGetOrSetP0(PC pc, Vec v, PetscBool get) 716 { 717 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 718 PetscErrorCode ierr; 719 720 PetscFunctionBegin; 721 if (!pcbddc->benign_sf) { 722 ierr = PetscSFCreate(PetscObjectComm((PetscObject)pc),&pcbddc->benign_sf);CHKERRQ(ierr); 723 ierr = PetscSFSetGraphLayout(pcbddc->benign_sf,pc->pmat->rmap,pcbddc->benign_n,NULL,PETSC_OWN_POINTER,pcbddc->benign_p0_gidx);CHKERRQ(ierr); 724 } 725 if (get) { /* use SF to get values */ 726 PetscScalar *array; 727 728 ierr = VecGetArrayRead(v,(const PetscScalar**)&array);CHKERRQ(ierr); 729 ierr = PetscSFBcastBegin(pcbddc->benign_sf,MPIU_SCALAR,array,pcbddc->benign_p0);CHKERRQ(ierr); 730 ierr = PetscSFBcastEnd(pcbddc->benign_sf,MPIU_SCALAR,array,pcbddc->benign_p0);CHKERRQ(ierr); 731 ierr = VecRestoreArrayRead(v,(const PetscScalar**)&array);CHKERRQ(ierr); 732 } else { /* use VecSetValues (not scalable, I should try to find a better solution (defining a new MPI_OP for reduction) */ 733 ierr = VecSetValues(v,pcbddc->benign_n,pcbddc->benign_p0_gidx,pcbddc->benign_p0,INSERT_VALUES);CHKERRQ(ierr); 734 ierr = VecAssemblyBegin(v);CHKERRQ(ierr); 735 ierr = VecAssemblyEnd(v);CHKERRQ(ierr); 736 } 737 PetscFunctionReturn(0); 738 } 739 740 #undef __FUNCT__ 741 #define __FUNCT__ "PCBDDCBenignPopOrPushB0" 742 PetscErrorCode PCBDDCBenignPopOrPushB0(PC pc, PetscBool pop) 743 { 744 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 745 PetscErrorCode ierr; 746 747 PetscFunctionBegin; 748 /* TODO: add error checking 749 - avoid nested pop (or push) calls. 750 - cannot push before pop. 751 - cannot call this if pcbddc->local_mat is NULL 752 */ 753 if (!pcbddc->benign_n) { 754 PetscFunctionReturn(0); 755 } 756 if (pop) { 757 if (pcbddc->benign_change_explicit) { 758 IS is_p0; 759 MatReuse reuse; 760 761 /* extract B_0 */ 762 reuse = MAT_INITIAL_MATRIX; 763 if (pcbddc->benign_B0) { 764 reuse = MAT_REUSE_MATRIX; 765 } 766 ierr = ISCreateGeneral(PETSC_COMM_SELF,pcbddc->benign_n,pcbddc->benign_p0_lidx,PETSC_COPY_VALUES,&is_p0);CHKERRQ(ierr); 767 ierr = MatGetSubMatrix(pcbddc->local_mat,is_p0,NULL,reuse,&pcbddc->benign_B0);CHKERRQ(ierr); 768 /* remove rows and cols from local problem */ 769 ierr = MatSetOption(pcbddc->local_mat,MAT_KEEP_NONZERO_PATTERN,PETSC_TRUE);CHKERRQ(ierr); 770 ierr = MatSetOption(pcbddc->local_mat,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr); 771 ierr = MatZeroRowsColumnsIS(pcbddc->local_mat,is_p0,1.0,NULL,NULL);CHKERRQ(ierr); 772 ierr = ISDestroy(&is_p0);CHKERRQ(ierr); 773 } else { 774 Mat_IS *matis = (Mat_IS*)pc->pmat->data; 775 PetscScalar *vals; 776 PetscInt i,n,*idxs_ins; 777 778 ierr = VecGetLocalSize(matis->y,&n);CHKERRQ(ierr); 779 ierr = PetscMalloc2(n,&idxs_ins,n,&vals);CHKERRQ(ierr); 780 if (!pcbddc->benign_B0) { 781 ierr = MatCreate(PetscObjectComm((PetscObject)pcbddc->local_mat),&pcbddc->benign_B0);CHKERRQ(ierr); 782 ierr = MatSetType(pcbddc->benign_B0,MATAIJ);CHKERRQ(ierr); 783 ierr = MatSetSizes(pcbddc->benign_B0,pcbddc->benign_n,n,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr); 784 if (!pcbddc->current_level) { 785 /* this matrix is actually sparse: the nnz pattern is not known unless we do 2 sweeps of the next loop. 786 Setting nnz=100 should be more than enough for the finest level */ 787 ierr = MatSeqAIJSetPreallocation(pcbddc->benign_B0,100,NULL);CHKERRQ(ierr); 788 } else { 789 PetscInt *nnz; 790 ierr = PetscMalloc1(pcbddc->benign_n,&nnz);CHKERRQ(ierr); 791 for (i=0;i<pcbddc->benign_n;i++) { 792 ierr = ISGetLocalSize(pcbddc->benign_zerodiag_subs[i],&nnz[i]);CHKERRQ(ierr); 793 nnz[i] = n - nnz[i]; 794 } 795 ierr = MatSeqAIJSetPreallocation(pcbddc->benign_B0,0,nnz);CHKERRQ(ierr); 796 ierr = PetscFree(nnz);CHKERRQ(ierr); 797 } 798 } 799 800 for (i=0;i<pcbddc->benign_n;i++) { 801 PetscScalar *array; 802 PetscInt *idxs,j,nz,cum; 803 804 ierr = VecSet(matis->x,0.);CHKERRQ(ierr); 805 ierr = ISGetLocalSize(pcbddc->benign_zerodiag_subs[i],&nz);CHKERRQ(ierr); 806 ierr = ISGetIndices(pcbddc->benign_zerodiag_subs[i],(const PetscInt**)&idxs);CHKERRQ(ierr); 807 for (j=0;j<nz;j++) vals[j] = 1.; 808 ierr = VecSetValues(matis->x,nz,idxs,vals,INSERT_VALUES);CHKERRQ(ierr); 809 ierr = VecAssemblyBegin(matis->x);CHKERRQ(ierr); 810 ierr = VecAssemblyEnd(matis->x);CHKERRQ(ierr); 811 ierr = VecSet(matis->y,0.);CHKERRQ(ierr); 812 ierr = MatMult(matis->A,matis->x,matis->y);CHKERRQ(ierr); 813 ierr = VecGetArray(matis->y,&array);CHKERRQ(ierr); 814 cum = 0; 815 for (j=0;j<n;j++) { 816 if (PetscUnlikely(PetscAbsReal(array[j]) > PETSC_SMALL)) { 817 vals[cum] = array[j]; 818 idxs_ins[cum] = j; 819 cum++; 820 } 821 } 822 ierr = MatSetValues(pcbddc->benign_B0,1,&i,cum,idxs_ins,vals,INSERT_VALUES);CHKERRQ(ierr); 823 ierr = VecRestoreArray(matis->y,&array);CHKERRQ(ierr); 824 ierr = ISRestoreIndices(pcbddc->benign_zerodiag_subs[i],(const PetscInt**)&idxs);CHKERRQ(ierr); 825 } 826 ierr = MatAssemblyBegin(pcbddc->benign_B0,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 827 ierr = MatAssemblyEnd(pcbddc->benign_B0,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 828 ierr = PetscFree2(idxs_ins,vals);CHKERRQ(ierr); 829 } 830 } else { /* push */ 831 if (pcbddc->benign_change_explicit) { 832 PetscInt i; 833 834 for (i=0;i<pcbddc->benign_n;i++) { 835 PetscScalar *B0_vals; 836 PetscInt *B0_cols,B0_ncol; 837 838 ierr = MatGetRow(pcbddc->benign_B0,i,&B0_ncol,(const PetscInt**)&B0_cols,(const PetscScalar**)&B0_vals);CHKERRQ(ierr); 839 ierr = MatSetValues(pcbddc->local_mat,1,pcbddc->benign_p0_lidx+i,B0_ncol,B0_cols,B0_vals,INSERT_VALUES);CHKERRQ(ierr); 840 ierr = MatSetValues(pcbddc->local_mat,B0_ncol,B0_cols,1,pcbddc->benign_p0_lidx+i,B0_vals,INSERT_VALUES);CHKERRQ(ierr); 841 ierr = MatSetValue(pcbddc->local_mat,pcbddc->benign_p0_lidx[i],pcbddc->benign_p0_lidx[i],0.0,INSERT_VALUES);CHKERRQ(ierr); 842 ierr = MatRestoreRow(pcbddc->benign_B0,i,&B0_ncol,(const PetscInt**)&B0_cols,(const PetscScalar**)&B0_vals);CHKERRQ(ierr); 843 } 844 ierr = MatAssemblyBegin(pcbddc->local_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 845 ierr = MatAssemblyEnd(pcbddc->local_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 846 } else { 847 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Cannot push B0!\n"); 848 } 849 } 850 PetscFunctionReturn(0); 851 } 852 853 #undef __FUNCT__ 854 #define __FUNCT__ "PCBDDCAdaptiveSelection" 855 PetscErrorCode PCBDDCAdaptiveSelection(PC pc) 856 { 857 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 858 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 859 PetscBLASInt B_dummyint,B_neigs,B_ierr,B_lwork; 860 PetscBLASInt *B_iwork,*B_ifail; 861 PetscScalar *work,lwork; 862 PetscScalar *St,*S,*eigv; 863 PetscScalar *Sarray,*Starray; 864 PetscReal *eigs,thresh; 865 PetscInt i,nmax,nmin,nv,cum,mss,cum2,cumarray,maxneigs; 866 PetscBool allocated_S_St; 867 #if defined(PETSC_USE_COMPLEX) 868 PetscReal *rwork; 869 #endif 870 PetscErrorCode ierr; 871 872 PetscFunctionBegin; 873 if (!sub_schurs->schur_explicit) { 874 SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_SUP,"Adaptive selection of constraints requires MUMPS and/or MKL_CPARDISO"); 875 } 876 877 if (sub_schurs->n_subs && (!sub_schurs->is_hermitian || !sub_schurs->is_posdef)) { 878 SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_SUP,"Adaptive selection not yet implemented for general matrix pencils (herm %d, posdef %d)\n",sub_schurs->is_hermitian,sub_schurs->is_posdef); 879 } 880 881 if (pcbddc->dbg_flag) { 882 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 883 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 884 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Check adaptive selection of constraints\n");CHKERRQ(ierr); 885 ierr = PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 886 } 887 888 if (pcbddc->dbg_flag) { 889 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d cc %d (%d,%d).\n",PetscGlobalRank,sub_schurs->n_subs,sub_schurs->is_hermitian,sub_schurs->is_posdef); 890 } 891 892 /* max size of subsets */ 893 mss = 0; 894 for (i=0;i<sub_schurs->n_subs;i++) { 895 PetscInt subset_size; 896 897 ierr = ISGetLocalSize(sub_schurs->is_subs[i],&subset_size);CHKERRQ(ierr); 898 mss = PetscMax(mss,subset_size); 899 } 900 901 /* min/max and threshold */ 902 nmax = pcbddc->adaptive_nmax > 0 ? pcbddc->adaptive_nmax : mss; 903 nmin = pcbddc->adaptive_nmin > 0 ? pcbddc->adaptive_nmin : 0; 904 nmax = PetscMax(nmin,nmax); 905 allocated_S_St = PETSC_FALSE; 906 if (nmin) { 907 allocated_S_St = PETSC_TRUE; 908 } 909 910 /* allocate lapack workspace */ 911 cum = cum2 = 0; 912 maxneigs = 0; 913 for (i=0;i<sub_schurs->n_subs;i++) { 914 PetscInt n,subset_size; 915 916 ierr = ISGetLocalSize(sub_schurs->is_subs[i],&subset_size);CHKERRQ(ierr); 917 n = PetscMin(subset_size,nmax); 918 cum += subset_size; 919 cum2 += subset_size*n; 920 maxneigs = PetscMax(maxneigs,n); 921 } 922 if (mss) { 923 if (sub_schurs->is_hermitian && sub_schurs->is_posdef) { 924 PetscBLASInt B_itype = 1; 925 PetscBLASInt B_N = mss; 926 PetscReal zero = 0.0; 927 PetscReal eps = 0.0; /* dlamch? */ 928 929 B_lwork = -1; 930 S = NULL; 931 St = NULL; 932 eigs = NULL; 933 eigv = NULL; 934 B_iwork = NULL; 935 B_ifail = NULL; 936 #if defined(PETSC_USE_COMPLEX) 937 rwork = NULL; 938 #endif 939 thresh = 1.0; 940 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 941 #if defined(PETSC_USE_COMPLEX) 942 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&zero,&thresh,&B_dummyint,&B_dummyint,&eps,&B_neigs,eigs,eigv,&B_N,&lwork,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr)); 943 #else 944 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&zero,&thresh,&B_dummyint,&B_dummyint,&eps,&B_neigs,eigs,eigv,&B_N,&lwork,&B_lwork,B_iwork,B_ifail,&B_ierr)); 945 #endif 946 if (B_ierr != 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to SYGVX Lapack routine %d",(int)B_ierr); 947 ierr = PetscFPTrapPop();CHKERRQ(ierr); 948 } else { 949 /* TODO */ 950 } 951 } else { 952 lwork = 0; 953 } 954 955 nv = 0; 956 if (sub_schurs->is_vertices && pcbddc->use_vertices) { /* complement set of active subsets, each entry is a vertex (boundary made by active subsets, vertices and dirichlet dofs) */ 957 ierr = ISGetLocalSize(sub_schurs->is_vertices,&nv);CHKERRQ(ierr); 958 } 959 ierr = PetscBLASIntCast((PetscInt)PetscRealPart(lwork),&B_lwork);CHKERRQ(ierr); 960 if (allocated_S_St) { 961 ierr = PetscMalloc2(mss*mss,&S,mss*mss,&St);CHKERRQ(ierr); 962 } 963 ierr = PetscMalloc5(mss*mss,&eigv,mss,&eigs,B_lwork,&work,5*mss,&B_iwork,mss,&B_ifail);CHKERRQ(ierr); 964 #if defined(PETSC_USE_COMPLEX) 965 ierr = PetscMalloc1(7*mss,&rwork);CHKERRQ(ierr); 966 #endif 967 ierr = PetscMalloc5(nv+sub_schurs->n_subs,&pcbddc->adaptive_constraints_n, 968 nv+sub_schurs->n_subs+1,&pcbddc->adaptive_constraints_idxs_ptr, 969 nv+sub_schurs->n_subs+1,&pcbddc->adaptive_constraints_data_ptr, 970 nv+cum,&pcbddc->adaptive_constraints_idxs, 971 nv+cum2,&pcbddc->adaptive_constraints_data);CHKERRQ(ierr); 972 ierr = PetscMemzero(pcbddc->adaptive_constraints_n,(nv+sub_schurs->n_subs)*sizeof(PetscInt));CHKERRQ(ierr); 973 974 maxneigs = 0; 975 cum = cum2 = cumarray = 0; 976 pcbddc->adaptive_constraints_idxs_ptr[0] = 0; 977 pcbddc->adaptive_constraints_data_ptr[0] = 0; 978 if (sub_schurs->is_vertices && pcbddc->use_vertices) { 979 const PetscInt *idxs; 980 981 ierr = ISGetIndices(sub_schurs->is_vertices,&idxs);CHKERRQ(ierr); 982 for (cum=0;cum<nv;cum++) { 983 pcbddc->adaptive_constraints_n[cum] = 1; 984 pcbddc->adaptive_constraints_idxs[cum] = idxs[cum]; 985 pcbddc->adaptive_constraints_data[cum] = 1.0; 986 pcbddc->adaptive_constraints_idxs_ptr[cum+1] = pcbddc->adaptive_constraints_idxs_ptr[cum]+1; 987 pcbddc->adaptive_constraints_data_ptr[cum+1] = pcbddc->adaptive_constraints_data_ptr[cum]+1; 988 } 989 cum2 = cum; 990 ierr = ISRestoreIndices(sub_schurs->is_vertices,&idxs);CHKERRQ(ierr); 991 } 992 993 if (mss) { /* multilevel */ 994 ierr = MatSeqAIJGetArray(sub_schurs->sum_S_Ej_inv_all,&Sarray);CHKERRQ(ierr); 995 ierr = MatSeqAIJGetArray(sub_schurs->sum_S_Ej_tilda_all,&Starray);CHKERRQ(ierr); 996 } 997 998 thresh = pcbddc->adaptive_threshold; 999 for (i=0;i<sub_schurs->n_subs;i++) { 1000 const PetscInt *idxs; 1001 PetscReal infty = PETSC_MAX_REAL; 1002 PetscInt j,subset_size,eigs_start = 0; 1003 PetscBLASInt B_N; 1004 PetscBool same_data = PETSC_FALSE; 1005 1006 ierr = ISGetLocalSize(sub_schurs->is_subs[i],&subset_size);CHKERRQ(ierr); 1007 ierr = ISGetIndices(sub_schurs->is_subs[i],&idxs);CHKERRQ(ierr); 1008 ierr = PetscBLASIntCast(subset_size,&B_N);CHKERRQ(ierr); 1009 if (allocated_S_St) { /* S and S_t should be copied since we could need them later */ 1010 if (sub_schurs->is_hermitian) { 1011 PetscInt j,k; 1012 if (sub_schurs->n_subs == 1) { /* zeroing memory to use PetscMemcmp later */ 1013 ierr = PetscMemzero(S,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 1014 ierr = PetscMemzero(St,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 1015 } 1016 for (j=0;j<subset_size;j++) { 1017 for (k=j;k<subset_size;k++) { 1018 S [j*subset_size+k] = Sarray [cumarray+j*subset_size+k]; 1019 St[j*subset_size+k] = Starray[cumarray+j*subset_size+k]; 1020 } 1021 } 1022 } else { 1023 ierr = PetscMemcpy(S,Sarray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 1024 ierr = PetscMemcpy(St,Starray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 1025 } 1026 } else { 1027 S = Sarray + cumarray; 1028 St = Starray + cumarray; 1029 } 1030 1031 /* see if we can save some work */ 1032 if (sub_schurs->n_subs == 1) { 1033 ierr = PetscMemcmp(S,St,subset_size*subset_size*sizeof(PetscScalar),&same_data);CHKERRQ(ierr); 1034 } 1035 1036 if (same_data) { /* there's no need of constraints here, deluxe scaling is enough */ 1037 B_neigs = 0; 1038 } else { 1039 if (sub_schurs->is_hermitian && sub_schurs->is_posdef) { 1040 PetscBLASInt B_itype = 1; 1041 PetscBLASInt B_IL, B_IU; 1042 PetscReal eps = -1.0; /* dlamch? */ 1043 PetscInt nmin_s; 1044 1045 if (pcbddc->dbg_flag) { 1046 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Computing for sub %d/%d %d %d.\n",i,sub_schurs->n_subs,subset_size,pcbddc->mat_graph->count[idxs[0]]); 1047 } 1048 1049 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 1050 if (thresh > 1.+PETSC_SMALL) { 1051 1052 /* ask for eigenvalues larger than thresh */ 1053 #if defined(PETSC_USE_COMPLEX) 1054 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&thresh,&infty,&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr)); 1055 #else 1056 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&thresh,&infty,&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr)); 1057 #endif 1058 } else { 1059 B_IU = PetscMax(1,PetscMin(B_N,nmax)); 1060 B_IL = 1; 1061 #if defined(PETSC_USE_COMPLEX) 1062 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","I","L",&B_N,St,&B_N,S,&B_N,&thresh,&infty,&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr)); 1063 #else 1064 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","I","L",&B_N,St,&B_N,S,&B_N,&thresh,&infty,&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr)); 1065 #endif 1066 } 1067 ierr = PetscFPTrapPop();CHKERRQ(ierr); 1068 if (B_ierr) { 1069 if (B_ierr < 0 ) { 1070 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: illegal value for argument %d",-(int)B_ierr); 1071 } else if (B_ierr <= B_N) { 1072 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: %d eigenvalues failed to converge",(int)B_ierr); 1073 } else { 1074 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: leading minor of order %d is not positive definite",(int)B_ierr-B_N-1); 1075 } 1076 } 1077 1078 if (B_neigs > nmax) { 1079 if (pcbddc->dbg_flag) { 1080 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," found %d eigs, more than maximum required %d.\n",B_neigs,nmax); 1081 } 1082 eigs_start = B_neigs -nmax; 1083 B_neigs = nmax; 1084 } 1085 1086 nmin_s = PetscMin(nmin,B_N); 1087 if (B_neigs < nmin_s) { 1088 PetscBLASInt B_neigs2; 1089 1090 B_IU = B_N - B_neigs; 1091 B_IL = B_N - nmin_s + 1; 1092 if (pcbddc->dbg_flag) { 1093 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," found %d eigs, less than minimum required %d. Asking for %d to %d incl (fortran like)\n",B_neigs,nmin,B_IL,B_IU); 1094 } 1095 if (sub_schurs->is_hermitian) { 1096 PetscInt j; 1097 for (j=0;j<subset_size;j++) { 1098 ierr = PetscMemcpy(S+j*(subset_size+1),Sarray+cumarray+j*(subset_size+1),(subset_size-j)*sizeof(PetscScalar));CHKERRQ(ierr); 1099 } 1100 for (j=0;j<subset_size;j++) { 1101 ierr = PetscMemcpy(St+j*(subset_size+1),Starray+cumarray+j*(subset_size+1),(subset_size-j)*sizeof(PetscScalar));CHKERRQ(ierr); 1102 } 1103 } else { 1104 ierr = PetscMemcpy(S,Sarray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 1105 ierr = PetscMemcpy(St,Starray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 1106 } 1107 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 1108 #if defined(PETSC_USE_COMPLEX) 1109 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","I","L",&B_N,St,&B_N,S,&B_N,&thresh,&infty,&B_IL,&B_IU,&eps,&B_neigs2,eigs+B_neigs,eigv+B_neigs*subset_size,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr)); 1110 #else 1111 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","I","L",&B_N,St,&B_N,S,&B_N,&thresh,&infty,&B_IL,&B_IU,&eps,&B_neigs2,eigs+B_neigs,eigv+B_neigs*subset_size,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr)); 1112 #endif 1113 ierr = PetscFPTrapPop();CHKERRQ(ierr); 1114 B_neigs += B_neigs2; 1115 } 1116 if (B_ierr) { 1117 if (B_ierr < 0 ) { 1118 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: illegal value for argument %d",-(int)B_ierr); 1119 } else if (B_ierr <= B_N) { 1120 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: %d eigenvalues failed to converge",(int)B_ierr); 1121 } else { 1122 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: leading minor of order %d is not positive definite",(int)B_ierr-B_N-1); 1123 } 1124 } 1125 if (pcbddc->dbg_flag) { 1126 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," -> Got %d eigs\n",B_neigs);CHKERRQ(ierr); 1127 for (j=0;j<B_neigs;j++) { 1128 if (eigs[j] == 0.0) { 1129 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," Inf\n");CHKERRQ(ierr); 1130 } else { 1131 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," %1.6e\n",eigs[j+eigs_start]);CHKERRQ(ierr); 1132 } 1133 } 1134 } 1135 } else { 1136 /* TODO */ 1137 } 1138 } 1139 maxneigs = PetscMax(B_neigs,maxneigs); 1140 pcbddc->adaptive_constraints_n[i+nv] = B_neigs; 1141 if (B_neigs) { 1142 ierr = PetscMemcpy(pcbddc->adaptive_constraints_data+pcbddc->adaptive_constraints_data_ptr[cum],eigv+eigs_start*subset_size,B_neigs*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 1143 1144 if (pcbddc->dbg_flag > 1) { 1145 PetscInt ii; 1146 for (ii=0;ii<B_neigs;ii++) { 1147 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," -> Eigenvector %d/%d (%d)\n",ii,B_neigs,B_N);CHKERRQ(ierr); 1148 for (j=0;j<B_N;j++) { 1149 #if defined(PETSC_USE_COMPLEX) 1150 PetscReal r = PetscRealPart(pcbddc->adaptive_constraints_data[ii*subset_size+j+pcbddc->adaptive_constraints_data_ptr[cum]]); 1151 PetscReal c = PetscImaginaryPart(pcbddc->adaptive_constraints_data[ii*subset_size+j+pcbddc->adaptive_constraints_data_ptr[cum]]); 1152 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," %1.4e + %1.4e i\n",r,c);CHKERRQ(ierr); 1153 #else 1154 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," %1.4e\n",pcbddc->adaptive_constraints_data[ii*subset_size+j+pcbddc->adaptive_constraints_data_ptr[cum]]);CHKERRQ(ierr); 1155 #endif 1156 } 1157 } 1158 } 1159 #if 0 1160 for (j=0;j<B_neigs;j++) { 1161 PetscBLASInt Blas_N,Blas_one = 1.0; 1162 PetscScalar norm; 1163 ierr = PetscBLASIntCast(subset_size,&Blas_N);CHKERRQ(ierr); 1164 PetscStackCallBLAS("BLASdot",norm = BLASdot_(&Blas_N,pcbddc->adaptive_constraints_data+pcbddc->adaptive_constraints_data_ptr[cum]+j*subset_size, 1165 &Blas_one,pcbddc->adaptive_constraints_data+pcbddc->adaptive_constraints_data_ptr[cum]+j*subset_size,&Blas_one)); 1166 if (pcbddc->adaptive_constraints_data[cum2] > 0.0) { 1167 norm = 1.0/PetscSqrtReal(PetscRealPart(norm)); 1168 } else { 1169 norm = -1.0/PetscSqrtReal(PetscRealPart(norm)); 1170 } 1171 PetscStackCallBLAS("BLASscal",BLASscal_(&Blas_N,&norm,pcbddc->adaptive_constraints_data+pcbddc->adaptive_constraints_data_ptr[cum]+j*subset_size,&Blas_one)); 1172 } 1173 #endif 1174 ierr = PetscMemcpy(pcbddc->adaptive_constraints_idxs+pcbddc->adaptive_constraints_idxs_ptr[cum],idxs,subset_size*sizeof(PetscInt));CHKERRQ(ierr); 1175 pcbddc->adaptive_constraints_idxs_ptr[cum+1] = pcbddc->adaptive_constraints_idxs_ptr[cum] + subset_size; 1176 pcbddc->adaptive_constraints_data_ptr[cum+1] = pcbddc->adaptive_constraints_data_ptr[cum] + subset_size*B_neigs; 1177 cum++; 1178 } 1179 ierr = ISRestoreIndices(sub_schurs->is_subs[i],&idxs);CHKERRQ(ierr); 1180 /* shift for next computation */ 1181 cumarray += subset_size*subset_size; 1182 } 1183 if (pcbddc->dbg_flag) { 1184 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 1185 } 1186 1187 if (mss) { 1188 ierr = MatSeqAIJRestoreArray(sub_schurs->sum_S_Ej_inv_all,&Sarray);CHKERRQ(ierr); 1189 ierr = MatSeqAIJRestoreArray(sub_schurs->sum_S_Ej_tilda_all,&Starray);CHKERRQ(ierr); 1190 /* destroy matrices (junk) */ 1191 ierr = MatDestroy(&sub_schurs->sum_S_Ej_inv_all);CHKERRQ(ierr); 1192 ierr = MatDestroy(&sub_schurs->sum_S_Ej_tilda_all);CHKERRQ(ierr); 1193 } 1194 if (allocated_S_St) { 1195 ierr = PetscFree2(S,St);CHKERRQ(ierr); 1196 } 1197 ierr = PetscFree5(eigv,eigs,work,B_iwork,B_ifail);CHKERRQ(ierr); 1198 #if defined(PETSC_USE_COMPLEX) 1199 ierr = PetscFree(rwork);CHKERRQ(ierr); 1200 #endif 1201 if (pcbddc->dbg_flag) { 1202 PetscInt maxneigs_r; 1203 ierr = MPI_Allreduce(&maxneigs,&maxneigs_r,1,MPIU_INT,MPI_MAX,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 1204 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Maximum number of constraints per cc %d\n",maxneigs_r);CHKERRQ(ierr); 1205 } 1206 PetscFunctionReturn(0); 1207 } 1208 1209 #undef __FUNCT__ 1210 #define __FUNCT__ "PCBDDCSetUpSolvers" 1211 PetscErrorCode PCBDDCSetUpSolvers(PC pc) 1212 { 1213 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 1214 PetscScalar *coarse_submat_vals; 1215 PetscErrorCode ierr; 1216 1217 PetscFunctionBegin; 1218 /* Setup local scatters R_to_B and (optionally) R_to_D */ 1219 /* PCBDDCSetUpLocalWorkVectors should be called first! */ 1220 ierr = PCBDDCSetUpLocalScatters(pc);CHKERRQ(ierr); 1221 1222 /* Setup local neumann solver ksp_R */ 1223 /* PCBDDCSetUpLocalScatters should be called first! */ 1224 ierr = PCBDDCSetUpLocalSolvers(pc,PETSC_FALSE,PETSC_TRUE);CHKERRQ(ierr); 1225 1226 /* Change global null space passed in by the user if change of basis has been requested */ 1227 if (pcbddc->NullSpace && pcbddc->ChangeOfBasisMatrix) { 1228 ierr = PCBDDCNullSpaceAdaptGlobal(pc);CHKERRQ(ierr); 1229 } 1230 1231 /* 1232 Setup local correction and local part of coarse basis. 1233 Gives back the dense local part of the coarse matrix in column major ordering 1234 */ 1235 ierr = PCBDDCSetUpCorrection(pc,&coarse_submat_vals);CHKERRQ(ierr); 1236 1237 /* Compute total number of coarse nodes and setup coarse solver */ 1238 ierr = PCBDDCSetUpCoarseSolver(pc,coarse_submat_vals);CHKERRQ(ierr); 1239 1240 /* free */ 1241 ierr = PetscFree(coarse_submat_vals);CHKERRQ(ierr); 1242 PetscFunctionReturn(0); 1243 } 1244 1245 #undef __FUNCT__ 1246 #define __FUNCT__ "PCBDDCResetCustomization" 1247 PetscErrorCode PCBDDCResetCustomization(PC pc) 1248 { 1249 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 1250 PetscErrorCode ierr; 1251 1252 PetscFunctionBegin; 1253 ierr = PCBDDCGraphResetCSR(pcbddc->mat_graph);CHKERRQ(ierr); 1254 ierr = ISDestroy(&pcbddc->user_primal_vertices);CHKERRQ(ierr); 1255 ierr = ISDestroy(&pcbddc->user_primal_vertices_local);CHKERRQ(ierr); 1256 ierr = MatNullSpaceDestroy(&pcbddc->NullSpace);CHKERRQ(ierr); 1257 ierr = ISDestroy(&pcbddc->NeumannBoundaries);CHKERRQ(ierr); 1258 ierr = ISDestroy(&pcbddc->NeumannBoundariesLocal);CHKERRQ(ierr); 1259 ierr = ISDestroy(&pcbddc->DirichletBoundaries);CHKERRQ(ierr); 1260 ierr = MatNullSpaceDestroy(&pcbddc->onearnullspace);CHKERRQ(ierr); 1261 ierr = PetscFree(pcbddc->onearnullvecs_state);CHKERRQ(ierr); 1262 ierr = ISDestroy(&pcbddc->DirichletBoundariesLocal);CHKERRQ(ierr); 1263 ierr = PCBDDCSetDofsSplitting(pc,0,NULL);CHKERRQ(ierr); 1264 ierr = PCBDDCSetDofsSplittingLocal(pc,0,NULL);CHKERRQ(ierr); 1265 PetscFunctionReturn(0); 1266 } 1267 1268 #undef __FUNCT__ 1269 #define __FUNCT__ "PCBDDCResetTopography" 1270 PetscErrorCode PCBDDCResetTopography(PC pc) 1271 { 1272 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 1273 PetscInt i; 1274 PetscErrorCode ierr; 1275 1276 PetscFunctionBegin; 1277 ierr = MatDestroy(&pcbddc->user_ChangeOfBasisMatrix);CHKERRQ(ierr); 1278 ierr = MatDestroy(&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 1279 ierr = MatDestroy(&pcbddc->ConstraintMatrix);CHKERRQ(ierr); 1280 ierr = PCBDDCGraphReset(pcbddc->mat_graph);CHKERRQ(ierr); 1281 for (i=0;i<pcbddc->n_local_subs;i++) { 1282 ierr = ISDestroy(&pcbddc->local_subs[i]);CHKERRQ(ierr); 1283 } 1284 ierr = PetscFree(pcbddc->local_subs);CHKERRQ(ierr); 1285 ierr = PCBDDCSubSchursReset(pcbddc->sub_schurs);CHKERRQ(ierr); 1286 PetscFunctionReturn(0); 1287 } 1288 1289 #undef __FUNCT__ 1290 #define __FUNCT__ "PCBDDCResetSolvers" 1291 PetscErrorCode PCBDDCResetSolvers(PC pc) 1292 { 1293 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 1294 PetscErrorCode ierr; 1295 1296 PetscFunctionBegin; 1297 ierr = VecDestroy(&pcbddc->coarse_vec);CHKERRQ(ierr); 1298 if (pcbddc->coarse_phi_B) { 1299 PetscScalar *array; 1300 ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&array);CHKERRQ(ierr); 1301 ierr = PetscFree(array);CHKERRQ(ierr); 1302 } 1303 ierr = MatDestroy(&pcbddc->coarse_phi_B);CHKERRQ(ierr); 1304 ierr = MatDestroy(&pcbddc->coarse_phi_D);CHKERRQ(ierr); 1305 ierr = MatDestroy(&pcbddc->coarse_psi_B);CHKERRQ(ierr); 1306 ierr = MatDestroy(&pcbddc->coarse_psi_D);CHKERRQ(ierr); 1307 ierr = VecDestroy(&pcbddc->vec1_P);CHKERRQ(ierr); 1308 ierr = VecDestroy(&pcbddc->vec1_C);CHKERRQ(ierr); 1309 ierr = MatDestroy(&pcbddc->local_auxmat2);CHKERRQ(ierr); 1310 ierr = MatDestroy(&pcbddc->local_auxmat1);CHKERRQ(ierr); 1311 ierr = VecDestroy(&pcbddc->vec1_R);CHKERRQ(ierr); 1312 ierr = VecDestroy(&pcbddc->vec2_R);CHKERRQ(ierr); 1313 ierr = ISDestroy(&pcbddc->is_R_local);CHKERRQ(ierr); 1314 ierr = VecScatterDestroy(&pcbddc->R_to_B);CHKERRQ(ierr); 1315 ierr = VecScatterDestroy(&pcbddc->R_to_D);CHKERRQ(ierr); 1316 ierr = VecScatterDestroy(&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr); 1317 ierr = KSPDestroy(&pcbddc->ksp_D);CHKERRQ(ierr); 1318 ierr = KSPDestroy(&pcbddc->ksp_R);CHKERRQ(ierr); 1319 ierr = KSPDestroy(&pcbddc->coarse_ksp);CHKERRQ(ierr); 1320 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 1321 ierr = PetscFree(pcbddc->primal_indices_local_idxs);CHKERRQ(ierr); 1322 ierr = PetscFree2(pcbddc->local_primal_ref_node,pcbddc->local_primal_ref_mult);CHKERRQ(ierr); 1323 ierr = PetscFree(pcbddc->global_primal_indices);CHKERRQ(ierr); 1324 ierr = ISDestroy(&pcbddc->coarse_subassembling);CHKERRQ(ierr); 1325 ierr = ISDestroy(&pcbddc->coarse_subassembling_init);CHKERRQ(ierr); 1326 ierr = MatDestroy(&pcbddc->benign_change);CHKERRQ(ierr); 1327 ierr = VecDestroy(&pcbddc->benign_vec);CHKERRQ(ierr); 1328 ierr = MatDestroy(&pcbddc->benign_original_mat);CHKERRQ(ierr); 1329 ierr = MatDestroy(&pcbddc->benign_B0);CHKERRQ(ierr); 1330 ierr = PetscSFDestroy(&pcbddc->benign_sf);CHKERRQ(ierr); 1331 if (pcbddc->benign_zerodiag_subs) { 1332 PetscInt i; 1333 for (i=0;i<pcbddc->benign_n;i++) { 1334 ierr = ISDestroy(&pcbddc->benign_zerodiag_subs[i]);CHKERRQ(ierr); 1335 } 1336 ierr = PetscFree(pcbddc->benign_zerodiag_subs);CHKERRQ(ierr); 1337 } 1338 ierr = PetscFree3(pcbddc->benign_p0_lidx,pcbddc->benign_p0_gidx,pcbddc->benign_p0);CHKERRQ(ierr); 1339 PetscFunctionReturn(0); 1340 } 1341 1342 #undef __FUNCT__ 1343 #define __FUNCT__ "PCBDDCSetUpLocalWorkVectors" 1344 PetscErrorCode PCBDDCSetUpLocalWorkVectors(PC pc) 1345 { 1346 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 1347 PC_IS *pcis = (PC_IS*)pc->data; 1348 VecType impVecType; 1349 PetscInt n_constraints,n_R,old_size; 1350 PetscErrorCode ierr; 1351 1352 PetscFunctionBegin; 1353 if (!pcbddc->ConstraintMatrix) { 1354 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"BDDC Constraint matrix has not been created"); 1355 } 1356 /* get sizes */ 1357 n_constraints = pcbddc->local_primal_size - pcbddc->benign_n - pcbddc->n_vertices; 1358 n_R = pcis->n - pcbddc->n_vertices; 1359 ierr = VecGetType(pcis->vec1_N,&impVecType);CHKERRQ(ierr); 1360 /* local work vectors (try to avoid unneeded work)*/ 1361 /* R nodes */ 1362 old_size = -1; 1363 if (pcbddc->vec1_R) { 1364 ierr = VecGetSize(pcbddc->vec1_R,&old_size);CHKERRQ(ierr); 1365 } 1366 if (n_R != old_size) { 1367 ierr = VecDestroy(&pcbddc->vec1_R);CHKERRQ(ierr); 1368 ierr = VecDestroy(&pcbddc->vec2_R);CHKERRQ(ierr); 1369 ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_R);CHKERRQ(ierr); 1370 ierr = VecSetSizes(pcbddc->vec1_R,PETSC_DECIDE,n_R);CHKERRQ(ierr); 1371 ierr = VecSetType(pcbddc->vec1_R,impVecType);CHKERRQ(ierr); 1372 ierr = VecDuplicate(pcbddc->vec1_R,&pcbddc->vec2_R);CHKERRQ(ierr); 1373 } 1374 /* local primal dofs */ 1375 old_size = -1; 1376 if (pcbddc->vec1_P) { 1377 ierr = VecGetSize(pcbddc->vec1_P,&old_size);CHKERRQ(ierr); 1378 } 1379 if (pcbddc->local_primal_size != old_size) { 1380 ierr = VecDestroy(&pcbddc->vec1_P);CHKERRQ(ierr); 1381 ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_P);CHKERRQ(ierr); 1382 ierr = VecSetSizes(pcbddc->vec1_P,PETSC_DECIDE,pcbddc->local_primal_size);CHKERRQ(ierr); 1383 ierr = VecSetType(pcbddc->vec1_P,impVecType);CHKERRQ(ierr); 1384 } 1385 /* local explicit constraints */ 1386 old_size = -1; 1387 if (pcbddc->vec1_C) { 1388 ierr = VecGetSize(pcbddc->vec1_C,&old_size);CHKERRQ(ierr); 1389 } 1390 if (n_constraints && n_constraints != old_size) { 1391 ierr = VecDestroy(&pcbddc->vec1_C);CHKERRQ(ierr); 1392 ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_C);CHKERRQ(ierr); 1393 ierr = VecSetSizes(pcbddc->vec1_C,PETSC_DECIDE,n_constraints);CHKERRQ(ierr); 1394 ierr = VecSetType(pcbddc->vec1_C,impVecType);CHKERRQ(ierr); 1395 } 1396 PetscFunctionReturn(0); 1397 } 1398 1399 #undef __FUNCT__ 1400 #define __FUNCT__ "PCBDDCSetUpCorrection" 1401 PetscErrorCode PCBDDCSetUpCorrection(PC pc, PetscScalar **coarse_submat_vals_n) 1402 { 1403 PetscErrorCode ierr; 1404 /* pointers to pcis and pcbddc */ 1405 PC_IS* pcis = (PC_IS*)pc->data; 1406 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 1407 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 1408 /* submatrices of local problem */ 1409 Mat A_RV,A_VR,A_VV,local_auxmat2_R; 1410 /* submatrices of benign trick */ 1411 Mat B0_V = NULL; 1412 /* submatrices of local coarse problem */ 1413 Mat S_VV,S_CV,S_VC,S_CC; 1414 /* working matrices */ 1415 Mat C_CR; 1416 /* additional working stuff */ 1417 PC pc_R; 1418 Mat F; 1419 PetscBool isLU,isCHOL,isILU,need_benign_correction; 1420 1421 PetscScalar *coarse_submat_vals; /* TODO: use a PETSc matrix */ 1422 PetscScalar *work; 1423 PetscInt *idx_V_B; 1424 PetscInt lda_rhs,n,n_vertices,n_constraints,*p0_lidx_I; 1425 PetscInt i,n_R,n_D,n_B; 1426 1427 /* some shortcuts to scalars */ 1428 PetscScalar one=1.0,m_one=-1.0; 1429 1430 PetscFunctionBegin; 1431 if (!pcbddc->symmetric_primal && pcbddc->benign_n) { 1432 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Non-symmetric primal basis computation with benign trick not yet implemented"); 1433 } 1434 1435 /* Set Non-overlapping dimensions */ 1436 n_vertices = pcbddc->n_vertices; 1437 n_constraints = pcbddc->local_primal_size - pcbddc->benign_n - n_vertices; 1438 n_B = pcis->n_B; 1439 n_D = pcis->n - n_B; 1440 n_R = pcis->n - n_vertices; 1441 1442 /* vertices in boundary numbering */ 1443 ierr = PetscMalloc1(n_vertices,&idx_V_B);CHKERRQ(ierr); 1444 ierr = ISGlobalToLocalMappingApply(pcis->BtoNmap,IS_GTOLM_DROP,n_vertices,pcbddc->local_primal_ref_node,&i,idx_V_B);CHKERRQ(ierr); 1445 if (i != n_vertices) { 1446 SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error in boundary numbering for BDDC vertices! %d != %d\n",n_vertices,i); 1447 } 1448 1449 /* Subdomain contribution (Non-overlapping) to coarse matrix */ 1450 ierr = PetscCalloc1(pcbddc->local_primal_size*pcbddc->local_primal_size,&coarse_submat_vals);CHKERRQ(ierr); 1451 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_vertices,n_vertices,coarse_submat_vals,&S_VV);CHKERRQ(ierr); 1452 ierr = MatSeqDenseSetLDA(S_VV,pcbddc->local_primal_size);CHKERRQ(ierr); 1453 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_constraints,n_vertices,coarse_submat_vals+n_vertices,&S_CV);CHKERRQ(ierr); 1454 ierr = MatSeqDenseSetLDA(S_CV,pcbddc->local_primal_size);CHKERRQ(ierr); 1455 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_vertices,n_constraints,coarse_submat_vals+pcbddc->local_primal_size*n_vertices,&S_VC);CHKERRQ(ierr); 1456 ierr = MatSeqDenseSetLDA(S_VC,pcbddc->local_primal_size);CHKERRQ(ierr); 1457 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_constraints,n_constraints,coarse_submat_vals+(pcbddc->local_primal_size+1)*n_vertices,&S_CC);CHKERRQ(ierr); 1458 ierr = MatSeqDenseSetLDA(S_CC,pcbddc->local_primal_size);CHKERRQ(ierr); 1459 1460 /* determine if can use MatSolve routines instead of calling KSPSolve on ksp_R */ 1461 ierr = KSPGetPC(pcbddc->ksp_R,&pc_R);CHKERRQ(ierr); 1462 ierr = PetscObjectTypeCompare((PetscObject)pc_R,PCLU,&isLU);CHKERRQ(ierr); 1463 ierr = PetscObjectTypeCompare((PetscObject)pc_R,PCILU,&isILU);CHKERRQ(ierr); 1464 ierr = PetscObjectTypeCompare((PetscObject)pc_R,PCCHOLESKY,&isCHOL);CHKERRQ(ierr); 1465 lda_rhs = n_R; 1466 need_benign_correction = PETSC_FALSE; 1467 if (isLU || isILU || isCHOL) { 1468 ierr = PCFactorGetMatrix(pc_R,&F);CHKERRQ(ierr); 1469 } else if (sub_schurs->reuse_solver) { 1470 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 1471 MatFactorType type; 1472 1473 F = reuse_solver->F; 1474 ierr = MatGetFactorType(F,&type);CHKERRQ(ierr); 1475 if (type == MAT_FACTOR_CHOLESKY) isCHOL = PETSC_TRUE; 1476 ierr = MatGetSize(F,&lda_rhs,NULL);CHKERRQ(ierr); 1477 need_benign_correction = !!reuse_solver->benign_n; 1478 } else { 1479 F = NULL; 1480 } 1481 1482 /* allocate workspace */ 1483 n = 0; 1484 if (n_constraints) { 1485 n += lda_rhs*n_constraints; 1486 } 1487 if (n_vertices) { 1488 n = PetscMax(2*lda_rhs*n_vertices,n); 1489 n = PetscMax((lda_rhs+n_B)*n_vertices,n); 1490 } 1491 ierr = PetscMalloc1(n,&work);CHKERRQ(ierr); 1492 1493 /* Precompute stuffs needed for preprocessing and application of BDDC*/ 1494 if (n_constraints) { 1495 Mat M1,M2,M3; 1496 Mat auxmat; 1497 IS is_aux; 1498 PetscScalar *array,*array2; 1499 1500 ierr = MatDestroy(&pcbddc->local_auxmat1);CHKERRQ(ierr); 1501 ierr = MatDestroy(&pcbddc->local_auxmat2);CHKERRQ(ierr); 1502 1503 /* Extract constraints on R nodes: C_{CR} */ 1504 ierr = ISCreateStride(PETSC_COMM_SELF,n_constraints,n_vertices,1,&is_aux);CHKERRQ(ierr); 1505 ierr = MatGetSubMatrix(pcbddc->ConstraintMatrix,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&C_CR);CHKERRQ(ierr); 1506 ierr = MatGetSubMatrix(pcbddc->ConstraintMatrix,is_aux,pcis->is_B_local,MAT_INITIAL_MATRIX,&auxmat);CHKERRQ(ierr); 1507 1508 /* Assemble local_auxmat2_R = (- A_{RR}^{-1} C^T_{CR}) needed by BDDC setup */ 1509 /* Assemble pcbddc->local_auxmat2 = R_to_B (- A_{RR}^{-1} C^T_{CR}) needed by BDDC application */ 1510 ierr = PetscMemzero(work,lda_rhs*n_constraints*sizeof(PetscScalar));CHKERRQ(ierr); 1511 for (i=0;i<n_constraints;i++) { 1512 const PetscScalar *row_cmat_values; 1513 const PetscInt *row_cmat_indices; 1514 PetscInt size_of_constraint,j; 1515 1516 ierr = MatGetRow(C_CR,i,&size_of_constraint,&row_cmat_indices,&row_cmat_values);CHKERRQ(ierr); 1517 for (j=0;j<size_of_constraint;j++) { 1518 work[row_cmat_indices[j]+i*lda_rhs] = -row_cmat_values[j]; 1519 } 1520 ierr = MatRestoreRow(C_CR,i,&size_of_constraint,&row_cmat_indices,&row_cmat_values);CHKERRQ(ierr); 1521 } 1522 ierr = MatCreateSeqDense(PETSC_COMM_SELF,lda_rhs,n_constraints,NULL,&local_auxmat2_R);CHKERRQ(ierr); 1523 if (F) { 1524 Mat B; 1525 1526 ierr = MatCreateSeqDense(PETSC_COMM_SELF,lda_rhs,n_constraints,work,&B);CHKERRQ(ierr); 1527 if (need_benign_correction) { 1528 PetscScalar *marr; 1529 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 1530 1531 ierr = MatDenseGetArray(B,&marr);CHKERRQ(ierr); 1532 for (i=0;i<n_constraints;i++) { 1533 ierr = VecPlaceArray(pcbddc->vec1_R,marr+i*lda_rhs);CHKERRQ(ierr); 1534 ierr = PCBDDCReuseSolversChangeInterior(reuse_solver,pcbddc->vec1_R,NULL,PETSC_FALSE);CHKERRQ(ierr); 1535 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 1536 } 1537 ierr = MatDenseRestoreArray(B,&marr);CHKERRQ(ierr); 1538 } 1539 ierr = MatMatSolve(F,B,local_auxmat2_R);CHKERRQ(ierr); 1540 if (need_benign_correction) { 1541 PetscScalar *marr; 1542 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 1543 1544 ierr = MatDenseGetArray(local_auxmat2_R,&marr);CHKERRQ(ierr); 1545 for (i=0;i<n_constraints;i++) { 1546 ierr = VecPlaceArray(pcbddc->vec1_R,marr+i*lda_rhs);CHKERRQ(ierr); 1547 ierr = PCBDDCReuseSolversChangeInterior(reuse_solver,pcbddc->vec1_R,NULL,PETSC_TRUE);CHKERRQ(ierr); 1548 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 1549 } 1550 ierr = MatDenseRestoreArray(local_auxmat2_R,&marr);CHKERRQ(ierr); 1551 } 1552 ierr = MatDestroy(&B);CHKERRQ(ierr); 1553 } else { 1554 PetscScalar *marr; 1555 1556 ierr = MatDenseGetArray(local_auxmat2_R,&marr);CHKERRQ(ierr); 1557 for (i=0;i<n_constraints;i++) { 1558 ierr = VecPlaceArray(pcbddc->vec1_R,work+i*lda_rhs);CHKERRQ(ierr); 1559 ierr = VecPlaceArray(pcbddc->vec2_R,marr+i*lda_rhs);CHKERRQ(ierr); 1560 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 1561 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 1562 ierr = VecResetArray(pcbddc->vec2_R);CHKERRQ(ierr); 1563 } 1564 ierr = MatDenseRestoreArray(local_auxmat2_R,&marr);CHKERRQ(ierr); 1565 } 1566 if (!pcbddc->switch_static) { 1567 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_B,n_constraints,NULL,&pcbddc->local_auxmat2);CHKERRQ(ierr); 1568 ierr = MatDenseGetArray(pcbddc->local_auxmat2,&array);CHKERRQ(ierr); 1569 ierr = MatDenseGetArray(local_auxmat2_R,&array2);CHKERRQ(ierr); 1570 for (i=0;i<n_constraints;i++) { 1571 ierr = VecPlaceArray(pcbddc->vec1_R,array2+i*lda_rhs);CHKERRQ(ierr); 1572 ierr = VecPlaceArray(pcis->vec1_B,array+i*n_B);CHKERRQ(ierr); 1573 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1574 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1575 ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr); 1576 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 1577 } 1578 ierr = MatDenseRestoreArray(local_auxmat2_R,&array2);CHKERRQ(ierr); 1579 ierr = MatDenseRestoreArray(pcbddc->local_auxmat2,&array);CHKERRQ(ierr); 1580 ierr = MatMatMult(auxmat,pcbddc->local_auxmat2,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&M3);CHKERRQ(ierr); 1581 } else { 1582 if (lda_rhs != n_R) { 1583 IS dummy; 1584 1585 ierr = ISCreateStride(PETSC_COMM_SELF,n_R,0,1,&dummy);CHKERRQ(ierr); 1586 ierr = MatGetSubMatrix(local_auxmat2_R,dummy,dummy,MAT_INITIAL_MATRIX,&pcbddc->local_auxmat2);CHKERRQ(ierr); 1587 ierr = ISDestroy(&dummy);CHKERRQ(ierr); 1588 } else { 1589 ierr = PetscObjectReference((PetscObject)local_auxmat2_R);CHKERRQ(ierr); 1590 pcbddc->local_auxmat2 = local_auxmat2_R; 1591 } 1592 ierr = MatMatMult(C_CR,pcbddc->local_auxmat2,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&M3);CHKERRQ(ierr); 1593 } 1594 ierr = ISDestroy(&is_aux);CHKERRQ(ierr); 1595 /* Assemble explicitly S_CC = ( C_{CR} A_{RR}^{-1} C^T_{CR} )^{-1} */ 1596 ierr = MatScale(M3,m_one);CHKERRQ(ierr); 1597 ierr = MatDuplicate(M3,MAT_DO_NOT_COPY_VALUES,&M1);CHKERRQ(ierr); 1598 ierr = MatDuplicate(M3,MAT_DO_NOT_COPY_VALUES,&M2);CHKERRQ(ierr); 1599 if (isCHOL) { 1600 ierr = MatCholeskyFactor(M3,NULL,NULL);CHKERRQ(ierr); 1601 } else { 1602 ierr = MatLUFactor(M3,NULL,NULL,NULL);CHKERRQ(ierr); 1603 } 1604 ierr = VecSet(pcbddc->vec1_C,one);CHKERRQ(ierr); 1605 ierr = MatDiagonalSet(M2,pcbddc->vec1_C,INSERT_VALUES);CHKERRQ(ierr); 1606 ierr = MatMatSolve(M3,M2,M1);CHKERRQ(ierr); 1607 ierr = MatDestroy(&M2);CHKERRQ(ierr); 1608 ierr = MatDestroy(&M3);CHKERRQ(ierr); 1609 /* Assemble local_auxmat1 = S_CC*C_{CB} needed by BDDC application in KSP and in preproc */ 1610 ierr = MatMatMult(M1,auxmat,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&pcbddc->local_auxmat1);CHKERRQ(ierr); 1611 ierr = MatDestroy(&auxmat);CHKERRQ(ierr); 1612 ierr = MatCopy(M1,S_CC,SAME_NONZERO_PATTERN);CHKERRQ(ierr); /* S_CC can have a different LDA, MatMatSolve doesn't support it */ 1613 ierr = MatDestroy(&M1);CHKERRQ(ierr); 1614 } 1615 /* Get submatrices from subdomain matrix */ 1616 if (pcbddc->benign_n) { 1617 IS dummy; 1618 Mat B0_R; 1619 PetscReal norm; 1620 1621 ierr = ISCreateStride(PETSC_COMM_SELF,pcbddc->benign_n,0,1,&dummy);CHKERRQ(ierr); 1622 ierr = MatGetSubMatrix(pcbddc->benign_B0,dummy,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&B0_R);CHKERRQ(ierr); 1623 ierr = MatNorm(B0_R,NORM_INFINITY,&norm);CHKERRQ(ierr); 1624 if (norm > PETSC_SMALL) { 1625 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"Benign trick can not be applied! ||B0_R|| = %f (should be numerically 0.)",norm); 1626 } 1627 ierr = MatDestroy(&B0_R);CHKERRQ(ierr); 1628 ierr = ISDestroy(&dummy);CHKERRQ(ierr); 1629 } 1630 1631 if (n_vertices) { 1632 IS is_aux; 1633 1634 if (sub_schurs->reuse_solver) { /* is_R_local is not sorted, ISComplement doesn't like it */ 1635 IS tis; 1636 1637 ierr = ISDuplicate(pcbddc->is_R_local,&tis);CHKERRQ(ierr); 1638 ierr = ISSort(tis);CHKERRQ(ierr); 1639 ierr = ISComplement(tis,0,pcis->n,&is_aux);CHKERRQ(ierr); 1640 ierr = ISDestroy(&tis);CHKERRQ(ierr); 1641 } else { 1642 ierr = ISComplement(pcbddc->is_R_local,0,pcis->n,&is_aux);CHKERRQ(ierr); 1643 } 1644 ierr = MatGetSubMatrix(pcbddc->local_mat,pcbddc->is_R_local,is_aux,MAT_INITIAL_MATRIX,&A_RV);CHKERRQ(ierr); 1645 ierr = MatGetSubMatrix(pcbddc->local_mat,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&A_VR);CHKERRQ(ierr); 1646 ierr = MatGetSubMatrix(pcbddc->local_mat,is_aux,is_aux,MAT_INITIAL_MATRIX,&A_VV);CHKERRQ(ierr); 1647 if (pcbddc->benign_n) { 1648 IS dummy; 1649 1650 ierr = ISCreateStride(PETSC_COMM_SELF,pcbddc->benign_n,0,1,&dummy);CHKERRQ(ierr); 1651 ierr = MatGetSubMatrix(pcbddc->benign_B0,dummy,is_aux,MAT_INITIAL_MATRIX,&B0_V);CHKERRQ(ierr); 1652 ierr = ISDestroy(&dummy);CHKERRQ(ierr); 1653 } 1654 ierr = ISDestroy(&is_aux);CHKERRQ(ierr); 1655 } 1656 1657 /* Matrix of coarse basis functions (local) */ 1658 if (pcbddc->coarse_phi_B) { 1659 PetscInt on_B,on_primal,on_D=n_D; 1660 if (pcbddc->coarse_phi_D) { 1661 ierr = MatGetSize(pcbddc->coarse_phi_D,&on_D,NULL);CHKERRQ(ierr); 1662 } 1663 ierr = MatGetSize(pcbddc->coarse_phi_B,&on_B,&on_primal);CHKERRQ(ierr); 1664 if (on_B != n_B || on_primal != pcbddc->local_primal_size || on_D != n_D) { 1665 PetscScalar *marray; 1666 1667 ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&marray);CHKERRQ(ierr); 1668 ierr = PetscFree(marray);CHKERRQ(ierr); 1669 ierr = MatDestroy(&pcbddc->coarse_phi_B);CHKERRQ(ierr); 1670 ierr = MatDestroy(&pcbddc->coarse_psi_B);CHKERRQ(ierr); 1671 ierr = MatDestroy(&pcbddc->coarse_phi_D);CHKERRQ(ierr); 1672 ierr = MatDestroy(&pcbddc->coarse_psi_D);CHKERRQ(ierr); 1673 } 1674 } 1675 1676 if (!pcbddc->coarse_phi_B) { 1677 PetscScalar *marray; 1678 1679 n = n_B*pcbddc->local_primal_size; 1680 if (pcbddc->switch_static || pcbddc->dbg_flag) { 1681 n += n_D*pcbddc->local_primal_size; 1682 } 1683 if (!pcbddc->symmetric_primal) { 1684 n *= 2; 1685 } 1686 ierr = PetscCalloc1(n,&marray);CHKERRQ(ierr); 1687 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_B,pcbddc->local_primal_size,marray,&pcbddc->coarse_phi_B);CHKERRQ(ierr); 1688 n = n_B*pcbddc->local_primal_size; 1689 if (pcbddc->switch_static || pcbddc->dbg_flag) { 1690 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_D,pcbddc->local_primal_size,marray+n,&pcbddc->coarse_phi_D);CHKERRQ(ierr); 1691 n += n_D*pcbddc->local_primal_size; 1692 } 1693 if (!pcbddc->symmetric_primal) { 1694 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_B,pcbddc->local_primal_size,marray+n,&pcbddc->coarse_psi_B);CHKERRQ(ierr); 1695 if (pcbddc->switch_static || pcbddc->dbg_flag) { 1696 n = n_B*pcbddc->local_primal_size; 1697 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_D,pcbddc->local_primal_size,marray+n,&pcbddc->coarse_psi_D);CHKERRQ(ierr); 1698 } 1699 } else { 1700 ierr = PetscObjectReference((PetscObject)pcbddc->coarse_phi_B);CHKERRQ(ierr); 1701 pcbddc->coarse_psi_B = pcbddc->coarse_phi_B; 1702 if (pcbddc->switch_static || pcbddc->dbg_flag) { 1703 ierr = PetscObjectReference((PetscObject)pcbddc->coarse_phi_D);CHKERRQ(ierr); 1704 pcbddc->coarse_psi_D = pcbddc->coarse_phi_D; 1705 } 1706 } 1707 } 1708 1709 /* We are now ready to evaluate coarse basis functions and subdomain contribution to coarse problem */ 1710 p0_lidx_I = NULL; 1711 if (pcbddc->benign_n && (pcbddc->switch_static || pcbddc->dbg_flag)) { 1712 const PetscInt *idxs; 1713 1714 ierr = ISGetIndices(pcis->is_I_local,&idxs);CHKERRQ(ierr); 1715 ierr = PetscMalloc1(pcbddc->benign_n,&p0_lidx_I);CHKERRQ(ierr); 1716 for (i=0;i<pcbddc->benign_n;i++) { 1717 ierr = PetscFindInt(pcbddc->benign_p0_lidx[i],pcis->n-pcis->n_B,idxs,&p0_lidx_I[i]);CHKERRQ(ierr); 1718 } 1719 ierr = ISRestoreIndices(pcis->is_I_local,&idxs);CHKERRQ(ierr); 1720 } 1721 1722 /* vertices */ 1723 if (n_vertices) { 1724 1725 ierr = MatConvert(A_VV,MATDENSE,MAT_REUSE_MATRIX,&A_VV);CHKERRQ(ierr); 1726 1727 if (n_R) { 1728 Mat A_RRmA_RV,A_RV_bcorr=NULL,S_VVt; /* S_VVt with LDA=N */ 1729 PetscBLASInt B_N,B_one = 1; 1730 PetscScalar *x,*y; 1731 PetscBool isseqaij; 1732 1733 ierr = MatScale(A_RV,m_one);CHKERRQ(ierr); 1734 if (need_benign_correction) { 1735 ISLocalToGlobalMapping RtoN; 1736 IS is_p0; 1737 PetscInt *idxs_p0,n; 1738 1739 ierr = PetscMalloc1(pcbddc->benign_n,&idxs_p0);CHKERRQ(ierr); 1740 ierr = ISLocalToGlobalMappingCreateIS(pcbddc->is_R_local,&RtoN);CHKERRQ(ierr); 1741 ierr = ISGlobalToLocalMappingApply(RtoN,IS_GTOLM_DROP,pcbddc->benign_n,pcbddc->benign_p0_lidx,&n,idxs_p0);CHKERRQ(ierr); 1742 if (n != pcbddc->benign_n) { 1743 SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error in R numbering for benign p0! %d != %d\n",n,pcbddc->benign_n); 1744 } 1745 ierr = ISLocalToGlobalMappingDestroy(&RtoN);CHKERRQ(ierr); 1746 ierr = ISCreateGeneral(PETSC_COMM_SELF,n,idxs_p0,PETSC_OWN_POINTER,&is_p0);CHKERRQ(ierr); 1747 ierr = MatGetSubMatrix(A_RV,is_p0,NULL,MAT_INITIAL_MATRIX,&A_RV_bcorr);CHKERRQ(ierr); 1748 ierr = ISDestroy(&is_p0);CHKERRQ(ierr); 1749 } 1750 1751 if (lda_rhs == n_R) { 1752 ierr = MatConvert(A_RV,MATDENSE,MAT_REUSE_MATRIX,&A_RV);CHKERRQ(ierr); 1753 } else { 1754 PetscScalar *av,*array; 1755 const PetscInt *xadj,*adjncy; 1756 PetscInt n; 1757 PetscBool flg_row; 1758 1759 array = work+lda_rhs*n_vertices; 1760 ierr = PetscMemzero(array,lda_rhs*n_vertices*sizeof(PetscScalar));CHKERRQ(ierr); 1761 ierr = MatConvert(A_RV,MATSEQAIJ,MAT_REUSE_MATRIX,&A_RV);CHKERRQ(ierr); 1762 ierr = MatGetRowIJ(A_RV,0,PETSC_FALSE,PETSC_FALSE,&n,&xadj,&adjncy,&flg_row);CHKERRQ(ierr); 1763 ierr = MatSeqAIJGetArray(A_RV,&av);CHKERRQ(ierr); 1764 for (i=0;i<n;i++) { 1765 PetscInt j; 1766 for (j=xadj[i];j<xadj[i+1];j++) array[lda_rhs*adjncy[j]+i] = av[j]; 1767 } 1768 ierr = MatRestoreRowIJ(A_RV,0,PETSC_FALSE,PETSC_FALSE,&n,&xadj,&adjncy,&flg_row);CHKERRQ(ierr); 1769 ierr = MatDestroy(&A_RV);CHKERRQ(ierr); 1770 ierr = MatCreateSeqDense(PETSC_COMM_SELF,lda_rhs,n_vertices,array,&A_RV);CHKERRQ(ierr); 1771 } 1772 ierr = MatCreateSeqDense(PETSC_COMM_SELF,lda_rhs,n_vertices,work,&A_RRmA_RV);CHKERRQ(ierr); 1773 if (F) { 1774 if (need_benign_correction) { 1775 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 1776 PetscScalar *marr; 1777 1778 ierr = MatDenseGetArray(A_RV,&marr);CHKERRQ(ierr); 1779 /* need \Phi^T A_RV = (I+L)A_RV, L given by 1780 1781 | 0 0 0 | (V) 1782 L = | 0 0 -1 | (P-p0) 1783 | 0 0 -1 | (p0) 1784 1785 */ 1786 for (i=0;i<reuse_solver->benign_n;i++) { 1787 const PetscScalar *vals; 1788 const PetscInt *idxs,*idxs_zero; 1789 PetscInt n,j,nz; 1790 1791 ierr = ISGetLocalSize(reuse_solver->benign_zerodiag_subs[i],&nz);CHKERRQ(ierr); 1792 ierr = ISGetIndices(reuse_solver->benign_zerodiag_subs[i],&idxs_zero);CHKERRQ(ierr); 1793 ierr = MatGetRow(A_RV_bcorr,i,&n,&idxs,&vals);CHKERRQ(ierr); 1794 for (j=0;j<n;j++) { 1795 PetscScalar val = vals[j]; 1796 PetscInt k,col = idxs[j]; 1797 for (k=0;k<nz;k++) marr[idxs_zero[k]+lda_rhs*col] -= val; 1798 } 1799 ierr = MatRestoreRow(A_RV_bcorr,i,&n,&idxs,&vals);CHKERRQ(ierr); 1800 ierr = ISRestoreIndices(reuse_solver->benign_zerodiag_subs[i],&idxs_zero);CHKERRQ(ierr); 1801 } 1802 /* need to correct the rhs */ 1803 for (i=0;i<n_vertices;i++) { 1804 ierr = VecPlaceArray(pcbddc->vec1_R,marr+i*lda_rhs);CHKERRQ(ierr); 1805 ierr = PCBDDCReuseSolversChangeInterior(reuse_solver,pcbddc->vec1_R,NULL,PETSC_FALSE);CHKERRQ(ierr); 1806 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 1807 } 1808 ierr = MatDenseRestoreArray(A_RV,&marr);CHKERRQ(ierr); 1809 } 1810 ierr = MatMatSolve(F,A_RV,A_RRmA_RV);CHKERRQ(ierr); 1811 /* need to correct the solution */ 1812 if (need_benign_correction) { 1813 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 1814 PetscScalar *marr; 1815 1816 ierr = MatDenseGetArray(A_RRmA_RV,&marr);CHKERRQ(ierr); 1817 for (i=0;i<n_vertices;i++) { 1818 ierr = VecPlaceArray(pcbddc->vec1_R,marr+i*lda_rhs);CHKERRQ(ierr); 1819 ierr = PCBDDCReuseSolversChangeInterior(reuse_solver,pcbddc->vec1_R,NULL,PETSC_TRUE);CHKERRQ(ierr); 1820 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 1821 } 1822 ierr = MatDenseRestoreArray(A_RRmA_RV,&marr);CHKERRQ(ierr); 1823 } 1824 } else { 1825 ierr = MatDenseGetArray(A_RV,&y);CHKERRQ(ierr); 1826 for (i=0;i<n_vertices;i++) { 1827 ierr = VecPlaceArray(pcbddc->vec1_R,y+i*lda_rhs);CHKERRQ(ierr); 1828 ierr = VecPlaceArray(pcbddc->vec2_R,work+i*lda_rhs);CHKERRQ(ierr); 1829 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 1830 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 1831 ierr = VecResetArray(pcbddc->vec2_R);CHKERRQ(ierr); 1832 } 1833 ierr = MatDenseRestoreArray(A_RV,&y);CHKERRQ(ierr); 1834 } 1835 ierr = MatDestroy(&A_RV);CHKERRQ(ierr); 1836 /* S_VV and S_CV */ 1837 if (n_constraints) { 1838 Mat B; 1839 1840 ierr = PetscMemzero(work+lda_rhs*n_vertices,n_B*n_vertices*sizeof(PetscScalar));CHKERRQ(ierr); 1841 for (i=0;i<n_vertices;i++) { 1842 ierr = VecPlaceArray(pcbddc->vec1_R,work+i*lda_rhs);CHKERRQ(ierr); 1843 ierr = VecPlaceArray(pcis->vec1_B,work+lda_rhs*n_vertices+i*n_B);CHKERRQ(ierr); 1844 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1845 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1846 ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr); 1847 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 1848 } 1849 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_B,n_vertices,work+lda_rhs*n_vertices,&B);CHKERRQ(ierr); 1850 ierr = MatMatMult(pcbddc->local_auxmat1,B,MAT_REUSE_MATRIX,PETSC_DEFAULT,&S_CV);CHKERRQ(ierr); 1851 ierr = MatDestroy(&B);CHKERRQ(ierr); 1852 ierr = MatCreateSeqDense(PETSC_COMM_SELF,lda_rhs,n_vertices,work+lda_rhs*n_vertices,&B);CHKERRQ(ierr); 1853 ierr = MatMatMult(local_auxmat2_R,S_CV,MAT_REUSE_MATRIX,PETSC_DEFAULT,&B);CHKERRQ(ierr); 1854 ierr = MatScale(S_CV,m_one);CHKERRQ(ierr); 1855 ierr = PetscBLASIntCast(lda_rhs*n_vertices,&B_N);CHKERRQ(ierr); 1856 PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&B_N,&one,work+lda_rhs*n_vertices,&B_one,work,&B_one)); 1857 ierr = MatDestroy(&B);CHKERRQ(ierr); 1858 } 1859 ierr = PetscObjectTypeCompare((PetscObject)A_VR,MATSEQAIJ,&isseqaij);CHKERRQ(ierr); 1860 if (!isseqaij) { /* MatMatMult with SEQ(S)BAIJ below will raise an error */ 1861 ierr = MatConvert(A_VR,MATSEQAIJ,MAT_REUSE_MATRIX,&A_VR);CHKERRQ(ierr); 1862 } 1863 if (lda_rhs != n_R) { 1864 ierr = MatDestroy(&A_RRmA_RV);CHKERRQ(ierr); 1865 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_vertices,work,&A_RRmA_RV);CHKERRQ(ierr); 1866 ierr = MatSeqDenseSetLDA(A_RRmA_RV,lda_rhs);CHKERRQ(ierr); 1867 } 1868 ierr = MatMatMult(A_VR,A_RRmA_RV,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&S_VVt);CHKERRQ(ierr); 1869 /* need A_VR * \Phi * A_RRmA_RV = A_VR * (I+L)^T * A_RRmA_RV, L given as before */ 1870 if (need_benign_correction) { 1871 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 1872 PetscScalar *marr,*sums; 1873 1874 ierr = PetscMalloc1(n_vertices,&sums);CHKERRQ(ierr); 1875 ierr = MatDenseGetArray(S_VVt,&marr); 1876 for (i=0;i<reuse_solver->benign_n;i++) { 1877 const PetscScalar *vals; 1878 const PetscInt *idxs,*idxs_zero; 1879 PetscInt n,j,nz; 1880 1881 ierr = ISGetLocalSize(reuse_solver->benign_zerodiag_subs[i],&nz);CHKERRQ(ierr); 1882 ierr = ISGetIndices(reuse_solver->benign_zerodiag_subs[i],&idxs_zero);CHKERRQ(ierr); 1883 for (j=0;j<n_vertices;j++) { 1884 PetscInt k; 1885 sums[j] = 0.; 1886 for (k=0;k<nz;k++) sums[j] += work[idxs_zero[k]+j*lda_rhs]; 1887 } 1888 ierr = MatGetRow(A_RV_bcorr,i,&n,&idxs,&vals);CHKERRQ(ierr); 1889 for (j=0;j<n;j++) { 1890 PetscScalar val = vals[j]; 1891 PetscInt k; 1892 for (k=0;k<n_vertices;k++) { 1893 marr[idxs[j]+k*n_vertices] += val*sums[k]; 1894 } 1895 } 1896 ierr = MatRestoreRow(A_RV_bcorr,i,&n,&idxs,&vals);CHKERRQ(ierr); 1897 ierr = ISRestoreIndices(reuse_solver->benign_zerodiag_subs[i],&idxs_zero);CHKERRQ(ierr); 1898 } 1899 ierr = PetscFree(sums);CHKERRQ(ierr); 1900 ierr = MatDenseRestoreArray(S_VVt,&marr); 1901 ierr = MatDestroy(&A_RV_bcorr);CHKERRQ(ierr); 1902 } 1903 ierr = MatDestroy(&A_RRmA_RV);CHKERRQ(ierr); 1904 ierr = PetscBLASIntCast(n_vertices*n_vertices,&B_N);CHKERRQ(ierr); 1905 ierr = MatDenseGetArray(A_VV,&x);CHKERRQ(ierr); 1906 ierr = MatDenseGetArray(S_VVt,&y);CHKERRQ(ierr); 1907 PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&B_N,&one,x,&B_one,y,&B_one)); 1908 ierr = MatDenseRestoreArray(A_VV,&x);CHKERRQ(ierr); 1909 ierr = MatDenseRestoreArray(S_VVt,&y);CHKERRQ(ierr); 1910 ierr = MatCopy(S_VVt,S_VV,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 1911 ierr = MatDestroy(&S_VVt);CHKERRQ(ierr); 1912 } else { 1913 ierr = MatCopy(A_VV,S_VV,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 1914 } 1915 for (i=0;i<pcbddc->benign_n;i++) { 1916 const PetscScalar *vals; 1917 const PetscInt *idxs; 1918 PetscInt n,j,primal_idx; 1919 1920 ierr = MatGetRow(B0_V,i,&n,&idxs,&vals);CHKERRQ(ierr); 1921 primal_idx = pcbddc->local_primal_size - pcbddc->benign_n + i; 1922 for (j=0;j<n;j++) { 1923 coarse_submat_vals[primal_idx*pcbddc->local_primal_size+idxs[j]] = vals[j]; 1924 coarse_submat_vals[idxs[j]*pcbddc->local_primal_size+primal_idx] = vals[j]; 1925 } 1926 ierr = MatRestoreRow(B0_V,i,&n,&idxs,&vals);CHKERRQ(ierr); 1927 } 1928 ierr = MatDestroy(&A_VV);CHKERRQ(ierr); 1929 1930 /* coarse basis functions */ 1931 for (i=0;i<n_vertices;i++) { 1932 PetscScalar *y; 1933 1934 ierr = VecPlaceArray(pcbddc->vec1_R,work+lda_rhs*i);CHKERRQ(ierr); 1935 ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&y);CHKERRQ(ierr); 1936 ierr = VecPlaceArray(pcis->vec1_B,y+n_B*i);CHKERRQ(ierr); 1937 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1938 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1939 y[n_B*i+idx_V_B[i]] = 1.0; 1940 ierr = MatDenseRestoreArray(pcbddc->coarse_phi_B,&y);CHKERRQ(ierr); 1941 ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr); 1942 1943 if (pcbddc->switch_static || pcbddc->dbg_flag) { 1944 PetscInt j; 1945 1946 ierr = MatDenseGetArray(pcbddc->coarse_phi_D,&y);CHKERRQ(ierr); 1947 ierr = VecPlaceArray(pcis->vec1_D,y+n_D*i);CHKERRQ(ierr); 1948 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1949 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1950 ierr = VecResetArray(pcis->vec1_D);CHKERRQ(ierr); 1951 for (j=0;j<pcbddc->benign_n;j++) y[n_D*i+p0_lidx_I[j]] = 0.0; 1952 ierr = MatDenseRestoreArray(pcbddc->coarse_phi_D,&y);CHKERRQ(ierr); 1953 } 1954 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 1955 } 1956 /* if n_R == 0 the object is not destroyed */ 1957 ierr = MatDestroy(&A_RV);CHKERRQ(ierr); 1958 } 1959 1960 if (n_constraints) { 1961 Mat B; 1962 1963 ierr = MatCreateSeqDense(PETSC_COMM_SELF,lda_rhs,n_constraints,work,&B);CHKERRQ(ierr); 1964 ierr = MatScale(S_CC,m_one);CHKERRQ(ierr); 1965 ierr = MatMatMult(local_auxmat2_R,S_CC,MAT_REUSE_MATRIX,PETSC_DEFAULT,&B);CHKERRQ(ierr); 1966 ierr = MatScale(S_CC,m_one);CHKERRQ(ierr); 1967 if (n_vertices) { 1968 if (isCHOL) { /* if we can solve the interior problem with cholesky, we should also be fine with transposing here */ 1969 ierr = MatTranspose(S_CV,MAT_REUSE_MATRIX,&S_VC);CHKERRQ(ierr); 1970 } else { 1971 Mat S_VCt; 1972 1973 if (lda_rhs != n_R) { 1974 ierr = MatDestroy(&B);CHKERRQ(ierr); 1975 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_vertices,work,&B);CHKERRQ(ierr); 1976 ierr = MatSeqDenseSetLDA(B,lda_rhs);CHKERRQ(ierr); 1977 } 1978 ierr = MatMatMult(A_VR,B,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&S_VCt);CHKERRQ(ierr); 1979 ierr = MatCopy(S_VCt,S_VC,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 1980 ierr = MatDestroy(&S_VCt);CHKERRQ(ierr); 1981 } 1982 } 1983 ierr = MatDestroy(&B);CHKERRQ(ierr); 1984 /* coarse basis functions */ 1985 for (i=0;i<n_constraints;i++) { 1986 PetscScalar *y; 1987 1988 ierr = VecPlaceArray(pcbddc->vec1_R,work+lda_rhs*i);CHKERRQ(ierr); 1989 ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&y);CHKERRQ(ierr); 1990 ierr = VecPlaceArray(pcis->vec1_B,y+n_B*(i+n_vertices));CHKERRQ(ierr); 1991 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1992 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1993 ierr = MatDenseRestoreArray(pcbddc->coarse_phi_B,&y);CHKERRQ(ierr); 1994 ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr); 1995 if (pcbddc->switch_static || pcbddc->dbg_flag) { 1996 PetscInt j; 1997 1998 ierr = MatDenseGetArray(pcbddc->coarse_phi_D,&y);CHKERRQ(ierr); 1999 ierr = VecPlaceArray(pcis->vec1_D,y+n_D*(i+n_vertices));CHKERRQ(ierr); 2000 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2001 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2002 ierr = VecResetArray(pcis->vec1_D);CHKERRQ(ierr); 2003 for (j=0;j<pcbddc->benign_n;j++) y[n_D*i+p0_lidx_I[j]] = 0.0; 2004 ierr = MatDenseRestoreArray(pcbddc->coarse_phi_D,&y);CHKERRQ(ierr); 2005 } 2006 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 2007 } 2008 } 2009 if (n_constraints) { 2010 ierr = MatDestroy(&local_auxmat2_R);CHKERRQ(ierr); 2011 } 2012 ierr = PetscFree(p0_lidx_I);CHKERRQ(ierr); 2013 ierr = MatDestroy(&B0_V);CHKERRQ(ierr); 2014 2015 /* compute other basis functions for non-symmetric problems */ 2016 if (!pcbddc->symmetric_primal) { 2017 Mat B_V=NULL,B_C=NULL; 2018 PetscScalar *marray; 2019 2020 if (n_constraints) { 2021 Mat S_CCT,C_CRT; 2022 2023 ierr = MatTranspose(C_CR,MAT_INITIAL_MATRIX,&C_CRT);CHKERRQ(ierr); 2024 ierr = MatTranspose(S_CC,MAT_INITIAL_MATRIX,&S_CCT);CHKERRQ(ierr); 2025 ierr = MatMatMult(C_CRT,S_CCT,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&B_C);CHKERRQ(ierr); 2026 ierr = MatDestroy(&S_CCT);CHKERRQ(ierr); 2027 if (n_vertices) { 2028 Mat S_VCT; 2029 2030 ierr = MatTranspose(S_VC,MAT_INITIAL_MATRIX,&S_VCT);CHKERRQ(ierr); 2031 ierr = MatMatMult(C_CRT,S_VCT,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&B_V);CHKERRQ(ierr); 2032 ierr = MatDestroy(&S_VCT);CHKERRQ(ierr); 2033 } 2034 ierr = MatDestroy(&C_CRT);CHKERRQ(ierr); 2035 } 2036 if (n_vertices && n_R) { 2037 PetscScalar *av,*marray; 2038 const PetscInt *xadj,*adjncy; 2039 PetscInt n; 2040 PetscBool flg_row; 2041 2042 /* B_V = B_V - A_VR^T */ 2043 ierr = MatConvert(A_VR,MATSEQAIJ,MAT_REUSE_MATRIX,&A_VR);CHKERRQ(ierr); 2044 ierr = MatGetRowIJ(A_VR,0,PETSC_FALSE,PETSC_FALSE,&n,&xadj,&adjncy,&flg_row);CHKERRQ(ierr); 2045 ierr = MatSeqAIJGetArray(A_VR,&av);CHKERRQ(ierr); 2046 ierr = MatDenseGetArray(B_V,&marray);CHKERRQ(ierr); 2047 for (i=0;i<n;i++) { 2048 PetscInt j; 2049 for (j=xadj[i];j<xadj[i+1];j++) marray[i*n_R + adjncy[j]] -= av[j]; 2050 } 2051 ierr = MatDenseRestoreArray(B_V,&marray);CHKERRQ(ierr); 2052 ierr = MatRestoreRowIJ(A_VR,0,PETSC_FALSE,PETSC_FALSE,&n,&xadj,&adjncy,&flg_row);CHKERRQ(ierr); 2053 ierr = MatDestroy(&A_VR);CHKERRQ(ierr); 2054 } 2055 2056 /* currently there's no support for MatTransposeMatSolve(F,B,X) */ 2057 ierr = MatDenseGetArray(B_V,&marray);CHKERRQ(ierr); 2058 for (i=0;i<n_vertices;i++) { 2059 ierr = VecPlaceArray(pcbddc->vec1_R,marray+i*n_R);CHKERRQ(ierr); 2060 ierr = VecPlaceArray(pcbddc->vec2_R,work+i*n_R);CHKERRQ(ierr); 2061 ierr = KSPSolveTranspose(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 2062 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 2063 ierr = VecResetArray(pcbddc->vec2_R);CHKERRQ(ierr); 2064 } 2065 ierr = MatDenseRestoreArray(B_V,&marray);CHKERRQ(ierr); 2066 ierr = MatDenseGetArray(B_C,&marray);CHKERRQ(ierr); 2067 for (i=n_vertices;i<n_constraints+n_vertices;i++) { 2068 ierr = VecPlaceArray(pcbddc->vec1_R,marray+(i-n_vertices)*n_R);CHKERRQ(ierr); 2069 ierr = VecPlaceArray(pcbddc->vec2_R,work+i*n_R);CHKERRQ(ierr); 2070 ierr = KSPSolveTranspose(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 2071 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 2072 ierr = VecResetArray(pcbddc->vec2_R);CHKERRQ(ierr); 2073 } 2074 ierr = MatDenseRestoreArray(B_C,&marray);CHKERRQ(ierr); 2075 /* coarse basis functions */ 2076 for (i=0;i<pcbddc->local_primal_size;i++) { 2077 PetscScalar *y; 2078 2079 ierr = VecPlaceArray(pcbddc->vec1_R,work+i*n_R);CHKERRQ(ierr); 2080 ierr = MatDenseGetArray(pcbddc->coarse_psi_B,&y);CHKERRQ(ierr); 2081 ierr = VecPlaceArray(pcis->vec1_B,y+n_B*i);CHKERRQ(ierr); 2082 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2083 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2084 if (i<n_vertices) { 2085 y[n_B*i+idx_V_B[i]] = 1.0; 2086 } 2087 ierr = MatDenseRestoreArray(pcbddc->coarse_psi_B,&y);CHKERRQ(ierr); 2088 ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr); 2089 2090 if (pcbddc->switch_static || pcbddc->dbg_flag) { 2091 ierr = MatDenseGetArray(pcbddc->coarse_psi_D,&y);CHKERRQ(ierr); 2092 ierr = VecPlaceArray(pcis->vec1_D,y+n_D*i);CHKERRQ(ierr); 2093 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2094 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2095 ierr = VecResetArray(pcis->vec1_D);CHKERRQ(ierr); 2096 ierr = MatDenseRestoreArray(pcbddc->coarse_psi_D,&y);CHKERRQ(ierr); 2097 } 2098 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 2099 } 2100 ierr = MatDestroy(&B_V);CHKERRQ(ierr); 2101 ierr = MatDestroy(&B_C);CHKERRQ(ierr); 2102 } 2103 /* free memory */ 2104 ierr = PetscFree(idx_V_B);CHKERRQ(ierr); 2105 ierr = MatDestroy(&S_VV);CHKERRQ(ierr); 2106 ierr = MatDestroy(&S_CV);CHKERRQ(ierr); 2107 ierr = MatDestroy(&S_VC);CHKERRQ(ierr); 2108 ierr = MatDestroy(&S_CC);CHKERRQ(ierr); 2109 ierr = PetscFree(work);CHKERRQ(ierr); 2110 if (n_vertices) { 2111 ierr = MatDestroy(&A_VR);CHKERRQ(ierr); 2112 } 2113 if (n_constraints) { 2114 ierr = MatDestroy(&C_CR);CHKERRQ(ierr); 2115 } 2116 /* Checking coarse_sub_mat and coarse basis functios */ 2117 /* Symmetric case : It should be \Phi^{(j)^T} A^{(j)} \Phi^{(j)}=coarse_sub_mat */ 2118 /* Non-symmetric case : It should be \Psi^{(j)^T} A^{(j)} \Phi^{(j)}=coarse_sub_mat */ 2119 if (pcbddc->dbg_flag) { 2120 Mat coarse_sub_mat; 2121 Mat AUXMAT,TM1,TM2,TM3,TM4; 2122 Mat coarse_phi_D,coarse_phi_B; 2123 Mat coarse_psi_D,coarse_psi_B; 2124 Mat A_II,A_BB,A_IB,A_BI; 2125 Mat C_B,CPHI; 2126 IS is_dummy; 2127 Vec mones; 2128 MatType checkmattype=MATSEQAIJ; 2129 PetscReal real_value; 2130 2131 if (pcbddc->benign_n && !pcbddc->benign_change_explicit) { 2132 Mat A; 2133 ierr = PCBDDCBenignProject(pc,NULL,NULL,&A);CHKERRQ(ierr); 2134 ierr = MatGetSubMatrix(A,pcis->is_I_local,pcis->is_I_local,MAT_INITIAL_MATRIX,&A_II);CHKERRQ(ierr); 2135 ierr = MatGetSubMatrix(A,pcis->is_I_local,pcis->is_B_local,MAT_INITIAL_MATRIX,&A_IB);CHKERRQ(ierr); 2136 ierr = MatGetSubMatrix(A,pcis->is_B_local,pcis->is_I_local,MAT_INITIAL_MATRIX,&A_BI);CHKERRQ(ierr); 2137 ierr = MatGetSubMatrix(A,pcis->is_B_local,pcis->is_B_local,MAT_INITIAL_MATRIX,&A_BB);CHKERRQ(ierr); 2138 ierr = MatDestroy(&A);CHKERRQ(ierr); 2139 } else { 2140 ierr = MatConvert(pcis->A_II,checkmattype,MAT_INITIAL_MATRIX,&A_II);CHKERRQ(ierr); 2141 ierr = MatConvert(pcis->A_IB,checkmattype,MAT_INITIAL_MATRIX,&A_IB);CHKERRQ(ierr); 2142 ierr = MatConvert(pcis->A_BI,checkmattype,MAT_INITIAL_MATRIX,&A_BI);CHKERRQ(ierr); 2143 ierr = MatConvert(pcis->A_BB,checkmattype,MAT_INITIAL_MATRIX,&A_BB);CHKERRQ(ierr); 2144 } 2145 ierr = MatConvert(pcbddc->coarse_phi_D,checkmattype,MAT_INITIAL_MATRIX,&coarse_phi_D);CHKERRQ(ierr); 2146 ierr = MatConvert(pcbddc->coarse_phi_B,checkmattype,MAT_INITIAL_MATRIX,&coarse_phi_B);CHKERRQ(ierr); 2147 if (!pcbddc->symmetric_primal) { 2148 ierr = MatConvert(pcbddc->coarse_psi_D,checkmattype,MAT_INITIAL_MATRIX,&coarse_psi_D);CHKERRQ(ierr); 2149 ierr = MatConvert(pcbddc->coarse_psi_B,checkmattype,MAT_INITIAL_MATRIX,&coarse_psi_B);CHKERRQ(ierr); 2150 } 2151 ierr = MatCreateSeqDense(PETSC_COMM_SELF,pcbddc->local_primal_size,pcbddc->local_primal_size,coarse_submat_vals,&coarse_sub_mat);CHKERRQ(ierr); 2152 2153 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 2154 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Check coarse sub mat computation (symmetric %d)\n",pcbddc->symmetric_primal);CHKERRQ(ierr); 2155 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 2156 if (!pcbddc->symmetric_primal) { 2157 ierr = MatMatMult(A_II,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 2158 ierr = MatTransposeMatMult(coarse_psi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM1);CHKERRQ(ierr); 2159 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 2160 ierr = MatMatMult(A_BB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 2161 ierr = MatTransposeMatMult(coarse_psi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM2);CHKERRQ(ierr); 2162 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 2163 ierr = MatMatMult(A_IB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 2164 ierr = MatTransposeMatMult(coarse_psi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM3);CHKERRQ(ierr); 2165 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 2166 ierr = MatMatMult(A_BI,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 2167 ierr = MatTransposeMatMult(coarse_psi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM4);CHKERRQ(ierr); 2168 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 2169 } else { 2170 ierr = MatPtAP(A_II,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&TM1);CHKERRQ(ierr); 2171 ierr = MatPtAP(A_BB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&TM2);CHKERRQ(ierr); 2172 ierr = MatMatMult(A_IB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 2173 ierr = MatTransposeMatMult(coarse_phi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM3);CHKERRQ(ierr); 2174 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 2175 ierr = MatMatMult(A_BI,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 2176 ierr = MatTransposeMatMult(coarse_phi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM4);CHKERRQ(ierr); 2177 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 2178 } 2179 ierr = MatAXPY(TM1,one,TM2,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 2180 ierr = MatAXPY(TM1,one,TM3,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 2181 ierr = MatAXPY(TM1,one,TM4,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 2182 ierr = MatConvert(TM1,MATSEQDENSE,MAT_REUSE_MATRIX,&TM1);CHKERRQ(ierr); 2183 if (pcbddc->benign_n) { 2184 Mat B0_I,B0_B,B0_BPHI,B0_IPHI; 2185 PetscScalar *data,*data2; 2186 PetscInt j; 2187 2188 ierr = ISCreateStride(PETSC_COMM_SELF,pcbddc->benign_n,0,1,&is_dummy);CHKERRQ(ierr); 2189 ierr = MatGetSubMatrix(pcbddc->benign_B0,is_dummy,pcis->is_B_local,MAT_INITIAL_MATRIX,&B0_B); 2190 ierr = MatGetSubMatrix(pcbddc->benign_B0,is_dummy,pcis->is_I_local,MAT_INITIAL_MATRIX,&B0_I); 2191 ierr = MatMatMult(B0_B,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&B0_BPHI);CHKERRQ(ierr); 2192 ierr = MatConvert(B0_BPHI,MATSEQDENSE,MAT_REUSE_MATRIX,&B0_BPHI);CHKERRQ(ierr); 2193 ierr = MatDenseGetArray(TM1,&data);CHKERRQ(ierr); 2194 ierr = MatDenseGetArray(B0_BPHI,&data2);CHKERRQ(ierr); 2195 for (j=0;j<pcbddc->benign_n;j++) { 2196 PetscInt primal_idx = pcbddc->local_primal_size - pcbddc->benign_n + j; 2197 for (i=0;i<pcbddc->local_primal_size;i++) { 2198 data[primal_idx*pcbddc->local_primal_size+i] += data2[i*pcbddc->benign_n+j]; 2199 data[i*pcbddc->local_primal_size+primal_idx] += data2[i*pcbddc->benign_n+j]; 2200 } 2201 } 2202 ierr = MatDenseRestoreArray(TM1,&data);CHKERRQ(ierr); 2203 ierr = MatDenseRestoreArray(B0_BPHI,&data2);CHKERRQ(ierr); 2204 ierr = MatDestroy(&B0_B);CHKERRQ(ierr); 2205 ierr = ISDestroy(&is_dummy);CHKERRQ(ierr); 2206 ierr = MatDestroy(&B0_BPHI);CHKERRQ(ierr); 2207 ierr = MatMatMult(B0_I,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&B0_IPHI);CHKERRQ(ierr); 2208 ierr = MatDestroy(&B0_I);CHKERRQ(ierr); 2209 ierr = MatNorm(B0_IPHI,NORM_FROBENIUS,&real_value);CHKERRQ(ierr); 2210 ierr = MatDestroy(&B0_IPHI);CHKERRQ(ierr); 2211 } 2212 #if 0 2213 { 2214 PetscViewer viewer; 2215 char filename[256]; 2216 sprintf(filename,"details_local_coarse_mat%d_level%d.m",PetscGlobalRank,pcbddc->current_level); 2217 ierr = PetscViewerASCIIOpen(PETSC_COMM_SELF,filename,&viewer);CHKERRQ(ierr); 2218 ierr = PetscViewerSetFormat(viewer,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr); 2219 ierr = PetscObjectSetName((PetscObject)coarse_sub_mat,"computed");CHKERRQ(ierr); 2220 ierr = MatView(coarse_sub_mat,viewer);CHKERRQ(ierr); 2221 ierr = PetscObjectSetName((PetscObject)TM1,"projected");CHKERRQ(ierr); 2222 ierr = MatView(TM1,viewer);CHKERRQ(ierr); 2223 ierr = PetscObjectSetName((PetscObject)pcbddc->coarse_phi_B,"phi_B");CHKERRQ(ierr); 2224 ierr = MatView(pcbddc->coarse_phi_B,viewer);CHKERRQ(ierr); 2225 if (pcbddc->coarse_phi_D) { 2226 ierr = PetscObjectSetName((PetscObject)pcbddc->coarse_phi_D,"phi_D");CHKERRQ(ierr); 2227 ierr = MatView(pcbddc->coarse_phi_D,viewer);CHKERRQ(ierr); 2228 } 2229 if (pcbddc->coarse_psi_B) { 2230 ierr = PetscObjectSetName((PetscObject)pcbddc->coarse_psi_B,"psi_B");CHKERRQ(ierr); 2231 ierr = MatView(pcbddc->coarse_psi_B,viewer);CHKERRQ(ierr); 2232 } 2233 if (pcbddc->coarse_phi_B) { 2234 ierr = PetscObjectSetName((PetscObject)pcbddc->coarse_psi_D,"psi_D");CHKERRQ(ierr); 2235 ierr = MatView(pcbddc->coarse_psi_D,viewer);CHKERRQ(ierr); 2236 } 2237 ierr = PetscViewerDestroy(&viewer);CHKERRQ(ierr); 2238 } 2239 #endif 2240 ierr = MatAXPY(TM1,m_one,coarse_sub_mat,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 2241 ierr = MatNorm(TM1,NORM_FROBENIUS,&real_value);CHKERRQ(ierr); 2242 ierr = PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 2243 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d matrix error % 1.14e\n",PetscGlobalRank,real_value);CHKERRQ(ierr); 2244 2245 /* check constraints */ 2246 if (!pcbddc->benign_n) { /* TODO: add benign case */ 2247 ierr = ISCreateStride(PETSC_COMM_SELF,pcbddc->local_primal_size,0,1,&is_dummy);CHKERRQ(ierr); 2248 ierr = MatGetSubMatrix(pcbddc->ConstraintMatrix,is_dummy,pcis->is_B_local,MAT_INITIAL_MATRIX,&C_B); 2249 ierr = MatMatMult(C_B,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&CPHI);CHKERRQ(ierr); 2250 ierr = MatCreateVecs(CPHI,&mones,NULL);CHKERRQ(ierr); 2251 ierr = VecSet(mones,-1.0);CHKERRQ(ierr); 2252 ierr = MatDiagonalSet(CPHI,mones,ADD_VALUES);CHKERRQ(ierr); 2253 ierr = MatNorm(CPHI,NORM_FROBENIUS,&real_value);CHKERRQ(ierr); 2254 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d phi constraints error % 1.14e\n",PetscGlobalRank,real_value);CHKERRQ(ierr); 2255 if (!pcbddc->symmetric_primal) { 2256 ierr = MatMatMult(C_B,coarse_psi_B,MAT_REUSE_MATRIX,1.0,&CPHI);CHKERRQ(ierr); 2257 ierr = VecSet(mones,-1.0);CHKERRQ(ierr); 2258 ierr = MatDiagonalSet(CPHI,mones,ADD_VALUES);CHKERRQ(ierr); 2259 ierr = MatNorm(CPHI,NORM_FROBENIUS,&real_value);CHKERRQ(ierr); 2260 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d psi constraints error % 1.14e\n",PetscGlobalRank,real_value);CHKERRQ(ierr); 2261 } 2262 ierr = MatDestroy(&C_B);CHKERRQ(ierr); 2263 ierr = MatDestroy(&CPHI);CHKERRQ(ierr); 2264 ierr = ISDestroy(&is_dummy);CHKERRQ(ierr); 2265 ierr = VecDestroy(&mones);CHKERRQ(ierr); 2266 } 2267 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 2268 ierr = MatDestroy(&A_II);CHKERRQ(ierr); 2269 ierr = MatDestroy(&A_BB);CHKERRQ(ierr); 2270 ierr = MatDestroy(&A_IB);CHKERRQ(ierr); 2271 ierr = MatDestroy(&A_BI);CHKERRQ(ierr); 2272 ierr = MatDestroy(&TM1);CHKERRQ(ierr); 2273 ierr = MatDestroy(&TM2);CHKERRQ(ierr); 2274 ierr = MatDestroy(&TM3);CHKERRQ(ierr); 2275 ierr = MatDestroy(&TM4);CHKERRQ(ierr); 2276 ierr = MatDestroy(&coarse_phi_D);CHKERRQ(ierr); 2277 ierr = MatDestroy(&coarse_phi_B);CHKERRQ(ierr); 2278 if (!pcbddc->symmetric_primal) { 2279 ierr = MatDestroy(&coarse_psi_D);CHKERRQ(ierr); 2280 ierr = MatDestroy(&coarse_psi_B);CHKERRQ(ierr); 2281 } 2282 ierr = MatDestroy(&coarse_sub_mat);CHKERRQ(ierr); 2283 } 2284 /* get back data */ 2285 *coarse_submat_vals_n = coarse_submat_vals; 2286 PetscFunctionReturn(0); 2287 } 2288 2289 #undef __FUNCT__ 2290 #define __FUNCT__ "MatGetSubMatrixUnsorted" 2291 PetscErrorCode MatGetSubMatrixUnsorted(Mat A, IS isrow, IS iscol, Mat* B) 2292 { 2293 Mat *work_mat; 2294 IS isrow_s,iscol_s; 2295 PetscBool rsorted,csorted; 2296 PetscInt rsize,*idxs_perm_r,csize,*idxs_perm_c; 2297 PetscErrorCode ierr; 2298 2299 PetscFunctionBegin; 2300 ierr = ISSorted(isrow,&rsorted);CHKERRQ(ierr); 2301 ierr = ISSorted(iscol,&csorted);CHKERRQ(ierr); 2302 ierr = ISGetLocalSize(isrow,&rsize);CHKERRQ(ierr); 2303 ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr); 2304 2305 if (!rsorted) { 2306 const PetscInt *idxs; 2307 PetscInt *idxs_sorted,i; 2308 2309 ierr = PetscMalloc1(rsize,&idxs_perm_r);CHKERRQ(ierr); 2310 ierr = PetscMalloc1(rsize,&idxs_sorted);CHKERRQ(ierr); 2311 for (i=0;i<rsize;i++) { 2312 idxs_perm_r[i] = i; 2313 } 2314 ierr = ISGetIndices(isrow,&idxs);CHKERRQ(ierr); 2315 ierr = PetscSortIntWithPermutation(rsize,idxs,idxs_perm_r);CHKERRQ(ierr); 2316 for (i=0;i<rsize;i++) { 2317 idxs_sorted[i] = idxs[idxs_perm_r[i]]; 2318 } 2319 ierr = ISRestoreIndices(isrow,&idxs);CHKERRQ(ierr); 2320 ierr = ISCreateGeneral(PETSC_COMM_SELF,rsize,idxs_sorted,PETSC_OWN_POINTER,&isrow_s);CHKERRQ(ierr); 2321 } else { 2322 ierr = PetscObjectReference((PetscObject)isrow);CHKERRQ(ierr); 2323 isrow_s = isrow; 2324 } 2325 2326 if (!csorted) { 2327 if (isrow == iscol) { 2328 ierr = PetscObjectReference((PetscObject)isrow_s);CHKERRQ(ierr); 2329 iscol_s = isrow_s; 2330 } else { 2331 const PetscInt *idxs; 2332 PetscInt *idxs_sorted,i; 2333 2334 ierr = PetscMalloc1(csize,&idxs_perm_c);CHKERRQ(ierr); 2335 ierr = PetscMalloc1(csize,&idxs_sorted);CHKERRQ(ierr); 2336 for (i=0;i<csize;i++) { 2337 idxs_perm_c[i] = i; 2338 } 2339 ierr = ISGetIndices(iscol,&idxs);CHKERRQ(ierr); 2340 ierr = PetscSortIntWithPermutation(csize,idxs,idxs_perm_c);CHKERRQ(ierr); 2341 for (i=0;i<csize;i++) { 2342 idxs_sorted[i] = idxs[idxs_perm_c[i]]; 2343 } 2344 ierr = ISRestoreIndices(iscol,&idxs);CHKERRQ(ierr); 2345 ierr = ISCreateGeneral(PETSC_COMM_SELF,csize,idxs_sorted,PETSC_OWN_POINTER,&iscol_s);CHKERRQ(ierr); 2346 } 2347 } else { 2348 ierr = PetscObjectReference((PetscObject)iscol);CHKERRQ(ierr); 2349 iscol_s = iscol; 2350 } 2351 2352 ierr = MatGetSubMatrices(A,1,&isrow_s,&iscol_s,MAT_INITIAL_MATRIX,&work_mat);CHKERRQ(ierr); 2353 2354 if (!rsorted || !csorted) { 2355 Mat new_mat; 2356 IS is_perm_r,is_perm_c; 2357 2358 if (!rsorted) { 2359 PetscInt *idxs_r,i; 2360 ierr = PetscMalloc1(rsize,&idxs_r);CHKERRQ(ierr); 2361 for (i=0;i<rsize;i++) { 2362 idxs_r[idxs_perm_r[i]] = i; 2363 } 2364 ierr = PetscFree(idxs_perm_r);CHKERRQ(ierr); 2365 ierr = ISCreateGeneral(PETSC_COMM_SELF,rsize,idxs_r,PETSC_OWN_POINTER,&is_perm_r);CHKERRQ(ierr); 2366 } else { 2367 ierr = ISCreateStride(PETSC_COMM_SELF,rsize,0,1,&is_perm_r);CHKERRQ(ierr); 2368 } 2369 ierr = ISSetPermutation(is_perm_r);CHKERRQ(ierr); 2370 2371 if (!csorted) { 2372 if (isrow_s == iscol_s) { 2373 ierr = PetscObjectReference((PetscObject)is_perm_r);CHKERRQ(ierr); 2374 is_perm_c = is_perm_r; 2375 } else { 2376 PetscInt *idxs_c,i; 2377 ierr = PetscMalloc1(csize,&idxs_c);CHKERRQ(ierr); 2378 for (i=0;i<csize;i++) { 2379 idxs_c[idxs_perm_c[i]] = i; 2380 } 2381 ierr = PetscFree(idxs_perm_c);CHKERRQ(ierr); 2382 ierr = ISCreateGeneral(PETSC_COMM_SELF,csize,idxs_c,PETSC_OWN_POINTER,&is_perm_c);CHKERRQ(ierr); 2383 } 2384 } else { 2385 ierr = ISCreateStride(PETSC_COMM_SELF,csize,0,1,&is_perm_c);CHKERRQ(ierr); 2386 } 2387 ierr = ISSetPermutation(is_perm_c);CHKERRQ(ierr); 2388 2389 ierr = MatPermute(work_mat[0],is_perm_r,is_perm_c,&new_mat);CHKERRQ(ierr); 2390 ierr = MatDestroy(&work_mat[0]);CHKERRQ(ierr); 2391 work_mat[0] = new_mat; 2392 ierr = ISDestroy(&is_perm_r);CHKERRQ(ierr); 2393 ierr = ISDestroy(&is_perm_c);CHKERRQ(ierr); 2394 } 2395 2396 ierr = PetscObjectReference((PetscObject)work_mat[0]);CHKERRQ(ierr); 2397 *B = work_mat[0]; 2398 ierr = MatDestroyMatrices(1,&work_mat);CHKERRQ(ierr); 2399 ierr = ISDestroy(&isrow_s);CHKERRQ(ierr); 2400 ierr = ISDestroy(&iscol_s);CHKERRQ(ierr); 2401 PetscFunctionReturn(0); 2402 } 2403 2404 #undef __FUNCT__ 2405 #define __FUNCT__ "PCBDDCComputeLocalMatrix" 2406 PetscErrorCode PCBDDCComputeLocalMatrix(PC pc, Mat ChangeOfBasisMatrix) 2407 { 2408 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 2409 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 2410 Mat new_mat; 2411 IS is_local,is_global; 2412 PetscInt local_size; 2413 PetscBool isseqaij; 2414 PetscErrorCode ierr; 2415 2416 PetscFunctionBegin; 2417 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 2418 ierr = MatGetSize(matis->A,&local_size,NULL);CHKERRQ(ierr); 2419 ierr = ISCreateStride(PetscObjectComm((PetscObject)matis->A),local_size,0,1,&is_local);CHKERRQ(ierr); 2420 ierr = ISLocalToGlobalMappingApplyIS(pc->pmat->rmap->mapping,is_local,&is_global);CHKERRQ(ierr); 2421 ierr = ISDestroy(&is_local);CHKERRQ(ierr); 2422 ierr = MatGetSubMatrixUnsorted(ChangeOfBasisMatrix,is_global,is_global,&new_mat);CHKERRQ(ierr); 2423 ierr = ISDestroy(&is_global);CHKERRQ(ierr); 2424 2425 /* check */ 2426 if (pcbddc->dbg_flag) { 2427 Vec x,x_change; 2428 PetscReal error; 2429 2430 ierr = MatCreateVecs(ChangeOfBasisMatrix,&x,&x_change);CHKERRQ(ierr); 2431 ierr = VecSetRandom(x,NULL);CHKERRQ(ierr); 2432 ierr = MatMult(ChangeOfBasisMatrix,x,x_change);CHKERRQ(ierr); 2433 ierr = VecScatterBegin(matis->cctx,x,matis->x,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2434 ierr = VecScatterEnd(matis->cctx,x,matis->x,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2435 ierr = MatMult(new_mat,matis->x,matis->y);CHKERRQ(ierr); 2436 ierr = VecScatterBegin(matis->rctx,matis->y,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2437 ierr = VecScatterEnd(matis->rctx,matis->y,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2438 ierr = VecAXPY(x,-1.0,x_change);CHKERRQ(ierr); 2439 ierr = VecNorm(x,NORM_INFINITY,&error);CHKERRQ(ierr); 2440 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 2441 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Error global vs local change on N: %1.6e\n",error);CHKERRQ(ierr); 2442 ierr = VecDestroy(&x);CHKERRQ(ierr); 2443 ierr = VecDestroy(&x_change);CHKERRQ(ierr); 2444 } 2445 2446 /* TODO: HOW TO WORK WITH BAIJ and SBAIJ and SEQDENSE? */ 2447 ierr = PetscObjectTypeCompare((PetscObject)matis->A,MATSEQAIJ,&isseqaij);CHKERRQ(ierr); 2448 if (isseqaij) { 2449 Mat M; 2450 2451 ierr = MatPtAP(matis->A,new_mat,MAT_INITIAL_MATRIX,2.0,&M);CHKERRQ(ierr); 2452 ierr = MatSeqAIJCompress(M,&pcbddc->local_mat);CHKERRQ(ierr); 2453 ierr = MatDestroy(&M);CHKERRQ(ierr); 2454 } else { 2455 Mat work_mat,M; 2456 2457 ierr = MatConvert(matis->A,MATSEQAIJ,MAT_INITIAL_MATRIX,&work_mat);CHKERRQ(ierr); 2458 ierr = MatPtAP(work_mat,new_mat,MAT_INITIAL_MATRIX,2.0,&M);CHKERRQ(ierr); 2459 ierr = MatSeqAIJCompress(M,&pcbddc->local_mat);CHKERRQ(ierr); 2460 ierr = MatDestroy(&M);CHKERRQ(ierr); 2461 ierr = MatDestroy(&work_mat);CHKERRQ(ierr); 2462 } 2463 if (matis->A->symmetric_set) { 2464 ierr = MatSetOption(pcbddc->local_mat,MAT_SYMMETRIC,matis->A->symmetric);CHKERRQ(ierr); 2465 #if !defined(PETSC_USE_COMPLEX) 2466 ierr = MatSetOption(pcbddc->local_mat,MAT_HERMITIAN,matis->A->symmetric);CHKERRQ(ierr); 2467 #endif 2468 } 2469 /* 2470 ierr = PetscViewerSetFormat(PETSC_VIEWER_STDOUT_SELF,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr); 2471 ierr = MatView(new_mat,(PetscViewer)0);CHKERRQ(ierr); 2472 */ 2473 ierr = MatDestroy(&new_mat);CHKERRQ(ierr); 2474 PetscFunctionReturn(0); 2475 } 2476 2477 #undef __FUNCT__ 2478 #define __FUNCT__ "PCBDDCSetUpLocalScatters" 2479 PetscErrorCode PCBDDCSetUpLocalScatters(PC pc) 2480 { 2481 PC_IS* pcis = (PC_IS*)(pc->data); 2482 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 2483 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 2484 PetscInt *idx_R_local=NULL; 2485 PetscInt n_vertices,i,j,n_R,n_D,n_B; 2486 PetscInt vbs,bs; 2487 PetscBT bitmask=NULL; 2488 PetscErrorCode ierr; 2489 2490 PetscFunctionBegin; 2491 /* 2492 No need to setup local scatters if 2493 - primal space is unchanged 2494 AND 2495 - we actually have locally some primal dofs (could not be true in multilevel or for isolated subdomains) 2496 AND 2497 - we are not in debugging mode (this is needed since there are Synchronized prints at the end of the subroutine 2498 */ 2499 if (!pcbddc->new_primal_space_local && pcbddc->local_primal_size && !pcbddc->dbg_flag) { 2500 PetscFunctionReturn(0); 2501 } 2502 /* destroy old objects */ 2503 ierr = ISDestroy(&pcbddc->is_R_local);CHKERRQ(ierr); 2504 ierr = VecScatterDestroy(&pcbddc->R_to_B);CHKERRQ(ierr); 2505 ierr = VecScatterDestroy(&pcbddc->R_to_D);CHKERRQ(ierr); 2506 /* Set Non-overlapping dimensions */ 2507 n_B = pcis->n_B; 2508 n_D = pcis->n - n_B; 2509 n_vertices = pcbddc->n_vertices; 2510 2511 /* Dohrmann's notation: dofs splitted in R (Remaining: all dofs but the vertices) and V (Vertices) */ 2512 2513 /* create auxiliary bitmask and allocate workspace */ 2514 if (!sub_schurs->reuse_solver) { 2515 ierr = PetscMalloc1(pcis->n-n_vertices,&idx_R_local);CHKERRQ(ierr); 2516 ierr = PetscBTCreate(pcis->n,&bitmask);CHKERRQ(ierr); 2517 for (i=0;i<n_vertices;i++) { 2518 ierr = PetscBTSet(bitmask,pcbddc->local_primal_ref_node[i]);CHKERRQ(ierr); 2519 } 2520 2521 for (i=0, n_R=0; i<pcis->n; i++) { 2522 if (!PetscBTLookup(bitmask,i)) { 2523 idx_R_local[n_R++] = i; 2524 } 2525 } 2526 } else { /* A different ordering (already computed) is present if we are reusing the Schur solver */ 2527 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 2528 2529 ierr = ISGetIndices(reuse_solver->is_R,(const PetscInt**)&idx_R_local);CHKERRQ(ierr); 2530 ierr = ISGetLocalSize(reuse_solver->is_R,&n_R);CHKERRQ(ierr); 2531 } 2532 2533 /* Block code */ 2534 vbs = 1; 2535 ierr = MatGetBlockSize(pcbddc->local_mat,&bs);CHKERRQ(ierr); 2536 if (bs>1 && !(n_vertices%bs)) { 2537 PetscBool is_blocked = PETSC_TRUE; 2538 PetscInt *vary; 2539 if (!sub_schurs->reuse_solver) { 2540 ierr = PetscMalloc1(pcis->n/bs,&vary);CHKERRQ(ierr); 2541 ierr = PetscMemzero(vary,pcis->n/bs*sizeof(PetscInt));CHKERRQ(ierr); 2542 /* Verify that the vertex indices correspond to each element in a block (code taken from sbaij2.c) */ 2543 /* it is ok to check this way since local_primal_ref_node are always sorted by local numbering and idx_R_local is obtained as a complement */ 2544 for (i=0; i<n_vertices; i++) vary[pcbddc->local_primal_ref_node[i]/bs]++; 2545 for (i=0; i<pcis->n/bs; i++) { 2546 if (vary[i]!=0 && vary[i]!=bs) { 2547 is_blocked = PETSC_FALSE; 2548 break; 2549 } 2550 } 2551 ierr = PetscFree(vary);CHKERRQ(ierr); 2552 } else { 2553 /* Verify directly the R set */ 2554 for (i=0; i<n_R/bs; i++) { 2555 PetscInt j,node=idx_R_local[bs*i]; 2556 for (j=1; j<bs; j++) { 2557 if (node != idx_R_local[bs*i+j]-j) { 2558 is_blocked = PETSC_FALSE; 2559 break; 2560 } 2561 } 2562 } 2563 } 2564 if (is_blocked) { /* build compressed IS for R nodes (complement of vertices) */ 2565 vbs = bs; 2566 for (i=0;i<n_R/vbs;i++) { 2567 idx_R_local[i] = idx_R_local[vbs*i]/vbs; 2568 } 2569 } 2570 } 2571 ierr = ISCreateBlock(PETSC_COMM_SELF,vbs,n_R/vbs,idx_R_local,PETSC_COPY_VALUES,&pcbddc->is_R_local);CHKERRQ(ierr); 2572 if (sub_schurs->reuse_solver) { 2573 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 2574 2575 ierr = ISRestoreIndices(reuse_solver->is_R,(const PetscInt**)&idx_R_local);CHKERRQ(ierr); 2576 ierr = ISDestroy(&reuse_solver->is_R);CHKERRQ(ierr); 2577 ierr = PetscObjectReference((PetscObject)pcbddc->is_R_local);CHKERRQ(ierr); 2578 reuse_solver->is_R = pcbddc->is_R_local; 2579 } else { 2580 ierr = PetscFree(idx_R_local);CHKERRQ(ierr); 2581 } 2582 2583 /* print some info if requested */ 2584 if (pcbddc->dbg_flag) { 2585 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 2586 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 2587 ierr = PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 2588 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d local dimensions\n",PetscGlobalRank);CHKERRQ(ierr); 2589 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local_size = %d, dirichlet_size = %d, boundary_size = %d\n",pcis->n,n_D,n_B);CHKERRQ(ierr); 2590 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"r_size = %d, v_size = %d, constraints = %d, local_primal_size = %d\n",n_R,n_vertices,pcbddc->local_primal_size-n_vertices-pcbddc->benign_n,pcbddc->local_primal_size);CHKERRQ(ierr); 2591 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 2592 } 2593 2594 /* VecScatters pcbddc->R_to_B and (optionally) pcbddc->R_to_D */ 2595 if (!sub_schurs->reuse_solver) { 2596 IS is_aux1,is_aux2; 2597 PetscInt *aux_array1,*aux_array2,*is_indices,*idx_R_local; 2598 2599 ierr = ISGetIndices(pcbddc->is_R_local,(const PetscInt**)&idx_R_local);CHKERRQ(ierr); 2600 ierr = PetscMalloc1(pcis->n_B-n_vertices,&aux_array1);CHKERRQ(ierr); 2601 ierr = PetscMalloc1(pcis->n_B-n_vertices,&aux_array2);CHKERRQ(ierr); 2602 ierr = ISGetIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 2603 for (i=0; i<n_D; i++) { 2604 ierr = PetscBTSet(bitmask,is_indices[i]);CHKERRQ(ierr); 2605 } 2606 ierr = ISRestoreIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 2607 for (i=0, j=0; i<n_R; i++) { 2608 if (!PetscBTLookup(bitmask,idx_R_local[i])) { 2609 aux_array1[j++] = i; 2610 } 2611 } 2612 ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array1,PETSC_OWN_POINTER,&is_aux1);CHKERRQ(ierr); 2613 ierr = ISGetIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 2614 for (i=0, j=0; i<n_B; i++) { 2615 if (!PetscBTLookup(bitmask,is_indices[i])) { 2616 aux_array2[j++] = i; 2617 } 2618 } 2619 ierr = ISRestoreIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 2620 ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array2,PETSC_OWN_POINTER,&is_aux2);CHKERRQ(ierr); 2621 ierr = VecScatterCreate(pcbddc->vec1_R,is_aux1,pcis->vec1_B,is_aux2,&pcbddc->R_to_B);CHKERRQ(ierr); 2622 ierr = ISDestroy(&is_aux1);CHKERRQ(ierr); 2623 ierr = ISDestroy(&is_aux2);CHKERRQ(ierr); 2624 2625 if (pcbddc->switch_static || pcbddc->dbg_flag) { 2626 ierr = PetscMalloc1(n_D,&aux_array1);CHKERRQ(ierr); 2627 for (i=0, j=0; i<n_R; i++) { 2628 if (PetscBTLookup(bitmask,idx_R_local[i])) { 2629 aux_array1[j++] = i; 2630 } 2631 } 2632 ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array1,PETSC_OWN_POINTER,&is_aux1);CHKERRQ(ierr); 2633 ierr = VecScatterCreate(pcbddc->vec1_R,is_aux1,pcis->vec1_D,(IS)0,&pcbddc->R_to_D);CHKERRQ(ierr); 2634 ierr = ISDestroy(&is_aux1);CHKERRQ(ierr); 2635 } 2636 ierr = PetscBTDestroy(&bitmask);CHKERRQ(ierr); 2637 ierr = ISRestoreIndices(pcbddc->is_R_local,(const PetscInt**)&idx_R_local);CHKERRQ(ierr); 2638 } else { 2639 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 2640 IS tis; 2641 PetscInt schur_size; 2642 2643 ierr = ISGetLocalSize(reuse_solver->is_B,&schur_size);CHKERRQ(ierr); 2644 ierr = ISCreateStride(PETSC_COMM_SELF,schur_size,n_D,1,&tis);CHKERRQ(ierr); 2645 ierr = VecScatterCreate(pcbddc->vec1_R,tis,pcis->vec1_B,reuse_solver->is_B,&pcbddc->R_to_B);CHKERRQ(ierr); 2646 ierr = ISDestroy(&tis);CHKERRQ(ierr); 2647 if (pcbddc->switch_static || pcbddc->dbg_flag) { 2648 ierr = ISCreateStride(PETSC_COMM_SELF,n_D,0,1,&tis);CHKERRQ(ierr); 2649 ierr = VecScatterCreate(pcbddc->vec1_R,tis,pcis->vec1_D,(IS)0,&pcbddc->R_to_D);CHKERRQ(ierr); 2650 ierr = ISDestroy(&tis);CHKERRQ(ierr); 2651 } 2652 } 2653 PetscFunctionReturn(0); 2654 } 2655 2656 2657 #undef __FUNCT__ 2658 #define __FUNCT__ "PCBDDCSetUpLocalSolvers" 2659 PetscErrorCode PCBDDCSetUpLocalSolvers(PC pc, PetscBool dirichlet, PetscBool neumann) 2660 { 2661 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 2662 PC_IS *pcis = (PC_IS*)pc->data; 2663 PC pc_temp; 2664 Mat A_RR; 2665 MatReuse reuse; 2666 PetscScalar m_one = -1.0; 2667 PetscReal value; 2668 PetscInt n_D,n_R; 2669 PetscBool use_exact,use_exact_reduced,issbaij; 2670 PetscErrorCode ierr; 2671 /* prefixes stuff */ 2672 char dir_prefix[256],neu_prefix[256],str_level[16]; 2673 size_t len; 2674 2675 PetscFunctionBegin; 2676 2677 /* compute prefixes */ 2678 ierr = PetscStrcpy(dir_prefix,"");CHKERRQ(ierr); 2679 ierr = PetscStrcpy(neu_prefix,"");CHKERRQ(ierr); 2680 if (!pcbddc->current_level) { 2681 ierr = PetscStrcpy(dir_prefix,((PetscObject)pc)->prefix);CHKERRQ(ierr); 2682 ierr = PetscStrcpy(neu_prefix,((PetscObject)pc)->prefix);CHKERRQ(ierr); 2683 ierr = PetscStrcat(dir_prefix,"pc_bddc_dirichlet_");CHKERRQ(ierr); 2684 ierr = PetscStrcat(neu_prefix,"pc_bddc_neumann_");CHKERRQ(ierr); 2685 } else { 2686 ierr = PetscStrcpy(str_level,"");CHKERRQ(ierr); 2687 sprintf(str_level,"l%d_",(int)(pcbddc->current_level)); 2688 ierr = PetscStrlen(((PetscObject)pc)->prefix,&len);CHKERRQ(ierr); 2689 len -= 15; /* remove "pc_bddc_coarse_" */ 2690 if (pcbddc->current_level>1) len -= 3; /* remove "lX_" with X level number */ 2691 if (pcbddc->current_level>10) len -= 1; /* remove another char from level number */ 2692 ierr = PetscStrncpy(dir_prefix,((PetscObject)pc)->prefix,len+1);CHKERRQ(ierr); 2693 ierr = PetscStrncpy(neu_prefix,((PetscObject)pc)->prefix,len+1);CHKERRQ(ierr); 2694 ierr = PetscStrcat(dir_prefix,"pc_bddc_dirichlet_");CHKERRQ(ierr); 2695 ierr = PetscStrcat(neu_prefix,"pc_bddc_neumann_");CHKERRQ(ierr); 2696 ierr = PetscStrcat(dir_prefix,str_level);CHKERRQ(ierr); 2697 ierr = PetscStrcat(neu_prefix,str_level);CHKERRQ(ierr); 2698 } 2699 2700 /* DIRICHLET PROBLEM */ 2701 if (dirichlet) { 2702 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 2703 if (pcbddc->benign_n && !pcbddc->benign_change_explicit && pcbddc->dbg_flag) { 2704 Mat A_IIn; 2705 2706 ierr = PCBDDCBenignProject(pc,pcis->is_I_local,pcis->is_I_local,&A_IIn);CHKERRQ(ierr); 2707 ierr = MatDestroy(&pcis->A_II);CHKERRQ(ierr); 2708 pcis->A_II = A_IIn; 2709 } 2710 if (pcbddc->local_mat->symmetric_set) { 2711 ierr = MatSetOption(pcis->A_II,MAT_SYMMETRIC,pcbddc->local_mat->symmetric_set);CHKERRQ(ierr); 2712 } 2713 /* Matrix for Dirichlet problem is pcis->A_II */ 2714 n_D = pcis->n - pcis->n_B; 2715 if (!pcbddc->ksp_D) { /* create object if not yet build */ 2716 ierr = KSPCreate(PETSC_COMM_SELF,&pcbddc->ksp_D);CHKERRQ(ierr); 2717 ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->ksp_D,(PetscObject)pc,1);CHKERRQ(ierr); 2718 /* default */ 2719 ierr = KSPSetType(pcbddc->ksp_D,KSPPREONLY);CHKERRQ(ierr); 2720 ierr = KSPSetOptionsPrefix(pcbddc->ksp_D,dir_prefix);CHKERRQ(ierr); 2721 ierr = PetscObjectTypeCompare((PetscObject)pcis->A_II,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr); 2722 ierr = KSPGetPC(pcbddc->ksp_D,&pc_temp);CHKERRQ(ierr); 2723 if (issbaij) { 2724 ierr = PCSetType(pc_temp,PCCHOLESKY);CHKERRQ(ierr); 2725 } else { 2726 ierr = PCSetType(pc_temp,PCLU);CHKERRQ(ierr); 2727 } 2728 /* Allow user's customization */ 2729 ierr = KSPSetFromOptions(pcbddc->ksp_D);CHKERRQ(ierr); 2730 ierr = PCFactorSetReuseFill(pc_temp,PETSC_TRUE);CHKERRQ(ierr); 2731 } 2732 ierr = KSPSetOperators(pcbddc->ksp_D,pcis->A_II,pcis->A_II);CHKERRQ(ierr); 2733 if (sub_schurs->reuse_solver) { 2734 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 2735 2736 ierr = KSPSetPC(pcbddc->ksp_D,reuse_solver->interior_solver);CHKERRQ(ierr); 2737 } 2738 /* umfpack interface has a bug when matrix dimension is zero. TODO solve from umfpack interface */ 2739 if (!n_D) { 2740 ierr = KSPGetPC(pcbddc->ksp_D,&pc_temp);CHKERRQ(ierr); 2741 ierr = PCSetType(pc_temp,PCNONE);CHKERRQ(ierr); 2742 } 2743 /* Set Up KSP for Dirichlet problem of BDDC */ 2744 ierr = KSPSetUp(pcbddc->ksp_D);CHKERRQ(ierr); 2745 /* set ksp_D into pcis data */ 2746 ierr = KSPDestroy(&pcis->ksp_D);CHKERRQ(ierr); 2747 ierr = PetscObjectReference((PetscObject)pcbddc->ksp_D);CHKERRQ(ierr); 2748 pcis->ksp_D = pcbddc->ksp_D; 2749 } 2750 2751 /* NEUMANN PROBLEM */ 2752 A_RR = 0; 2753 if (neumann) { 2754 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 2755 PetscInt ibs,mbs; 2756 PetscBool issbaij; 2757 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 2758 /* Matrix for Neumann problem is A_RR -> we need to create/reuse it at this point */ 2759 ierr = ISGetSize(pcbddc->is_R_local,&n_R);CHKERRQ(ierr); 2760 if (pcbddc->ksp_R) { /* already created ksp */ 2761 PetscInt nn_R; 2762 ierr = KSPGetOperators(pcbddc->ksp_R,NULL,&A_RR);CHKERRQ(ierr); 2763 ierr = PetscObjectReference((PetscObject)A_RR);CHKERRQ(ierr); 2764 ierr = MatGetSize(A_RR,&nn_R,NULL);CHKERRQ(ierr); 2765 if (nn_R != n_R) { /* old ksp is not reusable, so reset it */ 2766 ierr = KSPReset(pcbddc->ksp_R);CHKERRQ(ierr); 2767 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 2768 reuse = MAT_INITIAL_MATRIX; 2769 } else { /* same sizes, but nonzero pattern depend on primal vertices so it can be changed */ 2770 if (pcbddc->new_primal_space_local) { /* we are not sure the matrix will have the same nonzero pattern */ 2771 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 2772 reuse = MAT_INITIAL_MATRIX; 2773 } else { /* safe to reuse the matrix */ 2774 reuse = MAT_REUSE_MATRIX; 2775 } 2776 } 2777 /* last check */ 2778 if (pc->flag == DIFFERENT_NONZERO_PATTERN) { 2779 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 2780 reuse = MAT_INITIAL_MATRIX; 2781 } 2782 } else { /* first time, so we need to create the matrix */ 2783 reuse = MAT_INITIAL_MATRIX; 2784 } 2785 /* extract A_RR */ 2786 ierr = MatGetBlockSize(pcbddc->local_mat,&mbs);CHKERRQ(ierr); 2787 ierr = ISGetBlockSize(pcbddc->is_R_local,&ibs);CHKERRQ(ierr); 2788 ierr = PetscObjectTypeCompare((PetscObject)pcbddc->local_mat,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr); 2789 if (ibs != mbs) { /* need to convert to SEQAIJ to extract any submatrix with is_R_local */ 2790 if (matis->A == pcbddc->local_mat) { 2791 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 2792 ierr = MatConvert(matis->A,MATSEQAIJ,MAT_INITIAL_MATRIX,&pcbddc->local_mat);CHKERRQ(ierr); 2793 } else { 2794 ierr = MatConvert(pcbddc->local_mat,MATSEQAIJ,MAT_REUSE_MATRIX,&pcbddc->local_mat);CHKERRQ(ierr); 2795 } 2796 } else if (issbaij) { /* need to convert to BAIJ to get offdiagonal blocks */ 2797 if (matis->A == pcbddc->local_mat) { 2798 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 2799 ierr = MatConvert(matis->A,MATSEQBAIJ,MAT_INITIAL_MATRIX,&pcbddc->local_mat);CHKERRQ(ierr); 2800 } else { 2801 ierr = MatConvert(pcbddc->local_mat,MATSEQBAIJ,MAT_REUSE_MATRIX,&pcbddc->local_mat);CHKERRQ(ierr); 2802 } 2803 } 2804 ierr = MatGetSubMatrix(pcbddc->local_mat,pcbddc->is_R_local,pcbddc->is_R_local,reuse,&A_RR);CHKERRQ(ierr); 2805 if (pcbddc->local_mat->symmetric_set) { 2806 ierr = MatSetOption(A_RR,MAT_SYMMETRIC,pcbddc->local_mat->symmetric_set);CHKERRQ(ierr); 2807 } 2808 if (pcbddc->benign_n && !pcbddc->benign_change_explicit && pcbddc->dbg_flag) { 2809 Mat A_RRn; 2810 ierr = PCBDDCBenignProject(pc,pcbddc->is_R_local,pcbddc->is_R_local,&A_RRn);CHKERRQ(ierr); 2811 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 2812 A_RR = A_RRn; 2813 } 2814 if (!pcbddc->ksp_R) { /* create object if not present */ 2815 ierr = KSPCreate(PETSC_COMM_SELF,&pcbddc->ksp_R);CHKERRQ(ierr); 2816 ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->ksp_R,(PetscObject)pc,1);CHKERRQ(ierr); 2817 /* default */ 2818 ierr = KSPSetType(pcbddc->ksp_R,KSPPREONLY);CHKERRQ(ierr); 2819 ierr = KSPSetOptionsPrefix(pcbddc->ksp_R,neu_prefix);CHKERRQ(ierr); 2820 ierr = KSPGetPC(pcbddc->ksp_R,&pc_temp);CHKERRQ(ierr); 2821 ierr = PetscObjectTypeCompare((PetscObject)A_RR,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr); 2822 if (issbaij) { 2823 ierr = PCSetType(pc_temp,PCCHOLESKY);CHKERRQ(ierr); 2824 } else { 2825 ierr = PCSetType(pc_temp,PCLU);CHKERRQ(ierr); 2826 } 2827 /* Allow user's customization */ 2828 ierr = KSPSetFromOptions(pcbddc->ksp_R);CHKERRQ(ierr); 2829 ierr = PCFactorSetReuseFill(pc_temp,PETSC_TRUE);CHKERRQ(ierr); 2830 } 2831 /* umfpack interface has a bug when matrix dimension is zero. TODO solve from umfpack interface */ 2832 if (!n_R) { 2833 ierr = KSPGetPC(pcbddc->ksp_R,&pc_temp);CHKERRQ(ierr); 2834 ierr = PCSetType(pc_temp,PCNONE);CHKERRQ(ierr); 2835 } 2836 /* Reuse solver if it is present */ 2837 if (sub_schurs->reuse_solver) { 2838 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 2839 2840 ierr = KSPSetPC(pcbddc->ksp_R,reuse_solver->correction_solver);CHKERRQ(ierr); 2841 } 2842 ierr = KSPSetOperators(pcbddc->ksp_R,A_RR,A_RR);CHKERRQ(ierr); 2843 /* Set Up KSP for Neumann problem of BDDC */ 2844 ierr = KSPSetUp(pcbddc->ksp_R);CHKERRQ(ierr); 2845 } 2846 /* free Neumann problem's matrix */ 2847 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 2848 2849 /* check Dirichlet and Neumann solvers and adapt them if a nullspace correction is needed */ 2850 if (pcbddc->NullSpace || pcbddc->dbg_flag) { 2851 if (pcbddc->dbg_flag) { 2852 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 2853 ierr = PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 2854 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 2855 } 2856 if (dirichlet) { /* Dirichlet */ 2857 ierr = VecSetRandom(pcis->vec1_D,NULL);CHKERRQ(ierr); 2858 ierr = MatMult(pcis->A_II,pcis->vec1_D,pcis->vec2_D);CHKERRQ(ierr); 2859 ierr = KSPSolve(pcbddc->ksp_D,pcis->vec2_D,pcis->vec2_D);CHKERRQ(ierr); 2860 ierr = VecAXPY(pcis->vec1_D,m_one,pcis->vec2_D);CHKERRQ(ierr); 2861 ierr = VecNorm(pcis->vec1_D,NORM_INFINITY,&value);CHKERRQ(ierr); 2862 /* need to be adapted? */ 2863 use_exact = (PetscAbsReal(value) > 1.e-4 ? PETSC_FALSE : PETSC_TRUE); 2864 ierr = MPI_Allreduce(&use_exact,&use_exact_reduced,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 2865 ierr = PCBDDCSetUseExactDirichlet(pc,use_exact_reduced);CHKERRQ(ierr); 2866 /* print info */ 2867 if (pcbddc->dbg_flag) { 2868 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d infinity error for Dirichlet solve (%s) = % 1.14e \n",PetscGlobalRank,((PetscObject)(pcbddc->ksp_D))->prefix,value);CHKERRQ(ierr); 2869 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 2870 } 2871 if (pcbddc->NullSpace && !use_exact_reduced && !pcbddc->switch_static) { 2872 ierr = PCBDDCNullSpaceAssembleCorrection(pc,PETSC_TRUE,pcis->is_I_local);CHKERRQ(ierr); 2873 } 2874 } 2875 if (neumann) { /* Neumann */ 2876 ierr = KSPGetOperators(pcbddc->ksp_R,&A_RR,NULL);CHKERRQ(ierr); 2877 ierr = VecSetRandom(pcbddc->vec1_R,NULL);CHKERRQ(ierr); 2878 ierr = MatMult(A_RR,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 2879 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec2_R,pcbddc->vec2_R);CHKERRQ(ierr); 2880 ierr = VecAXPY(pcbddc->vec1_R,m_one,pcbddc->vec2_R);CHKERRQ(ierr); 2881 ierr = VecNorm(pcbddc->vec1_R,NORM_INFINITY,&value);CHKERRQ(ierr); 2882 /* need to be adapted? */ 2883 use_exact = (PetscAbsReal(value) > 1.e-4 ? PETSC_FALSE : PETSC_TRUE); 2884 ierr = MPI_Allreduce(&use_exact,&use_exact_reduced,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 2885 /* print info */ 2886 if (pcbddc->dbg_flag) { 2887 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d infinity error for Neumann solve (%s) = % 1.14e\n",PetscGlobalRank,((PetscObject)(pcbddc->ksp_R))->prefix,value);CHKERRQ(ierr); 2888 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 2889 } 2890 if (pcbddc->NullSpace && !use_exact_reduced) { /* is it the right logic? */ 2891 ierr = PCBDDCNullSpaceAssembleCorrection(pc,PETSC_FALSE,pcbddc->is_R_local);CHKERRQ(ierr); 2892 } 2893 } 2894 } 2895 PetscFunctionReturn(0); 2896 } 2897 2898 #undef __FUNCT__ 2899 #define __FUNCT__ "PCBDDCSolveSubstructureCorrection" 2900 static PetscErrorCode PCBDDCSolveSubstructureCorrection(PC pc, Vec inout_B, Vec inout_D, PetscBool applytranspose) 2901 { 2902 PetscErrorCode ierr; 2903 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 2904 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 2905 2906 PetscFunctionBegin; 2907 if (!sub_schurs->reuse_solver) { 2908 ierr = VecSet(pcbddc->vec1_R,0.);CHKERRQ(ierr); 2909 } 2910 if (!pcbddc->switch_static) { 2911 if (applytranspose && pcbddc->local_auxmat1) { 2912 ierr = MatMultTranspose(pcbddc->local_auxmat2,inout_B,pcbddc->vec1_C);CHKERRQ(ierr); 2913 ierr = MatMultTransposeAdd(pcbddc->local_auxmat1,pcbddc->vec1_C,inout_B,inout_B);CHKERRQ(ierr); 2914 } 2915 if (!sub_schurs->reuse_solver) { 2916 ierr = VecScatterBegin(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2917 ierr = VecScatterEnd(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2918 } else { 2919 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 2920 2921 ierr = VecScatterBegin(reuse_solver->correction_scatter_B,inout_B,reuse_solver->rhs_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2922 ierr = VecScatterEnd(reuse_solver->correction_scatter_B,inout_B,reuse_solver->rhs_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2923 } 2924 } else { 2925 ierr = VecScatterBegin(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2926 ierr = VecScatterEnd(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2927 ierr = VecScatterBegin(pcbddc->R_to_D,inout_D,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2928 ierr = VecScatterEnd(pcbddc->R_to_D,inout_D,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2929 if (applytranspose && pcbddc->local_auxmat1) { 2930 ierr = MatMultTranspose(pcbddc->local_auxmat2,pcbddc->vec1_R,pcbddc->vec1_C);CHKERRQ(ierr); 2931 ierr = MatMultTransposeAdd(pcbddc->local_auxmat1,pcbddc->vec1_C,inout_B,inout_B);CHKERRQ(ierr); 2932 ierr = VecScatterBegin(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2933 ierr = VecScatterEnd(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2934 } 2935 } 2936 if (!sub_schurs->reuse_solver) { 2937 if (applytranspose) { 2938 ierr = KSPSolveTranspose(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr); 2939 } else { 2940 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr); 2941 } 2942 } else { 2943 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 2944 2945 if (applytranspose) { 2946 ierr = MatFactorSolveSchurComplementTranspose(reuse_solver->F,reuse_solver->rhs_B,reuse_solver->sol_B);CHKERRQ(ierr); 2947 } else { 2948 ierr = MatFactorSolveSchurComplement(reuse_solver->F,reuse_solver->rhs_B,reuse_solver->sol_B);CHKERRQ(ierr); 2949 } 2950 } 2951 ierr = VecSet(inout_B,0.);CHKERRQ(ierr); 2952 if (!pcbddc->switch_static) { 2953 if (!sub_schurs->reuse_solver) { 2954 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2955 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2956 } else { 2957 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 2958 2959 ierr = VecScatterBegin(reuse_solver->correction_scatter_B,reuse_solver->sol_B,inout_B,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2960 ierr = VecScatterEnd(reuse_solver->correction_scatter_B,reuse_solver->sol_B,inout_B,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2961 } 2962 if (!applytranspose && pcbddc->local_auxmat1) { 2963 ierr = MatMult(pcbddc->local_auxmat1,inout_B,pcbddc->vec1_C);CHKERRQ(ierr); 2964 ierr = MatMultAdd(pcbddc->local_auxmat2,pcbddc->vec1_C,inout_B,inout_B);CHKERRQ(ierr); 2965 } 2966 } else { 2967 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2968 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2969 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,inout_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2970 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,inout_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2971 if (!applytranspose && pcbddc->local_auxmat1) { 2972 ierr = MatMult(pcbddc->local_auxmat1,inout_B,pcbddc->vec1_C);CHKERRQ(ierr); 2973 ierr = MatMultAdd(pcbddc->local_auxmat2,pcbddc->vec1_C,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr); 2974 } 2975 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2976 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2977 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,inout_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2978 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,inout_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2979 } 2980 PetscFunctionReturn(0); 2981 } 2982 2983 /* parameter apply transpose determines if the interface preconditioner should be applied transposed or not */ 2984 #undef __FUNCT__ 2985 #define __FUNCT__ "PCBDDCApplyInterfacePreconditioner" 2986 PetscErrorCode PCBDDCApplyInterfacePreconditioner(PC pc, PetscBool applytranspose) 2987 { 2988 PetscErrorCode ierr; 2989 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 2990 PC_IS* pcis = (PC_IS*) (pc->data); 2991 const PetscScalar zero = 0.0; 2992 2993 PetscFunctionBegin; 2994 /* Application of PSI^T or PHI^T (depending on applytranspose, see comment above) */ 2995 if (!pcbddc->benign_apply_coarse_only) { 2996 if (applytranspose) { 2997 ierr = MatMultTranspose(pcbddc->coarse_phi_B,pcis->vec1_B,pcbddc->vec1_P);CHKERRQ(ierr); 2998 if (pcbddc->switch_static) { ierr = MatMultTransposeAdd(pcbddc->coarse_phi_D,pcis->vec1_D,pcbddc->vec1_P,pcbddc->vec1_P);CHKERRQ(ierr); } 2999 } else { 3000 ierr = MatMultTranspose(pcbddc->coarse_psi_B,pcis->vec1_B,pcbddc->vec1_P);CHKERRQ(ierr); 3001 if (pcbddc->switch_static) { ierr = MatMultTransposeAdd(pcbddc->coarse_psi_D,pcis->vec1_D,pcbddc->vec1_P,pcbddc->vec1_P);CHKERRQ(ierr); } 3002 } 3003 } else { 3004 ierr = VecSet(pcbddc->vec1_P,zero);CHKERRQ(ierr); 3005 } 3006 3007 /* add p0 to the last value of vec1_P holding the coarse dof relative to p0 */ 3008 if (pcbddc->benign_n) { 3009 PetscScalar *array; 3010 PetscInt j; 3011 3012 ierr = VecGetArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 3013 for (j=0;j<pcbddc->benign_n;j++) array[pcbddc->local_primal_size-pcbddc->benign_n+j] += pcbddc->benign_p0[j]; 3014 ierr = VecRestoreArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 3015 } 3016 3017 /* start communications from local primal nodes to rhs of coarse solver */ 3018 ierr = VecSet(pcbddc->coarse_vec,zero);CHKERRQ(ierr); 3019 ierr = PCBDDCScatterCoarseDataBegin(pc,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3020 ierr = PCBDDCScatterCoarseDataEnd(pc,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3021 3022 /* Coarse solution -> rhs and sol updated inside PCBDDCScattarCoarseDataBegin/End */ 3023 /* TODO remove null space when doing multilevel */ 3024 if (pcbddc->coarse_ksp) { 3025 Mat coarse_mat; 3026 Vec rhs,sol; 3027 MatNullSpace nullsp; 3028 3029 ierr = KSPGetRhs(pcbddc->coarse_ksp,&rhs);CHKERRQ(ierr); 3030 ierr = KSPGetSolution(pcbddc->coarse_ksp,&sol);CHKERRQ(ierr); 3031 ierr = KSPGetOperators(pcbddc->coarse_ksp,&coarse_mat,NULL);CHKERRQ(ierr); 3032 ierr = MatGetNullSpace(coarse_mat,&nullsp);CHKERRQ(ierr); 3033 if (nullsp) { 3034 ierr = MatNullSpaceRemove(nullsp,rhs);CHKERRQ(ierr); 3035 } 3036 if (applytranspose) { 3037 ierr = KSPSolveTranspose(pcbddc->coarse_ksp,rhs,sol);CHKERRQ(ierr); 3038 } else { 3039 ierr = KSPSolve(pcbddc->coarse_ksp,rhs,sol);CHKERRQ(ierr); 3040 } 3041 if (nullsp) { 3042 ierr = MatNullSpaceRemove(nullsp,sol);CHKERRQ(ierr); 3043 } 3044 } 3045 3046 /* Local solution on R nodes */ 3047 if (pcis->n && !pcbddc->benign_apply_coarse_only) { 3048 ierr = PCBDDCSolveSubstructureCorrection(pc,pcis->vec1_B,pcis->vec1_D,applytranspose);CHKERRQ(ierr); 3049 } 3050 /* communications from coarse sol to local primal nodes */ 3051 ierr = PCBDDCScatterCoarseDataBegin(pc,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 3052 ierr = PCBDDCScatterCoarseDataEnd(pc,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 3053 3054 /* Sum contributions from the two levels */ 3055 if (!pcbddc->benign_apply_coarse_only) { 3056 if (applytranspose) { 3057 ierr = MatMultAdd(pcbddc->coarse_psi_B,pcbddc->vec1_P,pcis->vec1_B,pcis->vec1_B);CHKERRQ(ierr); 3058 if (pcbddc->switch_static) { ierr = MatMultAdd(pcbddc->coarse_psi_D,pcbddc->vec1_P,pcis->vec1_D,pcis->vec1_D);CHKERRQ(ierr); } 3059 } else { 3060 ierr = MatMultAdd(pcbddc->coarse_phi_B,pcbddc->vec1_P,pcis->vec1_B,pcis->vec1_B);CHKERRQ(ierr); 3061 if (pcbddc->switch_static) { ierr = MatMultAdd(pcbddc->coarse_phi_D,pcbddc->vec1_P,pcis->vec1_D,pcis->vec1_D);CHKERRQ(ierr); } 3062 } 3063 /* store p0 */ 3064 if (pcbddc->benign_n) { 3065 PetscScalar *array; 3066 PetscInt j; 3067 3068 ierr = VecGetArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 3069 for (j=0;j<pcbddc->benign_n;j++) pcbddc->benign_p0[j] = array[pcbddc->local_primal_size-pcbddc->benign_n+j]; 3070 ierr = VecRestoreArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 3071 } 3072 } else { /* expand the coarse solution */ 3073 if (applytranspose) { 3074 ierr = MatMult(pcbddc->coarse_psi_B,pcbddc->vec1_P,pcis->vec1_B);CHKERRQ(ierr); 3075 } else { 3076 ierr = MatMult(pcbddc->coarse_phi_B,pcbddc->vec1_P,pcis->vec1_B);CHKERRQ(ierr); 3077 } 3078 } 3079 PetscFunctionReturn(0); 3080 } 3081 3082 #undef __FUNCT__ 3083 #define __FUNCT__ "PCBDDCScatterCoarseDataBegin" 3084 PetscErrorCode PCBDDCScatterCoarseDataBegin(PC pc,InsertMode imode, ScatterMode smode) 3085 { 3086 PetscErrorCode ierr; 3087 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 3088 PetscScalar *array; 3089 Vec from,to; 3090 3091 PetscFunctionBegin; 3092 if (smode == SCATTER_REVERSE) { /* from global to local -> get data from coarse solution */ 3093 from = pcbddc->coarse_vec; 3094 to = pcbddc->vec1_P; 3095 if (pcbddc->coarse_ksp) { /* get array from coarse processes */ 3096 Vec tvec; 3097 3098 ierr = KSPGetRhs(pcbddc->coarse_ksp,&tvec);CHKERRQ(ierr); 3099 ierr = VecResetArray(tvec);CHKERRQ(ierr); 3100 ierr = KSPGetSolution(pcbddc->coarse_ksp,&tvec);CHKERRQ(ierr); 3101 ierr = VecGetArray(tvec,&array);CHKERRQ(ierr); 3102 ierr = VecPlaceArray(from,array);CHKERRQ(ierr); 3103 ierr = VecRestoreArray(tvec,&array);CHKERRQ(ierr); 3104 } 3105 } else { /* from local to global -> put data in coarse right hand side */ 3106 from = pcbddc->vec1_P; 3107 to = pcbddc->coarse_vec; 3108 } 3109 ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,from,to,imode,smode);CHKERRQ(ierr); 3110 PetscFunctionReturn(0); 3111 } 3112 3113 #undef __FUNCT__ 3114 #define __FUNCT__ "PCBDDCScatterCoarseDataEnd" 3115 PetscErrorCode PCBDDCScatterCoarseDataEnd(PC pc, InsertMode imode, ScatterMode smode) 3116 { 3117 PetscErrorCode ierr; 3118 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 3119 PetscScalar *array; 3120 Vec from,to; 3121 3122 PetscFunctionBegin; 3123 if (smode == SCATTER_REVERSE) { /* from global to local -> get data from coarse solution */ 3124 from = pcbddc->coarse_vec; 3125 to = pcbddc->vec1_P; 3126 } else { /* from local to global -> put data in coarse right hand side */ 3127 from = pcbddc->vec1_P; 3128 to = pcbddc->coarse_vec; 3129 } 3130 ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,from,to,imode,smode);CHKERRQ(ierr); 3131 if (smode == SCATTER_FORWARD) { 3132 if (pcbddc->coarse_ksp) { /* get array from coarse processes */ 3133 Vec tvec; 3134 3135 ierr = KSPGetRhs(pcbddc->coarse_ksp,&tvec);CHKERRQ(ierr); 3136 ierr = VecGetArray(to,&array);CHKERRQ(ierr); 3137 ierr = VecPlaceArray(tvec,array);CHKERRQ(ierr); 3138 ierr = VecRestoreArray(to,&array);CHKERRQ(ierr); 3139 } 3140 } else { 3141 if (pcbddc->coarse_ksp) { /* restore array of pcbddc->coarse_vec */ 3142 ierr = VecResetArray(from);CHKERRQ(ierr); 3143 } 3144 } 3145 PetscFunctionReturn(0); 3146 } 3147 3148 /* uncomment for testing purposes */ 3149 /* #define PETSC_MISSING_LAPACK_GESVD 1 */ 3150 #undef __FUNCT__ 3151 #define __FUNCT__ "PCBDDCConstraintsSetUp" 3152 PetscErrorCode PCBDDCConstraintsSetUp(PC pc) 3153 { 3154 PetscErrorCode ierr; 3155 PC_IS* pcis = (PC_IS*)(pc->data); 3156 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 3157 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 3158 /* one and zero */ 3159 PetscScalar one=1.0,zero=0.0; 3160 /* space to store constraints and their local indices */ 3161 PetscScalar *constraints_data; 3162 PetscInt *constraints_idxs,*constraints_idxs_B; 3163 PetscInt *constraints_idxs_ptr,*constraints_data_ptr; 3164 PetscInt *constraints_n; 3165 /* iterators */ 3166 PetscInt i,j,k,total_counts,total_counts_cc,cum; 3167 /* BLAS integers */ 3168 PetscBLASInt lwork,lierr; 3169 PetscBLASInt Blas_N,Blas_M,Blas_K,Blas_one=1; 3170 PetscBLASInt Blas_LDA,Blas_LDB,Blas_LDC; 3171 /* reuse */ 3172 PetscInt olocal_primal_size,olocal_primal_size_cc; 3173 PetscInt *olocal_primal_ref_node,*olocal_primal_ref_mult; 3174 /* change of basis */ 3175 PetscBool qr_needed; 3176 PetscBT change_basis,qr_needed_idx; 3177 /* auxiliary stuff */ 3178 PetscInt *nnz,*is_indices; 3179 PetscInt ncc; 3180 /* some quantities */ 3181 PetscInt n_vertices,total_primal_vertices,valid_constraints; 3182 PetscInt size_of_constraint,max_size_of_constraint=0,max_constraints,temp_constraints; 3183 3184 PetscFunctionBegin; 3185 /* Destroy Mat objects computed previously */ 3186 ierr = MatDestroy(&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 3187 ierr = MatDestroy(&pcbddc->ConstraintMatrix);CHKERRQ(ierr); 3188 /* save info on constraints from previous setup (if any) */ 3189 olocal_primal_size = pcbddc->local_primal_size; 3190 olocal_primal_size_cc = pcbddc->local_primal_size_cc; 3191 ierr = PetscMalloc2(olocal_primal_size_cc,&olocal_primal_ref_node,olocal_primal_size_cc,&olocal_primal_ref_mult);CHKERRQ(ierr); 3192 ierr = PetscMemcpy(olocal_primal_ref_node,pcbddc->local_primal_ref_node,olocal_primal_size_cc*sizeof(PetscInt));CHKERRQ(ierr); 3193 ierr = PetscMemcpy(olocal_primal_ref_mult,pcbddc->local_primal_ref_mult,olocal_primal_size_cc*sizeof(PetscInt));CHKERRQ(ierr); 3194 ierr = PetscFree2(pcbddc->local_primal_ref_node,pcbddc->local_primal_ref_mult);CHKERRQ(ierr); 3195 ierr = PetscFree(pcbddc->primal_indices_local_idxs);CHKERRQ(ierr); 3196 3197 /* print some info */ 3198 if (pcbddc->dbg_flag) { 3199 IS vertices; 3200 PetscInt nv,nedges,nfaces; 3201 ierr = PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph,&nfaces,NULL,&nedges,NULL,&vertices);CHKERRQ(ierr); 3202 ierr = ISGetSize(vertices,&nv);CHKERRQ(ierr); 3203 ierr = ISDestroy(&vertices);CHKERRQ(ierr); 3204 ierr = PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 3205 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"--------------------------------------------------------------\n");CHKERRQ(ierr); 3206 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate vertices (%d)\n",PetscGlobalRank,nv,pcbddc->use_vertices);CHKERRQ(ierr); 3207 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate edges (%d)\n",PetscGlobalRank,nedges,pcbddc->use_edges);CHKERRQ(ierr); 3208 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate faces (%d)\n",PetscGlobalRank,nfaces,pcbddc->use_faces);CHKERRQ(ierr); 3209 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 3210 ierr = PetscViewerASCIIPopSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 3211 } 3212 3213 if (!pcbddc->adaptive_selection) { 3214 IS ISForVertices,*ISForFaces,*ISForEdges; 3215 MatNullSpace nearnullsp; 3216 const Vec *nearnullvecs; 3217 Vec *localnearnullsp; 3218 PetscScalar *array; 3219 PetscInt n_ISForFaces,n_ISForEdges,nnsp_size; 3220 PetscBool nnsp_has_cnst; 3221 /* LAPACK working arrays for SVD or POD */ 3222 PetscBool skip_lapack,boolforchange; 3223 PetscScalar *work; 3224 PetscReal *singular_vals; 3225 #if defined(PETSC_USE_COMPLEX) 3226 PetscReal *rwork; 3227 #endif 3228 #if defined(PETSC_MISSING_LAPACK_GESVD) 3229 PetscScalar *temp_basis,*correlation_mat; 3230 #else 3231 PetscBLASInt dummy_int=1; 3232 PetscScalar dummy_scalar=1.; 3233 #endif 3234 3235 /* Get index sets for faces, edges and vertices from graph */ 3236 ierr = PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph,&n_ISForFaces,&ISForFaces,&n_ISForEdges,&ISForEdges,&ISForVertices);CHKERRQ(ierr); 3237 /* free unneeded index sets */ 3238 if (!pcbddc->use_vertices) { 3239 ierr = ISDestroy(&ISForVertices);CHKERRQ(ierr); 3240 } 3241 if (!pcbddc->use_edges) { 3242 for (i=0;i<n_ISForEdges;i++) { 3243 ierr = ISDestroy(&ISForEdges[i]);CHKERRQ(ierr); 3244 } 3245 ierr = PetscFree(ISForEdges);CHKERRQ(ierr); 3246 n_ISForEdges = 0; 3247 } 3248 if (!pcbddc->use_faces) { 3249 for (i=0;i<n_ISForFaces;i++) { 3250 ierr = ISDestroy(&ISForFaces[i]);CHKERRQ(ierr); 3251 } 3252 ierr = PetscFree(ISForFaces);CHKERRQ(ierr); 3253 n_ISForFaces = 0; 3254 } 3255 3256 #if defined(PETSC_USE_DEBUG) 3257 /* HACK: when solving singular problems not using vertices, a change of basis is mandatory. 3258 Also use_change_of_basis should be consistent among processors */ 3259 if (pcbddc->NullSpace) { 3260 PetscBool tbool[2],gbool[2]; 3261 3262 if (!ISForVertices && !pcbddc->user_ChangeOfBasisMatrix) { 3263 pcbddc->use_change_of_basis = PETSC_TRUE; 3264 if (!ISForEdges) { 3265 pcbddc->use_change_on_faces = PETSC_TRUE; 3266 } 3267 } 3268 tbool[0] = pcbddc->use_change_of_basis; 3269 tbool[1] = pcbddc->use_change_on_faces; 3270 ierr = MPI_Allreduce(tbool,gbool,2,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 3271 pcbddc->use_change_of_basis = gbool[0]; 3272 pcbddc->use_change_on_faces = gbool[1]; 3273 } 3274 #endif 3275 3276 /* check if near null space is attached to global mat */ 3277 ierr = MatGetNearNullSpace(pc->pmat,&nearnullsp);CHKERRQ(ierr); 3278 if (nearnullsp) { 3279 ierr = MatNullSpaceGetVecs(nearnullsp,&nnsp_has_cnst,&nnsp_size,&nearnullvecs);CHKERRQ(ierr); 3280 /* remove any stored info */ 3281 ierr = MatNullSpaceDestroy(&pcbddc->onearnullspace);CHKERRQ(ierr); 3282 ierr = PetscFree(pcbddc->onearnullvecs_state);CHKERRQ(ierr); 3283 /* store information for BDDC solver reuse */ 3284 ierr = PetscObjectReference((PetscObject)nearnullsp);CHKERRQ(ierr); 3285 pcbddc->onearnullspace = nearnullsp; 3286 ierr = PetscMalloc1(nnsp_size,&pcbddc->onearnullvecs_state);CHKERRQ(ierr); 3287 for (i=0;i<nnsp_size;i++) { 3288 ierr = PetscObjectStateGet((PetscObject)nearnullvecs[i],&pcbddc->onearnullvecs_state[i]);CHKERRQ(ierr); 3289 } 3290 } else { /* if near null space is not provided BDDC uses constants by default */ 3291 nnsp_size = 0; 3292 nnsp_has_cnst = PETSC_TRUE; 3293 } 3294 /* get max number of constraints on a single cc */ 3295 max_constraints = nnsp_size; 3296 if (nnsp_has_cnst) max_constraints++; 3297 3298 /* 3299 Evaluate maximum storage size needed by the procedure 3300 - Indices for connected component i stored at "constraints_idxs + constraints_idxs_ptr[i]" 3301 - Values for constraints on connected component i stored at "constraints_data + constraints_data_ptr[i]" 3302 There can be multiple constraints per connected component 3303 */ 3304 n_vertices = 0; 3305 if (ISForVertices) { 3306 ierr = ISGetSize(ISForVertices,&n_vertices);CHKERRQ(ierr); 3307 } 3308 ncc = n_vertices+n_ISForFaces+n_ISForEdges; 3309 ierr = PetscMalloc3(ncc+1,&constraints_idxs_ptr,ncc+1,&constraints_data_ptr,ncc,&constraints_n);CHKERRQ(ierr); 3310 3311 total_counts = n_ISForFaces+n_ISForEdges; 3312 total_counts *= max_constraints; 3313 total_counts += n_vertices; 3314 ierr = PetscBTCreate(total_counts,&change_basis);CHKERRQ(ierr); 3315 3316 total_counts = 0; 3317 max_size_of_constraint = 0; 3318 for (i=0;i<n_ISForEdges+n_ISForFaces;i++) { 3319 IS used_is; 3320 if (i<n_ISForEdges) { 3321 used_is = ISForEdges[i]; 3322 } else { 3323 used_is = ISForFaces[i-n_ISForEdges]; 3324 } 3325 ierr = ISGetSize(used_is,&j);CHKERRQ(ierr); 3326 total_counts += j; 3327 max_size_of_constraint = PetscMax(j,max_size_of_constraint); 3328 } 3329 ierr = PetscMalloc3(total_counts*max_constraints+n_vertices,&constraints_data,total_counts+n_vertices,&constraints_idxs,total_counts+n_vertices,&constraints_idxs_B);CHKERRQ(ierr); 3330 3331 /* get local part of global near null space vectors */ 3332 ierr = PetscMalloc1(nnsp_size,&localnearnullsp);CHKERRQ(ierr); 3333 for (k=0;k<nnsp_size;k++) { 3334 ierr = VecDuplicate(pcis->vec1_N,&localnearnullsp[k]);CHKERRQ(ierr); 3335 ierr = VecScatterBegin(matis->rctx,nearnullvecs[k],localnearnullsp[k],INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3336 ierr = VecScatterEnd(matis->rctx,nearnullvecs[k],localnearnullsp[k],INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3337 } 3338 3339 /* whether or not to skip lapack calls */ 3340 skip_lapack = PETSC_TRUE; 3341 if (n_ISForFaces+n_ISForEdges && max_constraints > 1 && !pcbddc->use_nnsp_true) skip_lapack = PETSC_FALSE; 3342 3343 /* First we issue queries to allocate optimal workspace for LAPACKgesvd (or LAPACKsyev if SVD is missing) */ 3344 if (!skip_lapack) { 3345 PetscScalar temp_work; 3346 3347 #if defined(PETSC_MISSING_LAPACK_GESVD) 3348 /* Proper Orthogonal Decomposition (POD) using the snapshot method */ 3349 ierr = PetscMalloc1(max_constraints*max_constraints,&correlation_mat);CHKERRQ(ierr); 3350 ierr = PetscMalloc1(max_constraints,&singular_vals);CHKERRQ(ierr); 3351 ierr = PetscMalloc1(max_size_of_constraint*max_constraints,&temp_basis);CHKERRQ(ierr); 3352 #if defined(PETSC_USE_COMPLEX) 3353 ierr = PetscMalloc1(3*max_constraints,&rwork);CHKERRQ(ierr); 3354 #endif 3355 /* now we evaluate the optimal workspace using query with lwork=-1 */ 3356 ierr = PetscBLASIntCast(max_constraints,&Blas_N);CHKERRQ(ierr); 3357 ierr = PetscBLASIntCast(max_constraints,&Blas_LDA);CHKERRQ(ierr); 3358 lwork = -1; 3359 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 3360 #if !defined(PETSC_USE_COMPLEX) 3361 PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,&temp_work,&lwork,&lierr)); 3362 #else 3363 PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,&temp_work,&lwork,rwork,&lierr)); 3364 #endif 3365 ierr = PetscFPTrapPop();CHKERRQ(ierr); 3366 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to SYEV Lapack routine %d",(int)lierr); 3367 #else /* on missing GESVD */ 3368 /* SVD */ 3369 PetscInt max_n,min_n; 3370 max_n = max_size_of_constraint; 3371 min_n = max_constraints; 3372 if (max_size_of_constraint < max_constraints) { 3373 min_n = max_size_of_constraint; 3374 max_n = max_constraints; 3375 } 3376 ierr = PetscMalloc1(min_n,&singular_vals);CHKERRQ(ierr); 3377 #if defined(PETSC_USE_COMPLEX) 3378 ierr = PetscMalloc1(5*min_n,&rwork);CHKERRQ(ierr); 3379 #endif 3380 /* now we evaluate the optimal workspace using query with lwork=-1 */ 3381 lwork = -1; 3382 ierr = PetscBLASIntCast(max_n,&Blas_M);CHKERRQ(ierr); 3383 ierr = PetscBLASIntCast(min_n,&Blas_N);CHKERRQ(ierr); 3384 ierr = PetscBLASIntCast(max_n,&Blas_LDA);CHKERRQ(ierr); 3385 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 3386 #if !defined(PETSC_USE_COMPLEX) 3387 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&constraints_data[0],&Blas_LDA,singular_vals,&dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,&temp_work,&lwork,&lierr)); 3388 #else 3389 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&constraints_data[0],&Blas_LDA,singular_vals,&dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,&temp_work,&lwork,rwork,&lierr)); 3390 #endif 3391 ierr = PetscFPTrapPop();CHKERRQ(ierr); 3392 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to GESVD Lapack routine %d",(int)lierr); 3393 #endif /* on missing GESVD */ 3394 /* Allocate optimal workspace */ 3395 ierr = PetscBLASIntCast((PetscInt)PetscRealPart(temp_work),&lwork);CHKERRQ(ierr); 3396 ierr = PetscMalloc1(lwork,&work);CHKERRQ(ierr); 3397 } 3398 /* Now we can loop on constraining sets */ 3399 total_counts = 0; 3400 constraints_idxs_ptr[0] = 0; 3401 constraints_data_ptr[0] = 0; 3402 /* vertices */ 3403 if (n_vertices) { 3404 ierr = ISGetIndices(ISForVertices,(const PetscInt**)&is_indices);CHKERRQ(ierr); 3405 ierr = PetscMemcpy(constraints_idxs,is_indices,n_vertices*sizeof(PetscInt));CHKERRQ(ierr); 3406 for (i=0;i<n_vertices;i++) { 3407 constraints_n[total_counts] = 1; 3408 constraints_data[total_counts] = 1.0; 3409 constraints_idxs_ptr[total_counts+1] = constraints_idxs_ptr[total_counts]+1; 3410 constraints_data_ptr[total_counts+1] = constraints_data_ptr[total_counts]+1; 3411 total_counts++; 3412 } 3413 ierr = ISRestoreIndices(ISForVertices,(const PetscInt**)&is_indices);CHKERRQ(ierr); 3414 n_vertices = total_counts; 3415 } 3416 3417 /* edges and faces */ 3418 total_counts_cc = total_counts; 3419 for (ncc=0;ncc<n_ISForEdges+n_ISForFaces;ncc++) { 3420 IS used_is; 3421 PetscBool idxs_copied = PETSC_FALSE; 3422 3423 if (ncc<n_ISForEdges) { 3424 used_is = ISForEdges[ncc]; 3425 boolforchange = pcbddc->use_change_of_basis; /* change or not the basis on the edge */ 3426 } else { 3427 used_is = ISForFaces[ncc-n_ISForEdges]; 3428 boolforchange = (PetscBool)(pcbddc->use_change_of_basis && pcbddc->use_change_on_faces); /* change or not the basis on the face */ 3429 } 3430 temp_constraints = 0; /* zero the number of constraints I have on this conn comp */ 3431 3432 ierr = ISGetSize(used_is,&size_of_constraint);CHKERRQ(ierr); 3433 ierr = ISGetIndices(used_is,(const PetscInt**)&is_indices);CHKERRQ(ierr); 3434 /* change of basis should not be performed on local periodic nodes */ 3435 if (pcbddc->mat_graph->mirrors && pcbddc->mat_graph->mirrors[is_indices[0]]) boolforchange = PETSC_FALSE; 3436 if (nnsp_has_cnst) { 3437 PetscScalar quad_value; 3438 3439 ierr = PetscMemcpy(constraints_idxs + constraints_idxs_ptr[total_counts_cc],is_indices,size_of_constraint*sizeof(PetscInt));CHKERRQ(ierr); 3440 idxs_copied = PETSC_TRUE; 3441 3442 if (!pcbddc->use_nnsp_true) { 3443 quad_value = (PetscScalar)(1.0/PetscSqrtReal((PetscReal)size_of_constraint)); 3444 } else { 3445 quad_value = 1.0; 3446 } 3447 for (j=0;j<size_of_constraint;j++) { 3448 constraints_data[constraints_data_ptr[total_counts_cc]+j] = quad_value; 3449 } 3450 temp_constraints++; 3451 total_counts++; 3452 } 3453 for (k=0;k<nnsp_size;k++) { 3454 PetscReal real_value; 3455 PetscScalar *ptr_to_data; 3456 3457 ierr = VecGetArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr); 3458 ptr_to_data = &constraints_data[constraints_data_ptr[total_counts_cc]+temp_constraints*size_of_constraint]; 3459 for (j=0;j<size_of_constraint;j++) { 3460 ptr_to_data[j] = array[is_indices[j]]; 3461 } 3462 ierr = VecRestoreArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr); 3463 /* check if array is null on the connected component */ 3464 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 3465 PetscStackCallBLAS("BLASasum",real_value = BLASasum_(&Blas_N,ptr_to_data,&Blas_one)); 3466 if (real_value > 0.0) { /* keep indices and values */ 3467 temp_constraints++; 3468 total_counts++; 3469 if (!idxs_copied) { 3470 ierr = PetscMemcpy(constraints_idxs + constraints_idxs_ptr[total_counts_cc],is_indices,size_of_constraint*sizeof(PetscInt));CHKERRQ(ierr); 3471 idxs_copied = PETSC_TRUE; 3472 } 3473 } 3474 } 3475 ierr = ISRestoreIndices(used_is,(const PetscInt**)&is_indices);CHKERRQ(ierr); 3476 valid_constraints = temp_constraints; 3477 if (!pcbddc->use_nnsp_true && temp_constraints) { 3478 if (temp_constraints == 1) { /* just normalize the constraint */ 3479 PetscScalar norm,*ptr_to_data; 3480 3481 ptr_to_data = &constraints_data[constraints_data_ptr[total_counts_cc]]; 3482 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 3483 PetscStackCallBLAS("BLASdot",norm = BLASdot_(&Blas_N,ptr_to_data,&Blas_one,ptr_to_data,&Blas_one)); 3484 norm = 1.0/PetscSqrtReal(PetscRealPart(norm)); 3485 PetscStackCallBLAS("BLASscal",BLASscal_(&Blas_N,&norm,ptr_to_data,&Blas_one)); 3486 } else { /* perform SVD */ 3487 PetscReal tol = 1.0e-8; /* tolerance for retaining eigenmodes */ 3488 PetscScalar *ptr_to_data = &constraints_data[constraints_data_ptr[total_counts_cc]]; 3489 3490 #if defined(PETSC_MISSING_LAPACK_GESVD) 3491 /* SVD: Y = U*S*V^H -> U (eigenvectors of Y*Y^H) = Y*V*(S)^\dag 3492 POD: Y^H*Y = V*D*V^H, D = S^H*S -> U = Y*V*D^(-1/2) 3493 -> When PETSC_USE_COMPLEX and PETSC_MISSING_LAPACK_GESVD are defined 3494 the constraints basis will differ (by a complex factor with absolute value equal to 1) 3495 from that computed using LAPACKgesvd 3496 -> This is due to a different computation of eigenvectors in LAPACKheev 3497 -> The quality of the POD-computed basis will be the same */ 3498 ierr = PetscMemzero(correlation_mat,temp_constraints*temp_constraints*sizeof(PetscScalar));CHKERRQ(ierr); 3499 /* Store upper triangular part of correlation matrix */ 3500 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 3501 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 3502 for (j=0;j<temp_constraints;j++) { 3503 for (k=0;k<j+1;k++) { 3504 PetscStackCallBLAS("BLASdot",correlation_mat[j*temp_constraints+k] = BLASdot_(&Blas_N,ptr_to_data+k*size_of_constraint,&Blas_one,ptr_to_data+j*size_of_constraint,&Blas_one)); 3505 } 3506 } 3507 /* compute eigenvalues and eigenvectors of correlation matrix */ 3508 ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr); 3509 ierr = PetscBLASIntCast(temp_constraints,&Blas_LDA);CHKERRQ(ierr); 3510 #if !defined(PETSC_USE_COMPLEX) 3511 PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,work,&lwork,&lierr)); 3512 #else 3513 PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,work,&lwork,rwork,&lierr)); 3514 #endif 3515 ierr = PetscFPTrapPop();CHKERRQ(ierr); 3516 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYEV Lapack routine %d",(int)lierr); 3517 /* retain eigenvalues greater than tol: note that LAPACKsyev gives eigs in ascending order */ 3518 j = 0; 3519 while (j < temp_constraints && singular_vals[j] < tol) j++; 3520 total_counts = total_counts-j; 3521 valid_constraints = temp_constraints-j; 3522 /* scale and copy POD basis into used quadrature memory */ 3523 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 3524 ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr); 3525 ierr = PetscBLASIntCast(temp_constraints,&Blas_K);CHKERRQ(ierr); 3526 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 3527 ierr = PetscBLASIntCast(temp_constraints,&Blas_LDB);CHKERRQ(ierr); 3528 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDC);CHKERRQ(ierr); 3529 if (j<temp_constraints) { 3530 PetscInt ii; 3531 for (k=j;k<temp_constraints;k++) singular_vals[k] = 1.0/PetscSqrtReal(singular_vals[k]); 3532 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 3533 PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&Blas_M,&Blas_N,&Blas_K,&one,ptr_to_data,&Blas_LDA,correlation_mat,&Blas_LDB,&zero,temp_basis,&Blas_LDC)); 3534 ierr = PetscFPTrapPop();CHKERRQ(ierr); 3535 for (k=0;k<temp_constraints-j;k++) { 3536 for (ii=0;ii<size_of_constraint;ii++) { 3537 ptr_to_data[k*size_of_constraint+ii] = singular_vals[temp_constraints-1-k]*temp_basis[(temp_constraints-1-k)*size_of_constraint+ii]; 3538 } 3539 } 3540 } 3541 #else /* on missing GESVD */ 3542 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 3543 ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr); 3544 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 3545 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 3546 #if !defined(PETSC_USE_COMPLEX) 3547 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,ptr_to_data,&Blas_LDA,singular_vals,&dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,work,&lwork,&lierr)); 3548 #else 3549 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,ptr_to_data,&Blas_LDA,singular_vals,&dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,work,&lwork,rwork,&lierr)); 3550 #endif 3551 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in GESVD Lapack routine %d",(int)lierr); 3552 ierr = PetscFPTrapPop();CHKERRQ(ierr); 3553 /* retain eigenvalues greater than tol: note that LAPACKgesvd gives eigs in descending order */ 3554 k = temp_constraints; 3555 if (k > size_of_constraint) k = size_of_constraint; 3556 j = 0; 3557 while (j < k && singular_vals[k-j-1] < tol) j++; 3558 valid_constraints = k-j; 3559 total_counts = total_counts-temp_constraints+valid_constraints; 3560 #endif /* on missing GESVD */ 3561 } 3562 } 3563 /* update pointers information */ 3564 if (valid_constraints) { 3565 constraints_n[total_counts_cc] = valid_constraints; 3566 constraints_idxs_ptr[total_counts_cc+1] = constraints_idxs_ptr[total_counts_cc]+size_of_constraint; 3567 constraints_data_ptr[total_counts_cc+1] = constraints_data_ptr[total_counts_cc]+size_of_constraint*valid_constraints; 3568 /* set change_of_basis flag */ 3569 if (boolforchange) { 3570 PetscBTSet(change_basis,total_counts_cc); 3571 } 3572 total_counts_cc++; 3573 } 3574 } 3575 /* free workspace */ 3576 if (!skip_lapack) { 3577 ierr = PetscFree(work);CHKERRQ(ierr); 3578 #if defined(PETSC_USE_COMPLEX) 3579 ierr = PetscFree(rwork);CHKERRQ(ierr); 3580 #endif 3581 ierr = PetscFree(singular_vals);CHKERRQ(ierr); 3582 #if defined(PETSC_MISSING_LAPACK_GESVD) 3583 ierr = PetscFree(correlation_mat);CHKERRQ(ierr); 3584 ierr = PetscFree(temp_basis);CHKERRQ(ierr); 3585 #endif 3586 } 3587 for (k=0;k<nnsp_size;k++) { 3588 ierr = VecDestroy(&localnearnullsp[k]);CHKERRQ(ierr); 3589 } 3590 ierr = PetscFree(localnearnullsp);CHKERRQ(ierr); 3591 /* free index sets of faces, edges and vertices */ 3592 for (i=0;i<n_ISForFaces;i++) { 3593 ierr = ISDestroy(&ISForFaces[i]);CHKERRQ(ierr); 3594 } 3595 if (n_ISForFaces) { 3596 ierr = PetscFree(ISForFaces);CHKERRQ(ierr); 3597 } 3598 for (i=0;i<n_ISForEdges;i++) { 3599 ierr = ISDestroy(&ISForEdges[i]);CHKERRQ(ierr); 3600 } 3601 if (n_ISForEdges) { 3602 ierr = PetscFree(ISForEdges);CHKERRQ(ierr); 3603 } 3604 ierr = ISDestroy(&ISForVertices);CHKERRQ(ierr); 3605 } else { 3606 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 3607 3608 total_counts = 0; 3609 n_vertices = 0; 3610 if (sub_schurs->is_vertices && pcbddc->use_vertices) { 3611 ierr = ISGetLocalSize(sub_schurs->is_vertices,&n_vertices);CHKERRQ(ierr); 3612 } 3613 max_constraints = 0; 3614 total_counts_cc = 0; 3615 for (i=0;i<sub_schurs->n_subs+n_vertices;i++) { 3616 total_counts += pcbddc->adaptive_constraints_n[i]; 3617 if (pcbddc->adaptive_constraints_n[i]) total_counts_cc++; 3618 max_constraints = PetscMax(max_constraints,pcbddc->adaptive_constraints_n[i]); 3619 } 3620 constraints_idxs_ptr = pcbddc->adaptive_constraints_idxs_ptr; 3621 constraints_data_ptr = pcbddc->adaptive_constraints_data_ptr; 3622 constraints_idxs = pcbddc->adaptive_constraints_idxs; 3623 constraints_data = pcbddc->adaptive_constraints_data; 3624 /* constraints_n differs from pcbddc->adaptive_constraints_n */ 3625 ierr = PetscMalloc1(total_counts_cc,&constraints_n);CHKERRQ(ierr); 3626 total_counts_cc = 0; 3627 for (i=0;i<sub_schurs->n_subs+n_vertices;i++) { 3628 if (pcbddc->adaptive_constraints_n[i]) { 3629 constraints_n[total_counts_cc++] = pcbddc->adaptive_constraints_n[i]; 3630 } 3631 } 3632 #if 0 3633 printf("Found %d totals (%d)\n",total_counts_cc,total_counts); 3634 for (i=0;i<total_counts_cc;i++) { 3635 printf("const %d, start %d",i,constraints_idxs_ptr[i]); 3636 printf(" end %d:\n",constraints_idxs_ptr[i+1]); 3637 for (j=constraints_idxs_ptr[i];j<constraints_idxs_ptr[i+1];j++) { 3638 printf(" %d",constraints_idxs[j]); 3639 } 3640 printf("\n"); 3641 printf("number of cc: %d\n",constraints_n[i]); 3642 } 3643 for (i=0;i<n_vertices;i++) { 3644 PetscPrintf(PETSC_COMM_SELF,"[%d] vertex %d, n %d\n",PetscGlobalRank,i,pcbddc->adaptive_constraints_n[i]); 3645 } 3646 for (i=0;i<sub_schurs->n_subs;i++) { 3647 PetscPrintf(PETSC_COMM_SELF,"[%d] sub %d, edge %d, n %d\n",PetscGlobalRank,i,(PetscBool)PetscBTLookup(sub_schurs->is_edge,i),pcbddc->adaptive_constraints_n[i+n_vertices]); 3648 } 3649 #endif 3650 3651 max_size_of_constraint = 0; 3652 for (i=0;i<total_counts_cc;i++) max_size_of_constraint = PetscMax(max_size_of_constraint,constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i]); 3653 ierr = PetscMalloc1(constraints_idxs_ptr[total_counts_cc],&constraints_idxs_B);CHKERRQ(ierr); 3654 /* Change of basis */ 3655 ierr = PetscBTCreate(total_counts_cc,&change_basis);CHKERRQ(ierr); 3656 if (pcbddc->use_change_of_basis) { 3657 for (i=0;i<sub_schurs->n_subs;i++) { 3658 if (PetscBTLookup(sub_schurs->is_edge,i) || pcbddc->use_change_on_faces) { 3659 ierr = PetscBTSet(change_basis,i+n_vertices);CHKERRQ(ierr); 3660 } 3661 } 3662 } 3663 } 3664 pcbddc->local_primal_size = total_counts; 3665 ierr = PetscMalloc1(pcbddc->local_primal_size+pcbddc->benign_n,&pcbddc->primal_indices_local_idxs);CHKERRQ(ierr); 3666 3667 /* map constraints_idxs in boundary numbering */ 3668 ierr = ISGlobalToLocalMappingApply(pcis->BtoNmap,IS_GTOLM_DROP,constraints_idxs_ptr[total_counts_cc],constraints_idxs,&i,constraints_idxs_B);CHKERRQ(ierr); 3669 if (i != constraints_idxs_ptr[total_counts_cc]) { 3670 SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_SUP,"Error in boundary numbering for constraints indices %d != %d\n",constraints_idxs_ptr[total_counts_cc],i); 3671 } 3672 3673 /* Create constraint matrix */ 3674 ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->ConstraintMatrix);CHKERRQ(ierr); 3675 ierr = MatSetType(pcbddc->ConstraintMatrix,MATAIJ);CHKERRQ(ierr); 3676 ierr = MatSetSizes(pcbddc->ConstraintMatrix,pcbddc->local_primal_size,pcis->n,pcbddc->local_primal_size,pcis->n);CHKERRQ(ierr); 3677 3678 /* find primal_dofs: subdomain corners plus dofs selected as primal after change of basis */ 3679 /* determine if a QR strategy is needed for change of basis */ 3680 qr_needed = PETSC_FALSE; 3681 ierr = PetscBTCreate(total_counts_cc,&qr_needed_idx);CHKERRQ(ierr); 3682 total_primal_vertices=0; 3683 pcbddc->local_primal_size_cc = 0; 3684 for (i=0;i<total_counts_cc;i++) { 3685 size_of_constraint = constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i]; 3686 if (size_of_constraint == 1) { 3687 pcbddc->primal_indices_local_idxs[total_primal_vertices++] = constraints_idxs[constraints_idxs_ptr[i]]; 3688 pcbddc->local_primal_size_cc += 1; 3689 } else if (PetscBTLookup(change_basis,i)) { 3690 for (k=0;k<constraints_n[i];k++) { 3691 pcbddc->primal_indices_local_idxs[total_primal_vertices++] = constraints_idxs[constraints_idxs_ptr[i]+k]; 3692 } 3693 pcbddc->local_primal_size_cc += constraints_n[i]; 3694 if (constraints_n[i] > 1 || pcbddc->use_qr_single || pcbddc->faster_deluxe) { 3695 PetscBTSet(qr_needed_idx,i); 3696 qr_needed = PETSC_TRUE; 3697 } 3698 } else { 3699 pcbddc->local_primal_size_cc += 1; 3700 } 3701 } 3702 /* note that the local variable n_vertices used below stores the number of pointwise constraints */ 3703 pcbddc->n_vertices = total_primal_vertices; 3704 /* permute indices in order to have a sorted set of vertices */ 3705 ierr = PetscSortInt(total_primal_vertices,pcbddc->primal_indices_local_idxs);CHKERRQ(ierr); 3706 3707 ierr = PetscMalloc2(pcbddc->local_primal_size_cc+pcbddc->benign_n,&pcbddc->local_primal_ref_node,pcbddc->local_primal_size_cc+pcbddc->benign_n,&pcbddc->local_primal_ref_mult);CHKERRQ(ierr); 3708 ierr = PetscMemcpy(pcbddc->local_primal_ref_node,pcbddc->primal_indices_local_idxs,total_primal_vertices*sizeof(PetscInt));CHKERRQ(ierr); 3709 for (i=0;i<total_primal_vertices;i++) pcbddc->local_primal_ref_mult[i] = 1; 3710 3711 /* nonzero structure of constraint matrix */ 3712 /* and get reference dof for local constraints */ 3713 ierr = PetscMalloc1(pcbddc->local_primal_size,&nnz);CHKERRQ(ierr); 3714 for (i=0;i<total_primal_vertices;i++) nnz[i] = 1; 3715 3716 j = total_primal_vertices; 3717 total_counts = total_primal_vertices; 3718 cum = total_primal_vertices; 3719 for (i=n_vertices;i<total_counts_cc;i++) { 3720 if (!PetscBTLookup(change_basis,i)) { 3721 pcbddc->local_primal_ref_node[cum] = constraints_idxs[constraints_idxs_ptr[i]]; 3722 pcbddc->local_primal_ref_mult[cum] = constraints_n[i]; 3723 cum++; 3724 size_of_constraint = constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i]; 3725 for (k=0;k<constraints_n[i];k++) { 3726 pcbddc->primal_indices_local_idxs[total_counts++] = constraints_idxs[constraints_idxs_ptr[i]+k]; 3727 nnz[j+k] = size_of_constraint; 3728 } 3729 j += constraints_n[i]; 3730 } 3731 } 3732 ierr = MatSeqAIJSetPreallocation(pcbddc->ConstraintMatrix,0,nnz);CHKERRQ(ierr); 3733 ierr = PetscFree(nnz);CHKERRQ(ierr); 3734 3735 /* set values in constraint matrix */ 3736 for (i=0;i<total_primal_vertices;i++) { 3737 ierr = MatSetValue(pcbddc->ConstraintMatrix,i,pcbddc->local_primal_ref_node[i],1.0,INSERT_VALUES);CHKERRQ(ierr); 3738 } 3739 total_counts = total_primal_vertices; 3740 for (i=n_vertices;i<total_counts_cc;i++) { 3741 if (!PetscBTLookup(change_basis,i)) { 3742 PetscInt *cols; 3743 3744 size_of_constraint = constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i]; 3745 cols = constraints_idxs+constraints_idxs_ptr[i]; 3746 for (k=0;k<constraints_n[i];k++) { 3747 PetscInt row = total_counts+k; 3748 PetscScalar *vals; 3749 3750 vals = constraints_data+constraints_data_ptr[i]+k*size_of_constraint; 3751 ierr = MatSetValues(pcbddc->ConstraintMatrix,1,&row,size_of_constraint,cols,vals,INSERT_VALUES);CHKERRQ(ierr); 3752 } 3753 total_counts += constraints_n[i]; 3754 } 3755 } 3756 /* assembling */ 3757 ierr = MatAssemblyBegin(pcbddc->ConstraintMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3758 ierr = MatAssemblyEnd(pcbddc->ConstraintMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3759 3760 /* 3761 ierr = PetscViewerSetFormat(PETSC_VIEWER_STDOUT_SELF,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr); 3762 ierr = MatView(pcbddc->ConstraintMatrix,(PetscViewer)0);CHKERRQ(ierr); 3763 */ 3764 /* Create matrix for change of basis. We don't need it in case pcbddc->use_change_of_basis is FALSE */ 3765 if (pcbddc->use_change_of_basis) { 3766 /* dual and primal dofs on a single cc */ 3767 PetscInt dual_dofs,primal_dofs; 3768 /* working stuff for GEQRF */ 3769 PetscScalar *qr_basis,*qr_tau = NULL,*qr_work,lqr_work_t; 3770 PetscBLASInt lqr_work; 3771 /* working stuff for UNGQR */ 3772 PetscScalar *gqr_work,lgqr_work_t; 3773 PetscBLASInt lgqr_work; 3774 /* working stuff for TRTRS */ 3775 PetscScalar *trs_rhs; 3776 PetscBLASInt Blas_NRHS; 3777 /* pointers for values insertion into change of basis matrix */ 3778 PetscInt *start_rows,*start_cols; 3779 PetscScalar *start_vals; 3780 /* working stuff for values insertion */ 3781 PetscBT is_primal; 3782 PetscInt *aux_primal_numbering_B; 3783 /* matrix sizes */ 3784 PetscInt global_size,local_size; 3785 /* temporary change of basis */ 3786 Mat localChangeOfBasisMatrix; 3787 /* extra space for debugging */ 3788 PetscScalar *dbg_work; 3789 3790 /* local temporary change of basis acts on local interfaces -> dimension is n_B x n_B */ 3791 ierr = MatCreate(PETSC_COMM_SELF,&localChangeOfBasisMatrix);CHKERRQ(ierr); 3792 ierr = MatSetType(localChangeOfBasisMatrix,MATAIJ);CHKERRQ(ierr); 3793 ierr = MatSetSizes(localChangeOfBasisMatrix,pcis->n,pcis->n,pcis->n,pcis->n);CHKERRQ(ierr); 3794 /* nonzeros for local mat */ 3795 ierr = PetscMalloc1(pcis->n,&nnz);CHKERRQ(ierr); 3796 for (i=0;i<pcis->n;i++) nnz[i]=1; 3797 for (i=n_vertices;i<total_counts_cc;i++) { 3798 if (PetscBTLookup(change_basis,i)) { 3799 size_of_constraint = constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i]; 3800 if (PetscBTLookup(qr_needed_idx,i)) { 3801 for (j=0;j<size_of_constraint;j++) nnz[constraints_idxs[constraints_idxs_ptr[i]+j]] = size_of_constraint; 3802 } else { 3803 nnz[constraints_idxs[constraints_idxs_ptr[i]]] = size_of_constraint; 3804 for (j=1;j<size_of_constraint;j++) nnz[constraints_idxs[constraints_idxs_ptr[i]+j]] = 2; 3805 } 3806 } 3807 } 3808 ierr = MatSeqAIJSetPreallocation(localChangeOfBasisMatrix,0,nnz);CHKERRQ(ierr); 3809 ierr = PetscFree(nnz);CHKERRQ(ierr); 3810 /* Set initial identity in the matrix */ 3811 for (i=0;i<pcis->n;i++) { 3812 ierr = MatSetValue(localChangeOfBasisMatrix,i,i,1.0,INSERT_VALUES);CHKERRQ(ierr); 3813 } 3814 3815 if (pcbddc->dbg_flag) { 3816 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"--------------------------------------------------------------\n");CHKERRQ(ierr); 3817 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Checking change of basis computation for subdomain %04d\n",PetscGlobalRank);CHKERRQ(ierr); 3818 } 3819 3820 3821 /* Now we loop on the constraints which need a change of basis */ 3822 /* 3823 Change of basis matrix is evaluated similarly to the FIRST APPROACH in 3824 Klawonn and Widlund, Dual-primal FETI-DP methods for linear elasticity, (see Sect 6.2.1) 3825 3826 Basic blocks of change of basis matrix T computed by 3827 3828 - Using the following block transformation if there is only a primal dof on the cc (and -pc_bddc_use_qr_single is not specified) 3829 3830 | 1 0 ... 0 s_1/S | 3831 | 0 1 ... 0 s_2/S | 3832 | ... | 3833 | 0 ... 1 s_{n-1}/S | 3834 | -s_1/s_n ... -s_{n-1}/s_n s_n/S | 3835 3836 with S = \sum_{i=1}^n s_i^2 3837 NOTE: in the above example, the primal dof is the last one of the edge in LOCAL ordering 3838 in the current implementation, the primal dof is the first one of the edge in GLOBAL ordering 3839 3840 - QR decomposition of constraints otherwise 3841 */ 3842 if (qr_needed) { 3843 /* space to store Q */ 3844 ierr = PetscMalloc1(max_size_of_constraint*max_size_of_constraint,&qr_basis);CHKERRQ(ierr); 3845 /* first we issue queries for optimal work */ 3846 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_M);CHKERRQ(ierr); 3847 ierr = PetscBLASIntCast(max_constraints,&Blas_N);CHKERRQ(ierr); 3848 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 3849 lqr_work = -1; 3850 PetscStackCallBLAS("LAPACKgeqrf",LAPACKgeqrf_(&Blas_M,&Blas_N,qr_basis,&Blas_LDA,qr_tau,&lqr_work_t,&lqr_work,&lierr)); 3851 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to GEQRF Lapack routine %d",(int)lierr); 3852 ierr = PetscBLASIntCast((PetscInt)PetscRealPart(lqr_work_t),&lqr_work);CHKERRQ(ierr); 3853 ierr = PetscMalloc1((PetscInt)PetscRealPart(lqr_work_t),&qr_work);CHKERRQ(ierr); 3854 lgqr_work = -1; 3855 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_M);CHKERRQ(ierr); 3856 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_N);CHKERRQ(ierr); 3857 ierr = PetscBLASIntCast(max_constraints,&Blas_K);CHKERRQ(ierr); 3858 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 3859 if (Blas_K>Blas_M) Blas_K=Blas_M; /* adjust just for computing optimal work */ 3860 PetscStackCallBLAS("LAPACKungqr",LAPACKungqr_(&Blas_M,&Blas_N,&Blas_K,qr_basis,&Blas_LDA,qr_tau,&lgqr_work_t,&lgqr_work,&lierr)); 3861 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to UNGQR Lapack routine %d",(int)lierr); 3862 ierr = PetscBLASIntCast((PetscInt)PetscRealPart(lgqr_work_t),&lgqr_work);CHKERRQ(ierr); 3863 ierr = PetscMalloc1((PetscInt)PetscRealPart(lgqr_work_t),&gqr_work);CHKERRQ(ierr); 3864 /* array to store scaling factors for reflectors */ 3865 ierr = PetscMalloc1(max_constraints,&qr_tau);CHKERRQ(ierr); 3866 /* array to store rhs and solution of triangular solver */ 3867 ierr = PetscMalloc1(max_constraints*max_constraints,&trs_rhs);CHKERRQ(ierr); 3868 /* allocating workspace for check */ 3869 if (pcbddc->dbg_flag) { 3870 ierr = PetscMalloc1(max_size_of_constraint*(max_constraints+max_size_of_constraint),&dbg_work);CHKERRQ(ierr); 3871 } 3872 } 3873 /* array to store whether a node is primal or not */ 3874 ierr = PetscBTCreate(pcis->n_B,&is_primal);CHKERRQ(ierr); 3875 ierr = PetscMalloc1(total_primal_vertices,&aux_primal_numbering_B);CHKERRQ(ierr); 3876 ierr = ISGlobalToLocalMappingApply(pcis->BtoNmap,IS_GTOLM_DROP,total_primal_vertices,pcbddc->local_primal_ref_node,&i,aux_primal_numbering_B);CHKERRQ(ierr); 3877 if (i != total_primal_vertices) { 3878 SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_SUP,"Error in boundary numbering for BDDC vertices! %d != %d\n",total_primal_vertices,i); 3879 } 3880 for (i=0;i<total_primal_vertices;i++) { 3881 ierr = PetscBTSet(is_primal,aux_primal_numbering_B[i]);CHKERRQ(ierr); 3882 } 3883 ierr = PetscFree(aux_primal_numbering_B);CHKERRQ(ierr); 3884 3885 /* loop on constraints and see whether or not they need a change of basis and compute it */ 3886 for (total_counts=n_vertices;total_counts<total_counts_cc;total_counts++) { 3887 size_of_constraint = constraints_idxs_ptr[total_counts+1]-constraints_idxs_ptr[total_counts]; 3888 if (PetscBTLookup(change_basis,total_counts)) { 3889 /* get constraint info */ 3890 primal_dofs = constraints_n[total_counts]; 3891 dual_dofs = size_of_constraint-primal_dofs; 3892 3893 if (pcbddc->dbg_flag) { 3894 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Constraints %d: %d need a change of basis (size %d)\n",total_counts,primal_dofs,size_of_constraint);CHKERRQ(ierr); 3895 } 3896 3897 if (PetscBTLookup(qr_needed_idx,total_counts)) { /* QR */ 3898 3899 /* copy quadrature constraints for change of basis check */ 3900 if (pcbddc->dbg_flag) { 3901 ierr = PetscMemcpy(dbg_work,&constraints_data[constraints_data_ptr[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr); 3902 } 3903 /* copy temporary constraints into larger work vector (in order to store all columns of Q) */ 3904 ierr = PetscMemcpy(qr_basis,&constraints_data[constraints_data_ptr[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr); 3905 3906 /* compute QR decomposition of constraints */ 3907 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 3908 ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr); 3909 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 3910 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 3911 PetscStackCallBLAS("LAPACKgeqrf",LAPACKgeqrf_(&Blas_M,&Blas_N,qr_basis,&Blas_LDA,qr_tau,qr_work,&lqr_work,&lierr)); 3912 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in GEQRF Lapack routine %d",(int)lierr); 3913 ierr = PetscFPTrapPop();CHKERRQ(ierr); 3914 3915 /* explictly compute R^-T */ 3916 ierr = PetscMemzero(trs_rhs,primal_dofs*primal_dofs*sizeof(*trs_rhs));CHKERRQ(ierr); 3917 for (j=0;j<primal_dofs;j++) trs_rhs[j*(primal_dofs+1)] = 1.0; 3918 ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr); 3919 ierr = PetscBLASIntCast(primal_dofs,&Blas_NRHS);CHKERRQ(ierr); 3920 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 3921 ierr = PetscBLASIntCast(primal_dofs,&Blas_LDB);CHKERRQ(ierr); 3922 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 3923 PetscStackCallBLAS("LAPACKtrtrs",LAPACKtrtrs_("U","T","N",&Blas_N,&Blas_NRHS,qr_basis,&Blas_LDA,trs_rhs,&Blas_LDB,&lierr)); 3924 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in TRTRS Lapack routine %d",(int)lierr); 3925 ierr = PetscFPTrapPop();CHKERRQ(ierr); 3926 3927 /* explicitly compute all columns of Q (Q = [Q1 | Q2] ) overwriting QR factorization in qr_basis */ 3928 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 3929 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 3930 ierr = PetscBLASIntCast(primal_dofs,&Blas_K);CHKERRQ(ierr); 3931 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 3932 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 3933 PetscStackCallBLAS("LAPACKungqr",LAPACKungqr_(&Blas_M,&Blas_N,&Blas_K,qr_basis,&Blas_LDA,qr_tau,gqr_work,&lgqr_work,&lierr)); 3934 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in UNGQR Lapack routine %d",(int)lierr); 3935 ierr = PetscFPTrapPop();CHKERRQ(ierr); 3936 3937 /* first primal_dofs columns of Q need to be re-scaled in order to be unitary w.r.t constraints 3938 i.e. C_{pxn}*Q_{nxn} should be equal to [I_pxp | 0_pxd] (see check below) 3939 where n=size_of_constraint, p=primal_dofs, d=dual_dofs (n=p+d), I and 0 identity and null matrix resp. */ 3940 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 3941 ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr); 3942 ierr = PetscBLASIntCast(primal_dofs,&Blas_K);CHKERRQ(ierr); 3943 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 3944 ierr = PetscBLASIntCast(primal_dofs,&Blas_LDB);CHKERRQ(ierr); 3945 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDC);CHKERRQ(ierr); 3946 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 3947 PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&Blas_M,&Blas_N,&Blas_K,&one,qr_basis,&Blas_LDA,trs_rhs,&Blas_LDB,&zero,constraints_data+constraints_data_ptr[total_counts],&Blas_LDC)); 3948 ierr = PetscFPTrapPop();CHKERRQ(ierr); 3949 ierr = PetscMemcpy(qr_basis,&constraints_data[constraints_data_ptr[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr); 3950 3951 /* insert values in change of basis matrix respecting global ordering of new primal dofs */ 3952 start_rows = &constraints_idxs[constraints_idxs_ptr[total_counts]]; 3953 /* insert cols for primal dofs */ 3954 for (j=0;j<primal_dofs;j++) { 3955 start_vals = &qr_basis[j*size_of_constraint]; 3956 start_cols = &constraints_idxs[constraints_idxs_ptr[total_counts]+j]; 3957 ierr = MatSetValues(localChangeOfBasisMatrix,size_of_constraint,start_rows,1,start_cols,start_vals,INSERT_VALUES);CHKERRQ(ierr); 3958 } 3959 /* insert cols for dual dofs */ 3960 for (j=0,k=0;j<dual_dofs;k++) { 3961 if (!PetscBTLookup(is_primal,constraints_idxs_B[constraints_idxs_ptr[total_counts]+k])) { 3962 start_vals = &qr_basis[(primal_dofs+j)*size_of_constraint]; 3963 start_cols = &constraints_idxs[constraints_idxs_ptr[total_counts]+k]; 3964 ierr = MatSetValues(localChangeOfBasisMatrix,size_of_constraint,start_rows,1,start_cols,start_vals,INSERT_VALUES);CHKERRQ(ierr); 3965 j++; 3966 } 3967 } 3968 3969 /* check change of basis */ 3970 if (pcbddc->dbg_flag) { 3971 PetscInt ii,jj; 3972 PetscBool valid_qr=PETSC_TRUE; 3973 ierr = PetscBLASIntCast(primal_dofs,&Blas_M);CHKERRQ(ierr); 3974 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 3975 ierr = PetscBLASIntCast(size_of_constraint,&Blas_K);CHKERRQ(ierr); 3976 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 3977 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDB);CHKERRQ(ierr); 3978 ierr = PetscBLASIntCast(primal_dofs,&Blas_LDC);CHKERRQ(ierr); 3979 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 3980 PetscStackCallBLAS("BLASgemm",BLASgemm_("T","N",&Blas_M,&Blas_N,&Blas_K,&one,dbg_work,&Blas_LDA,qr_basis,&Blas_LDB,&zero,&dbg_work[size_of_constraint*primal_dofs],&Blas_LDC)); 3981 ierr = PetscFPTrapPop();CHKERRQ(ierr); 3982 for (jj=0;jj<size_of_constraint;jj++) { 3983 for (ii=0;ii<primal_dofs;ii++) { 3984 if (ii != jj && PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]) > 1.e-12) valid_qr = PETSC_FALSE; 3985 if (ii == jj && PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]-1.0) > 1.e-12) valid_qr = PETSC_FALSE; 3986 } 3987 } 3988 if (!valid_qr) { 3989 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> wrong change of basis!\n");CHKERRQ(ierr); 3990 for (jj=0;jj<size_of_constraint;jj++) { 3991 for (ii=0;ii<primal_dofs;ii++) { 3992 if (ii != jj && PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]) > 1.e-12) { 3993 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\tQr basis function %d is not orthogonal to constraint %d (%1.14e)!\n",jj,ii,PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii])); 3994 } 3995 if (ii == jj && PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]-1.0) > 1.e-12) { 3996 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\tQr basis function %d is not unitary w.r.t constraint %d (%1.14e)!\n",jj,ii,PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii])); 3997 } 3998 } 3999 } 4000 } else { 4001 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> right change of basis!\n");CHKERRQ(ierr); 4002 } 4003 } 4004 } else { /* simple transformation block */ 4005 PetscInt row,col; 4006 PetscScalar val,norm; 4007 4008 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 4009 PetscStackCallBLAS("BLASdot",norm = BLASdot_(&Blas_N,constraints_data+constraints_data_ptr[total_counts],&Blas_one,constraints_data+constraints_data_ptr[total_counts],&Blas_one)); 4010 for (j=0;j<size_of_constraint;j++) { 4011 PetscInt row_B = constraints_idxs_B[constraints_idxs_ptr[total_counts]+j]; 4012 row = constraints_idxs[constraints_idxs_ptr[total_counts]+j]; 4013 if (!PetscBTLookup(is_primal,row_B)) { 4014 col = constraints_idxs[constraints_idxs_ptr[total_counts]]; 4015 ierr = MatSetValue(localChangeOfBasisMatrix,row,row,1.0,INSERT_VALUES);CHKERRQ(ierr); 4016 ierr = MatSetValue(localChangeOfBasisMatrix,row,col,constraints_data[constraints_data_ptr[total_counts]+j]/norm,INSERT_VALUES);CHKERRQ(ierr); 4017 } else { 4018 for (k=0;k<size_of_constraint;k++) { 4019 col = constraints_idxs[constraints_idxs_ptr[total_counts]+k]; 4020 if (row != col) { 4021 val = -constraints_data[constraints_data_ptr[total_counts]+k]/constraints_data[constraints_data_ptr[total_counts]]; 4022 } else { 4023 val = constraints_data[constraints_data_ptr[total_counts]]/norm; 4024 } 4025 ierr = MatSetValue(localChangeOfBasisMatrix,row,col,val,INSERT_VALUES);CHKERRQ(ierr); 4026 } 4027 } 4028 } 4029 if (pcbddc->dbg_flag) { 4030 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> using standard change of basis\n");CHKERRQ(ierr); 4031 } 4032 } 4033 } else { 4034 if (pcbddc->dbg_flag) { 4035 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Constraint %d does not need a change of basis (size %d)\n",total_counts,size_of_constraint);CHKERRQ(ierr); 4036 } 4037 } 4038 } 4039 4040 /* free workspace */ 4041 if (qr_needed) { 4042 if (pcbddc->dbg_flag) { 4043 ierr = PetscFree(dbg_work);CHKERRQ(ierr); 4044 } 4045 ierr = PetscFree(trs_rhs);CHKERRQ(ierr); 4046 ierr = PetscFree(qr_tau);CHKERRQ(ierr); 4047 ierr = PetscFree(qr_work);CHKERRQ(ierr); 4048 ierr = PetscFree(gqr_work);CHKERRQ(ierr); 4049 ierr = PetscFree(qr_basis);CHKERRQ(ierr); 4050 } 4051 ierr = PetscBTDestroy(&is_primal);CHKERRQ(ierr); 4052 ierr = MatAssemblyBegin(localChangeOfBasisMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4053 ierr = MatAssemblyEnd(localChangeOfBasisMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4054 4055 /* assembling of global change of variable */ 4056 { 4057 Mat tmat; 4058 PetscInt bs; 4059 4060 ierr = VecGetSize(pcis->vec1_global,&global_size);CHKERRQ(ierr); 4061 ierr = VecGetLocalSize(pcis->vec1_global,&local_size);CHKERRQ(ierr); 4062 ierr = MatDuplicate(pc->pmat,MAT_DO_NOT_COPY_VALUES,&tmat);CHKERRQ(ierr); 4063 ierr = MatISSetLocalMat(tmat,localChangeOfBasisMatrix);CHKERRQ(ierr); 4064 ierr = MatCreate(PetscObjectComm((PetscObject)pc),&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 4065 ierr = MatSetType(pcbddc->ChangeOfBasisMatrix,MATAIJ);CHKERRQ(ierr); 4066 ierr = MatGetBlockSize(pc->pmat,&bs);CHKERRQ(ierr); 4067 ierr = MatSetBlockSize(pcbddc->ChangeOfBasisMatrix,bs);CHKERRQ(ierr); 4068 ierr = MatSetSizes(pcbddc->ChangeOfBasisMatrix,local_size,local_size,global_size,global_size);CHKERRQ(ierr); 4069 ierr = MatISSetMPIXAIJPreallocation_Private(tmat,pcbddc->ChangeOfBasisMatrix,PETSC_TRUE);CHKERRQ(ierr); 4070 ierr = MatISGetMPIXAIJ(tmat,MAT_REUSE_MATRIX,&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 4071 ierr = MatDestroy(&tmat);CHKERRQ(ierr); 4072 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 4073 ierr = VecSet(pcis->vec1_N,1.0);CHKERRQ(ierr); 4074 ierr = VecScatterBegin(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4075 ierr = VecScatterEnd(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4076 ierr = VecReciprocal(pcis->vec1_global);CHKERRQ(ierr); 4077 ierr = MatDiagonalScale(pcbddc->ChangeOfBasisMatrix,pcis->vec1_global,NULL);CHKERRQ(ierr); 4078 } 4079 /* check */ 4080 if (pcbddc->dbg_flag) { 4081 PetscReal error; 4082 Vec x,x_change; 4083 4084 ierr = VecDuplicate(pcis->vec1_global,&x);CHKERRQ(ierr); 4085 ierr = VecDuplicate(pcis->vec1_global,&x_change);CHKERRQ(ierr); 4086 ierr = VecSetRandom(x,NULL);CHKERRQ(ierr); 4087 ierr = VecCopy(x,pcis->vec1_global);CHKERRQ(ierr); 4088 ierr = VecScatterBegin(matis->rctx,x,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4089 ierr = VecScatterEnd(matis->rctx,x,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4090 ierr = MatMult(localChangeOfBasisMatrix,pcis->vec1_N,pcis->vec2_N);CHKERRQ(ierr); 4091 ierr = VecScatterBegin(matis->rctx,pcis->vec2_N,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4092 ierr = VecScatterEnd(matis->rctx,pcis->vec2_N,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4093 ierr = MatMult(pcbddc->ChangeOfBasisMatrix,pcis->vec1_global,x_change);CHKERRQ(ierr); 4094 ierr = VecAXPY(x,-1.0,x_change);CHKERRQ(ierr); 4095 ierr = VecNorm(x,NORM_INFINITY,&error);CHKERRQ(ierr); 4096 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4097 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Error global vs local change: %1.6e\n",error);CHKERRQ(ierr); 4098 ierr = VecDestroy(&x);CHKERRQ(ierr); 4099 ierr = VecDestroy(&x_change);CHKERRQ(ierr); 4100 } 4101 4102 /* adapt sub_schurs computed (if any) */ 4103 if (pcbddc->use_deluxe_scaling) { 4104 PCBDDCSubSchurs sub_schurs=pcbddc->sub_schurs; 4105 if (sub_schurs->S_Ej_all) { 4106 Mat S_new,tmat; 4107 ISLocalToGlobalMapping NtoSall; 4108 IS is_all_N,is_V,is_V_Sall; 4109 const PetscScalar *array; 4110 const PetscInt *idxs_V,*idxs_all; 4111 PetscInt i,n_V; 4112 4113 ierr = ISLocalToGlobalMappingApplyIS(pcis->BtoNmap,sub_schurs->is_Ej_all,&is_all_N);CHKERRQ(ierr); 4114 ierr = MatGetSubMatrix(localChangeOfBasisMatrix,is_all_N,is_all_N,MAT_INITIAL_MATRIX,&tmat);CHKERRQ(ierr); 4115 ierr = ISCreateGeneral(PETSC_COMM_SELF,pcbddc->n_vertices,pcbddc->local_primal_ref_node,PETSC_COPY_VALUES,&is_V);CHKERRQ(ierr); 4116 ierr = ISLocalToGlobalMappingCreateIS(is_all_N,&NtoSall);CHKERRQ(ierr); 4117 ierr = ISGlobalToLocalMappingApplyIS(NtoSall,IS_GTOLM_DROP,is_V,&is_V_Sall);CHKERRQ(ierr); 4118 ierr = ISLocalToGlobalMappingDestroy(&NtoSall);CHKERRQ(ierr); 4119 ierr = ISDestroy(&is_all_N);CHKERRQ(ierr); 4120 ierr = ISDestroy(&is_V);CHKERRQ(ierr); 4121 ierr = MatPtAP(sub_schurs->S_Ej_all,tmat,MAT_INITIAL_MATRIX,1.0,&S_new);CHKERRQ(ierr); 4122 ierr = MatDestroy(&sub_schurs->S_Ej_all);CHKERRQ(ierr); 4123 ierr = PetscObjectReference((PetscObject)S_new);CHKERRQ(ierr); 4124 ierr = MatZeroRowsColumnsIS(S_new,is_V_Sall,1.,NULL,NULL);CHKERRQ(ierr); 4125 ierr = ISGetLocalSize(is_V_Sall,&n_V);CHKERRQ(ierr); 4126 ierr = ISGetIndices(is_V_Sall,&idxs_V);CHKERRQ(ierr); 4127 ierr = ISGetIndices(sub_schurs->is_Ej_all,&idxs_all);CHKERRQ(ierr); 4128 ierr = VecGetArrayRead(pcis->D,&array);CHKERRQ(ierr); 4129 for (i=0;i<n_V;i++) { 4130 PetscScalar val; 4131 PetscInt idx; 4132 4133 idx = idxs_V[i]; 4134 val = array[idxs_all[idxs_V[i]]]; 4135 ierr = MatSetValue(S_new,idx,idx,val,INSERT_VALUES);CHKERRQ(ierr); 4136 } 4137 ierr = MatAssemblyBegin(S_new,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4138 ierr = MatAssemblyEnd(S_new,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4139 sub_schurs->S_Ej_all = S_new; 4140 ierr = MatDestroy(&S_new);CHKERRQ(ierr); 4141 if (sub_schurs->sum_S_Ej_all) { 4142 ierr = MatPtAP(sub_schurs->sum_S_Ej_all,tmat,MAT_INITIAL_MATRIX,1.0,&S_new);CHKERRQ(ierr); 4143 ierr = MatDestroy(&sub_schurs->sum_S_Ej_all);CHKERRQ(ierr); 4144 ierr = PetscObjectReference((PetscObject)S_new);CHKERRQ(ierr); 4145 ierr = MatZeroRowsColumnsIS(S_new,is_V_Sall,1.,NULL,NULL);CHKERRQ(ierr); 4146 sub_schurs->sum_S_Ej_all = S_new; 4147 ierr = MatDestroy(&S_new);CHKERRQ(ierr); 4148 } 4149 ierr = VecRestoreArrayRead(pcis->D,&array);CHKERRQ(ierr); 4150 ierr = ISRestoreIndices(sub_schurs->is_Ej_all,&idxs_all);CHKERRQ(ierr); 4151 ierr = ISRestoreIndices(is_V_Sall,&idxs_V);CHKERRQ(ierr); 4152 ierr = MatDestroy(&tmat);CHKERRQ(ierr); 4153 ierr = ISDestroy(&is_V_Sall);CHKERRQ(ierr); 4154 } 4155 } 4156 ierr = MatDestroy(&localChangeOfBasisMatrix);CHKERRQ(ierr); 4157 } else if (pcbddc->user_ChangeOfBasisMatrix) { 4158 ierr = PetscObjectReference((PetscObject)pcbddc->user_ChangeOfBasisMatrix);CHKERRQ(ierr); 4159 pcbddc->ChangeOfBasisMatrix = pcbddc->user_ChangeOfBasisMatrix; 4160 } 4161 4162 /* set up change of basis context */ 4163 if (pcbddc->ChangeOfBasisMatrix) { 4164 PCBDDCChange_ctx change_ctx; 4165 4166 if (!pcbddc->new_global_mat) { 4167 PetscInt global_size,local_size; 4168 4169 ierr = VecGetSize(pcis->vec1_global,&global_size);CHKERRQ(ierr); 4170 ierr = VecGetLocalSize(pcis->vec1_global,&local_size);CHKERRQ(ierr); 4171 ierr = MatCreate(PetscObjectComm((PetscObject)pc),&pcbddc->new_global_mat);CHKERRQ(ierr); 4172 ierr = MatSetSizes(pcbddc->new_global_mat,local_size,local_size,global_size,global_size);CHKERRQ(ierr); 4173 ierr = MatSetType(pcbddc->new_global_mat,MATSHELL);CHKERRQ(ierr); 4174 ierr = MatShellSetOperation(pcbddc->new_global_mat,MATOP_MULT,(void (*)(void))PCBDDCMatMult_Private);CHKERRQ(ierr); 4175 ierr = MatShellSetOperation(pcbddc->new_global_mat,MATOP_MULT_TRANSPOSE,(void (*)(void))PCBDDCMatMultTranspose_Private);CHKERRQ(ierr); 4176 ierr = PetscNew(&change_ctx);CHKERRQ(ierr); 4177 ierr = MatShellSetContext(pcbddc->new_global_mat,change_ctx);CHKERRQ(ierr); 4178 } else { 4179 ierr = MatShellGetContext(pcbddc->new_global_mat,&change_ctx);CHKERRQ(ierr); 4180 ierr = MatDestroy(&change_ctx->global_change);CHKERRQ(ierr); 4181 ierr = VecDestroyVecs(2,&change_ctx->work);CHKERRQ(ierr); 4182 } 4183 if (!pcbddc->user_ChangeOfBasisMatrix) { 4184 ierr = PetscObjectReference((PetscObject)pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 4185 change_ctx->global_change = pcbddc->ChangeOfBasisMatrix; 4186 } else { 4187 ierr = PetscObjectReference((PetscObject)pcbddc->user_ChangeOfBasisMatrix);CHKERRQ(ierr); 4188 change_ctx->global_change = pcbddc->user_ChangeOfBasisMatrix; 4189 } 4190 ierr = VecDuplicateVecs(pcis->vec1_global,2,&change_ctx->work);CHKERRQ(ierr); 4191 ierr = MatSetUp(pcbddc->new_global_mat);CHKERRQ(ierr); 4192 ierr = MatAssemblyBegin(pcbddc->new_global_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4193 ierr = MatAssemblyEnd(pcbddc->new_global_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4194 } else { 4195 ierr = MatDestroy(&pcbddc->new_global_mat);CHKERRQ(ierr); 4196 } 4197 4198 /* add pressure dofs to set of primal nodes for numbering purposes */ 4199 for (i=0;i<pcbddc->benign_n;i++) { 4200 pcbddc->local_primal_ref_node[pcbddc->local_primal_size_cc] = pcbddc->benign_p0_lidx[i]; 4201 pcbddc->primal_indices_local_idxs[pcbddc->local_primal_size] = pcbddc->benign_p0_lidx[i]; 4202 pcbddc->local_primal_ref_mult[pcbddc->local_primal_size_cc] = 1; 4203 pcbddc->local_primal_size_cc++; 4204 pcbddc->local_primal_size++; 4205 } 4206 4207 /* check if a new primal space has been introduced (also take into account benign trick) */ 4208 pcbddc->new_primal_space_local = PETSC_TRUE; 4209 if (olocal_primal_size == pcbddc->local_primal_size) { 4210 ierr = PetscMemcmp(pcbddc->local_primal_ref_node,olocal_primal_ref_node,olocal_primal_size_cc*sizeof(PetscInt),&pcbddc->new_primal_space_local);CHKERRQ(ierr); 4211 pcbddc->new_primal_space_local = (PetscBool)(!pcbddc->new_primal_space_local); 4212 if (!pcbddc->new_primal_space_local) { 4213 ierr = PetscMemcmp(pcbddc->local_primal_ref_mult,olocal_primal_ref_mult,olocal_primal_size_cc*sizeof(PetscInt),&pcbddc->new_primal_space_local);CHKERRQ(ierr); 4214 pcbddc->new_primal_space_local = (PetscBool)(!pcbddc->new_primal_space_local); 4215 } 4216 } 4217 ierr = PetscFree2(olocal_primal_ref_node,olocal_primal_ref_mult);CHKERRQ(ierr); 4218 /* new_primal_space will be used for numbering of coarse dofs, so it should be the same across all subdomains */ 4219 ierr = MPI_Allreduce(&pcbddc->new_primal_space_local,&pcbddc->new_primal_space,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 4220 4221 /* flush dbg viewer */ 4222 if (pcbddc->dbg_flag) { 4223 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4224 } 4225 4226 /* free workspace */ 4227 ierr = PetscBTDestroy(&qr_needed_idx);CHKERRQ(ierr); 4228 ierr = PetscBTDestroy(&change_basis);CHKERRQ(ierr); 4229 if (!pcbddc->adaptive_selection) { 4230 ierr = PetscFree3(constraints_idxs_ptr,constraints_data_ptr,constraints_n);CHKERRQ(ierr); 4231 ierr = PetscFree3(constraints_data,constraints_idxs,constraints_idxs_B);CHKERRQ(ierr); 4232 } else { 4233 ierr = PetscFree5(pcbddc->adaptive_constraints_n, 4234 pcbddc->adaptive_constraints_idxs_ptr, 4235 pcbddc->adaptive_constraints_data_ptr, 4236 pcbddc->adaptive_constraints_idxs, 4237 pcbddc->adaptive_constraints_data);CHKERRQ(ierr); 4238 ierr = PetscFree(constraints_n);CHKERRQ(ierr); 4239 ierr = PetscFree(constraints_idxs_B);CHKERRQ(ierr); 4240 } 4241 PetscFunctionReturn(0); 4242 } 4243 4244 #undef __FUNCT__ 4245 #define __FUNCT__ "PCBDDCAnalyzeInterface" 4246 PetscErrorCode PCBDDCAnalyzeInterface(PC pc) 4247 { 4248 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 4249 PC_IS *pcis = (PC_IS*)pc->data; 4250 Mat_IS *matis = (Mat_IS*)pc->pmat->data; 4251 PetscInt ierr,i,vertex_size,N; 4252 PetscViewer viewer=pcbddc->dbg_viewer; 4253 4254 PetscFunctionBegin; 4255 /* Reset previously computed graph */ 4256 ierr = PCBDDCGraphReset(pcbddc->mat_graph);CHKERRQ(ierr); 4257 /* Init local Graph struct */ 4258 ierr = MatGetSize(pc->pmat,&N,NULL);CHKERRQ(ierr); 4259 ierr = PCBDDCGraphInit(pcbddc->mat_graph,pcis->mapping,N);CHKERRQ(ierr); 4260 4261 /* Check validity of the csr graph passed in by the user */ 4262 if (pcbddc->mat_graph->nvtxs_csr && pcbddc->mat_graph->nvtxs_csr != pcbddc->mat_graph->nvtxs) { 4263 SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Invalid size of local CSR graph! Found %d, expected %d\n",pcbddc->mat_graph->nvtxs_csr,pcbddc->mat_graph->nvtxs); 4264 } 4265 4266 /* Set default CSR adjacency of local dofs if not provided by the user with PCBDDCSetLocalAdjacencyGraph */ 4267 if ( (!pcbddc->mat_graph->xadj || !pcbddc->mat_graph->adjncy) && pcbddc->use_local_adj) { 4268 PetscInt *xadj,*adjncy; 4269 PetscInt nvtxs; 4270 PetscBool flg_row=PETSC_FALSE; 4271 4272 ierr = MatGetRowIJ(matis->A,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);CHKERRQ(ierr); 4273 if (flg_row) { 4274 ierr = PCBDDCSetLocalAdjacencyGraph(pc,nvtxs,xadj,adjncy,PETSC_COPY_VALUES);CHKERRQ(ierr); 4275 pcbddc->computed_rowadj = PETSC_TRUE; 4276 } 4277 ierr = MatRestoreRowIJ(matis->A,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);CHKERRQ(ierr); 4278 } 4279 if (pcbddc->dbg_flag) { 4280 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4281 } 4282 4283 /* Set default dofs' splitting if no information has been provided by the user with PCBDDCSetDofsSplitting or PCBDDCSetDofsSplittingLocal */ 4284 vertex_size = 1; 4285 if (pcbddc->user_provided_isfordofs) { 4286 if (pcbddc->n_ISForDofs) { /* need to convert from global to local and remove references to global dofs splitting */ 4287 ierr = PetscMalloc1(pcbddc->n_ISForDofs,&pcbddc->ISForDofsLocal);CHKERRQ(ierr); 4288 for (i=0;i<pcbddc->n_ISForDofs;i++) { 4289 ierr = PCBDDCGlobalToLocal(matis->rctx,pcis->vec1_global,pcis->vec1_N,pcbddc->ISForDofs[i],&pcbddc->ISForDofsLocal[i]);CHKERRQ(ierr); 4290 ierr = ISDestroy(&pcbddc->ISForDofs[i]);CHKERRQ(ierr); 4291 } 4292 pcbddc->n_ISForDofsLocal = pcbddc->n_ISForDofs; 4293 pcbddc->n_ISForDofs = 0; 4294 ierr = PetscFree(pcbddc->ISForDofs);CHKERRQ(ierr); 4295 } 4296 /* mat block size as vertex size (used for elasticity with rigid body modes as nearnullspace) */ 4297 ierr = MatGetBlockSize(matis->A,&vertex_size);CHKERRQ(ierr); 4298 } else { 4299 if (!pcbddc->n_ISForDofsLocal) { /* field split not present, create it in local ordering */ 4300 ierr = MatGetBlockSize(pc->pmat,&pcbddc->n_ISForDofsLocal);CHKERRQ(ierr); 4301 ierr = PetscMalloc1(pcbddc->n_ISForDofsLocal,&pcbddc->ISForDofsLocal);CHKERRQ(ierr); 4302 for (i=0;i<pcbddc->n_ISForDofsLocal;i++) { 4303 ierr = ISCreateStride(PetscObjectComm((PetscObject)pc),pcis->n/pcbddc->n_ISForDofsLocal,i,pcbddc->n_ISForDofsLocal,&pcbddc->ISForDofsLocal[i]);CHKERRQ(ierr); 4304 } 4305 } 4306 } 4307 4308 /* Setup of Graph */ 4309 if (!pcbddc->DirichletBoundariesLocal && pcbddc->DirichletBoundaries) { /* need to convert from global to local */ 4310 ierr = PCBDDCGlobalToLocal(matis->rctx,pcis->vec1_global,pcis->vec1_N,pcbddc->DirichletBoundaries,&pcbddc->DirichletBoundariesLocal);CHKERRQ(ierr); 4311 } 4312 if (!pcbddc->NeumannBoundariesLocal && pcbddc->NeumannBoundaries) { /* need to convert from global to local */ 4313 ierr = PCBDDCGlobalToLocal(matis->rctx,pcis->vec1_global,pcis->vec1_N,pcbddc->NeumannBoundaries,&pcbddc->NeumannBoundariesLocal);CHKERRQ(ierr); 4314 } 4315 if (!pcbddc->user_primal_vertices_local && pcbddc->user_primal_vertices) { /* need to convert from global to local */ 4316 ierr = PCBDDCGlobalToLocal(matis->rctx,pcis->vec1_global,pcis->vec1_N,pcbddc->user_primal_vertices,&pcbddc->user_primal_vertices_local);CHKERRQ(ierr); 4317 } 4318 ierr = PCBDDCGraphSetUp(pcbddc->mat_graph,vertex_size,pcbddc->NeumannBoundariesLocal,pcbddc->DirichletBoundariesLocal,pcbddc->n_ISForDofsLocal,pcbddc->ISForDofsLocal,pcbddc->user_primal_vertices_local);CHKERRQ(ierr); 4319 4320 /* attach info on disconnected subdomains if present */ 4321 if (pcbddc->n_local_subs) { 4322 PetscInt *local_subs; 4323 4324 ierr = PetscMalloc1(N,&local_subs);CHKERRQ(ierr); 4325 for (i=0;i<pcbddc->n_local_subs;i++) { 4326 const PetscInt *idxs; 4327 PetscInt nl,j; 4328 4329 ierr = ISGetLocalSize(pcbddc->local_subs[i],&nl);CHKERRQ(ierr); 4330 ierr = ISGetIndices(pcbddc->local_subs[i],&idxs);CHKERRQ(ierr); 4331 for (j=0;j<nl;j++) { 4332 local_subs[idxs[j]] = i; 4333 } 4334 ierr = ISRestoreIndices(pcbddc->local_subs[i],&idxs);CHKERRQ(ierr); 4335 } 4336 pcbddc->mat_graph->n_local_subs = pcbddc->n_local_subs; 4337 pcbddc->mat_graph->local_subs = local_subs; 4338 } 4339 4340 /* Graph's connected components analysis */ 4341 ierr = PCBDDCGraphComputeConnectedComponents(pcbddc->mat_graph);CHKERRQ(ierr); 4342 4343 /* print some info to stdout */ 4344 if (pcbddc->dbg_flag) { 4345 ierr = PCBDDCGraphASCIIView(pcbddc->mat_graph,pcbddc->dbg_flag,viewer);CHKERRQ(ierr); 4346 } 4347 4348 /* mark topography has done */ 4349 pcbddc->recompute_topography = PETSC_FALSE; 4350 PetscFunctionReturn(0); 4351 } 4352 4353 /* given an index sets possibly with holes, renumbers the indexes removing the holes */ 4354 #undef __FUNCT__ 4355 #define __FUNCT__ "PCBDDCSubsetNumbering" 4356 PetscErrorCode PCBDDCSubsetNumbering(IS subset, IS subset_mult, PetscInt *N_n, IS *subset_n) 4357 { 4358 PetscSF sf; 4359 PetscLayout map; 4360 const PetscInt *idxs; 4361 PetscInt *leaf_data,*root_data,*gidxs; 4362 PetscInt N,n,i,lbounds[2],gbounds[2],Nl; 4363 PetscInt n_n,nlocals,start,first_index; 4364 PetscMPIInt commsize; 4365 PetscBool first_found; 4366 PetscErrorCode ierr; 4367 4368 PetscFunctionBegin; 4369 ierr = ISGetLocalSize(subset,&n);CHKERRQ(ierr); 4370 if (subset_mult) { 4371 PetscCheckSameComm(subset,1,subset_mult,2); 4372 ierr = ISGetLocalSize(subset,&i);CHKERRQ(ierr); 4373 if (i != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Local subset and multiplicity sizes don't match! %d != %d",n,i); 4374 } 4375 /* create workspace layout for computing global indices of subset */ 4376 ierr = ISGetIndices(subset,&idxs);CHKERRQ(ierr); 4377 lbounds[0] = lbounds[1] = 0; 4378 for (i=0;i<n;i++) { 4379 if (idxs[i] < lbounds[0]) lbounds[0] = idxs[i]; 4380 else if (idxs[i] > lbounds[1]) lbounds[1] = idxs[i]; 4381 } 4382 lbounds[0] = -lbounds[0]; 4383 ierr = MPI_Allreduce(lbounds,gbounds,2,MPIU_INT,MPI_MAX,PetscObjectComm((PetscObject)subset));CHKERRQ(ierr); 4384 gbounds[0] = -gbounds[0]; 4385 N = gbounds[1] - gbounds[0] + 1; 4386 ierr = PetscLayoutCreate(PetscObjectComm((PetscObject)subset),&map);CHKERRQ(ierr); 4387 ierr = PetscLayoutSetBlockSize(map,1);CHKERRQ(ierr); 4388 ierr = PetscLayoutSetSize(map,N);CHKERRQ(ierr); 4389 ierr = PetscLayoutSetUp(map);CHKERRQ(ierr); 4390 ierr = PetscLayoutGetLocalSize(map,&Nl);CHKERRQ(ierr); 4391 4392 /* create sf : leaf_data == multiplicity of indexes, root data == global index in layout */ 4393 ierr = PetscMalloc2(n,&leaf_data,Nl,&root_data);CHKERRQ(ierr); 4394 if (subset_mult) { 4395 const PetscInt* idxs_mult; 4396 4397 ierr = ISGetIndices(subset_mult,&idxs_mult);CHKERRQ(ierr); 4398 ierr = PetscMemcpy(leaf_data,idxs_mult,n*sizeof(PetscInt));CHKERRQ(ierr); 4399 ierr = ISRestoreIndices(subset_mult,&idxs_mult);CHKERRQ(ierr); 4400 } else { 4401 for (i=0;i<n;i++) leaf_data[i] = 1; 4402 } 4403 /* local size of new subset */ 4404 n_n = 0; 4405 for (i=0;i<n;i++) n_n += leaf_data[i]; 4406 4407 /* global indexes in layout */ 4408 ierr = PetscMalloc1(n_n,&gidxs);CHKERRQ(ierr); /* allocating possibly extra space in gidxs which will be used later */ 4409 for (i=0;i<n;i++) gidxs[i] = idxs[i] - gbounds[0]; 4410 ierr = ISRestoreIndices(subset,&idxs);CHKERRQ(ierr); 4411 ierr = PetscSFCreate(PetscObjectComm((PetscObject)subset),&sf);CHKERRQ(ierr); 4412 ierr = PetscSFSetGraphLayout(sf,map,n,NULL,PETSC_COPY_VALUES,gidxs);CHKERRQ(ierr); 4413 ierr = PetscLayoutDestroy(&map);CHKERRQ(ierr); 4414 4415 /* reduce from leaves to roots */ 4416 ierr = PetscMemzero(root_data,Nl*sizeof(PetscInt));CHKERRQ(ierr); 4417 ierr = PetscSFReduceBegin(sf,MPIU_INT,leaf_data,root_data,MPI_MAX);CHKERRQ(ierr); 4418 ierr = PetscSFReduceEnd(sf,MPIU_INT,leaf_data,root_data,MPI_MAX);CHKERRQ(ierr); 4419 4420 /* count indexes in local part of layout */ 4421 nlocals = 0; 4422 first_index = -1; 4423 first_found = PETSC_FALSE; 4424 for (i=0;i<Nl;i++) { 4425 if (!first_found && root_data[i]) { 4426 first_found = PETSC_TRUE; 4427 first_index = i; 4428 } 4429 nlocals += root_data[i]; 4430 } 4431 4432 /* cumulative of number of indexes and size of subset without holes */ 4433 #if defined(PETSC_HAVE_MPI_EXSCAN) 4434 start = 0; 4435 ierr = MPI_Exscan(&nlocals,&start,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)subset));CHKERRQ(ierr); 4436 #else 4437 ierr = MPI_Scan(&nlocals,&start,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)subset));CHKERRQ(ierr); 4438 start = start-nlocals; 4439 #endif 4440 4441 if (N_n) { /* compute total size of new subset if requested */ 4442 *N_n = start + nlocals; 4443 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)subset),&commsize);CHKERRQ(ierr); 4444 ierr = MPI_Bcast(N_n,1,MPIU_INT,commsize-1,PetscObjectComm((PetscObject)subset));CHKERRQ(ierr); 4445 } 4446 4447 /* adapt root data with cumulative */ 4448 if (first_found) { 4449 PetscInt old_index; 4450 4451 root_data[first_index] += start; 4452 old_index = first_index; 4453 for (i=first_index+1;i<Nl;i++) { 4454 if (root_data[i]) { 4455 root_data[i] += root_data[old_index]; 4456 old_index = i; 4457 } 4458 } 4459 } 4460 4461 /* from roots to leaves */ 4462 ierr = PetscSFBcastBegin(sf,MPIU_INT,root_data,leaf_data);CHKERRQ(ierr); 4463 ierr = PetscSFBcastEnd(sf,MPIU_INT,root_data,leaf_data);CHKERRQ(ierr); 4464 ierr = PetscSFDestroy(&sf);CHKERRQ(ierr); 4465 4466 /* create new IS with global indexes without holes */ 4467 if (subset_mult) { 4468 const PetscInt* idxs_mult; 4469 PetscInt cum; 4470 4471 cum = 0; 4472 ierr = ISGetIndices(subset_mult,&idxs_mult);CHKERRQ(ierr); 4473 for (i=0;i<n;i++) { 4474 PetscInt j; 4475 for (j=0;j<idxs_mult[i];j++) gidxs[cum++] = leaf_data[i] - idxs_mult[i] + j; 4476 } 4477 ierr = ISRestoreIndices(subset_mult,&idxs_mult);CHKERRQ(ierr); 4478 } else { 4479 for (i=0;i<n;i++) { 4480 gidxs[i] = leaf_data[i]-1; 4481 } 4482 } 4483 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)subset),n_n,gidxs,PETSC_OWN_POINTER,subset_n);CHKERRQ(ierr); 4484 ierr = PetscFree2(leaf_data,root_data);CHKERRQ(ierr); 4485 PetscFunctionReturn(0); 4486 } 4487 4488 #undef __FUNCT__ 4489 #define __FUNCT__ "PCBDDCOrthonormalizeVecs" 4490 PetscErrorCode PCBDDCOrthonormalizeVecs(PetscInt n, Vec vecs[]) 4491 { 4492 PetscInt i,j; 4493 PetscScalar *alphas; 4494 PetscErrorCode ierr; 4495 4496 PetscFunctionBegin; 4497 /* this implements stabilized Gram-Schmidt */ 4498 ierr = PetscMalloc1(n,&alphas);CHKERRQ(ierr); 4499 for (i=0;i<n;i++) { 4500 ierr = VecNormalize(vecs[i],NULL);CHKERRQ(ierr); 4501 if (i<n) { ierr = VecMDot(vecs[i],n-i-1,&vecs[i+1],&alphas[i+1]);CHKERRQ(ierr); } 4502 for (j=i+1;j<n;j++) { ierr = VecAXPY(vecs[j],PetscConj(-alphas[j]),vecs[i]);CHKERRQ(ierr); } 4503 } 4504 ierr = PetscFree(alphas);CHKERRQ(ierr); 4505 PetscFunctionReturn(0); 4506 } 4507 4508 #undef __FUNCT__ 4509 #define __FUNCT__ "MatISGetSubassemblingPattern" 4510 PetscErrorCode MatISGetSubassemblingPattern(Mat mat, PetscInt n_subdomains, PetscInt redprocs, IS* is_sends) 4511 { 4512 IS ranks_send_to; 4513 PetscInt n_neighs,*neighs,*n_shared,**shared; 4514 PetscMPIInt size,rank,color; 4515 PetscInt *xadj,*adjncy; 4516 PetscInt *adjncy_wgt,*v_wgt,*ranks_send_to_idx; 4517 PetscInt i,local_size,threshold=0; 4518 PetscBool use_vwgt=PETSC_FALSE,use_square=PETSC_FALSE; 4519 PetscSubcomm subcomm; 4520 PetscErrorCode ierr; 4521 4522 PetscFunctionBegin; 4523 ierr = PetscOptionsGetBool(NULL,"-matis_partitioning_use_square",&use_square,NULL);CHKERRQ(ierr); 4524 ierr = PetscOptionsGetBool(NULL,"-matis_partitioning_use_vwgt",&use_vwgt,NULL);CHKERRQ(ierr); 4525 ierr = PetscOptionsGetInt(NULL,"-matis_partitioning_threshold",&threshold,NULL);CHKERRQ(ierr); 4526 4527 /* Get info on mapping */ 4528 ierr = ISLocalToGlobalMappingGetSize(mat->rmap->mapping,&local_size);CHKERRQ(ierr); 4529 ierr = ISLocalToGlobalMappingGetInfo(mat->rmap->mapping,&n_neighs,&neighs,&n_shared,&shared);CHKERRQ(ierr); 4530 4531 /* build local CSR graph of subdomains' connectivity */ 4532 ierr = PetscMalloc1(2,&xadj);CHKERRQ(ierr); 4533 xadj[0] = 0; 4534 xadj[1] = PetscMax(n_neighs-1,0); 4535 ierr = PetscMalloc1(xadj[1],&adjncy);CHKERRQ(ierr); 4536 ierr = PetscMalloc1(xadj[1],&adjncy_wgt);CHKERRQ(ierr); 4537 4538 if (threshold) { 4539 PetscInt xadj_count = 0; 4540 for (i=1;i<n_neighs;i++) { 4541 if (n_shared[i] > threshold) { 4542 adjncy[xadj_count] = neighs[i]; 4543 adjncy_wgt[xadj_count] = n_shared[i]; 4544 xadj_count++; 4545 } 4546 } 4547 xadj[1] = xadj_count; 4548 } else { 4549 if (xadj[1]) { 4550 ierr = PetscMemcpy(adjncy,&neighs[1],xadj[1]*sizeof(*adjncy));CHKERRQ(ierr); 4551 ierr = PetscMemcpy(adjncy_wgt,&n_shared[1],xadj[1]*sizeof(*adjncy_wgt));CHKERRQ(ierr); 4552 } 4553 } 4554 ierr = ISLocalToGlobalMappingRestoreInfo(mat->rmap->mapping,&n_neighs,&neighs,&n_shared,&shared);CHKERRQ(ierr); 4555 if (use_square) { 4556 for (i=0;i<xadj[1];i++) { 4557 adjncy_wgt[i] = adjncy_wgt[i]*adjncy_wgt[i]; 4558 } 4559 } 4560 ierr = PetscSortIntWithArray(xadj[1],adjncy,adjncy_wgt);CHKERRQ(ierr); 4561 4562 ierr = PetscMalloc1(1,&ranks_send_to_idx);CHKERRQ(ierr); 4563 4564 /* 4565 Restrict work on active processes only. 4566 */ 4567 ierr = PetscSubcommCreate(PetscObjectComm((PetscObject)mat),&subcomm);CHKERRQ(ierr); 4568 ierr = PetscSubcommSetNumber(subcomm,2);CHKERRQ(ierr); /* 2 groups, active process and not active processes */ 4569 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr); 4570 ierr = PetscMPIIntCast(!local_size,&color);CHKERRQ(ierr); 4571 ierr = PetscSubcommSetTypeGeneral(subcomm,color,rank);CHKERRQ(ierr); 4572 if (color) { 4573 ierr = PetscFree(xadj);CHKERRQ(ierr); 4574 ierr = PetscFree(adjncy);CHKERRQ(ierr); 4575 ierr = PetscFree(adjncy_wgt);CHKERRQ(ierr); 4576 } else { 4577 Mat subdomain_adj; 4578 IS new_ranks,new_ranks_contig; 4579 MatPartitioning partitioner; 4580 PetscInt prank,rstart=0,rend=0; 4581 PetscInt *is_indices,*oldranks; 4582 PetscBool aggregate; 4583 4584 ierr = MPI_Comm_size(PetscSubcommChild(subcomm),&size);CHKERRQ(ierr); 4585 ierr = PetscMalloc1(size,&oldranks);CHKERRQ(ierr); 4586 prank = rank; 4587 ierr = MPI_Allgather(&prank,1,MPIU_INT,oldranks,1,MPIU_INT,PetscSubcommChild(subcomm));CHKERRQ(ierr); 4588 /* 4589 for (i=0;i<size;i++) { 4590 PetscPrintf(subcomm->comm,"oldranks[%d] = %d\n",i,oldranks[i]); 4591 } 4592 */ 4593 for (i=0;i<xadj[1];i++) { 4594 ierr = PetscFindInt(adjncy[i],size,oldranks,&adjncy[i]);CHKERRQ(ierr); 4595 } 4596 ierr = PetscSortIntWithArray(xadj[1],adjncy,adjncy_wgt);CHKERRQ(ierr); 4597 aggregate = ((redprocs > 0 && redprocs < size) ? PETSC_TRUE : PETSC_FALSE); 4598 if (aggregate) { 4599 PetscInt lrows,row,ncols,*cols; 4600 PetscMPIInt nrank; 4601 PetscScalar *vals; 4602 4603 ierr = MPI_Comm_rank(PetscSubcommChild(subcomm),&nrank);CHKERRQ(ierr); 4604 lrows = 0; 4605 if (nrank<redprocs) { 4606 lrows = size/redprocs; 4607 if (nrank<size%redprocs) lrows++; 4608 } 4609 ierr = MatCreateAIJ(PetscSubcommChild(subcomm),lrows,lrows,size,size,50,NULL,50,NULL,&subdomain_adj);CHKERRQ(ierr); 4610 ierr = MatGetOwnershipRange(subdomain_adj,&rstart,&rend);CHKERRQ(ierr); 4611 ierr = MatSetOption(subdomain_adj,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr); 4612 ierr = MatSetOption(subdomain_adj,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr); 4613 row = nrank; 4614 ncols = xadj[1]-xadj[0]; 4615 cols = adjncy; 4616 ierr = PetscMalloc1(ncols,&vals);CHKERRQ(ierr); 4617 for (i=0;i<ncols;i++) vals[i] = adjncy_wgt[i]; 4618 ierr = MatSetValues(subdomain_adj,1,&row,ncols,cols,vals,INSERT_VALUES);CHKERRQ(ierr); 4619 ierr = MatAssemblyBegin(subdomain_adj,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4620 ierr = MatAssemblyEnd(subdomain_adj,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4621 ierr = PetscFree(xadj);CHKERRQ(ierr); 4622 ierr = PetscFree(adjncy);CHKERRQ(ierr); 4623 ierr = PetscFree(adjncy_wgt);CHKERRQ(ierr); 4624 ierr = PetscFree(vals);CHKERRQ(ierr); 4625 } else { 4626 ierr = MatCreateMPIAdj(PetscSubcommChild(subcomm),1,(PetscInt)size,xadj,adjncy,adjncy_wgt,&subdomain_adj);CHKERRQ(ierr); 4627 } 4628 /* ierr = MatView(subdomain_adj,0);CHKERRQ(ierr); */ 4629 4630 /* Partition */ 4631 ierr = MatPartitioningCreate(PetscSubcommChild(subcomm),&partitioner);CHKERRQ(ierr); 4632 ierr = MatPartitioningSetAdjacency(partitioner,subdomain_adj);CHKERRQ(ierr); 4633 if (use_vwgt) { 4634 ierr = PetscMalloc1(1,&v_wgt);CHKERRQ(ierr); 4635 v_wgt[0] = local_size; 4636 ierr = MatPartitioningSetVertexWeights(partitioner,v_wgt);CHKERRQ(ierr); 4637 } 4638 n_subdomains = PetscMin((PetscInt)size,n_subdomains); 4639 ierr = MatPartitioningSetNParts(partitioner,n_subdomains);CHKERRQ(ierr); 4640 ierr = MatPartitioningSetFromOptions(partitioner);CHKERRQ(ierr); 4641 ierr = MatPartitioningApply(partitioner,&new_ranks);CHKERRQ(ierr); 4642 /* ierr = MatPartitioningView(partitioner,0);CHKERRQ(ierr); */ 4643 4644 /* renumber new_ranks to avoid "holes" in new set of processors */ 4645 ierr = PCBDDCSubsetNumbering(new_ranks,NULL,NULL,&new_ranks_contig);CHKERRQ(ierr); 4646 ierr = ISDestroy(&new_ranks);CHKERRQ(ierr); 4647 ierr = ISGetIndices(new_ranks_contig,(const PetscInt**)&is_indices);CHKERRQ(ierr); 4648 if (!redprocs) { 4649 ranks_send_to_idx[0] = oldranks[is_indices[0]]; 4650 } else { 4651 PetscInt idxs[1]; 4652 PetscMPIInt tag; 4653 MPI_Request *reqs; 4654 4655 ierr = PetscObjectGetNewTag((PetscObject)subdomain_adj,&tag);CHKERRQ(ierr); 4656 ierr = PetscMalloc1(rend-rstart,&reqs);CHKERRQ(ierr); 4657 for (i=rstart;i<rend;i++) { 4658 ierr = MPI_Isend(is_indices+i-rstart,1,MPIU_INT,i,tag,PetscSubcommChild(subcomm),&reqs[i-rstart]);CHKERRQ(ierr); 4659 } 4660 ierr = MPI_Recv(idxs,1,MPIU_INT,MPI_ANY_SOURCE,tag,PetscSubcommChild(subcomm),MPI_STATUS_IGNORE);CHKERRQ(ierr); 4661 ierr = MPI_Waitall(rend-rstart,reqs,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 4662 ierr = PetscFree(reqs);CHKERRQ(ierr); 4663 ranks_send_to_idx[0] = oldranks[idxs[0]]; 4664 } 4665 ierr = ISRestoreIndices(new_ranks_contig,(const PetscInt**)&is_indices);CHKERRQ(ierr); 4666 /* clean up */ 4667 ierr = PetscFree(oldranks);CHKERRQ(ierr); 4668 ierr = ISDestroy(&new_ranks_contig);CHKERRQ(ierr); 4669 ierr = MatDestroy(&subdomain_adj);CHKERRQ(ierr); 4670 ierr = MatPartitioningDestroy(&partitioner);CHKERRQ(ierr); 4671 } 4672 ierr = PetscSubcommDestroy(&subcomm);CHKERRQ(ierr); 4673 4674 /* assemble parallel IS for sends */ 4675 i = 1; 4676 if (color) i=0; 4677 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)mat),i,ranks_send_to_idx,PETSC_OWN_POINTER,&ranks_send_to);CHKERRQ(ierr); 4678 /* get back IS */ 4679 *is_sends = ranks_send_to; 4680 PetscFunctionReturn(0); 4681 } 4682 4683 typedef enum {MATDENSE_PRIVATE=0,MATAIJ_PRIVATE,MATBAIJ_PRIVATE,MATSBAIJ_PRIVATE}MatTypePrivate; 4684 4685 #undef __FUNCT__ 4686 #define __FUNCT__ "MatISSubassemble" 4687 PetscErrorCode MatISSubassemble(Mat mat, IS is_sends, PetscInt n_subdomains, PetscBool restrict_comm, PetscBool restrict_full, MatReuse reuse, Mat *mat_n, PetscInt nis, IS isarray[]) 4688 { 4689 Mat local_mat; 4690 IS is_sends_internal; 4691 PetscInt rows,cols,new_local_rows; 4692 PetscInt i,bs,buf_size_idxs,buf_size_idxs_is,buf_size_vals; 4693 PetscBool ismatis,isdense,newisdense,destroy_mat; 4694 ISLocalToGlobalMapping l2gmap; 4695 PetscInt* l2gmap_indices; 4696 const PetscInt* is_indices; 4697 MatType new_local_type; 4698 /* buffers */ 4699 PetscInt *ptr_idxs,*send_buffer_idxs,*recv_buffer_idxs; 4700 PetscInt *ptr_idxs_is,*send_buffer_idxs_is,*recv_buffer_idxs_is; 4701 PetscInt *recv_buffer_idxs_local; 4702 PetscScalar *ptr_vals,*send_buffer_vals,*recv_buffer_vals; 4703 /* MPI */ 4704 MPI_Comm comm,comm_n; 4705 PetscSubcomm subcomm; 4706 PetscMPIInt n_sends,n_recvs,commsize; 4707 PetscMPIInt *iflags,*ilengths_idxs,*ilengths_vals,*ilengths_idxs_is; 4708 PetscMPIInt *onodes,*onodes_is,*olengths_idxs,*olengths_idxs_is,*olengths_vals; 4709 PetscMPIInt len,tag_idxs,tag_idxs_is,tag_vals,source_dest; 4710 MPI_Request *send_req_idxs,*send_req_idxs_is,*send_req_vals; 4711 MPI_Request *recv_req_idxs,*recv_req_idxs_is,*recv_req_vals; 4712 PetscErrorCode ierr; 4713 4714 PetscFunctionBegin; 4715 /* TODO: add missing checks */ 4716 PetscValidLogicalCollectiveInt(mat,n_subdomains,3); 4717 PetscValidLogicalCollectiveBool(mat,restrict_comm,4); 4718 PetscValidLogicalCollectiveEnum(mat,reuse,5); 4719 PetscValidLogicalCollectiveInt(mat,nis,7); 4720 ierr = PetscObjectTypeCompare((PetscObject)mat,MATIS,&ismatis);CHKERRQ(ierr); 4721 if (!ismatis) SETERRQ1(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot use %s on a matrix object which is not of type MATIS",__FUNCT__); 4722 ierr = MatISGetLocalMat(mat,&local_mat);CHKERRQ(ierr); 4723 ierr = PetscObjectTypeCompare((PetscObject)local_mat,MATSEQDENSE,&isdense);CHKERRQ(ierr); 4724 if (!isdense) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Currently cannot subassemble MATIS when local matrix type is not of type SEQDENSE"); 4725 ierr = MatGetSize(local_mat,&rows,&cols);CHKERRQ(ierr); 4726 if (rows != cols) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Local MATIS matrices should be square"); 4727 if (reuse == MAT_REUSE_MATRIX && *mat_n) { 4728 PetscInt mrows,mcols,mnrows,mncols; 4729 ierr = PetscObjectTypeCompare((PetscObject)*mat_n,MATIS,&ismatis);CHKERRQ(ierr); 4730 if (!ismatis) SETERRQ(PetscObjectComm((PetscObject)*mat_n),PETSC_ERR_SUP,"Cannot reuse a matrix which is not of type MATIS"); 4731 ierr = MatGetSize(mat,&mrows,&mcols);CHKERRQ(ierr); 4732 ierr = MatGetSize(*mat_n,&mnrows,&mncols);CHKERRQ(ierr); 4733 if (mrows != mnrows) SETERRQ2(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot reuse matrix! Wrong number of rows %D != %D",mrows,mnrows); 4734 if (mcols != mncols) SETERRQ2(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot reuse matrix! Wrong number of cols %D != %D",mcols,mncols); 4735 } 4736 ierr = MatGetBlockSize(local_mat,&bs);CHKERRQ(ierr); 4737 PetscValidLogicalCollectiveInt(mat,bs,0); 4738 /* prepare IS for sending if not provided */ 4739 if (!is_sends) { 4740 if (!n_subdomains) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"You should specify either an IS or a target number of subdomains"); 4741 ierr = MatISGetSubassemblingPattern(mat,n_subdomains,0,&is_sends_internal);CHKERRQ(ierr); 4742 } else { 4743 ierr = PetscObjectReference((PetscObject)is_sends);CHKERRQ(ierr); 4744 is_sends_internal = is_sends; 4745 } 4746 4747 /* get comm */ 4748 ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr); 4749 4750 /* compute number of sends */ 4751 ierr = ISGetLocalSize(is_sends_internal,&i);CHKERRQ(ierr); 4752 ierr = PetscMPIIntCast(i,&n_sends);CHKERRQ(ierr); 4753 4754 /* compute number of receives */ 4755 ierr = MPI_Comm_size(comm,&commsize);CHKERRQ(ierr); 4756 ierr = PetscMalloc1(commsize,&iflags);CHKERRQ(ierr); 4757 ierr = PetscMemzero(iflags,commsize*sizeof(*iflags));CHKERRQ(ierr); 4758 ierr = ISGetIndices(is_sends_internal,&is_indices);CHKERRQ(ierr); 4759 for (i=0;i<n_sends;i++) iflags[is_indices[i]] = 1; 4760 ierr = PetscGatherNumberOfMessages(comm,iflags,NULL,&n_recvs);CHKERRQ(ierr); 4761 ierr = PetscFree(iflags);CHKERRQ(ierr); 4762 4763 /* restrict comm if requested */ 4764 subcomm = 0; 4765 destroy_mat = PETSC_FALSE; 4766 if (restrict_comm) { 4767 PetscMPIInt color,subcommsize; 4768 4769 color = 0; 4770 if (restrict_full) { 4771 if (!n_recvs) color = 1; /* processes not receiving anything will not partecipate in new comm (full restriction) */ 4772 } else { 4773 if (!n_recvs && n_sends) color = 1; /* just those processes that are sending but not receiving anything will not partecipate in new comm */ 4774 } 4775 ierr = MPI_Allreduce(&color,&subcommsize,1,MPI_INT,MPI_SUM,comm);CHKERRQ(ierr); 4776 subcommsize = commsize - subcommsize; 4777 /* check if reuse has been requested */ 4778 if (reuse == MAT_REUSE_MATRIX) { 4779 if (*mat_n) { 4780 PetscMPIInt subcommsize2; 4781 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)*mat_n),&subcommsize2);CHKERRQ(ierr); 4782 if (subcommsize != subcommsize2) SETERRQ2(PetscObjectComm((PetscObject)*mat_n),PETSC_ERR_PLIB,"Cannot reuse matrix! wrong subcomm size %d != %d",subcommsize,subcommsize2); 4783 comm_n = PetscObjectComm((PetscObject)*mat_n); 4784 } else { 4785 comm_n = PETSC_COMM_SELF; 4786 } 4787 } else { /* MAT_INITIAL_MATRIX */ 4788 PetscMPIInt rank; 4789 4790 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 4791 ierr = PetscSubcommCreate(comm,&subcomm);CHKERRQ(ierr); 4792 ierr = PetscSubcommSetNumber(subcomm,2);CHKERRQ(ierr); 4793 ierr = PetscSubcommSetTypeGeneral(subcomm,color,rank);CHKERRQ(ierr); 4794 comm_n = PetscSubcommChild(subcomm); 4795 } 4796 /* flag to destroy *mat_n if not significative */ 4797 if (color) destroy_mat = PETSC_TRUE; 4798 } else { 4799 comm_n = comm; 4800 } 4801 4802 /* prepare send/receive buffers */ 4803 ierr = PetscMalloc1(commsize,&ilengths_idxs);CHKERRQ(ierr); 4804 ierr = PetscMemzero(ilengths_idxs,commsize*sizeof(*ilengths_idxs));CHKERRQ(ierr); 4805 ierr = PetscMalloc1(commsize,&ilengths_vals);CHKERRQ(ierr); 4806 ierr = PetscMemzero(ilengths_vals,commsize*sizeof(*ilengths_vals));CHKERRQ(ierr); 4807 if (nis) { 4808 ierr = PetscCalloc1(commsize,&ilengths_idxs_is);CHKERRQ(ierr); 4809 } 4810 4811 /* Get data from local matrices */ 4812 if (!isdense) { 4813 SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Subassembling of AIJ local matrices not yet implemented"); 4814 /* TODO: See below some guidelines on how to prepare the local buffers */ 4815 /* 4816 send_buffer_vals should contain the raw values of the local matrix 4817 send_buffer_idxs should contain: 4818 - MatType_PRIVATE type 4819 - PetscInt size_of_l2gmap 4820 - PetscInt global_row_indices[size_of_l2gmap] 4821 - PetscInt all_other_info_which_is_needed_to_compute_preallocation_and_set_values 4822 */ 4823 } else { 4824 ierr = MatDenseGetArray(local_mat,&send_buffer_vals);CHKERRQ(ierr); 4825 ierr = ISLocalToGlobalMappingGetSize(mat->rmap->mapping,&i);CHKERRQ(ierr); 4826 ierr = PetscMalloc1(i+2,&send_buffer_idxs);CHKERRQ(ierr); 4827 send_buffer_idxs[0] = (PetscInt)MATDENSE_PRIVATE; 4828 send_buffer_idxs[1] = i; 4829 ierr = ISLocalToGlobalMappingGetIndices(mat->rmap->mapping,(const PetscInt**)&ptr_idxs);CHKERRQ(ierr); 4830 ierr = PetscMemcpy(&send_buffer_idxs[2],ptr_idxs,i*sizeof(PetscInt));CHKERRQ(ierr); 4831 ierr = ISLocalToGlobalMappingRestoreIndices(mat->rmap->mapping,(const PetscInt**)&ptr_idxs);CHKERRQ(ierr); 4832 ierr = PetscMPIIntCast(i,&len);CHKERRQ(ierr); 4833 for (i=0;i<n_sends;i++) { 4834 ilengths_vals[is_indices[i]] = len*len; 4835 ilengths_idxs[is_indices[i]] = len+2; 4836 } 4837 } 4838 ierr = PetscGatherMessageLengths2(comm,n_sends,n_recvs,ilengths_idxs,ilengths_vals,&onodes,&olengths_idxs,&olengths_vals);CHKERRQ(ierr); 4839 /* additional is (if any) */ 4840 if (nis) { 4841 PetscMPIInt psum; 4842 PetscInt j; 4843 for (j=0,psum=0;j<nis;j++) { 4844 PetscInt plen; 4845 ierr = ISGetLocalSize(isarray[j],&plen);CHKERRQ(ierr); 4846 ierr = PetscMPIIntCast(plen,&len);CHKERRQ(ierr); 4847 psum += len+1; /* indices + lenght */ 4848 } 4849 ierr = PetscMalloc1(psum,&send_buffer_idxs_is);CHKERRQ(ierr); 4850 for (j=0,psum=0;j<nis;j++) { 4851 PetscInt plen; 4852 const PetscInt *is_array_idxs; 4853 ierr = ISGetLocalSize(isarray[j],&plen);CHKERRQ(ierr); 4854 send_buffer_idxs_is[psum] = plen; 4855 ierr = ISGetIndices(isarray[j],&is_array_idxs);CHKERRQ(ierr); 4856 ierr = PetscMemcpy(&send_buffer_idxs_is[psum+1],is_array_idxs,plen*sizeof(PetscInt));CHKERRQ(ierr); 4857 ierr = ISRestoreIndices(isarray[j],&is_array_idxs);CHKERRQ(ierr); 4858 psum += plen+1; /* indices + lenght */ 4859 } 4860 for (i=0;i<n_sends;i++) { 4861 ilengths_idxs_is[is_indices[i]] = psum; 4862 } 4863 ierr = PetscGatherMessageLengths(comm,n_sends,n_recvs,ilengths_idxs_is,&onodes_is,&olengths_idxs_is);CHKERRQ(ierr); 4864 } 4865 4866 buf_size_idxs = 0; 4867 buf_size_vals = 0; 4868 buf_size_idxs_is = 0; 4869 for (i=0;i<n_recvs;i++) { 4870 buf_size_idxs += (PetscInt)olengths_idxs[i]; 4871 buf_size_vals += (PetscInt)olengths_vals[i]; 4872 if (nis) buf_size_idxs_is += (PetscInt)olengths_idxs_is[i]; 4873 } 4874 ierr = PetscMalloc1(buf_size_idxs,&recv_buffer_idxs);CHKERRQ(ierr); 4875 ierr = PetscMalloc1(buf_size_vals,&recv_buffer_vals);CHKERRQ(ierr); 4876 ierr = PetscMalloc1(buf_size_idxs_is,&recv_buffer_idxs_is);CHKERRQ(ierr); 4877 4878 /* get new tags for clean communications */ 4879 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_idxs);CHKERRQ(ierr); 4880 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_vals);CHKERRQ(ierr); 4881 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_idxs_is);CHKERRQ(ierr); 4882 4883 /* allocate for requests */ 4884 ierr = PetscMalloc1(n_sends,&send_req_idxs);CHKERRQ(ierr); 4885 ierr = PetscMalloc1(n_sends,&send_req_vals);CHKERRQ(ierr); 4886 ierr = PetscMalloc1(n_sends,&send_req_idxs_is);CHKERRQ(ierr); 4887 ierr = PetscMalloc1(n_recvs,&recv_req_idxs);CHKERRQ(ierr); 4888 ierr = PetscMalloc1(n_recvs,&recv_req_vals);CHKERRQ(ierr); 4889 ierr = PetscMalloc1(n_recvs,&recv_req_idxs_is);CHKERRQ(ierr); 4890 4891 /* communications */ 4892 ptr_idxs = recv_buffer_idxs; 4893 ptr_vals = recv_buffer_vals; 4894 ptr_idxs_is = recv_buffer_idxs_is; 4895 for (i=0;i<n_recvs;i++) { 4896 source_dest = onodes[i]; 4897 ierr = MPI_Irecv(ptr_idxs,olengths_idxs[i],MPIU_INT,source_dest,tag_idxs,comm,&recv_req_idxs[i]);CHKERRQ(ierr); 4898 ierr = MPI_Irecv(ptr_vals,olengths_vals[i],MPIU_SCALAR,source_dest,tag_vals,comm,&recv_req_vals[i]);CHKERRQ(ierr); 4899 ptr_idxs += olengths_idxs[i]; 4900 ptr_vals += olengths_vals[i]; 4901 if (nis) { 4902 ierr = MPI_Irecv(ptr_idxs_is,olengths_idxs_is[i],MPIU_INT,source_dest,tag_idxs_is,comm,&recv_req_idxs_is[i]);CHKERRQ(ierr); 4903 ptr_idxs_is += olengths_idxs_is[i]; 4904 } 4905 } 4906 for (i=0;i<n_sends;i++) { 4907 ierr = PetscMPIIntCast(is_indices[i],&source_dest);CHKERRQ(ierr); 4908 ierr = MPI_Isend(send_buffer_idxs,ilengths_idxs[source_dest],MPIU_INT,source_dest,tag_idxs,comm,&send_req_idxs[i]);CHKERRQ(ierr); 4909 ierr = MPI_Isend(send_buffer_vals,ilengths_vals[source_dest],MPIU_SCALAR,source_dest,tag_vals,comm,&send_req_vals[i]);CHKERRQ(ierr); 4910 if (nis) { 4911 ierr = MPI_Isend(send_buffer_idxs_is,ilengths_idxs_is[source_dest],MPIU_INT,source_dest,tag_idxs_is,comm,&send_req_idxs_is[i]);CHKERRQ(ierr); 4912 } 4913 } 4914 ierr = ISRestoreIndices(is_sends_internal,&is_indices);CHKERRQ(ierr); 4915 ierr = ISDestroy(&is_sends_internal);CHKERRQ(ierr); 4916 4917 /* assemble new l2g map */ 4918 ierr = MPI_Waitall(n_recvs,recv_req_idxs,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 4919 ptr_idxs = recv_buffer_idxs; 4920 new_local_rows = 0; 4921 for (i=0;i<n_recvs;i++) { 4922 new_local_rows += *(ptr_idxs+1); /* second element is the local size of the l2gmap */ 4923 ptr_idxs += olengths_idxs[i]; 4924 } 4925 ierr = PetscMalloc1(new_local_rows,&l2gmap_indices);CHKERRQ(ierr); 4926 ptr_idxs = recv_buffer_idxs; 4927 new_local_rows = 0; 4928 for (i=0;i<n_recvs;i++) { 4929 ierr = PetscMemcpy(&l2gmap_indices[new_local_rows],ptr_idxs+2,(*(ptr_idxs+1))*sizeof(PetscInt));CHKERRQ(ierr); 4930 new_local_rows += *(ptr_idxs+1); /* second element is the local size of the l2gmap */ 4931 ptr_idxs += olengths_idxs[i]; 4932 } 4933 ierr = PetscSortRemoveDupsInt(&new_local_rows,l2gmap_indices);CHKERRQ(ierr); 4934 ierr = ISLocalToGlobalMappingCreate(comm_n,1,new_local_rows,l2gmap_indices,PETSC_COPY_VALUES,&l2gmap);CHKERRQ(ierr); 4935 ierr = PetscFree(l2gmap_indices);CHKERRQ(ierr); 4936 4937 /* infer new local matrix type from received local matrices type */ 4938 /* currently if all local matrices are of type X, then the resulting matrix will be of type X, except for the dense case */ 4939 /* it also assumes that if the block size is set, than it is the same among all local matrices (see checks at the beginning of the function) */ 4940 if (n_recvs) { 4941 MatTypePrivate new_local_type_private = (MatTypePrivate)send_buffer_idxs[0]; 4942 ptr_idxs = recv_buffer_idxs; 4943 for (i=0;i<n_recvs;i++) { 4944 if ((PetscInt)new_local_type_private != *ptr_idxs) { 4945 new_local_type_private = MATAIJ_PRIVATE; 4946 break; 4947 } 4948 ptr_idxs += olengths_idxs[i]; 4949 } 4950 switch (new_local_type_private) { 4951 case MATDENSE_PRIVATE: 4952 if (n_recvs>1) { /* subassembling of dense matrices does not give a dense matrix! */ 4953 new_local_type = MATSEQAIJ; 4954 bs = 1; 4955 } else { /* if I receive only 1 dense matrix */ 4956 new_local_type = MATSEQDENSE; 4957 bs = 1; 4958 } 4959 break; 4960 case MATAIJ_PRIVATE: 4961 new_local_type = MATSEQAIJ; 4962 bs = 1; 4963 break; 4964 case MATBAIJ_PRIVATE: 4965 new_local_type = MATSEQBAIJ; 4966 break; 4967 case MATSBAIJ_PRIVATE: 4968 new_local_type = MATSEQSBAIJ; 4969 break; 4970 default: 4971 SETERRQ2(comm,PETSC_ERR_SUP,"Unsupported private type %d in %s",new_local_type_private,__FUNCT__); 4972 break; 4973 } 4974 } else { /* by default, new_local_type is seqdense */ 4975 new_local_type = MATSEQDENSE; 4976 bs = 1; 4977 } 4978 4979 /* create MATIS object if needed */ 4980 if (reuse == MAT_INITIAL_MATRIX) { 4981 ierr = MatGetSize(mat,&rows,&cols);CHKERRQ(ierr); 4982 ierr = MatCreateIS(comm_n,bs,PETSC_DECIDE,PETSC_DECIDE,rows,cols,l2gmap,NULL,mat_n);CHKERRQ(ierr); 4983 } else { 4984 /* it also destroys the local matrices */ 4985 ierr = MatSetLocalToGlobalMapping(*mat_n,l2gmap,l2gmap);CHKERRQ(ierr); 4986 } 4987 ierr = MatISGetLocalMat(*mat_n,&local_mat);CHKERRQ(ierr); 4988 ierr = MatSetType(local_mat,new_local_type);CHKERRQ(ierr); 4989 4990 ierr = MPI_Waitall(n_recvs,recv_req_vals,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 4991 4992 /* Global to local map of received indices */ 4993 ierr = PetscMalloc1(buf_size_idxs,&recv_buffer_idxs_local);CHKERRQ(ierr); /* needed for values insertion */ 4994 ierr = ISGlobalToLocalMappingApply(l2gmap,IS_GTOLM_MASK,buf_size_idxs,recv_buffer_idxs,&i,recv_buffer_idxs_local);CHKERRQ(ierr); 4995 ierr = ISLocalToGlobalMappingDestroy(&l2gmap);CHKERRQ(ierr); 4996 4997 /* restore attributes -> type of incoming data and its size */ 4998 buf_size_idxs = 0; 4999 for (i=0;i<n_recvs;i++) { 5000 recv_buffer_idxs_local[buf_size_idxs] = recv_buffer_idxs[buf_size_idxs]; 5001 recv_buffer_idxs_local[buf_size_idxs+1] = recv_buffer_idxs[buf_size_idxs+1]; 5002 buf_size_idxs += (PetscInt)olengths_idxs[i]; 5003 } 5004 ierr = PetscFree(recv_buffer_idxs);CHKERRQ(ierr); 5005 5006 /* set preallocation */ 5007 ierr = PetscObjectTypeCompare((PetscObject)local_mat,MATSEQDENSE,&newisdense);CHKERRQ(ierr); 5008 if (!newisdense) { 5009 PetscInt *new_local_nnz=0; 5010 5011 ptr_vals = recv_buffer_vals; 5012 ptr_idxs = recv_buffer_idxs_local; 5013 if (n_recvs) { 5014 ierr = PetscCalloc1(new_local_rows,&new_local_nnz);CHKERRQ(ierr); 5015 } 5016 for (i=0;i<n_recvs;i++) { 5017 PetscInt j; 5018 if (*ptr_idxs == (PetscInt)MATDENSE_PRIVATE) { /* preallocation provided for dense case only */ 5019 for (j=0;j<*(ptr_idxs+1);j++) { 5020 new_local_nnz[*(ptr_idxs+2+j)] += *(ptr_idxs+1); 5021 } 5022 } else { 5023 /* TODO */ 5024 } 5025 ptr_idxs += olengths_idxs[i]; 5026 } 5027 if (new_local_nnz) { 5028 for (i=0;i<new_local_rows;i++) new_local_nnz[i] = PetscMin(new_local_nnz[i],new_local_rows); 5029 ierr = MatSeqAIJSetPreallocation(local_mat,0,new_local_nnz);CHKERRQ(ierr); 5030 for (i=0;i<new_local_rows;i++) new_local_nnz[i] /= bs; 5031 ierr = MatSeqBAIJSetPreallocation(local_mat,bs,0,new_local_nnz);CHKERRQ(ierr); 5032 for (i=0;i<new_local_rows;i++) new_local_nnz[i] = PetscMax(new_local_nnz[i]-i,0); 5033 ierr = MatSeqSBAIJSetPreallocation(local_mat,bs,0,new_local_nnz);CHKERRQ(ierr); 5034 } else { 5035 ierr = MatSetUp(local_mat);CHKERRQ(ierr); 5036 } 5037 ierr = PetscFree(new_local_nnz);CHKERRQ(ierr); 5038 } else { 5039 ierr = MatSetUp(local_mat);CHKERRQ(ierr); 5040 } 5041 5042 /* set values */ 5043 ptr_vals = recv_buffer_vals; 5044 ptr_idxs = recv_buffer_idxs_local; 5045 for (i=0;i<n_recvs;i++) { 5046 if (*ptr_idxs == (PetscInt)MATDENSE_PRIVATE) { /* values insertion provided for dense case only */ 5047 ierr = MatSetOption(local_mat,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr); 5048 ierr = MatSetValues(local_mat,*(ptr_idxs+1),ptr_idxs+2,*(ptr_idxs+1),ptr_idxs+2,ptr_vals,ADD_VALUES);CHKERRQ(ierr); 5049 ierr = MatAssemblyBegin(local_mat,MAT_FLUSH_ASSEMBLY);CHKERRQ(ierr); 5050 ierr = MatAssemblyEnd(local_mat,MAT_FLUSH_ASSEMBLY);CHKERRQ(ierr); 5051 ierr = MatSetOption(local_mat,MAT_ROW_ORIENTED,PETSC_TRUE);CHKERRQ(ierr); 5052 } else { 5053 /* TODO */ 5054 } 5055 ptr_idxs += olengths_idxs[i]; 5056 ptr_vals += olengths_vals[i]; 5057 } 5058 ierr = MatAssemblyBegin(local_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5059 ierr = MatAssemblyEnd(local_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5060 ierr = MatAssemblyBegin(*mat_n,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5061 ierr = MatAssemblyEnd(*mat_n,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5062 ierr = PetscFree(recv_buffer_vals);CHKERRQ(ierr); 5063 ierr = PetscFree(recv_buffer_idxs_local);CHKERRQ(ierr); 5064 5065 #if 0 5066 if (!restrict_comm) { /* check */ 5067 Vec lvec,rvec; 5068 PetscReal infty_error; 5069 5070 ierr = MatCreateVecs(mat,&rvec,&lvec);CHKERRQ(ierr); 5071 ierr = VecSetRandom(rvec,NULL);CHKERRQ(ierr); 5072 ierr = MatMult(mat,rvec,lvec);CHKERRQ(ierr); 5073 ierr = VecScale(lvec,-1.0);CHKERRQ(ierr); 5074 ierr = MatMultAdd(*mat_n,rvec,lvec,lvec);CHKERRQ(ierr); 5075 ierr = VecNorm(lvec,NORM_INFINITY,&infty_error);CHKERRQ(ierr); 5076 ierr = PetscPrintf(PetscObjectComm((PetscObject)mat),"Infinity error subassembling %1.6e\n",infty_error); 5077 ierr = VecDestroy(&rvec);CHKERRQ(ierr); 5078 ierr = VecDestroy(&lvec);CHKERRQ(ierr); 5079 } 5080 #endif 5081 5082 /* assemble new additional is (if any) */ 5083 if (nis) { 5084 PetscInt **temp_idxs,*count_is,j,psum; 5085 5086 ierr = MPI_Waitall(n_recvs,recv_req_idxs_is,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 5087 ierr = PetscCalloc1(nis,&count_is);CHKERRQ(ierr); 5088 ptr_idxs = recv_buffer_idxs_is; 5089 psum = 0; 5090 for (i=0;i<n_recvs;i++) { 5091 for (j=0;j<nis;j++) { 5092 PetscInt plen = *(ptr_idxs); /* first element is the local size of IS's indices */ 5093 count_is[j] += plen; /* increment counting of buffer for j-th IS */ 5094 psum += plen; 5095 ptr_idxs += plen+1; /* shift pointer to received data */ 5096 } 5097 } 5098 ierr = PetscMalloc1(nis,&temp_idxs);CHKERRQ(ierr); 5099 ierr = PetscMalloc1(psum,&temp_idxs[0]);CHKERRQ(ierr); 5100 for (i=1;i<nis;i++) { 5101 temp_idxs[i] = temp_idxs[i-1]+count_is[i-1]; 5102 } 5103 ierr = PetscMemzero(count_is,nis*sizeof(PetscInt));CHKERRQ(ierr); 5104 ptr_idxs = recv_buffer_idxs_is; 5105 for (i=0;i<n_recvs;i++) { 5106 for (j=0;j<nis;j++) { 5107 PetscInt plen = *(ptr_idxs); /* first element is the local size of IS's indices */ 5108 ierr = PetscMemcpy(&temp_idxs[j][count_is[j]],ptr_idxs+1,plen*sizeof(PetscInt));CHKERRQ(ierr); 5109 count_is[j] += plen; /* increment starting point of buffer for j-th IS */ 5110 ptr_idxs += plen+1; /* shift pointer to received data */ 5111 } 5112 } 5113 for (i=0;i<nis;i++) { 5114 ierr = ISDestroy(&isarray[i]);CHKERRQ(ierr); 5115 ierr = PetscSortRemoveDupsInt(&count_is[i],temp_idxs[i]);CHKERRQ(ierr);CHKERRQ(ierr); 5116 ierr = ISCreateGeneral(comm_n,count_is[i],temp_idxs[i],PETSC_COPY_VALUES,&isarray[i]);CHKERRQ(ierr); 5117 } 5118 ierr = PetscFree(count_is);CHKERRQ(ierr); 5119 ierr = PetscFree(temp_idxs[0]);CHKERRQ(ierr); 5120 ierr = PetscFree(temp_idxs);CHKERRQ(ierr); 5121 } 5122 /* free workspace */ 5123 ierr = PetscFree(recv_buffer_idxs_is);CHKERRQ(ierr); 5124 ierr = MPI_Waitall(n_sends,send_req_idxs,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 5125 ierr = PetscFree(send_buffer_idxs);CHKERRQ(ierr); 5126 ierr = MPI_Waitall(n_sends,send_req_vals,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 5127 if (isdense) { 5128 ierr = MatISGetLocalMat(mat,&local_mat);CHKERRQ(ierr); 5129 ierr = MatDenseRestoreArray(local_mat,&send_buffer_vals);CHKERRQ(ierr); 5130 } else { 5131 /* ierr = PetscFree(send_buffer_vals);CHKERRQ(ierr); */ 5132 } 5133 if (nis) { 5134 ierr = MPI_Waitall(n_sends,send_req_idxs_is,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 5135 ierr = PetscFree(send_buffer_idxs_is);CHKERRQ(ierr); 5136 } 5137 ierr = PetscFree(recv_req_idxs);CHKERRQ(ierr); 5138 ierr = PetscFree(recv_req_vals);CHKERRQ(ierr); 5139 ierr = PetscFree(recv_req_idxs_is);CHKERRQ(ierr); 5140 ierr = PetscFree(send_req_idxs);CHKERRQ(ierr); 5141 ierr = PetscFree(send_req_vals);CHKERRQ(ierr); 5142 ierr = PetscFree(send_req_idxs_is);CHKERRQ(ierr); 5143 ierr = PetscFree(ilengths_vals);CHKERRQ(ierr); 5144 ierr = PetscFree(ilengths_idxs);CHKERRQ(ierr); 5145 ierr = PetscFree(olengths_vals);CHKERRQ(ierr); 5146 ierr = PetscFree(olengths_idxs);CHKERRQ(ierr); 5147 ierr = PetscFree(onodes);CHKERRQ(ierr); 5148 if (nis) { 5149 ierr = PetscFree(ilengths_idxs_is);CHKERRQ(ierr); 5150 ierr = PetscFree(olengths_idxs_is);CHKERRQ(ierr); 5151 ierr = PetscFree(onodes_is);CHKERRQ(ierr); 5152 } 5153 ierr = PetscSubcommDestroy(&subcomm);CHKERRQ(ierr); 5154 if (destroy_mat) { /* destroy mat is true only if restrict comm is true and process will not partecipate */ 5155 ierr = MatDestroy(mat_n);CHKERRQ(ierr); 5156 for (i=0;i<nis;i++) { 5157 ierr = ISDestroy(&isarray[i]);CHKERRQ(ierr); 5158 } 5159 *mat_n = NULL; 5160 } 5161 PetscFunctionReturn(0); 5162 } 5163 5164 /* temporary hack into ksp private data structure */ 5165 #include <petsc/private/kspimpl.h> 5166 5167 #undef __FUNCT__ 5168 #define __FUNCT__ "PCBDDCSetUpCoarseSolver" 5169 PetscErrorCode PCBDDCSetUpCoarseSolver(PC pc,PetscScalar* coarse_submat_vals) 5170 { 5171 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 5172 PC_IS *pcis = (PC_IS*)pc->data; 5173 Mat coarse_mat,coarse_mat_is,coarse_submat_dense; 5174 MatNullSpace CoarseNullSpace=NULL; 5175 ISLocalToGlobalMapping coarse_islg; 5176 IS coarse_is,*isarray; 5177 PetscInt i,im_active=-1,active_procs=-1; 5178 PetscInt nis,nisdofs,nisneu,nisvert; 5179 PC pc_temp; 5180 PCType coarse_pc_type; 5181 KSPType coarse_ksp_type; 5182 PetscBool multilevel_requested,multilevel_allowed; 5183 PetscBool isredundant,isbddc,isnn,coarse_reuse; 5184 Mat t_coarse_mat_is; 5185 PetscInt void_procs,ncoarse_ml,ncoarse_ds,ncoarse; 5186 PetscMPIInt all_procs; 5187 PetscBool csin_ml,csin_ds,csin,csin_type_simple,redist; 5188 PetscBool compute_vecs = PETSC_FALSE; 5189 PetscScalar *array; 5190 PetscErrorCode ierr; 5191 5192 PetscFunctionBegin; 5193 /* Assign global numbering to coarse dofs */ 5194 if (pcbddc->new_primal_space || pcbddc->coarse_size == -1) { /* a new primal space is present or it is the first initialization, so recompute global numbering */ 5195 PetscInt ocoarse_size; 5196 compute_vecs = PETSC_TRUE; 5197 ocoarse_size = pcbddc->coarse_size; 5198 ierr = PetscFree(pcbddc->global_primal_indices);CHKERRQ(ierr); 5199 ierr = PCBDDCComputePrimalNumbering(pc,&pcbddc->coarse_size,&pcbddc->global_primal_indices);CHKERRQ(ierr); 5200 /* see if we can avoid some work */ 5201 if (pcbddc->coarse_ksp) { /* coarse ksp has already been created */ 5202 /* if the coarse size is different or we are using adaptive selection, better to not reuse the coarse matrix */ 5203 if (ocoarse_size != pcbddc->coarse_size || pcbddc->adaptive_selection) { 5204 PC pc; 5205 PetscBool isbddc; 5206 5207 /* temporary workaround since PCBDDC does not have a reset method so far */ 5208 ierr = KSPGetPC(pcbddc->coarse_ksp,&pc);CHKERRQ(ierr); 5209 ierr = PetscObjectTypeCompare((PetscObject)pc,PCBDDC,&isbddc);CHKERRQ(ierr); 5210 if (isbddc) { 5211 ierr = KSPDestroy(&pcbddc->coarse_ksp);CHKERRQ(ierr); 5212 } else { 5213 ierr = KSPReset(pcbddc->coarse_ksp);CHKERRQ(ierr); 5214 } 5215 coarse_reuse = PETSC_FALSE; 5216 } else { /* we can safely reuse already computed coarse matrix */ 5217 coarse_reuse = PETSC_TRUE; 5218 } 5219 } else { /* there's no coarse ksp, so we need to create the coarse matrix too */ 5220 coarse_reuse = PETSC_FALSE; 5221 } 5222 /* reset any subassembling information */ 5223 ierr = ISDestroy(&pcbddc->coarse_subassembling);CHKERRQ(ierr); 5224 ierr = ISDestroy(&pcbddc->coarse_subassembling_init);CHKERRQ(ierr); 5225 } else { /* primal space is unchanged, so we can reuse coarse matrix */ 5226 coarse_reuse = PETSC_TRUE; 5227 } 5228 5229 /* count "active" (i.e. with positive local size) and "void" processes */ 5230 im_active = !!(pcis->n); 5231 ierr = MPI_Allreduce(&im_active,&active_procs,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 5232 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)pc),&all_procs);CHKERRQ(ierr); 5233 void_procs = all_procs-active_procs; 5234 csin_type_simple = PETSC_TRUE; 5235 redist = PETSC_FALSE; 5236 if (pcbddc->current_level && void_procs) { 5237 csin_ml = PETSC_TRUE; 5238 ncoarse_ml = void_procs; 5239 /* it has no sense to redistribute on a set of processors larger than the number of active processes */ 5240 if (pcbddc->redistribute_coarse > 0 && pcbddc->redistribute_coarse < active_procs) { 5241 csin_ds = PETSC_TRUE; 5242 ncoarse_ds = pcbddc->redistribute_coarse; 5243 redist = PETSC_TRUE; 5244 } else { 5245 csin_ds = PETSC_TRUE; 5246 ncoarse_ds = active_procs; 5247 redist = PETSC_TRUE; 5248 } 5249 } else { 5250 csin_ml = PETSC_FALSE; 5251 ncoarse_ml = all_procs; 5252 if (void_procs) { 5253 csin_ds = PETSC_TRUE; 5254 ncoarse_ds = void_procs; 5255 csin_type_simple = PETSC_FALSE; 5256 } else { 5257 if (pcbddc->redistribute_coarse > 0 && pcbddc->redistribute_coarse < all_procs) { 5258 csin_ds = PETSC_TRUE; 5259 ncoarse_ds = pcbddc->redistribute_coarse; 5260 redist = PETSC_TRUE; 5261 } else { 5262 csin_ds = PETSC_FALSE; 5263 ncoarse_ds = all_procs; 5264 } 5265 } 5266 } 5267 5268 /* 5269 test if we can go multilevel: three conditions must be satisfied: 5270 - we have not exceeded the number of levels requested 5271 - we can actually subassemble the active processes 5272 - we can find a suitable number of MPI processes where we can place the subassembled problem 5273 */ 5274 multilevel_allowed = PETSC_FALSE; 5275 multilevel_requested = PETSC_FALSE; 5276 if (pcbddc->current_level < pcbddc->max_levels) { 5277 multilevel_requested = PETSC_TRUE; 5278 if (active_procs/pcbddc->coarsening_ratio < 2 || ncoarse_ml/pcbddc->coarsening_ratio < 2) { 5279 multilevel_allowed = PETSC_FALSE; 5280 } else { 5281 multilevel_allowed = PETSC_TRUE; 5282 } 5283 } 5284 /* determine number of process partecipating to coarse solver */ 5285 if (multilevel_allowed) { 5286 ncoarse = ncoarse_ml; 5287 csin = csin_ml; 5288 redist = PETSC_FALSE; 5289 } else { 5290 ncoarse = ncoarse_ds; 5291 csin = csin_ds; 5292 } 5293 5294 /* creates temporary l2gmap and IS for coarse indexes */ 5295 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),pcbddc->local_primal_size,pcbddc->global_primal_indices,PETSC_COPY_VALUES,&coarse_is);CHKERRQ(ierr); 5296 ierr = ISLocalToGlobalMappingCreateIS(coarse_is,&coarse_islg);CHKERRQ(ierr); 5297 5298 /* creates temporary MATIS object for coarse matrix */ 5299 ierr = MatCreateSeqDense(PETSC_COMM_SELF,pcbddc->local_primal_size,pcbddc->local_primal_size,NULL,&coarse_submat_dense);CHKERRQ(ierr); 5300 ierr = MatDenseGetArray(coarse_submat_dense,&array);CHKERRQ(ierr); 5301 ierr = PetscMemcpy(array,coarse_submat_vals,sizeof(*coarse_submat_vals)*pcbddc->local_primal_size*pcbddc->local_primal_size);CHKERRQ(ierr); 5302 ierr = MatDenseRestoreArray(coarse_submat_dense,&array);CHKERRQ(ierr); 5303 ierr = MatCreateIS(PetscObjectComm((PetscObject)pc),1,PETSC_DECIDE,PETSC_DECIDE,pcbddc->coarse_size,pcbddc->coarse_size,coarse_islg,NULL,&t_coarse_mat_is);CHKERRQ(ierr); 5304 ierr = MatISSetLocalMat(t_coarse_mat_is,coarse_submat_dense);CHKERRQ(ierr); 5305 ierr = MatAssemblyBegin(t_coarse_mat_is,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5306 ierr = MatAssemblyEnd(t_coarse_mat_is,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5307 ierr = MatDestroy(&coarse_submat_dense);CHKERRQ(ierr); 5308 5309 /* compute dofs splitting and neumann boundaries for coarse dofs */ 5310 if (multilevel_allowed && (pcbddc->n_ISForDofsLocal || pcbddc->NeumannBoundariesLocal || (pcbddc->benign_saddle_point && pcbddc->user_primal_vertices_local))) { /* protects from unneded computations */ 5311 PetscInt *tidxs,*tidxs2,nout,tsize,i; 5312 const PetscInt *idxs; 5313 ISLocalToGlobalMapping tmap; 5314 5315 /* create map between primal indices (in local representative ordering) and local primal numbering */ 5316 ierr = ISLocalToGlobalMappingCreate(PETSC_COMM_SELF,1,pcbddc->local_primal_size,pcbddc->primal_indices_local_idxs,PETSC_COPY_VALUES,&tmap);CHKERRQ(ierr); 5317 /* allocate space for temporary storage */ 5318 ierr = PetscMalloc1(pcbddc->local_primal_size,&tidxs);CHKERRQ(ierr); 5319 ierr = PetscMalloc1(pcbddc->local_primal_size,&tidxs2);CHKERRQ(ierr); 5320 /* allocate for IS array */ 5321 nisdofs = pcbddc->n_ISForDofsLocal; 5322 nisneu = !!pcbddc->NeumannBoundariesLocal; 5323 nisvert = 0; 5324 if (pcbddc->benign_saddle_point && pcbddc->user_primal_vertices_local) { 5325 nisvert = 1; 5326 } 5327 nis = nisdofs + nisneu + nisvert; 5328 ierr = PetscMalloc1(nis,&isarray);CHKERRQ(ierr); 5329 /* dofs splitting */ 5330 for (i=0;i<nisdofs;i++) { 5331 /* ierr = ISView(pcbddc->ISForDofsLocal[i],0);CHKERRQ(ierr); */ 5332 ierr = ISGetLocalSize(pcbddc->ISForDofsLocal[i],&tsize);CHKERRQ(ierr); 5333 ierr = ISGetIndices(pcbddc->ISForDofsLocal[i],&idxs);CHKERRQ(ierr); 5334 ierr = ISGlobalToLocalMappingApply(tmap,IS_GTOLM_DROP,tsize,idxs,&nout,tidxs);CHKERRQ(ierr); 5335 ierr = ISRestoreIndices(pcbddc->ISForDofsLocal[i],&idxs);CHKERRQ(ierr); 5336 ierr = ISLocalToGlobalMappingApply(coarse_islg,nout,tidxs,tidxs2);CHKERRQ(ierr); 5337 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),nout,tidxs2,PETSC_COPY_VALUES,&isarray[i]);CHKERRQ(ierr); 5338 /* ierr = ISView(isarray[i],0);CHKERRQ(ierr); */ 5339 } 5340 /* neumann boundaries */ 5341 if (pcbddc->NeumannBoundariesLocal) { 5342 /* ierr = ISView(pcbddc->NeumannBoundariesLocal,0);CHKERRQ(ierr); */ 5343 ierr = ISGetLocalSize(pcbddc->NeumannBoundariesLocal,&tsize);CHKERRQ(ierr); 5344 ierr = ISGetIndices(pcbddc->NeumannBoundariesLocal,&idxs);CHKERRQ(ierr); 5345 ierr = ISGlobalToLocalMappingApply(tmap,IS_GTOLM_DROP,tsize,idxs,&nout,tidxs);CHKERRQ(ierr); 5346 ierr = ISRestoreIndices(pcbddc->NeumannBoundariesLocal,&idxs);CHKERRQ(ierr); 5347 ierr = ISLocalToGlobalMappingApply(coarse_islg,nout,tidxs,tidxs2);CHKERRQ(ierr); 5348 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),nout,tidxs2,PETSC_COPY_VALUES,&isarray[nisdofs]);CHKERRQ(ierr); 5349 /* ierr = ISView(isarray[nisdofs],0);CHKERRQ(ierr); */ 5350 } 5351 /* primal vertices (benign) */ 5352 if (pcbddc->benign_saddle_point && pcbddc->user_primal_vertices_local) { 5353 ierr = ISGetLocalSize(pcbddc->user_primal_vertices_local,&tsize);CHKERRQ(ierr); 5354 ierr = ISGetIndices(pcbddc->user_primal_vertices_local,&idxs);CHKERRQ(ierr); 5355 ierr = ISGlobalToLocalMappingApply(tmap,IS_GTOLM_DROP,tsize,idxs,&nout,tidxs);CHKERRQ(ierr); 5356 ierr = ISRestoreIndices(pcbddc->user_primal_vertices_local,&idxs);CHKERRQ(ierr); 5357 ierr = ISLocalToGlobalMappingApply(coarse_islg,nout,tidxs,tidxs2);CHKERRQ(ierr); 5358 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),nout,tidxs2,PETSC_COPY_VALUES,&isarray[nis-1]);CHKERRQ(ierr); 5359 } 5360 /* free memory */ 5361 ierr = PetscFree(tidxs);CHKERRQ(ierr); 5362 ierr = PetscFree(tidxs2);CHKERRQ(ierr); 5363 ierr = ISLocalToGlobalMappingDestroy(&tmap);CHKERRQ(ierr); 5364 } else { 5365 nis = 0; 5366 nisdofs = 0; 5367 nisneu = 0; 5368 nisvert = 0; 5369 isarray = NULL; 5370 } 5371 /* destroy no longer needed map */ 5372 ierr = ISLocalToGlobalMappingDestroy(&coarse_islg);CHKERRQ(ierr); 5373 5374 /* restrict on coarse candidates (if needed) */ 5375 coarse_mat_is = NULL; 5376 if (csin) { 5377 if (!pcbddc->coarse_subassembling_init ) { /* creates subassembling init pattern if not present */ 5378 if (redist) { 5379 PetscMPIInt rank; 5380 PetscInt spc,n_spc_p1,dest[1],destsize; 5381 5382 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)pc),&rank);CHKERRQ(ierr); 5383 spc = active_procs/ncoarse; 5384 n_spc_p1 = active_procs%ncoarse; 5385 if (im_active) { 5386 destsize = 1; 5387 if (rank > n_spc_p1*(spc+1)-1) { 5388 dest[0] = n_spc_p1+(rank-(n_spc_p1*(spc+1)))/spc; 5389 } else { 5390 dest[0] = rank/(spc+1); 5391 } 5392 } else { 5393 destsize = 0; 5394 } 5395 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),destsize,dest,PETSC_COPY_VALUES,&pcbddc->coarse_subassembling_init);CHKERRQ(ierr); 5396 } else if (csin_type_simple) { 5397 PetscMPIInt rank; 5398 PetscInt issize,isidx; 5399 5400 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)pc),&rank);CHKERRQ(ierr); 5401 if (im_active) { 5402 issize = 1; 5403 isidx = (PetscInt)rank; 5404 } else { 5405 issize = 0; 5406 isidx = -1; 5407 } 5408 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),issize,&isidx,PETSC_COPY_VALUES,&pcbddc->coarse_subassembling_init);CHKERRQ(ierr); 5409 } else { /* get a suitable subassembling pattern from MATIS code */ 5410 ierr = MatISGetSubassemblingPattern(t_coarse_mat_is,ncoarse,pcbddc->coarse_adj_red,&pcbddc->coarse_subassembling_init);CHKERRQ(ierr); 5411 } 5412 5413 /* we need to shift on coarse candidates either if we are not redistributing or we are redistributing and we have enough void processes */ 5414 if (!redist || ncoarse <= void_procs) { 5415 PetscInt ncoarse_cand,tissize,*nisindices; 5416 PetscInt *coarse_candidates; 5417 const PetscInt* tisindices; 5418 5419 /* get coarse candidates' ranks in pc communicator */ 5420 ierr = PetscMalloc1(all_procs,&coarse_candidates);CHKERRQ(ierr); 5421 ierr = MPI_Allgather(&im_active,1,MPIU_INT,coarse_candidates,1,MPIU_INT,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 5422 for (i=0,ncoarse_cand=0;i<all_procs;i++) { 5423 if (!coarse_candidates[i]) { 5424 coarse_candidates[ncoarse_cand++]=i; 5425 } 5426 } 5427 if (ncoarse_cand < ncoarse) SETERRQ2(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"This should not happen! %d < %d",ncoarse_cand,ncoarse); 5428 5429 5430 if (pcbddc->dbg_flag) { 5431 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 5432 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Subassembling pattern init (before shift)\n");CHKERRQ(ierr); 5433 ierr = ISView(pcbddc->coarse_subassembling_init,pcbddc->dbg_viewer);CHKERRQ(ierr); 5434 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Coarse candidates\n");CHKERRQ(ierr); 5435 for (i=0;i<ncoarse_cand;i++) { 5436 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"%d ",coarse_candidates[i]);CHKERRQ(ierr); 5437 } 5438 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"\n");CHKERRQ(ierr); 5439 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 5440 } 5441 /* shift the pattern on coarse candidates */ 5442 ierr = ISGetLocalSize(pcbddc->coarse_subassembling_init,&tissize);CHKERRQ(ierr); 5443 ierr = ISGetIndices(pcbddc->coarse_subassembling_init,&tisindices);CHKERRQ(ierr); 5444 ierr = PetscMalloc1(tissize,&nisindices);CHKERRQ(ierr); 5445 for (i=0;i<tissize;i++) nisindices[i] = coarse_candidates[tisindices[i]]; 5446 ierr = ISRestoreIndices(pcbddc->coarse_subassembling_init,&tisindices);CHKERRQ(ierr); 5447 ierr = ISGeneralSetIndices(pcbddc->coarse_subassembling_init,tissize,nisindices,PETSC_OWN_POINTER);CHKERRQ(ierr); 5448 ierr = PetscFree(coarse_candidates);CHKERRQ(ierr); 5449 } 5450 if (pcbddc->dbg_flag) { 5451 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 5452 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Subassembling pattern init\n");CHKERRQ(ierr); 5453 ierr = ISView(pcbddc->coarse_subassembling_init,pcbddc->dbg_viewer);CHKERRQ(ierr); 5454 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 5455 } 5456 } 5457 /* get temporary coarse mat in IS format restricted on coarse procs (plus additional index sets of isarray) */ 5458 if (multilevel_allowed) { /* we need to keep tracking of void processes for future placements */ 5459 ierr = MatISSubassemble(t_coarse_mat_is,pcbddc->coarse_subassembling_init,0,PETSC_TRUE,PETSC_FALSE,MAT_INITIAL_MATRIX,&coarse_mat_is,nis,isarray);CHKERRQ(ierr); 5460 } else { /* this is the last level, so use just receiving processes in subcomm */ 5461 ierr = MatISSubassemble(t_coarse_mat_is,pcbddc->coarse_subassembling_init,0,PETSC_TRUE,PETSC_TRUE,MAT_INITIAL_MATRIX,&coarse_mat_is,nis,isarray);CHKERRQ(ierr); 5462 } 5463 } else { 5464 if (pcbddc->dbg_flag) { 5465 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 5466 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Subassembling pattern init not needed\n");CHKERRQ(ierr); 5467 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 5468 } 5469 ierr = PetscObjectReference((PetscObject)t_coarse_mat_is);CHKERRQ(ierr); 5470 coarse_mat_is = t_coarse_mat_is; 5471 } 5472 5473 /* create local to global scatters for coarse problem */ 5474 if (compute_vecs) { 5475 PetscInt lrows; 5476 ierr = VecDestroy(&pcbddc->coarse_vec);CHKERRQ(ierr); 5477 if (coarse_mat_is) { 5478 ierr = MatGetLocalSize(coarse_mat_is,&lrows,NULL);CHKERRQ(ierr); 5479 } else { 5480 lrows = 0; 5481 } 5482 ierr = VecCreate(PetscObjectComm((PetscObject)pc),&pcbddc->coarse_vec);CHKERRQ(ierr); 5483 ierr = VecSetSizes(pcbddc->coarse_vec,lrows,PETSC_DECIDE);CHKERRQ(ierr); 5484 ierr = VecSetType(pcbddc->coarse_vec,VECSTANDARD);CHKERRQ(ierr); 5485 ierr = VecScatterDestroy(&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr); 5486 ierr = VecScatterCreate(pcbddc->vec1_P,NULL,pcbddc->coarse_vec,coarse_is,&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr); 5487 } 5488 ierr = ISDestroy(&coarse_is);CHKERRQ(ierr); 5489 ierr = MatDestroy(&t_coarse_mat_is);CHKERRQ(ierr); 5490 5491 /* set defaults for coarse KSP and PC */ 5492 if (multilevel_allowed) { 5493 coarse_ksp_type = KSPRICHARDSON; 5494 coarse_pc_type = PCBDDC; 5495 } else { 5496 coarse_ksp_type = KSPPREONLY; 5497 coarse_pc_type = PCREDUNDANT; 5498 } 5499 5500 /* print some info if requested */ 5501 if (pcbddc->dbg_flag) { 5502 if (!multilevel_allowed) { 5503 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 5504 if (multilevel_requested) { 5505 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Not enough active processes on level %d (active processes %d, coarsening ratio %d)\n",pcbddc->current_level,active_procs,pcbddc->coarsening_ratio);CHKERRQ(ierr); 5506 } else if (pcbddc->max_levels) { 5507 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Maximum number of requested levels reached (%d)\n",pcbddc->max_levels);CHKERRQ(ierr); 5508 } 5509 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 5510 } 5511 } 5512 5513 /* create the coarse KSP object only once with defaults */ 5514 if (coarse_mat_is) { 5515 MatReuse coarse_mat_reuse; 5516 PetscViewer dbg_viewer = NULL; 5517 if (pcbddc->dbg_flag) { 5518 dbg_viewer = PETSC_VIEWER_STDOUT_(PetscObjectComm((PetscObject)coarse_mat_is)); 5519 ierr = PetscViewerASCIIAddTab(dbg_viewer,2*pcbddc->current_level);CHKERRQ(ierr); 5520 } 5521 if (!pcbddc->coarse_ksp) { 5522 char prefix[256],str_level[16]; 5523 size_t len; 5524 ierr = KSPCreate(PetscObjectComm((PetscObject)coarse_mat_is),&pcbddc->coarse_ksp);CHKERRQ(ierr); 5525 ierr = KSPSetErrorIfNotConverged(pcbddc->coarse_ksp,pc->erroriffailure);CHKERRQ(ierr); 5526 ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->coarse_ksp,(PetscObject)pc,1);CHKERRQ(ierr); 5527 ierr = KSPSetTolerances(pcbddc->coarse_ksp,PETSC_DEFAULT,PETSC_DEFAULT,PETSC_DEFAULT,1);CHKERRQ(ierr); 5528 ierr = KSPSetOperators(pcbddc->coarse_ksp,coarse_mat_is,coarse_mat_is);CHKERRQ(ierr); 5529 ierr = KSPSetType(pcbddc->coarse_ksp,coarse_ksp_type);CHKERRQ(ierr); 5530 ierr = KSPSetNormType(pcbddc->coarse_ksp,KSP_NORM_NONE);CHKERRQ(ierr); 5531 ierr = KSPGetPC(pcbddc->coarse_ksp,&pc_temp);CHKERRQ(ierr); 5532 ierr = PCSetType(pc_temp,coarse_pc_type);CHKERRQ(ierr); 5533 /* prefix */ 5534 ierr = PetscStrcpy(prefix,"");CHKERRQ(ierr); 5535 ierr = PetscStrcpy(str_level,"");CHKERRQ(ierr); 5536 if (!pcbddc->current_level) { 5537 ierr = PetscStrcpy(prefix,((PetscObject)pc)->prefix);CHKERRQ(ierr); 5538 ierr = PetscStrcat(prefix,"pc_bddc_coarse_");CHKERRQ(ierr); 5539 } else { 5540 ierr = PetscStrlen(((PetscObject)pc)->prefix,&len);CHKERRQ(ierr); 5541 if (pcbddc->current_level>1) len -= 3; /* remove "lX_" with X level number */ 5542 if (pcbddc->current_level>10) len -= 1; /* remove another char from level number */ 5543 ierr = PetscStrncpy(prefix,((PetscObject)pc)->prefix,len+1);CHKERRQ(ierr); 5544 sprintf(str_level,"l%d_",(int)(pcbddc->current_level)); 5545 ierr = PetscStrcat(prefix,str_level);CHKERRQ(ierr); 5546 } 5547 ierr = KSPSetOptionsPrefix(pcbddc->coarse_ksp,prefix);CHKERRQ(ierr); 5548 /* propagate BDDC info to the next level (these are dummy calls if pc_temp is not of type PCBDDC) */ 5549 ierr = PCBDDCSetLevel(pc_temp,pcbddc->current_level+1);CHKERRQ(ierr); 5550 ierr = PCBDDCSetCoarseningRatio(pc_temp,pcbddc->coarsening_ratio);CHKERRQ(ierr); 5551 ierr = PCBDDCSetLevels(pc_temp,pcbddc->max_levels);CHKERRQ(ierr); 5552 /* allow user customization */ 5553 ierr = KSPSetFromOptions(pcbddc->coarse_ksp);CHKERRQ(ierr); 5554 } 5555 /* propagate BDDC info to the next level (these are dummy calls if pc_temp is not of type PCBDDC) */ 5556 ierr = KSPGetPC(pcbddc->coarse_ksp,&pc_temp);CHKERRQ(ierr); 5557 if (nisdofs) { 5558 ierr = PCBDDCSetDofsSplitting(pc_temp,nisdofs,isarray);CHKERRQ(ierr); 5559 for (i=0;i<nisdofs;i++) { 5560 ierr = ISDestroy(&isarray[i]);CHKERRQ(ierr); 5561 } 5562 } 5563 if (nisneu) { 5564 ierr = PCBDDCSetNeumannBoundaries(pc_temp,isarray[nisdofs]);CHKERRQ(ierr); 5565 ierr = ISDestroy(&isarray[nisdofs]);CHKERRQ(ierr); 5566 } 5567 if (nisvert) { 5568 ierr = PCBDDCSetPrimalVerticesIS(pc_temp,isarray[nis-1]);CHKERRQ(ierr); 5569 ierr = ISDestroy(&isarray[nis-1]);CHKERRQ(ierr); 5570 } 5571 5572 /* get some info after set from options */ 5573 ierr = PetscObjectTypeCompare((PetscObject)pc_temp,PCNN,&isnn);CHKERRQ(ierr); 5574 ierr = PetscObjectTypeCompare((PetscObject)pc_temp,PCBDDC,&isbddc);CHKERRQ(ierr); 5575 ierr = PetscObjectTypeCompare((PetscObject)pc_temp,PCREDUNDANT,&isredundant);CHKERRQ(ierr); 5576 if (isbddc && !multilevel_allowed) { /* multilevel can only be requested via pc_bddc_set_levels */ 5577 ierr = PCSetType(pc_temp,coarse_pc_type);CHKERRQ(ierr); 5578 isbddc = PETSC_FALSE; 5579 } 5580 ierr = PCFactorSetReuseFill(pc_temp,PETSC_TRUE);CHKERRQ(ierr); 5581 if (isredundant) { 5582 KSP inner_ksp; 5583 PC inner_pc; 5584 ierr = PCRedundantGetKSP(pc_temp,&inner_ksp);CHKERRQ(ierr); 5585 ierr = KSPGetPC(inner_ksp,&inner_pc);CHKERRQ(ierr); 5586 ierr = PCFactorSetReuseFill(inner_pc,PETSC_TRUE);CHKERRQ(ierr); 5587 } 5588 5589 /* assemble coarse matrix */ 5590 if (coarse_reuse) { 5591 ierr = KSPGetOperators(pcbddc->coarse_ksp,&coarse_mat,NULL);CHKERRQ(ierr); 5592 ierr = PetscObjectReference((PetscObject)coarse_mat);CHKERRQ(ierr); 5593 coarse_mat_reuse = MAT_REUSE_MATRIX; 5594 } else { 5595 coarse_mat_reuse = MAT_INITIAL_MATRIX; 5596 } 5597 if (isbddc || isnn) { 5598 if (isbddc) { /* currently there's no API for this */ 5599 PC_BDDC* pcbddc = (PC_BDDC*)pc_temp->data; 5600 pcbddc->detect_disconnected = PETSC_TRUE; 5601 } 5602 if (pcbddc->coarsening_ratio > 1) { 5603 if (!pcbddc->coarse_subassembling) { /* subassembling info is not present */ 5604 ierr = MatISGetSubassemblingPattern(coarse_mat_is,active_procs/pcbddc->coarsening_ratio,pcbddc->coarse_adj_red,&pcbddc->coarse_subassembling);CHKERRQ(ierr); 5605 if (pcbddc->dbg_flag) { 5606 ierr = PetscViewerASCIIPrintf(dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 5607 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Subassembling pattern\n");CHKERRQ(ierr); 5608 ierr = ISView(pcbddc->coarse_subassembling,dbg_viewer);CHKERRQ(ierr); 5609 ierr = PetscViewerFlush(dbg_viewer);CHKERRQ(ierr); 5610 } 5611 } 5612 ierr = MatISSubassemble(coarse_mat_is,pcbddc->coarse_subassembling,0,PETSC_FALSE,PETSC_FALSE,coarse_mat_reuse,&coarse_mat,0,NULL);CHKERRQ(ierr); 5613 } else { 5614 ierr = PetscObjectReference((PetscObject)coarse_mat_is);CHKERRQ(ierr); 5615 coarse_mat = coarse_mat_is; 5616 } 5617 } else { 5618 ierr = MatISGetMPIXAIJ(coarse_mat_is,coarse_mat_reuse,&coarse_mat);CHKERRQ(ierr); 5619 } 5620 ierr = MatDestroy(&coarse_mat_is);CHKERRQ(ierr); 5621 5622 /* propagate symmetry info of coarse matrix */ 5623 ierr = MatSetOption(coarse_mat,MAT_STRUCTURALLY_SYMMETRIC,PETSC_TRUE);CHKERRQ(ierr); 5624 if (pc->pmat->symmetric_set) { 5625 ierr = MatSetOption(coarse_mat,MAT_SYMMETRIC,pc->pmat->symmetric);CHKERRQ(ierr); 5626 } 5627 if (pc->pmat->hermitian_set) { 5628 ierr = MatSetOption(coarse_mat,MAT_HERMITIAN,pc->pmat->hermitian);CHKERRQ(ierr); 5629 } 5630 if (pc->pmat->spd_set) { 5631 ierr = MatSetOption(coarse_mat,MAT_SPD,pc->pmat->spd);CHKERRQ(ierr); 5632 } 5633 /* set operators */ 5634 ierr = KSPSetOperators(pcbddc->coarse_ksp,coarse_mat,coarse_mat);CHKERRQ(ierr); 5635 if (pcbddc->dbg_flag) { 5636 ierr = PetscViewerASCIISubtractTab(dbg_viewer,2*pcbddc->current_level);CHKERRQ(ierr); 5637 } 5638 } else { /* processes non partecipating to coarse solver (if any) */ 5639 coarse_mat = 0; 5640 } 5641 ierr = PetscFree(isarray);CHKERRQ(ierr); 5642 #if 0 5643 { 5644 PetscViewer viewer; 5645 char filename[256]; 5646 sprintf(filename,"coarse_mat_level%d.m",pcbddc->current_level); 5647 ierr = PetscViewerASCIIOpen(PetscObjectComm((PetscObject)coarse_mat),filename,&viewer);CHKERRQ(ierr); 5648 ierr = PetscViewerSetFormat(viewer,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr); 5649 ierr = MatView(coarse_mat,viewer);CHKERRQ(ierr); 5650 ierr = PetscViewerDestroy(&viewer);CHKERRQ(ierr); 5651 } 5652 #endif 5653 5654 /* Compute coarse null space (special handling by BDDC only) */ 5655 #if 0 5656 if (pcbddc->NullSpace) { 5657 ierr = PCBDDCNullSpaceAssembleCoarse(pc,coarse_mat,&CoarseNullSpace);CHKERRQ(ierr); 5658 } 5659 #endif 5660 /* hack */ 5661 if (pcbddc->coarse_ksp) { 5662 Vec crhs,csol; 5663 5664 ierr = KSPGetSolution(pcbddc->coarse_ksp,&csol);CHKERRQ(ierr); 5665 ierr = KSPGetRhs(pcbddc->coarse_ksp,&crhs);CHKERRQ(ierr); 5666 if (!csol) { 5667 ierr = MatCreateVecs(coarse_mat,&((pcbddc->coarse_ksp)->vec_sol),NULL);CHKERRQ(ierr); 5668 } 5669 if (!crhs) { 5670 ierr = MatCreateVecs(coarse_mat,NULL,&((pcbddc->coarse_ksp)->vec_rhs));CHKERRQ(ierr); 5671 } 5672 } 5673 5674 /* compute null space for coarse solver if the benign trick has been requested */ 5675 if (pcbddc->benign_null) { 5676 5677 ierr = VecSet(pcbddc->vec1_P,0.);CHKERRQ(ierr); 5678 for (i=0;i<pcbddc->benign_n;i++) { 5679 ierr = VecSetValue(pcbddc->vec1_P,pcbddc->local_primal_size-pcbddc->benign_n+i,1.0,INSERT_VALUES);CHKERRQ(ierr); 5680 } 5681 ierr = VecAssemblyBegin(pcbddc->vec1_P);CHKERRQ(ierr); 5682 ierr = VecAssemblyEnd(pcbddc->vec1_P);CHKERRQ(ierr); 5683 ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,pcbddc->vec1_P,pcbddc->coarse_vec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 5684 ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,pcbddc->vec1_P,pcbddc->coarse_vec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 5685 if (coarse_mat) { 5686 Vec nullv; 5687 PetscScalar *array,*array2; 5688 PetscInt nl; 5689 5690 ierr = MatCreateVecs(coarse_mat,&nullv,NULL);CHKERRQ(ierr); 5691 ierr = VecGetLocalSize(nullv,&nl);CHKERRQ(ierr); 5692 ierr = VecGetArrayRead(pcbddc->coarse_vec,(const PetscScalar**)&array);CHKERRQ(ierr); 5693 ierr = VecGetArray(nullv,&array2);CHKERRQ(ierr); 5694 ierr = PetscMemcpy(array2,array,nl*sizeof(*array));CHKERRQ(ierr); 5695 ierr = VecRestoreArray(nullv,&array2);CHKERRQ(ierr); 5696 ierr = VecRestoreArrayRead(pcbddc->coarse_vec,(const PetscScalar**)&array);CHKERRQ(ierr); 5697 ierr = VecNormalize(nullv,NULL);CHKERRQ(ierr); 5698 ierr = MatNullSpaceCreate(PetscObjectComm((PetscObject)coarse_mat),PETSC_FALSE,1,&nullv,&CoarseNullSpace);CHKERRQ(ierr); 5699 ierr = VecDestroy(&nullv);CHKERRQ(ierr); 5700 } 5701 } 5702 5703 if (pcbddc->coarse_ksp) { 5704 PetscBool ispreonly; 5705 5706 if (CoarseNullSpace) { 5707 PetscBool isnull; 5708 ierr = MatNullSpaceTest(CoarseNullSpace,coarse_mat,&isnull);CHKERRQ(ierr); 5709 if (0) { 5710 if (isbddc && !pcbddc->benign_saddle_point) { 5711 ierr = PCBDDCSetNullSpace(pc_temp,CoarseNullSpace);CHKERRQ(ierr); 5712 } else { 5713 ierr = MatSetNullSpace(coarse_mat,CoarseNullSpace);CHKERRQ(ierr); 5714 } 5715 } else { 5716 ierr = MatNullSpaceDestroy(&CoarseNullSpace);CHKERRQ(ierr); 5717 } 5718 } 5719 /* setup coarse ksp */ 5720 ierr = KSPSetUp(pcbddc->coarse_ksp);CHKERRQ(ierr); 5721 /* Check coarse problem if in debug mode or if solving with an iterative method */ 5722 ierr = PetscObjectTypeCompare((PetscObject)pcbddc->coarse_ksp,KSPPREONLY,&ispreonly);CHKERRQ(ierr); 5723 if (pcbddc->dbg_flag || (!ispreonly && pcbddc->use_coarse_estimates) ) { 5724 KSP check_ksp; 5725 KSPType check_ksp_type; 5726 PC check_pc; 5727 Vec check_vec,coarse_vec; 5728 PetscReal abs_infty_error,infty_error,lambda_min=1.0,lambda_max=1.0; 5729 PetscInt its; 5730 PetscBool compute_eigs; 5731 PetscReal *eigs_r,*eigs_c; 5732 PetscInt neigs; 5733 const char *prefix; 5734 5735 /* Create ksp object suitable for estimation of extreme eigenvalues */ 5736 ierr = KSPCreate(PetscObjectComm((PetscObject)pcbddc->coarse_ksp),&check_ksp);CHKERRQ(ierr); 5737 ierr = KSPSetErrorIfNotConverged(pcbddc->coarse_ksp,pc->erroriffailure);CHKERRQ(ierr); 5738 ierr = KSPSetOperators(check_ksp,coarse_mat,coarse_mat);CHKERRQ(ierr); 5739 ierr = KSPSetTolerances(check_ksp,1.e-12,1.e-12,PETSC_DEFAULT,pcbddc->coarse_size);CHKERRQ(ierr); 5740 if (ispreonly) { 5741 check_ksp_type = KSPPREONLY; 5742 compute_eigs = PETSC_FALSE; 5743 } else { 5744 check_ksp_type = KSPGMRES; 5745 compute_eigs = PETSC_TRUE; 5746 } 5747 ierr = KSPSetType(check_ksp,check_ksp_type);CHKERRQ(ierr); 5748 ierr = KSPSetComputeSingularValues(check_ksp,compute_eigs);CHKERRQ(ierr); 5749 ierr = KSPSetComputeEigenvalues(check_ksp,compute_eigs);CHKERRQ(ierr); 5750 ierr = KSPGMRESSetRestart(check_ksp,pcbddc->coarse_size+1);CHKERRQ(ierr); 5751 ierr = KSPGetOptionsPrefix(pcbddc->coarse_ksp,&prefix);CHKERRQ(ierr); 5752 ierr = KSPSetOptionsPrefix(check_ksp,prefix);CHKERRQ(ierr); 5753 ierr = KSPAppendOptionsPrefix(check_ksp,"check_");CHKERRQ(ierr); 5754 ierr = KSPSetFromOptions(check_ksp);CHKERRQ(ierr); 5755 ierr = KSPSetUp(check_ksp);CHKERRQ(ierr); 5756 ierr = KSPGetPC(pcbddc->coarse_ksp,&check_pc);CHKERRQ(ierr); 5757 ierr = KSPSetPC(check_ksp,check_pc);CHKERRQ(ierr); 5758 /* create random vec */ 5759 ierr = KSPGetSolution(pcbddc->coarse_ksp,&coarse_vec);CHKERRQ(ierr); 5760 ierr = VecDuplicate(coarse_vec,&check_vec);CHKERRQ(ierr); 5761 ierr = VecSetRandom(check_vec,NULL);CHKERRQ(ierr); 5762 if (CoarseNullSpace) { 5763 ierr = MatNullSpaceRemove(CoarseNullSpace,check_vec);CHKERRQ(ierr); 5764 } 5765 ierr = MatMult(coarse_mat,check_vec,coarse_vec);CHKERRQ(ierr); 5766 /* solve coarse problem */ 5767 ierr = KSPSolve(check_ksp,coarse_vec,coarse_vec);CHKERRQ(ierr); 5768 if (CoarseNullSpace) { 5769 ierr = MatNullSpaceRemove(CoarseNullSpace,coarse_vec);CHKERRQ(ierr); 5770 } 5771 /* set eigenvalue estimation if preonly has not been requested */ 5772 if (compute_eigs) { 5773 ierr = PetscMalloc1(pcbddc->coarse_size+1,&eigs_r);CHKERRQ(ierr); 5774 ierr = PetscMalloc1(pcbddc->coarse_size+1,&eigs_c);CHKERRQ(ierr); 5775 ierr = KSPComputeEigenvalues(check_ksp,pcbddc->coarse_size+1,eigs_r,eigs_c,&neigs);CHKERRQ(ierr); 5776 lambda_max = eigs_r[neigs-1]; 5777 lambda_min = eigs_r[0]; 5778 if (pcbddc->use_coarse_estimates) { 5779 if (lambda_max>lambda_min) { 5780 ierr = KSPChebyshevSetEigenvalues(pcbddc->coarse_ksp,lambda_max,lambda_min);CHKERRQ(ierr); 5781 ierr = KSPRichardsonSetScale(pcbddc->coarse_ksp,2.0/(lambda_max+lambda_min));CHKERRQ(ierr); 5782 } 5783 } 5784 } 5785 5786 /* check coarse problem residual error */ 5787 if (pcbddc->dbg_flag) { 5788 PetscViewer dbg_viewer = PETSC_VIEWER_STDOUT_(PetscObjectComm((PetscObject)pcbddc->coarse_ksp)); 5789 ierr = PetscViewerASCIIAddTab(dbg_viewer,2*(pcbddc->current_level+1));CHKERRQ(ierr); 5790 ierr = VecAXPY(check_vec,-1.0,coarse_vec);CHKERRQ(ierr); 5791 ierr = VecNorm(check_vec,NORM_INFINITY,&infty_error);CHKERRQ(ierr); 5792 ierr = MatMult(coarse_mat,check_vec,coarse_vec);CHKERRQ(ierr); 5793 ierr = VecNorm(coarse_vec,NORM_INFINITY,&abs_infty_error);CHKERRQ(ierr); 5794 ierr = VecDestroy(&check_vec);CHKERRQ(ierr); 5795 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem details (use estimates %d)\n",pcbddc->use_coarse_estimates);CHKERRQ(ierr); 5796 ierr = PetscObjectPrintClassNamePrefixType((PetscObject)(pcbddc->coarse_ksp),dbg_viewer);CHKERRQ(ierr); 5797 ierr = PetscObjectPrintClassNamePrefixType((PetscObject)(check_pc),dbg_viewer);CHKERRQ(ierr); 5798 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem exact infty_error : %1.6e\n",infty_error);CHKERRQ(ierr); 5799 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem residual infty_error: %1.6e\n",abs_infty_error);CHKERRQ(ierr); 5800 if (CoarseNullSpace) { 5801 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem is singular\n");CHKERRQ(ierr); 5802 } 5803 if (compute_eigs) { 5804 PetscReal lambda_max_s,lambda_min_s; 5805 ierr = KSPGetType(check_ksp,&check_ksp_type);CHKERRQ(ierr); 5806 ierr = KSPGetIterationNumber(check_ksp,&its);CHKERRQ(ierr); 5807 ierr = KSPComputeExtremeSingularValues(check_ksp,&lambda_max_s,&lambda_min_s);CHKERRQ(ierr); 5808 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem eigenvalues (estimated with %d iterations of %s): %1.6e %1.6e (%1.6e %1.6e)\n",its,check_ksp_type,lambda_min,lambda_max,lambda_min_s,lambda_max_s);CHKERRQ(ierr); 5809 for (i=0;i<neigs;i++) { 5810 ierr = PetscViewerASCIIPrintf(dbg_viewer,"%1.6e %1.6ei\n",eigs_r[i],eigs_c[i]);CHKERRQ(ierr); 5811 } 5812 } 5813 ierr = PetscViewerFlush(dbg_viewer);CHKERRQ(ierr); 5814 ierr = PetscViewerASCIISubtractTab(dbg_viewer,2*(pcbddc->current_level+1));CHKERRQ(ierr); 5815 } 5816 ierr = KSPDestroy(&check_ksp);CHKERRQ(ierr); 5817 if (compute_eigs) { 5818 ierr = PetscFree(eigs_r);CHKERRQ(ierr); 5819 ierr = PetscFree(eigs_c);CHKERRQ(ierr); 5820 } 5821 } 5822 } 5823 /* print additional info */ 5824 if (pcbddc->dbg_flag) { 5825 /* waits until all processes reaches this point */ 5826 ierr = PetscBarrier((PetscObject)pc);CHKERRQ(ierr); 5827 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Coarse solver setup completed at level %d\n",pcbddc->current_level);CHKERRQ(ierr); 5828 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 5829 } 5830 5831 /* free memory */ 5832 ierr = MatNullSpaceDestroy(&CoarseNullSpace);CHKERRQ(ierr); 5833 ierr = MatDestroy(&coarse_mat);CHKERRQ(ierr); 5834 PetscFunctionReturn(0); 5835 } 5836 5837 #undef __FUNCT__ 5838 #define __FUNCT__ "PCBDDCComputePrimalNumbering" 5839 PetscErrorCode PCBDDCComputePrimalNumbering(PC pc,PetscInt* coarse_size_n,PetscInt** local_primal_indices_n) 5840 { 5841 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 5842 PC_IS* pcis = (PC_IS*)pc->data; 5843 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 5844 IS subset,subset_mult,subset_n; 5845 PetscInt local_size,coarse_size=0; 5846 PetscInt *local_primal_indices=NULL; 5847 const PetscInt *t_local_primal_indices; 5848 PetscErrorCode ierr; 5849 5850 PetscFunctionBegin; 5851 /* Compute global number of coarse dofs */ 5852 if (pcbddc->local_primal_size && !pcbddc->local_primal_ref_node) { 5853 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"BDDC ConstraintsSetUp should be called first"); 5854 } 5855 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)(pc->pmat)),pcbddc->local_primal_size_cc,pcbddc->local_primal_ref_node,PETSC_COPY_VALUES,&subset_n);CHKERRQ(ierr); 5856 ierr = ISLocalToGlobalMappingApplyIS(pcis->mapping,subset_n,&subset);CHKERRQ(ierr); 5857 ierr = ISDestroy(&subset_n);CHKERRQ(ierr); 5858 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)(pc->pmat)),pcbddc->local_primal_size_cc,pcbddc->local_primal_ref_mult,PETSC_COPY_VALUES,&subset_mult);CHKERRQ(ierr); 5859 ierr = PCBDDCSubsetNumbering(subset,subset_mult,&coarse_size,&subset_n);CHKERRQ(ierr); 5860 ierr = ISDestroy(&subset);CHKERRQ(ierr); 5861 ierr = ISDestroy(&subset_mult);CHKERRQ(ierr); 5862 ierr = ISGetLocalSize(subset_n,&local_size);CHKERRQ(ierr); 5863 if (local_size != pcbddc->local_primal_size) { 5864 SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Invalid number of local primal indices computed %d != %d",local_size,pcbddc->local_primal_size); 5865 } 5866 ierr = PetscMalloc1(local_size,&local_primal_indices);CHKERRQ(ierr); 5867 ierr = ISGetIndices(subset_n,&t_local_primal_indices);CHKERRQ(ierr); 5868 ierr = PetscMemcpy(local_primal_indices,t_local_primal_indices,local_size*sizeof(PetscInt));CHKERRQ(ierr); 5869 ierr = ISRestoreIndices(subset_n,&t_local_primal_indices);CHKERRQ(ierr); 5870 ierr = ISDestroy(&subset_n);CHKERRQ(ierr); 5871 5872 /* check numbering */ 5873 if (pcbddc->dbg_flag) { 5874 PetscScalar coarsesum,*array,*array2; 5875 PetscInt i; 5876 PetscBool set_error = PETSC_FALSE,set_error_reduced = PETSC_FALSE; 5877 5878 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 5879 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 5880 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Check coarse indices\n");CHKERRQ(ierr); 5881 ierr = PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 5882 /* counter */ 5883 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 5884 ierr = VecSet(pcis->vec1_N,1.0);CHKERRQ(ierr); 5885 ierr = VecScatterBegin(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 5886 ierr = VecScatterEnd(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 5887 ierr = VecScatterBegin(matis->rctx,pcis->vec1_global,pcis->vec2_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 5888 ierr = VecScatterEnd(matis->rctx,pcis->vec1_global,pcis->vec2_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 5889 ierr = VecSet(pcis->vec1_N,0.0);CHKERRQ(ierr); 5890 for (i=0;i<pcbddc->local_primal_size;i++) { 5891 ierr = VecSetValue(pcis->vec1_N,pcbddc->primal_indices_local_idxs[i],1.0,INSERT_VALUES);CHKERRQ(ierr); 5892 } 5893 ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr); 5894 ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr); 5895 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 5896 ierr = VecScatterBegin(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 5897 ierr = VecScatterEnd(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 5898 ierr = VecScatterBegin(matis->rctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 5899 ierr = VecScatterEnd(matis->rctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 5900 ierr = VecGetArray(pcis->vec1_N,&array);CHKERRQ(ierr); 5901 ierr = VecGetArray(pcis->vec2_N,&array2);CHKERRQ(ierr); 5902 for (i=0;i<pcis->n;i++) { 5903 if (array[i] != 0.0 && array[i] != array2[i]) { 5904 PetscInt owned = (PetscInt)PetscRealPart(array[i]); 5905 PetscInt neigh = (PetscInt)PetscRealPart(array2[i]); 5906 set_error = PETSC_TRUE; 5907 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d: local index %d owned by a %d processes instead of %d!\n",PetscGlobalRank,i,owned,neigh);CHKERRQ(ierr); 5908 } 5909 } 5910 ierr = VecRestoreArray(pcis->vec2_N,&array2);CHKERRQ(ierr); 5911 ierr = MPI_Allreduce(&set_error,&set_error_reduced,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 5912 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 5913 for (i=0;i<pcis->n;i++) { 5914 if (PetscRealPart(array[i]) > 0.0) array[i] = 1.0/PetscRealPart(array[i]); 5915 } 5916 ierr = VecRestoreArray(pcis->vec1_N,&array);CHKERRQ(ierr); 5917 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 5918 ierr = VecScatterBegin(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 5919 ierr = VecScatterEnd(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 5920 ierr = VecSum(pcis->vec1_global,&coarsesum);CHKERRQ(ierr); 5921 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Size of coarse problem is %d (%lf)\n",coarse_size,PetscRealPart(coarsesum));CHKERRQ(ierr); 5922 if (pcbddc->dbg_flag > 1 || set_error_reduced) { 5923 PetscInt *gidxs; 5924 5925 ierr = PetscMalloc1(pcbddc->local_primal_size,&gidxs);CHKERRQ(ierr); 5926 ierr = ISLocalToGlobalMappingApply(pcis->mapping,pcbddc->local_primal_size,pcbddc->primal_indices_local_idxs,gidxs);CHKERRQ(ierr); 5927 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Distribution of local primal indices\n");CHKERRQ(ierr); 5928 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 5929 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d\n",PetscGlobalRank);CHKERRQ(ierr); 5930 for (i=0;i<pcbddc->local_primal_size;i++) { 5931 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local_primal_indices[%d]=%d (%d,%d)\n",i,local_primal_indices[i],pcbddc->primal_indices_local_idxs[i],gidxs[i]);CHKERRQ(ierr); 5932 } 5933 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 5934 ierr = PetscFree(gidxs);CHKERRQ(ierr); 5935 } 5936 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 5937 ierr = PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 5938 if (set_error_reduced) SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"BDDC Numbering of coarse dofs failed"); 5939 } 5940 /* ierr = PetscPrintf(PetscObjectComm((PetscObject)pc),"Size of coarse problem is %d\n",coarse_size);CHKERRQ(ierr); */ 5941 /* get back data */ 5942 *coarse_size_n = coarse_size; 5943 *local_primal_indices_n = local_primal_indices; 5944 PetscFunctionReturn(0); 5945 } 5946 5947 #undef __FUNCT__ 5948 #define __FUNCT__ "PCBDDCGlobalToLocal" 5949 PetscErrorCode PCBDDCGlobalToLocal(VecScatter g2l_ctx,Vec gwork, Vec lwork, IS globalis, IS* localis) 5950 { 5951 IS localis_t; 5952 PetscInt i,lsize,*idxs,n; 5953 PetscScalar *vals; 5954 PetscErrorCode ierr; 5955 5956 PetscFunctionBegin; 5957 /* get indices in local ordering exploiting local to global map */ 5958 ierr = ISGetLocalSize(globalis,&lsize);CHKERRQ(ierr); 5959 ierr = PetscMalloc1(lsize,&vals);CHKERRQ(ierr); 5960 for (i=0;i<lsize;i++) vals[i] = 1.0; 5961 ierr = ISGetIndices(globalis,(const PetscInt**)&idxs);CHKERRQ(ierr); 5962 ierr = VecSet(gwork,0.0);CHKERRQ(ierr); 5963 ierr = VecSet(lwork,0.0);CHKERRQ(ierr); 5964 if (idxs) { /* multilevel guard */ 5965 ierr = VecSetValues(gwork,lsize,idxs,vals,INSERT_VALUES);CHKERRQ(ierr); 5966 } 5967 ierr = VecAssemblyBegin(gwork);CHKERRQ(ierr); 5968 ierr = ISRestoreIndices(globalis,(const PetscInt**)&idxs);CHKERRQ(ierr); 5969 ierr = PetscFree(vals);CHKERRQ(ierr); 5970 ierr = VecAssemblyEnd(gwork);CHKERRQ(ierr); 5971 /* now compute set in local ordering */ 5972 ierr = VecScatterBegin(g2l_ctx,gwork,lwork,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 5973 ierr = VecScatterEnd(g2l_ctx,gwork,lwork,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 5974 ierr = VecGetArrayRead(lwork,(const PetscScalar**)&vals);CHKERRQ(ierr); 5975 ierr = VecGetSize(lwork,&n);CHKERRQ(ierr); 5976 for (i=0,lsize=0;i<n;i++) { 5977 if (PetscRealPart(vals[i]) > 0.5) { 5978 lsize++; 5979 } 5980 } 5981 ierr = PetscMalloc1(lsize,&idxs);CHKERRQ(ierr); 5982 for (i=0,lsize=0;i<n;i++) { 5983 if (PetscRealPart(vals[i]) > 0.5) { 5984 idxs[lsize++] = i; 5985 } 5986 } 5987 ierr = VecRestoreArrayRead(lwork,(const PetscScalar**)&vals);CHKERRQ(ierr); 5988 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)gwork),lsize,idxs,PETSC_OWN_POINTER,&localis_t);CHKERRQ(ierr); 5989 *localis = localis_t; 5990 PetscFunctionReturn(0); 5991 } 5992 5993 /* the next two functions will be called in KSPMatMult if a change of basis has been requested */ 5994 #undef __FUNCT__ 5995 #define __FUNCT__ "PCBDDCMatMult_Private" 5996 static PetscErrorCode PCBDDCMatMult_Private(Mat A, Vec x, Vec y) 5997 { 5998 PCBDDCChange_ctx change_ctx; 5999 PetscErrorCode ierr; 6000 6001 PetscFunctionBegin; 6002 ierr = MatShellGetContext(A,&change_ctx);CHKERRQ(ierr); 6003 ierr = MatMult(change_ctx->global_change,x,change_ctx->work[0]);CHKERRQ(ierr); 6004 ierr = MatMult(change_ctx->original_mat,change_ctx->work[0],change_ctx->work[1]);CHKERRQ(ierr); 6005 ierr = MatMultTranspose(change_ctx->global_change,change_ctx->work[1],y);CHKERRQ(ierr); 6006 PetscFunctionReturn(0); 6007 } 6008 6009 #undef __FUNCT__ 6010 #define __FUNCT__ "PCBDDCMatMultTranspose_Private" 6011 static PetscErrorCode PCBDDCMatMultTranspose_Private(Mat A, Vec x, Vec y) 6012 { 6013 PCBDDCChange_ctx change_ctx; 6014 PetscErrorCode ierr; 6015 6016 PetscFunctionBegin; 6017 ierr = MatShellGetContext(A,&change_ctx);CHKERRQ(ierr); 6018 ierr = MatMult(change_ctx->global_change,x,change_ctx->work[0]);CHKERRQ(ierr); 6019 ierr = MatMultTranspose(change_ctx->original_mat,change_ctx->work[0],change_ctx->work[1]);CHKERRQ(ierr); 6020 ierr = MatMultTranspose(change_ctx->global_change,change_ctx->work[1],y);CHKERRQ(ierr); 6021 PetscFunctionReturn(0); 6022 } 6023 6024 #undef __FUNCT__ 6025 #define __FUNCT__ "PCBDDCSetUpSubSchurs" 6026 PetscErrorCode PCBDDCSetUpSubSchurs(PC pc) 6027 { 6028 PC_IS *pcis=(PC_IS*)pc->data; 6029 PC_BDDC *pcbddc=(PC_BDDC*)pc->data; 6030 PCBDDCSubSchurs sub_schurs=pcbddc->sub_schurs; 6031 Mat S_j; 6032 PetscInt *used_xadj,*used_adjncy; 6033 PetscBool free_used_adj; 6034 PetscErrorCode ierr; 6035 6036 PetscFunctionBegin; 6037 /* decide the adjacency to be used for determining internal problems for local schur on subsets */ 6038 free_used_adj = PETSC_FALSE; 6039 if (pcbddc->sub_schurs_layers == -1) { 6040 used_xadj = NULL; 6041 used_adjncy = NULL; 6042 } else { 6043 if (pcbddc->sub_schurs_use_useradj && pcbddc->mat_graph->xadj) { 6044 used_xadj = pcbddc->mat_graph->xadj; 6045 used_adjncy = pcbddc->mat_graph->adjncy; 6046 } else if (pcbddc->computed_rowadj) { 6047 used_xadj = pcbddc->mat_graph->xadj; 6048 used_adjncy = pcbddc->mat_graph->adjncy; 6049 } else { 6050 PetscBool flg_row=PETSC_FALSE; 6051 const PetscInt *xadj,*adjncy; 6052 PetscInt nvtxs; 6053 6054 ierr = MatGetRowIJ(pcbddc->local_mat,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,&xadj,&adjncy,&flg_row);CHKERRQ(ierr); 6055 if (flg_row) { 6056 ierr = PetscMalloc2(nvtxs+1,&used_xadj,xadj[nvtxs],&used_adjncy);CHKERRQ(ierr); 6057 ierr = PetscMemcpy(used_xadj,xadj,(nvtxs+1)*sizeof(*xadj));CHKERRQ(ierr); 6058 ierr = PetscMemcpy(used_adjncy,adjncy,(xadj[nvtxs])*sizeof(*adjncy));CHKERRQ(ierr); 6059 free_used_adj = PETSC_TRUE; 6060 } else { 6061 pcbddc->sub_schurs_layers = -1; 6062 used_xadj = NULL; 6063 used_adjncy = NULL; 6064 } 6065 ierr = MatRestoreRowIJ(pcbddc->local_mat,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,&xadj,&adjncy,&flg_row);CHKERRQ(ierr); 6066 } 6067 } 6068 6069 /* setup sub_schurs data */ 6070 ierr = MatCreateSchurComplement(pcis->A_II,pcis->A_II,pcis->A_IB,pcis->A_BI,pcis->A_BB,&S_j);CHKERRQ(ierr); 6071 if (!sub_schurs->schur_explicit) { 6072 /* pcbddc->ksp_D up to date only if not using MatFactor with Schur complement support */ 6073 ierr = MatSchurComplementSetKSP(S_j,pcbddc->ksp_D);CHKERRQ(ierr); 6074 ierr = PCBDDCSubSchursSetUp(sub_schurs,NULL,S_j,PETSC_FALSE,used_xadj,used_adjncy,pcbddc->sub_schurs_layers,pcbddc->faster_deluxe,pcbddc->adaptive_selection,PETSC_FALSE,PETSC_FALSE,0,NULL,NULL);CHKERRQ(ierr); 6075 } else { 6076 PetscBool reuse_solvers = (PetscBool)!pcbddc->use_change_of_basis; 6077 PetscBool isseqaij; 6078 PetscInt benign_n; 6079 6080 if (!pcbddc->use_vertices && reuse_solvers) { 6081 PetscInt n_vertices; 6082 6083 ierr = ISGetLocalSize(sub_schurs->is_vertices,&n_vertices);CHKERRQ(ierr); 6084 reuse_solvers = (PetscBool)!n_vertices; 6085 } 6086 ierr = PetscObjectTypeCompare((PetscObject)pcbddc->local_mat,MATSEQAIJ,&isseqaij);CHKERRQ(ierr); 6087 if (!isseqaij) { 6088 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 6089 if (matis->A == pcbddc->local_mat) { 6090 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 6091 ierr = MatConvert(matis->A,MATSEQAIJ,MAT_INITIAL_MATRIX,&pcbddc->local_mat);CHKERRQ(ierr); 6092 } else { 6093 ierr = MatConvert(pcbddc->local_mat,MATSEQAIJ,MAT_REUSE_MATRIX,&pcbddc->local_mat);CHKERRQ(ierr); 6094 } 6095 } 6096 if (!pcbddc->benign_change_explicit) { 6097 benign_n = pcbddc->benign_n; 6098 } else { 6099 benign_n = 0; 6100 } 6101 ierr = PCBDDCSubSchursSetUp(sub_schurs,pcbddc->local_mat,S_j,pcbddc->sub_schurs_exact_schur,used_xadj,used_adjncy,pcbddc->sub_schurs_layers,pcbddc->faster_deluxe,pcbddc->adaptive_selection,reuse_solvers,pcbddc->benign_saddle_point,benign_n,pcbddc->benign_p0_lidx,pcbddc->benign_zerodiag_subs);CHKERRQ(ierr); 6102 } 6103 ierr = MatDestroy(&S_j);CHKERRQ(ierr); 6104 6105 /* free adjacency */ 6106 if (free_used_adj) { 6107 ierr = PetscFree2(used_xadj,used_adjncy);CHKERRQ(ierr); 6108 } 6109 PetscFunctionReturn(0); 6110 } 6111 6112 #undef __FUNCT__ 6113 #define __FUNCT__ "PCBDDCInitSubSchurs" 6114 PetscErrorCode PCBDDCInitSubSchurs(PC pc) 6115 { 6116 PC_IS *pcis=(PC_IS*)pc->data; 6117 PC_BDDC *pcbddc=(PC_BDDC*)pc->data; 6118 PCBDDCSubSchurs sub_schurs=pcbddc->sub_schurs; 6119 PCBDDCGraph graph; 6120 PetscErrorCode ierr; 6121 6122 PetscFunctionBegin; 6123 /* attach interface graph for determining subsets */ 6124 if (pcbddc->sub_schurs_rebuild) { /* in case rebuild has been requested, it uses a graph generated only by the neighbouring information */ 6125 IS verticesIS,verticescomm; 6126 PetscInt vsize,*idxs; 6127 6128 ierr = PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph,NULL,NULL,NULL,NULL,&verticesIS);CHKERRQ(ierr); 6129 ierr = ISGetSize(verticesIS,&vsize);CHKERRQ(ierr); 6130 ierr = ISGetIndices(verticesIS,(const PetscInt**)&idxs);CHKERRQ(ierr); 6131 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),vsize,idxs,PETSC_COPY_VALUES,&verticescomm);CHKERRQ(ierr); 6132 ierr = ISRestoreIndices(verticesIS,(const PetscInt**)&idxs);CHKERRQ(ierr); 6133 ierr = ISDestroy(&verticesIS);CHKERRQ(ierr); 6134 ierr = PCBDDCGraphCreate(&graph);CHKERRQ(ierr); 6135 ierr = PCBDDCGraphInit(graph,pcbddc->mat_graph->l2gmap,pcbddc->mat_graph->nvtxs_global);CHKERRQ(ierr); 6136 ierr = PCBDDCGraphSetUp(graph,0,NULL,pcbddc->DirichletBoundariesLocal,0,NULL,verticescomm);CHKERRQ(ierr); 6137 ierr = ISDestroy(&verticescomm);CHKERRQ(ierr); 6138 ierr = PCBDDCGraphComputeConnectedComponents(graph);CHKERRQ(ierr); 6139 /* 6140 if (pcbddc->dbg_flag) { 6141 ierr = PCBDDCGraphASCIIView(graph,pcbddc->dbg_flag,pcbddc->dbg_viewer);CHKERRQ(ierr); 6142 } 6143 */ 6144 } else { 6145 graph = pcbddc->mat_graph; 6146 } 6147 6148 /* sub_schurs init */ 6149 ierr = PCBDDCSubSchursInit(sub_schurs,pcis->is_I_local,pcis->is_B_local,graph,pcis->BtoNmap);CHKERRQ(ierr); 6150 6151 /* free graph struct */ 6152 if (pcbddc->sub_schurs_rebuild) { 6153 ierr = PCBDDCGraphDestroy(&graph);CHKERRQ(ierr); 6154 } 6155 PetscFunctionReturn(0); 6156 } 6157 6158 #undef __FUNCT__ 6159 #define __FUNCT__ "PCBDDCCheckOperator" 6160 PetscErrorCode PCBDDCCheckOperator(PC pc) 6161 { 6162 PC_IS *pcis=(PC_IS*)pc->data; 6163 PC_BDDC *pcbddc=(PC_BDDC*)pc->data; 6164 PetscErrorCode ierr; 6165 6166 PetscFunctionBegin; 6167 if (pcbddc->n_vertices == pcbddc->local_primal_size) { 6168 IS zerodiag = NULL; 6169 Mat S_j,B0_B=NULL; 6170 Vec dummy_vec=NULL,vec_check_B,vec_scale_P; 6171 PetscScalar *p0_check,*array,*array2; 6172 PetscReal norm; 6173 PetscInt i; 6174 6175 /* B0 and B0_B */ 6176 if (zerodiag) { 6177 IS dummy; 6178 6179 ierr = ISCreateStride(PETSC_COMM_SELF,pcbddc->benign_n,0,1,&dummy);CHKERRQ(ierr); 6180 ierr = MatGetSubMatrix(pcbddc->benign_B0,dummy,pcis->is_B_local,MAT_INITIAL_MATRIX,&B0_B);CHKERRQ(ierr); 6181 ierr = MatCreateVecs(B0_B,NULL,&dummy_vec);CHKERRQ(ierr); 6182 ierr = ISDestroy(&dummy);CHKERRQ(ierr); 6183 } 6184 /* I need a primal vector to scale primal nodes since BDDC sums contibutions */ 6185 ierr = VecDuplicate(pcbddc->vec1_P,&vec_scale_P);CHKERRQ(ierr); 6186 ierr = VecSet(pcbddc->vec1_P,1.0);CHKERRQ(ierr); 6187 ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,pcbddc->vec1_P,pcbddc->coarse_vec,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 6188 ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,pcbddc->vec1_P,pcbddc->coarse_vec,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 6189 ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,pcbddc->coarse_vec,vec_scale_P,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 6190 ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,pcbddc->coarse_vec,vec_scale_P,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 6191 ierr = VecReciprocal(vec_scale_P);CHKERRQ(ierr); 6192 /* S_j */ 6193 ierr = MatCreateSchurComplement(pcis->A_II,pcis->A_II,pcis->A_IB,pcis->A_BI,pcis->A_BB,&S_j);CHKERRQ(ierr); 6194 ierr = MatSchurComplementSetKSP(S_j,pcbddc->ksp_D);CHKERRQ(ierr); 6195 6196 /* mimic vector in \widetilde{W}_\Gamma */ 6197 ierr = VecSetRandom(pcis->vec1_N,NULL);CHKERRQ(ierr); 6198 /* continuous in primal space */ 6199 ierr = VecSetRandom(pcbddc->coarse_vec,NULL);CHKERRQ(ierr); 6200 ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,pcbddc->coarse_vec,pcbddc->vec1_P,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 6201 ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,pcbddc->coarse_vec,pcbddc->vec1_P,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 6202 ierr = VecGetArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 6203 ierr = PetscCalloc1(pcbddc->benign_n,&p0_check);CHKERRQ(ierr); 6204 for (i=0;i<pcbddc->benign_n;i++) p0_check[i] = array[pcbddc->local_primal_size-pcbddc->benign_n+i]; 6205 ierr = VecSetValues(pcis->vec1_N,pcbddc->local_primal_size,pcbddc->local_primal_ref_node,array,INSERT_VALUES);CHKERRQ(ierr); 6206 ierr = VecRestoreArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 6207 ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr); 6208 ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr); 6209 ierr = VecScatterBegin(pcis->N_to_B,pcis->vec1_N,pcis->vec2_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 6210 ierr = VecScatterEnd(pcis->N_to_B,pcis->vec1_N,pcis->vec2_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 6211 ierr = VecDuplicate(pcis->vec2_B,&vec_check_B);CHKERRQ(ierr); 6212 ierr = VecCopy(pcis->vec2_B,vec_check_B);CHKERRQ(ierr); 6213 6214 /* assemble rhs for coarse problem */ 6215 /* widetilde{S}_\Gamma w_\Gamma + \widetilde{B0}^T_B p0 */ 6216 /* local with Schur */ 6217 ierr = MatMult(S_j,pcis->vec2_B,pcis->vec1_B);CHKERRQ(ierr); 6218 if (zerodiag) { 6219 ierr = VecGetArray(dummy_vec,&array);CHKERRQ(ierr); 6220 for (i=0;i<pcbddc->benign_n;i++) array[i] = p0_check[i]; 6221 ierr = VecRestoreArray(dummy_vec,&array);CHKERRQ(ierr); 6222 ierr = MatMultTransposeAdd(B0_B,dummy_vec,pcis->vec1_B,pcis->vec1_B);CHKERRQ(ierr); 6223 } 6224 /* sum on primal nodes the local contributions */ 6225 ierr = VecScatterBegin(pcis->N_to_B,pcis->vec1_B,pcis->vec1_N,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 6226 ierr = VecScatterEnd(pcis->N_to_B,pcis->vec1_B,pcis->vec1_N,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 6227 ierr = VecGetArray(pcis->vec1_N,&array);CHKERRQ(ierr); 6228 ierr = VecGetArray(pcbddc->vec1_P,&array2);CHKERRQ(ierr); 6229 for (i=0;i<pcbddc->local_primal_size;i++) array2[i] = array[pcbddc->local_primal_ref_node[i]]; 6230 ierr = VecRestoreArray(pcbddc->vec1_P,&array2);CHKERRQ(ierr); 6231 ierr = VecRestoreArray(pcis->vec1_N,&array);CHKERRQ(ierr); 6232 ierr = VecSet(pcbddc->coarse_vec,0.);CHKERRQ(ierr); 6233 ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,pcbddc->vec1_P,pcbddc->coarse_vec,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 6234 ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,pcbddc->vec1_P,pcbddc->coarse_vec,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 6235 ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,pcbddc->coarse_vec,pcbddc->vec1_P,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 6236 ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,pcbddc->coarse_vec,pcbddc->vec1_P,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 6237 ierr = VecGetArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 6238 /* scale primal nodes (BDDC sums contibutions) */ 6239 ierr = VecPointwiseMult(pcbddc->vec1_P,vec_scale_P,pcbddc->vec1_P);CHKERRQ(ierr); 6240 ierr = VecSetValues(pcis->vec1_N,pcbddc->local_primal_size,pcbddc->local_primal_ref_node,array,INSERT_VALUES);CHKERRQ(ierr); 6241 ierr = VecRestoreArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 6242 ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr); 6243 ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr); 6244 ierr = VecScatterBegin(pcis->N_to_B,pcis->vec1_N,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 6245 ierr = VecScatterEnd(pcis->N_to_B,pcis->vec1_N,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 6246 /* global: \widetilde{B0}_B w_\Gamma */ 6247 if (zerodiag) { 6248 ierr = MatMult(B0_B,pcis->vec2_B,dummy_vec);CHKERRQ(ierr); 6249 ierr = VecGetArray(dummy_vec,&array);CHKERRQ(ierr); 6250 for (i=0;i<pcbddc->benign_n;i++) pcbddc->benign_p0[i] = array[i]; 6251 ierr = VecRestoreArray(dummy_vec,&array);CHKERRQ(ierr); 6252 } 6253 /* BDDC */ 6254 ierr = VecSet(pcis->vec1_D,0.);CHKERRQ(ierr); 6255 ierr = PCBDDCApplyInterfacePreconditioner(pc,PETSC_FALSE);CHKERRQ(ierr); 6256 6257 ierr = VecCopy(pcis->vec1_B,pcis->vec2_B);CHKERRQ(ierr); 6258 ierr = VecAXPY(pcis->vec1_B,-1.0,vec_check_B);CHKERRQ(ierr); 6259 ierr = VecNorm(pcis->vec1_B,NORM_INFINITY,&norm);CHKERRQ(ierr); 6260 PetscPrintf(PETSC_COMM_SELF,"[%d] BDDC local error is %1.4e\n",PetscGlobalRank,norm); 6261 for (i=0;i<pcbddc->benign_n;i++) { 6262 PetscPrintf(PETSC_COMM_SELF,"[%d] BDDC p0[%d] error is %1.4e\n",PetscGlobalRank,i,PetscAbsScalar(pcbddc->benign_p0[i]-p0_check[i])); 6263 } 6264 ierr = PetscFree(p0_check);CHKERRQ(ierr); 6265 ierr = VecDestroy(&vec_scale_P);CHKERRQ(ierr); 6266 ierr = VecDestroy(&vec_check_B);CHKERRQ(ierr); 6267 ierr = VecDestroy(&dummy_vec);CHKERRQ(ierr); 6268 ierr = MatDestroy(&S_j);CHKERRQ(ierr); 6269 ierr = MatDestroy(&B0_B);CHKERRQ(ierr); 6270 } 6271 PetscFunctionReturn(0); 6272 } 6273