1 #include <../src/mat/impls/aij/seq/aij.h> 2 #include <../src/ksp/pc/impls/bddc/bddc.h> 3 #include <../src/ksp/pc/impls/bddc/bddcprivate.h> 4 #include <petscblaslapack.h> 5 6 static PetscErrorCode PCBDDCMatMultTranspose_Private(Mat A, Vec x, Vec y); 7 static PetscErrorCode PCBDDCMatMult_Private(Mat A, Vec x, Vec y); 8 9 #undef __FUNCT__ 10 #define __FUNCT__ "PCBDDCBenignMatMult_Private_Private" 11 PetscErrorCode PCBDDCBenignMatMult_Private_Private(Mat A, Vec x, Vec y, PetscBool transpose) 12 { 13 PCBDDCBenignMatMult_ctx ctx; 14 PetscErrorCode ierr; 15 PetscBool apply_right,apply_left,reset_x; 16 17 PetscFunctionBegin; 18 ierr = MatShellGetContext(A,&ctx);CHKERRQ(ierr); 19 if (transpose) { 20 apply_right = ctx->apply_left; 21 apply_left = ctx->apply_right; 22 } else { 23 apply_right = ctx->apply_right; 24 apply_left = ctx->apply_left; 25 } 26 reset_x = PETSC_FALSE; 27 if (apply_right) { 28 const PetscScalar *ax; 29 PetscInt nl,i; 30 31 ierr = VecGetLocalSize(x,&nl);CHKERRQ(ierr); 32 ierr = VecGetArrayRead(x,&ax);CHKERRQ(ierr); 33 ierr = PetscMemcpy(ctx->work,ax,nl*sizeof(PetscScalar));CHKERRQ(ierr); 34 ierr = VecRestoreArrayRead(x,&ax);CHKERRQ(ierr); 35 for (i=0;i<ctx->benign_n;i++) { 36 PetscScalar sum,val; 37 const PetscInt *idxs; 38 PetscInt nz,j; 39 ierr = ISGetLocalSize(ctx->benign_zerodiag_subs[i],&nz);CHKERRQ(ierr); 40 ierr = ISGetIndices(ctx->benign_zerodiag_subs[i],&idxs);CHKERRQ(ierr); 41 sum = 0.; 42 if (ctx->apply_p0) { 43 val = ctx->work[idxs[nz-1]]; 44 for (j=0;j<nz-1;j++) { 45 sum += ctx->work[idxs[j]]; 46 ctx->work[idxs[j]] += val; 47 } 48 } else { 49 for (j=0;j<nz-1;j++) { 50 sum += ctx->work[idxs[j]]; 51 } 52 } 53 ctx->work[idxs[nz-1]] -= sum; 54 ierr = ISRestoreIndices(ctx->benign_zerodiag_subs[i],&idxs);CHKERRQ(ierr); 55 } 56 ierr = VecPlaceArray(x,ctx->work);CHKERRQ(ierr); 57 reset_x = PETSC_TRUE; 58 } 59 if (transpose) { 60 ierr = MatMultTranspose(ctx->A,x,y);CHKERRQ(ierr); 61 } else { 62 ierr = MatMult(ctx->A,x,y);CHKERRQ(ierr); 63 } 64 if (reset_x) { 65 ierr = VecResetArray(x);CHKERRQ(ierr); 66 } 67 if (apply_left) { 68 PetscScalar *ay; 69 PetscInt i; 70 71 ierr = VecGetArray(y,&ay);CHKERRQ(ierr); 72 for (i=0;i<ctx->benign_n;i++) { 73 PetscScalar sum,val; 74 const PetscInt *idxs; 75 PetscInt nz,j; 76 ierr = ISGetLocalSize(ctx->benign_zerodiag_subs[i],&nz);CHKERRQ(ierr); 77 ierr = ISGetIndices(ctx->benign_zerodiag_subs[i],&idxs);CHKERRQ(ierr); 78 val = -ay[idxs[nz-1]]; 79 if (ctx->apply_p0) { 80 sum = 0.; 81 for (j=0;j<nz-1;j++) { 82 sum += ay[idxs[j]]; 83 ay[idxs[j]] += val; 84 } 85 ay[idxs[nz-1]] += sum; 86 } else { 87 for (j=0;j<nz-1;j++) { 88 ay[idxs[j]] += val; 89 } 90 ay[idxs[nz-1]] = 0.; 91 } 92 ierr = ISRestoreIndices(ctx->benign_zerodiag_subs[i],&idxs);CHKERRQ(ierr); 93 } 94 ierr = VecRestoreArray(y,&ay);CHKERRQ(ierr); 95 } 96 PetscFunctionReturn(0); 97 } 98 99 #undef __FUNCT__ 100 #define __FUNCT__ "PCBDDCBenignMatMultTranspose_Private" 101 PetscErrorCode PCBDDCBenignMatMultTranspose_Private(Mat A, Vec x, Vec y) 102 { 103 PetscErrorCode ierr; 104 105 PetscFunctionBegin; 106 ierr = PCBDDCBenignMatMult_Private_Private(A,x,y,PETSC_TRUE);CHKERRQ(ierr); 107 PetscFunctionReturn(0); 108 } 109 110 #undef __FUNCT__ 111 #define __FUNCT__ "PCBDDCBenignMatMult_Private" 112 PetscErrorCode PCBDDCBenignMatMult_Private(Mat A, Vec x, Vec y) 113 { 114 PetscErrorCode ierr; 115 116 PetscFunctionBegin; 117 ierr = PCBDDCBenignMatMult_Private_Private(A,x,y,PETSC_FALSE);CHKERRQ(ierr); 118 PetscFunctionReturn(0); 119 } 120 121 #undef __FUNCT__ 122 #define __FUNCT__ "PCBDDCBenignShellMat" 123 PetscErrorCode PCBDDCBenignShellMat(PC pc, PetscBool restore) 124 { 125 PC_IS *pcis = (PC_IS*)pc->data; 126 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 127 Mat_IS *matis = (Mat_IS*)pc->pmat->data; 128 PCBDDCBenignMatMult_ctx ctx; 129 PetscErrorCode ierr; 130 131 PetscFunctionBegin; 132 if (!restore) { 133 Mat A,A_IB,A_BI; 134 PetscScalar *work; 135 PCBDDCReuseSolvers reuse = pcbddc->sub_schurs->reuse_solver; 136 137 if (!reuse) { 138 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Cannot setup shell matrices!"); 139 } 140 141 /* local mat */ 142 ierr = MatCreate(PETSC_COMM_SELF,&A);CHKERRQ(ierr); 143 ierr = MatSetSizes(A,pcis->n,pcis->n,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr); 144 ierr = MatSetType(A,MATSHELL);CHKERRQ(ierr); 145 ierr = MatShellSetOperation(A,MATOP_MULT,(void (*)(void))PCBDDCBenignMatMult_Private);CHKERRQ(ierr); 146 ierr = MatShellSetOperation(A,MATOP_MULT_TRANSPOSE,(void (*)(void))PCBDDCBenignMatMultTranspose_Private);CHKERRQ(ierr); 147 ierr = PetscNew(&ctx);CHKERRQ(ierr); 148 ierr = MatShellSetContext(A,ctx);CHKERRQ(ierr); 149 ctx->apply_left = PETSC_TRUE; 150 ctx->apply_right = PETSC_TRUE; 151 ctx->apply_p0 = PETSC_TRUE; 152 ctx->benign_n = pcbddc->benign_n; 153 ctx->benign_zerodiag_subs = pcbddc->benign_zerodiag_subs; 154 ctx->A = matis->A; 155 ierr = PetscMalloc1(pcis->n,&work);CHKERRQ(ierr); 156 ctx->work = work; 157 ierr = MatSetUp(A);CHKERRQ(ierr); 158 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 159 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 160 matis->A = A; 161 162 /* A_IB */ 163 ierr = MatCreate(PETSC_COMM_SELF,&A_IB);CHKERRQ(ierr); 164 ierr = MatSetSizes(A_IB,pcis->n-pcis->n_B,pcis->n_B,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr); 165 ierr = MatSetType(A_IB,MATSHELL);CHKERRQ(ierr); 166 ierr = MatShellSetOperation(A_IB,MATOP_MULT,(void (*)(void))PCBDDCBenignMatMult_Private);CHKERRQ(ierr); 167 ierr = MatShellSetOperation(A_IB,MATOP_MULT_TRANSPOSE,(void (*)(void))PCBDDCBenignMatMultTranspose_Private);CHKERRQ(ierr); 168 ierr = PetscNew(&ctx);CHKERRQ(ierr); 169 ierr = MatShellSetContext(A_IB,ctx);CHKERRQ(ierr); 170 ctx->apply_left = PETSC_TRUE; 171 ctx->apply_right = PETSC_FALSE; 172 ctx->apply_p0 = PETSC_FALSE; 173 ctx->benign_n = pcbddc->benign_n; 174 ctx->benign_zerodiag_subs = reuse->benign_zerodiag_subs; 175 ctx->A = pcis->A_IB; 176 ctx->work = work; 177 ierr = MatSetUp(A_IB);CHKERRQ(ierr); 178 ierr = MatAssemblyBegin(A_IB,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 179 ierr = MatAssemblyEnd(A_IB,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 180 pcis->A_IB = A_IB; 181 182 /* A_BI as A_IB^T */ 183 ierr = MatCreateTranspose(A_IB,&A_BI);CHKERRQ(ierr); 184 pcbddc->benign_original_mat = pcis->A_BI; 185 pcis->A_BI = A_BI; 186 } else { 187 ierr = MatShellGetContext(matis->A,&ctx);CHKERRQ(ierr); 188 ierr = MatDestroy(&matis->A);CHKERRQ(ierr); 189 matis->A = ctx->A; 190 ierr = PetscFree(ctx);CHKERRQ(ierr); 191 ierr = MatShellGetContext(pcis->A_IB,&ctx);CHKERRQ(ierr); 192 ierr = MatDestroy(&pcis->A_IB);CHKERRQ(ierr); 193 pcis->A_IB = ctx->A; 194 ierr = PetscFree(ctx->work);CHKERRQ(ierr); 195 ierr = PetscFree(ctx);CHKERRQ(ierr); 196 ierr = MatDestroy(&pcis->A_BI);CHKERRQ(ierr); 197 pcis->A_BI = pcbddc->benign_original_mat; 198 pcbddc->benign_original_mat = NULL; 199 } 200 PetscFunctionReturn(0); 201 } 202 203 /* used just in bddc debug mode */ 204 #undef __FUNCT__ 205 #define __FUNCT__ "PCBDDCBenignProject" 206 PetscErrorCode PCBDDCBenignProject(PC pc, IS is1, IS is2, Mat *B) 207 { 208 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 209 Mat_IS *matis = (Mat_IS*)pc->pmat->data; 210 Mat An; 211 PetscErrorCode ierr; 212 213 PetscFunctionBegin; 214 ierr = MatPtAP(matis->A,pcbddc->benign_change,MAT_INITIAL_MATRIX,2.0,&An);CHKERRQ(ierr); 215 ierr = MatZeroRowsColumns(An,pcbddc->benign_n,pcbddc->benign_p0_lidx,1.0,NULL,NULL);CHKERRQ(ierr); 216 if (is1) { 217 ierr = MatGetSubMatrix(An,is1,is2,MAT_INITIAL_MATRIX,B);CHKERRQ(ierr); 218 ierr = MatDestroy(&An);CHKERRQ(ierr); 219 } else { 220 *B = An; 221 } 222 PetscFunctionReturn(0); 223 } 224 225 /* TODO: add reuse flag */ 226 #undef __FUNCT__ 227 #define __FUNCT__ "MatSeqAIJCompress" 228 PetscErrorCode MatSeqAIJCompress(Mat A, Mat *B) 229 { 230 Mat Bt; 231 PetscScalar *a,*bdata; 232 const PetscInt *ii,*ij; 233 PetscInt m,n,i,nnz,*bii,*bij; 234 PetscBool flg_row; 235 PetscErrorCode ierr; 236 237 PetscFunctionBegin; 238 ierr = MatGetSize(A,&n,&m);CHKERRQ(ierr); 239 ierr = MatGetRowIJ(A,0,PETSC_FALSE,PETSC_FALSE,&n,&ii,&ij,&flg_row);CHKERRQ(ierr); 240 ierr = MatSeqAIJGetArray(A,&a);CHKERRQ(ierr); 241 nnz = n; 242 for (i=0;i<ii[n];i++) { 243 if (PetscLikely(PetscAbsScalar(a[i]) > PETSC_SMALL)) nnz++; 244 } 245 ierr = PetscMalloc1(n+1,&bii);CHKERRQ(ierr); 246 ierr = PetscMalloc1(nnz,&bij);CHKERRQ(ierr); 247 ierr = PetscMalloc1(nnz,&bdata);CHKERRQ(ierr); 248 nnz = 0; 249 bii[0] = 0; 250 for (i=0;i<n;i++) { 251 PetscInt j; 252 for (j=ii[i];j<ii[i+1];j++) { 253 PetscScalar entry = a[j]; 254 if (PetscLikely(PetscAbsScalar(entry) > PETSC_SMALL) || ij[j] == i) { 255 bij[nnz] = ij[j]; 256 bdata[nnz] = entry; 257 nnz++; 258 } 259 } 260 bii[i+1] = nnz; 261 } 262 ierr = MatSeqAIJRestoreArray(A,&a);CHKERRQ(ierr); 263 ierr = MatCreateSeqAIJWithArrays(PetscObjectComm((PetscObject)A),n,m,bii,bij,bdata,&Bt);CHKERRQ(ierr); 264 ierr = MatRestoreRowIJ(A,0,PETSC_FALSE,PETSC_FALSE,&n,&ii,&ij,&flg_row);CHKERRQ(ierr); 265 { 266 Mat_SeqAIJ *b = (Mat_SeqAIJ*)(Bt->data); 267 b->free_a = PETSC_TRUE; 268 b->free_ij = PETSC_TRUE; 269 } 270 *B = Bt; 271 PetscFunctionReturn(0); 272 } 273 274 #undef __FUNCT__ 275 #define __FUNCT__ "MatDetectDisconnectedComponents" 276 PetscErrorCode MatDetectDisconnectedComponents(Mat A, PetscBool filter, PetscInt *ncc, IS* cc[]) 277 { 278 Mat B; 279 IS is_dummy,*cc_n; 280 ISLocalToGlobalMapping l2gmap_dummy; 281 PCBDDCGraph graph; 282 PetscInt i,n; 283 PetscInt *xadj,*adjncy; 284 PetscInt *xadj_filtered,*adjncy_filtered; 285 PetscBool flg_row,isseqaij; 286 PetscErrorCode ierr; 287 288 PetscFunctionBegin; 289 ierr = PetscObjectTypeCompare((PetscObject)A,MATSEQAIJ,&isseqaij);CHKERRQ(ierr); 290 if (!isseqaij && filter) { 291 PetscBool isseqdense; 292 293 ierr = PetscObjectTypeCompare((PetscObject)A,MATSEQDENSE,&isseqdense);CHKERRQ(ierr); 294 if (!isseqdense) { 295 ierr = MatConvert(A,MATSEQAIJ,MAT_INITIAL_MATRIX,&B);CHKERRQ(ierr); 296 } else { /* TODO: rectangular case and LDA */ 297 PetscScalar *array; 298 PetscReal chop=1.e-6; 299 300 ierr = MatDuplicate(A,MAT_COPY_VALUES,&B);CHKERRQ(ierr); 301 ierr = MatDenseGetArray(B,&array);CHKERRQ(ierr); 302 ierr = MatGetSize(B,&n,NULL);CHKERRQ(ierr); 303 for (i=0;i<n;i++) { 304 PetscInt j; 305 for (j=i+1;j<n;j++) { 306 PetscReal thresh = chop*(PetscAbsScalar(array[i*(n+1)])+PetscAbsScalar(array[j*(n+1)])); 307 if (PetscAbsScalar(array[i*n+j]) < thresh) array[i*n+j] = 0.; 308 if (PetscAbsScalar(array[j*n+i]) < thresh) array[j*n+i] = 0.; 309 } 310 } 311 ierr = MatDenseRestoreArray(B,&array);CHKERRQ(ierr); 312 ierr = MatConvert(B,MATSEQAIJ,MAT_REUSE_MATRIX,&B);CHKERRQ(ierr); 313 } 314 } else { 315 B = A; 316 } 317 ierr = MatGetRowIJ(B,0,PETSC_TRUE,PETSC_FALSE,&n,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);CHKERRQ(ierr); 318 319 /* if filter is true, then removes entries lower than PETSC_SMALL in magnitude */ 320 if (filter) { 321 PetscScalar *data; 322 PetscInt j,cum; 323 324 ierr = PetscCalloc2(n+1,&xadj_filtered,xadj[n],&adjncy_filtered);CHKERRQ(ierr); 325 ierr = MatSeqAIJGetArray(B,&data);CHKERRQ(ierr); 326 cum = 0; 327 for (i=0;i<n;i++) { 328 PetscInt t; 329 330 for (j=xadj[i];j<xadj[i+1];j++) { 331 if (PetscUnlikely(PetscAbsScalar(data[j]) < PETSC_SMALL)) { 332 continue; 333 } 334 adjncy_filtered[cum+xadj_filtered[i]++] = adjncy[j]; 335 } 336 t = xadj_filtered[i]; 337 xadj_filtered[i] = cum; 338 cum += t; 339 } 340 ierr = MatSeqAIJRestoreArray(B,&data);CHKERRQ(ierr); 341 } else { 342 xadj_filtered = NULL; 343 adjncy_filtered = NULL; 344 } 345 346 /* compute local connected components using PCBDDCGraph */ 347 ierr = ISCreateStride(PETSC_COMM_SELF,n,0,1,&is_dummy);CHKERRQ(ierr); 348 ierr = ISLocalToGlobalMappingCreateIS(is_dummy,&l2gmap_dummy);CHKERRQ(ierr); 349 ierr = ISDestroy(&is_dummy);CHKERRQ(ierr); 350 ierr = PCBDDCGraphCreate(&graph);CHKERRQ(ierr); 351 ierr = PCBDDCGraphInit(graph,l2gmap_dummy,n);CHKERRQ(ierr); 352 ierr = ISLocalToGlobalMappingDestroy(&l2gmap_dummy);CHKERRQ(ierr); 353 if (xadj_filtered) { 354 graph->xadj = xadj_filtered; 355 graph->adjncy = adjncy_filtered; 356 } else { 357 graph->xadj = xadj; 358 graph->adjncy = adjncy; 359 } 360 ierr = PCBDDCGraphSetUp(graph,1,NULL,NULL,0,NULL,NULL);CHKERRQ(ierr); 361 ierr = PCBDDCGraphComputeConnectedComponents(graph);CHKERRQ(ierr); 362 /* partial clean up */ 363 ierr = PetscFree2(xadj_filtered,adjncy_filtered);CHKERRQ(ierr); 364 ierr = MatRestoreRowIJ(B,0,PETSC_TRUE,PETSC_FALSE,&n,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);CHKERRQ(ierr); 365 if (A != B) { 366 ierr = MatDestroy(&B);CHKERRQ(ierr); 367 } 368 369 /* get back data */ 370 if (ncc) *ncc = graph->ncc; 371 if (cc) { 372 ierr = PetscMalloc1(graph->ncc,&cc_n);CHKERRQ(ierr); 373 for (i=0;i<graph->ncc;i++) { 374 ierr = ISCreateGeneral(PETSC_COMM_SELF,graph->cptr[i+1]-graph->cptr[i],graph->queue+graph->cptr[i],PETSC_COPY_VALUES,&cc_n[i]);CHKERRQ(ierr); 375 } 376 *cc = cc_n; 377 } 378 /* clean up graph */ 379 graph->xadj = 0; 380 graph->adjncy = 0; 381 ierr = PCBDDCGraphDestroy(&graph);CHKERRQ(ierr); 382 PetscFunctionReturn(0); 383 } 384 385 #undef __FUNCT__ 386 #define __FUNCT__ "PCBDDCBenignCheck" 387 PetscErrorCode PCBDDCBenignCheck(PC pc, IS zerodiag) 388 { 389 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 390 PC_IS* pcis = (PC_IS*)(pc->data); 391 IS dirIS = NULL; 392 PetscInt i; 393 PetscErrorCode ierr; 394 395 PetscFunctionBegin; 396 ierr = PCBDDCGraphGetDirichletDofs(pcbddc->mat_graph,&dirIS);CHKERRQ(ierr); 397 if (zerodiag) { 398 Mat A; 399 Vec vec3_N; 400 PetscScalar *vals; 401 const PetscInt *idxs; 402 PetscInt nz,*count; 403 404 /* p0 */ 405 ierr = VecSet(pcis->vec1_N,0.);CHKERRQ(ierr); 406 ierr = PetscMalloc1(pcis->n,&vals);CHKERRQ(ierr); 407 ierr = ISGetLocalSize(zerodiag,&nz);CHKERRQ(ierr); 408 ierr = ISGetIndices(zerodiag,&idxs);CHKERRQ(ierr); 409 for (i=0;i<nz;i++) vals[i] = 1.; 410 ierr = VecSetValues(pcis->vec1_N,nz,idxs,vals,INSERT_VALUES);CHKERRQ(ierr); 411 ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr); 412 ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr); 413 /* v_I */ 414 ierr = VecSetRandom(pcis->vec2_N,NULL);CHKERRQ(ierr); 415 for (i=0;i<nz;i++) vals[i] = 0.; 416 ierr = VecSetValues(pcis->vec2_N,nz,idxs,vals,INSERT_VALUES);CHKERRQ(ierr); 417 ierr = ISRestoreIndices(zerodiag,&idxs);CHKERRQ(ierr); 418 ierr = ISGetIndices(pcis->is_B_local,&idxs);CHKERRQ(ierr); 419 for (i=0;i<pcis->n_B;i++) vals[i] = 0.; 420 ierr = VecSetValues(pcis->vec2_N,pcis->n_B,idxs,vals,INSERT_VALUES);CHKERRQ(ierr); 421 ierr = ISRestoreIndices(pcis->is_B_local,&idxs);CHKERRQ(ierr); 422 if (dirIS) { 423 PetscInt n; 424 425 ierr = ISGetLocalSize(dirIS,&n);CHKERRQ(ierr); 426 ierr = ISGetIndices(dirIS,&idxs);CHKERRQ(ierr); 427 for (i=0;i<n;i++) vals[i] = 0.; 428 ierr = VecSetValues(pcis->vec2_N,n,idxs,vals,INSERT_VALUES);CHKERRQ(ierr); 429 ierr = ISRestoreIndices(dirIS,&idxs);CHKERRQ(ierr); 430 } 431 ierr = VecAssemblyBegin(pcis->vec2_N);CHKERRQ(ierr); 432 ierr = VecAssemblyEnd(pcis->vec2_N);CHKERRQ(ierr); 433 ierr = VecDuplicate(pcis->vec1_N,&vec3_N);CHKERRQ(ierr); 434 ierr = VecSet(vec3_N,0.);CHKERRQ(ierr); 435 ierr = MatISGetLocalMat(pc->mat,&A);CHKERRQ(ierr); 436 ierr = MatMult(A,pcis->vec1_N,vec3_N);CHKERRQ(ierr); 437 ierr = VecDot(vec3_N,pcis->vec2_N,&vals[0]);CHKERRQ(ierr); 438 if (PetscAbsScalar(vals[0]) > PETSC_SMALL) { 439 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"Benign trick can not be applied! b(v_I,p_0) = %f (should be numerically 0.)",PetscAbsScalar(vals[0])); 440 } 441 ierr = PetscFree(vals);CHKERRQ(ierr); 442 ierr = VecDestroy(&vec3_N);CHKERRQ(ierr); 443 444 /* there should not be any pressure dofs lying on the interface */ 445 ierr = PetscCalloc1(pcis->n,&count);CHKERRQ(ierr); 446 ierr = ISGetIndices(pcis->is_B_local,&idxs);CHKERRQ(ierr); 447 for (i=0;i<pcis->n_B;i++) count[idxs[i]]++; 448 ierr = ISRestoreIndices(pcis->is_B_local,&idxs);CHKERRQ(ierr); 449 ierr = ISGetIndices(zerodiag,&idxs);CHKERRQ(ierr); 450 for (i=0;i<nz;i++) { 451 if (count[idxs[i]]) { 452 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"Benign trick can not be applied! pressure dof %d is an interface dof",idxs[i]); 453 } 454 } 455 ierr = ISRestoreIndices(zerodiag,&idxs);CHKERRQ(ierr); 456 ierr = PetscFree(count);CHKERRQ(ierr); 457 } 458 ierr = ISDestroy(&dirIS);CHKERRQ(ierr); 459 460 /* check PCBDDCBenignGetOrSetP0 */ 461 ierr = VecSetRandom(pcis->vec1_global,NULL);CHKERRQ(ierr); 462 for (i=0;i<pcbddc->benign_n;i++) pcbddc->benign_p0[i] = -PetscGlobalRank-i; 463 ierr = PCBDDCBenignGetOrSetP0(pc,pcis->vec1_global,PETSC_FALSE);CHKERRQ(ierr); 464 for (i=0;i<pcbddc->benign_n;i++) pcbddc->benign_p0[i] = 1; 465 ierr = PCBDDCBenignGetOrSetP0(pc,pcis->vec1_global,PETSC_TRUE);CHKERRQ(ierr); 466 for (i=0;i<pcbddc->benign_n;i++) { 467 if ((PetscInt)PetscRealPart(pcbddc->benign_p0[i]) != -PetscGlobalRank-i) { 468 SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error testing PCBDDCBenignGetOrSetP0! Found %1.4e at %d instead of %1.4e\n",pcbddc->benign_p0[i],i,-PetscGlobalRank-i);CHKERRQ(ierr); 469 } 470 } 471 PetscFunctionReturn(0); 472 } 473 474 #undef __FUNCT__ 475 #define __FUNCT__ "PCBDDCBenignDetectSaddlePoint" 476 PetscErrorCode PCBDDCBenignDetectSaddlePoint(PC pc, IS *zerodiaglocal) 477 { 478 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 479 IS pressures,zerodiag,*zerodiag_subs; 480 PetscInt nz,n; 481 PetscBool sorted,have_null,has_null_pressures,recompute_zerodiag; 482 PetscErrorCode ierr; 483 484 PetscFunctionBegin; 485 ierr = PetscSFDestroy(&pcbddc->benign_sf);CHKERRQ(ierr); 486 ierr = MatDestroy(&pcbddc->benign_B0);CHKERRQ(ierr); 487 ierr = MatDestroy(&pcbddc->benign_original_mat);CHKERRQ(ierr); 488 for (n=0;n<pcbddc->benign_n;n++) { 489 ierr = ISDestroy(&pcbddc->benign_zerodiag_subs[n]);CHKERRQ(ierr); 490 } 491 ierr = PetscFree(pcbddc->benign_zerodiag_subs);CHKERRQ(ierr); 492 pcbddc->benign_n = 0; 493 /* if a local info on dofs is present, assumes the last field is represented by "pressures" 494 otherwise, it uses only zerodiagonal dofs (ok if the pressure block is all zero; it could fail if it is not) 495 Checks if all the pressure dofs in each subdomain have a zero diagonal 496 If not, a change of basis on pressures is not needed 497 since the local Schur complements are SPD 498 */ 499 has_null_pressures = PETSC_TRUE; 500 have_null = PETSC_TRUE; 501 if (pcbddc->n_ISForDofsLocal) { 502 PetscInt npl,*idxs,p = pcbddc->n_ISForDofsLocal-1; 503 504 /* Dofs splitting for BDDC cannot have PETSC_COMM_SELF, so create a sequential IS */ 505 ierr = ISGetLocalSize(pcbddc->ISForDofsLocal[p],&npl);CHKERRQ(ierr); 506 ierr = ISGetIndices(pcbddc->ISForDofsLocal[p],(const PetscInt**)&idxs);CHKERRQ(ierr); 507 ierr = ISCreateGeneral(PETSC_COMM_SELF,npl,idxs,PETSC_COPY_VALUES,&pressures);CHKERRQ(ierr); 508 ierr = ISRestoreIndices(pcbddc->ISForDofsLocal[p],(const PetscInt**)&idxs);CHKERRQ(ierr); 509 ierr = ISSorted(pressures,&sorted);CHKERRQ(ierr); 510 if (!sorted) { 511 ierr = ISSort(pressures);CHKERRQ(ierr); 512 } 513 } else { 514 pressures = NULL; 515 } 516 /* pcis has not been setup yet, so get the local size from the subdomain matrix */ 517 ierr = MatGetLocalSize(pcbddc->local_mat,&n,NULL);CHKERRQ(ierr); 518 ierr = MatFindZeroDiagonals(pcbddc->local_mat,&zerodiag);CHKERRQ(ierr); 519 ierr = ISSorted(zerodiag,&sorted);CHKERRQ(ierr); 520 if (!sorted) { 521 ierr = ISSort(zerodiag);CHKERRQ(ierr); 522 } 523 ierr = ISGetLocalSize(zerodiag,&nz);CHKERRQ(ierr); 524 if (!nz) { 525 if (n) have_null = PETSC_FALSE; 526 has_null_pressures = PETSC_FALSE; 527 ierr = ISDestroy(&zerodiag);CHKERRQ(ierr); 528 } 529 recompute_zerodiag = PETSC_FALSE; 530 /* in case disconnected subdomains info is present, split the pressures accordingly (otherwise the benign trick could fail) */ 531 zerodiag_subs = NULL; 532 pcbddc->benign_n = 0; 533 if (has_null_pressures) { 534 IS *subs; 535 PetscInt nsubs,i; 536 537 subs = pcbddc->local_subs; 538 nsubs = pcbddc->n_local_subs; 539 if (nsubs > 1) { 540 ierr = PetscCalloc1(nsubs,&zerodiag_subs);CHKERRQ(ierr); 541 for (i=0;i<nsubs;i++) { 542 ISLocalToGlobalMapping l2g; 543 IS t_zerodiag_subs; 544 PetscInt nl; 545 546 ierr = ISLocalToGlobalMappingCreateIS(subs[i],&l2g);CHKERRQ(ierr); 547 ierr = ISGlobalToLocalMappingApplyIS(l2g,IS_GTOLM_DROP,zerodiag,&t_zerodiag_subs);CHKERRQ(ierr); 548 ierr = ISGetLocalSize(t_zerodiag_subs,&nl);CHKERRQ(ierr); 549 if (nl) { 550 PetscBool valid = PETSC_TRUE; 551 552 if (pressures) { 553 IS t_pressure_subs; 554 ierr = ISGlobalToLocalMappingApplyIS(l2g,IS_GTOLM_DROP,pressures,&t_pressure_subs);CHKERRQ(ierr); 555 ierr = ISEqual(t_pressure_subs,t_zerodiag_subs,&valid);CHKERRQ(ierr); 556 ierr = ISDestroy(&t_pressure_subs);CHKERRQ(ierr); 557 } 558 if (valid) { 559 ierr = ISLocalToGlobalMappingApplyIS(l2g,t_zerodiag_subs,&zerodiag_subs[pcbddc->benign_n]);CHKERRQ(ierr); 560 pcbddc->benign_n++; 561 } else { 562 recompute_zerodiag = PETSC_TRUE; 563 } 564 } 565 ierr = ISDestroy(&t_zerodiag_subs);CHKERRQ(ierr); 566 ierr = ISLocalToGlobalMappingDestroy(&l2g);CHKERRQ(ierr); 567 } 568 } else { /* there's just one subdomain (or zero if they have not been detected */ 569 PetscBool valid = PETSC_TRUE; 570 if (pressures) { 571 ierr = ISEqual(pressures,zerodiag,&valid);CHKERRQ(ierr); 572 } 573 if (valid) { 574 pcbddc->benign_n = 1; 575 ierr = PetscMalloc1(pcbddc->benign_n,&zerodiag_subs);CHKERRQ(ierr); 576 ierr = PetscObjectReference((PetscObject)zerodiag);CHKERRQ(ierr); 577 zerodiag_subs[0] = zerodiag; 578 } 579 } 580 } 581 582 if (!pcbddc->benign_n) { 583 ierr = ISDestroy(&zerodiag);CHKERRQ(ierr); 584 recompute_zerodiag = PETSC_FALSE; 585 has_null_pressures = PETSC_FALSE; 586 have_null = PETSC_FALSE; 587 } 588 589 /* final check for null pressures */ 590 if (zerodiag && pressures) { 591 PetscInt nz,np; 592 ierr = ISGetLocalSize(zerodiag,&nz);CHKERRQ(ierr); 593 ierr = ISGetLocalSize(pressures,&np);CHKERRQ(ierr); 594 if (nz != np) have_null = PETSC_FALSE; 595 } 596 597 if (recompute_zerodiag) { 598 ierr = ISDestroy(&zerodiag);CHKERRQ(ierr); 599 if (pcbddc->benign_n == 1) { 600 ierr = PetscObjectReference((PetscObject)zerodiag_subs[0]);CHKERRQ(ierr); 601 zerodiag = zerodiag_subs[0]; 602 } else { 603 PetscInt i,nzn,*new_idxs; 604 605 nzn = 0; 606 for (i=0;i<pcbddc->benign_n;i++) { 607 PetscInt ns; 608 ierr = ISGetLocalSize(zerodiag_subs[i],&ns);CHKERRQ(ierr); 609 nzn += ns; 610 } 611 ierr = PetscMalloc1(nzn,&new_idxs);CHKERRQ(ierr); 612 nzn = 0; 613 for (i=0;i<pcbddc->benign_n;i++) { 614 PetscInt ns,*idxs; 615 ierr = ISGetLocalSize(zerodiag_subs[i],&ns);CHKERRQ(ierr); 616 ierr = ISGetIndices(zerodiag_subs[i],(const PetscInt**)&idxs);CHKERRQ(ierr); 617 ierr = PetscMemcpy(new_idxs+nzn,idxs,ns*sizeof(PetscInt));CHKERRQ(ierr); 618 ierr = ISRestoreIndices(zerodiag_subs[i],(const PetscInt**)&idxs);CHKERRQ(ierr); 619 nzn += ns; 620 } 621 ierr = PetscSortInt(nzn,new_idxs);CHKERRQ(ierr); 622 ierr = ISCreateGeneral(PETSC_COMM_SELF,nzn,new_idxs,PETSC_OWN_POINTER,&zerodiag);CHKERRQ(ierr); 623 } 624 have_null = PETSC_FALSE; 625 } 626 627 if (has_null_pressures) { 628 IS zerodiagc; 629 const PetscInt *idxs,*idxsc; 630 PetscInt i,s,*nnz; 631 632 ierr = ISGetLocalSize(zerodiag,&nz);CHKERRQ(ierr); 633 ierr = ISComplement(zerodiag,0,n,&zerodiagc);CHKERRQ(ierr); 634 ierr = ISGetIndices(zerodiagc,&idxsc);CHKERRQ(ierr); 635 /* local change of basis for pressures */ 636 ierr = MatDestroy(&pcbddc->benign_change);CHKERRQ(ierr); 637 ierr = MatCreate(PetscObjectComm((PetscObject)pcbddc->local_mat),&pcbddc->benign_change);CHKERRQ(ierr); 638 ierr = MatSetType(pcbddc->benign_change,MATAIJ);CHKERRQ(ierr); 639 ierr = MatSetSizes(pcbddc->benign_change,n,n,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr); 640 ierr = PetscMalloc1(n,&nnz);CHKERRQ(ierr); 641 for (i=0;i<n-nz;i++) nnz[idxsc[i]] = 1; /* identity on velocities plus pressure dofs for non-singular subdomains */ 642 for (i=0;i<pcbddc->benign_n;i++) { 643 PetscInt nzs,j; 644 645 ierr = ISGetLocalSize(zerodiag_subs[i],&nzs);CHKERRQ(ierr); 646 ierr = ISGetIndices(zerodiag_subs[i],&idxs);CHKERRQ(ierr); 647 for (j=0;j<nzs-1;j++) nnz[idxs[j]] = 2; /* change on pressures */ 648 nnz[idxs[nzs-1]] = nzs; /* last local pressure dof in subdomain */ 649 ierr = ISRestoreIndices(zerodiag_subs[i],&idxs);CHKERRQ(ierr); 650 } 651 ierr = MatSeqAIJSetPreallocation(pcbddc->benign_change,0,nnz);CHKERRQ(ierr); 652 ierr = PetscFree(nnz);CHKERRQ(ierr); 653 /* set identity on velocities */ 654 for (i=0;i<n-nz;i++) { 655 ierr = MatSetValue(pcbddc->benign_change,idxsc[i],idxsc[i],1.,INSERT_VALUES);CHKERRQ(ierr); 656 } 657 ierr = ISRestoreIndices(zerodiagc,&idxsc);CHKERRQ(ierr); 658 ierr = ISDestroy(&zerodiagc);CHKERRQ(ierr); 659 ierr = PetscFree3(pcbddc->benign_p0_lidx,pcbddc->benign_p0_gidx,pcbddc->benign_p0);CHKERRQ(ierr); 660 ierr = PetscMalloc3(pcbddc->benign_n,&pcbddc->benign_p0_lidx,pcbddc->benign_n,&pcbddc->benign_p0_gidx,pcbddc->benign_n,&pcbddc->benign_p0);CHKERRQ(ierr); 661 /* set change on pressures */ 662 for (s=0;s<pcbddc->benign_n;s++) { 663 PetscScalar *array; 664 PetscInt nzs; 665 666 ierr = ISGetLocalSize(zerodiag_subs[s],&nzs);CHKERRQ(ierr); 667 ierr = ISGetIndices(zerodiag_subs[s],&idxs);CHKERRQ(ierr); 668 for (i=0;i<nzs-1;i++) { 669 PetscScalar vals[2]; 670 PetscInt cols[2]; 671 672 cols[0] = idxs[i]; 673 cols[1] = idxs[nzs-1]; 674 vals[0] = 1.; 675 vals[1] = 1.; 676 ierr = MatSetValues(pcbddc->benign_change,1,cols,2,cols,vals,INSERT_VALUES);CHKERRQ(ierr); 677 } 678 ierr = PetscMalloc1(nzs,&array);CHKERRQ(ierr); 679 for (i=0;i<nzs-1;i++) array[i] = -1.; 680 array[nzs-1] = 1.; 681 ierr = MatSetValues(pcbddc->benign_change,1,idxs+nzs-1,nzs,idxs,array,INSERT_VALUES);CHKERRQ(ierr); 682 /* store local idxs for p0 */ 683 pcbddc->benign_p0_lidx[s] = idxs[nzs-1]; 684 ierr = ISRestoreIndices(zerodiag_subs[s],&idxs);CHKERRQ(ierr); 685 ierr = PetscFree(array);CHKERRQ(ierr); 686 } 687 ierr = MatAssemblyBegin(pcbddc->benign_change,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 688 ierr = MatAssemblyEnd(pcbddc->benign_change,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 689 /* project if needed */ 690 if (pcbddc->benign_change_explicit) { 691 ierr = MatPtAP(pcbddc->local_mat,pcbddc->benign_change,MAT_INITIAL_MATRIX,2.0,&pcbddc->benign_original_mat);CHKERRQ(ierr); 692 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 693 ierr = MatSeqAIJCompress(pcbddc->benign_original_mat,&pcbddc->local_mat);CHKERRQ(ierr); 694 ierr = MatDestroy(&pcbddc->benign_original_mat);CHKERRQ(ierr); 695 } 696 /* store global idxs for p0 */ 697 ierr = ISLocalToGlobalMappingApply(pc->pmat->rmap->mapping,pcbddc->benign_n,pcbddc->benign_p0_lidx,pcbddc->benign_p0_gidx);CHKERRQ(ierr); 698 } 699 pcbddc->benign_zerodiag_subs = zerodiag_subs; 700 ierr = ISDestroy(&pressures);CHKERRQ(ierr); 701 702 /* determines if the coarse solver will be singular or not */ 703 ierr = MPI_Allreduce(&have_null,&pcbddc->benign_null,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 704 *zerodiaglocal = zerodiag; 705 PetscFunctionReturn(0); 706 } 707 708 #undef __FUNCT__ 709 #define __FUNCT__ "PCBDDCBenignGetOrSetP0" 710 PetscErrorCode PCBDDCBenignGetOrSetP0(PC pc, Vec v, PetscBool get) 711 { 712 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 713 PetscErrorCode ierr; 714 715 PetscFunctionBegin; 716 if (!pcbddc->benign_sf) { 717 ierr = PetscSFCreate(PetscObjectComm((PetscObject)pc),&pcbddc->benign_sf);CHKERRQ(ierr); 718 ierr = PetscSFSetGraphLayout(pcbddc->benign_sf,pc->pmat->rmap,pcbddc->benign_n,NULL,PETSC_OWN_POINTER,pcbddc->benign_p0_gidx);CHKERRQ(ierr); 719 } 720 if (get) { /* use SF to get values */ 721 PetscScalar *array; 722 723 ierr = VecGetArrayRead(v,(const PetscScalar**)&array);CHKERRQ(ierr); 724 ierr = PetscSFBcastBegin(pcbddc->benign_sf,MPIU_SCALAR,array,pcbddc->benign_p0);CHKERRQ(ierr); 725 ierr = PetscSFBcastEnd(pcbddc->benign_sf,MPIU_SCALAR,array,pcbddc->benign_p0);CHKERRQ(ierr); 726 ierr = VecRestoreArrayRead(v,(const PetscScalar**)&array);CHKERRQ(ierr); 727 } else { /* use VecSetValues (not scalable, I should try to find a better solution (defining a new MPI_OP for reduction) */ 728 ierr = VecSetValues(v,pcbddc->benign_n,pcbddc->benign_p0_gidx,pcbddc->benign_p0,INSERT_VALUES);CHKERRQ(ierr); 729 ierr = VecAssemblyBegin(v);CHKERRQ(ierr); 730 ierr = VecAssemblyEnd(v);CHKERRQ(ierr); 731 } 732 PetscFunctionReturn(0); 733 } 734 735 #undef __FUNCT__ 736 #define __FUNCT__ "PCBDDCBenignPopOrPushB0" 737 PetscErrorCode PCBDDCBenignPopOrPushB0(PC pc, PetscBool pop) 738 { 739 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 740 PetscErrorCode ierr; 741 742 PetscFunctionBegin; 743 /* TODO: add error checking 744 - avoid nested pop (or push) calls. 745 - cannot push before pop. 746 - cannot call this if pcbddc->local_mat is NULL 747 */ 748 if (!pcbddc->benign_n) { 749 PetscFunctionReturn(0); 750 } 751 if (pop) { 752 if (pcbddc->benign_change_explicit) { 753 IS is_p0; 754 MatReuse reuse; 755 756 /* extract B_0 */ 757 reuse = MAT_INITIAL_MATRIX; 758 if (pcbddc->benign_B0) { 759 reuse = MAT_REUSE_MATRIX; 760 } 761 ierr = ISCreateGeneral(PETSC_COMM_SELF,pcbddc->benign_n,pcbddc->benign_p0_lidx,PETSC_COPY_VALUES,&is_p0);CHKERRQ(ierr); 762 ierr = MatGetSubMatrix(pcbddc->local_mat,is_p0,NULL,reuse,&pcbddc->benign_B0);CHKERRQ(ierr); 763 /* remove rows and cols from local problem */ 764 ierr = MatSetOption(pcbddc->local_mat,MAT_KEEP_NONZERO_PATTERN,PETSC_TRUE);CHKERRQ(ierr); 765 ierr = MatSetOption(pcbddc->local_mat,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr); 766 ierr = MatZeroRowsColumnsIS(pcbddc->local_mat,is_p0,1.0,NULL,NULL);CHKERRQ(ierr); 767 ierr = ISDestroy(&is_p0);CHKERRQ(ierr); 768 } else { 769 Mat_IS *matis = (Mat_IS*)pc->pmat->data; 770 PetscScalar *vals; 771 PetscInt i,n,*idxs_ins; 772 773 ierr = VecGetLocalSize(matis->y,&n);CHKERRQ(ierr); 774 ierr = PetscMalloc2(n,&idxs_ins,n,&vals);CHKERRQ(ierr); 775 if (!pcbddc->benign_B0) { 776 ierr = MatCreate(PetscObjectComm((PetscObject)pcbddc->local_mat),&pcbddc->benign_B0);CHKERRQ(ierr); 777 ierr = MatSetType(pcbddc->benign_B0,MATAIJ);CHKERRQ(ierr); 778 ierr = MatSetSizes(pcbddc->benign_B0,pcbddc->benign_n,n,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr); 779 /* this matrix is very sparse: the nnz pattern is not known unless we do 2 sweeps of the next loop. 780 Setting nnz=100 should be more than enough */ 781 ierr = MatSeqAIJSetPreallocation(pcbddc->benign_B0,100,NULL);CHKERRQ(ierr); 782 } 783 784 for (i=0;i<pcbddc->benign_n;i++) { 785 PetscScalar *array; 786 PetscInt *idxs,j,nz,cum; 787 788 ierr = VecSet(matis->x,0.);CHKERRQ(ierr); 789 ierr = ISGetLocalSize(pcbddc->benign_zerodiag_subs[i],&nz);CHKERRQ(ierr); 790 ierr = ISGetIndices(pcbddc->benign_zerodiag_subs[i],(const PetscInt**)&idxs);CHKERRQ(ierr); 791 for (j=0;j<nz;j++) vals[j] = 1.; 792 ierr = VecSetValues(matis->x,nz,idxs,vals,INSERT_VALUES);CHKERRQ(ierr); 793 ierr = VecAssemblyBegin(matis->x);CHKERRQ(ierr); 794 ierr = VecAssemblyEnd(matis->x);CHKERRQ(ierr); 795 ierr = VecSet(matis->y,0.);CHKERRQ(ierr); 796 ierr = MatMult(matis->A,matis->x,matis->y);CHKERRQ(ierr); 797 ierr = VecGetArray(matis->y,&array);CHKERRQ(ierr); 798 cum = 0; 799 for (j=0;j<n;j++) { 800 if (PetscUnlikely(PetscAbsReal(array[j]) > PETSC_SMALL)) { 801 vals[cum] = array[j]; 802 idxs_ins[cum] = j; 803 cum++; 804 } 805 } 806 ierr = MatSetValues(pcbddc->benign_B0,1,&i,cum,idxs_ins,vals,INSERT_VALUES);CHKERRQ(ierr); 807 ierr = VecRestoreArray(matis->y,&array);CHKERRQ(ierr); 808 ierr = ISRestoreIndices(pcbddc->benign_zerodiag_subs[i],(const PetscInt**)&idxs);CHKERRQ(ierr); 809 } 810 ierr = MatAssemblyBegin(pcbddc->benign_B0,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 811 ierr = MatAssemblyEnd(pcbddc->benign_B0,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 812 ierr = PetscFree2(idxs_ins,vals);CHKERRQ(ierr); 813 } 814 } else { /* push */ 815 if (pcbddc->benign_change_explicit) { 816 PetscInt i; 817 818 for (i=0;i<pcbddc->benign_n;i++) { 819 PetscScalar *B0_vals; 820 PetscInt *B0_cols,B0_ncol; 821 822 ierr = MatGetRow(pcbddc->benign_B0,i,&B0_ncol,(const PetscInt**)&B0_cols,(const PetscScalar**)&B0_vals);CHKERRQ(ierr); 823 ierr = MatSetValues(pcbddc->local_mat,1,pcbddc->benign_p0_lidx+i,B0_ncol,B0_cols,B0_vals,INSERT_VALUES);CHKERRQ(ierr); 824 ierr = MatSetValues(pcbddc->local_mat,B0_ncol,B0_cols,1,pcbddc->benign_p0_lidx+i,B0_vals,INSERT_VALUES);CHKERRQ(ierr); 825 ierr = MatSetValue(pcbddc->local_mat,pcbddc->benign_p0_lidx[i],pcbddc->benign_p0_lidx[i],0.0,INSERT_VALUES);CHKERRQ(ierr); 826 ierr = MatRestoreRow(pcbddc->benign_B0,i,&B0_ncol,(const PetscInt**)&B0_cols,(const PetscScalar**)&B0_vals);CHKERRQ(ierr); 827 } 828 ierr = MatAssemblyBegin(pcbddc->local_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 829 ierr = MatAssemblyEnd(pcbddc->local_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 830 } else { 831 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Cannot push B0!\n"); 832 } 833 } 834 PetscFunctionReturn(0); 835 } 836 837 #undef __FUNCT__ 838 #define __FUNCT__ "PCBDDCAdaptiveSelection" 839 PetscErrorCode PCBDDCAdaptiveSelection(PC pc) 840 { 841 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 842 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 843 PetscBLASInt B_dummyint,B_neigs,B_ierr,B_lwork; 844 PetscBLASInt *B_iwork,*B_ifail; 845 PetscScalar *work,lwork; 846 PetscScalar *St,*S,*eigv; 847 PetscScalar *Sarray,*Starray; 848 PetscReal *eigs,thresh; 849 PetscInt i,nmax,nmin,nv,cum,mss,cum2,cumarray,maxneigs; 850 PetscBool allocated_S_St; 851 #if defined(PETSC_USE_COMPLEX) 852 PetscReal *rwork; 853 #endif 854 PetscErrorCode ierr; 855 856 PetscFunctionBegin; 857 if (!sub_schurs->schur_explicit) { 858 SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_SUP,"Adaptive selection of constraints requires MUMPS and/or MKL_CPARDISO"); 859 } 860 861 if (sub_schurs->n_subs && (!sub_schurs->is_hermitian || !sub_schurs->is_posdef)) { 862 SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_SUP,"Adaptive selection not yet implemented for general matrix pencils (herm %d, posdef %d)\n",sub_schurs->is_hermitian,sub_schurs->is_posdef); 863 } 864 865 if (pcbddc->dbg_flag) { 866 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 867 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 868 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Check adaptive selection of constraints\n");CHKERRQ(ierr); 869 ierr = PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 870 } 871 872 if (pcbddc->dbg_flag) { 873 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d cc %d (%d,%d).\n",PetscGlobalRank,sub_schurs->n_subs,sub_schurs->is_hermitian,sub_schurs->is_posdef); 874 } 875 876 /* max size of subsets */ 877 mss = 0; 878 for (i=0;i<sub_schurs->n_subs;i++) { 879 PetscInt subset_size; 880 881 ierr = ISGetLocalSize(sub_schurs->is_subs[i],&subset_size);CHKERRQ(ierr); 882 mss = PetscMax(mss,subset_size); 883 } 884 885 /* min/max and threshold */ 886 nmax = pcbddc->adaptive_nmax > 0 ? pcbddc->adaptive_nmax : mss; 887 nmin = pcbddc->adaptive_nmin > 0 ? pcbddc->adaptive_nmin : 0; 888 nmax = PetscMax(nmin,nmax); 889 allocated_S_St = PETSC_FALSE; 890 if (nmin) { 891 allocated_S_St = PETSC_TRUE; 892 } 893 894 /* allocate lapack workspace */ 895 cum = cum2 = 0; 896 maxneigs = 0; 897 for (i=0;i<sub_schurs->n_subs;i++) { 898 PetscInt n,subset_size; 899 900 ierr = ISGetLocalSize(sub_schurs->is_subs[i],&subset_size);CHKERRQ(ierr); 901 n = PetscMin(subset_size,nmax); 902 cum += subset_size; 903 cum2 += subset_size*n; 904 maxneigs = PetscMax(maxneigs,n); 905 } 906 if (mss) { 907 if (sub_schurs->is_hermitian && sub_schurs->is_posdef) { 908 PetscBLASInt B_itype = 1; 909 PetscBLASInt B_N = mss; 910 PetscReal zero = 0.0; 911 PetscReal eps = 0.0; /* dlamch? */ 912 913 B_lwork = -1; 914 S = NULL; 915 St = NULL; 916 eigs = NULL; 917 eigv = NULL; 918 B_iwork = NULL; 919 B_ifail = NULL; 920 #if defined(PETSC_USE_COMPLEX) 921 rwork = NULL; 922 #endif 923 thresh = 1.0; 924 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 925 #if defined(PETSC_USE_COMPLEX) 926 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&zero,&thresh,&B_dummyint,&B_dummyint,&eps,&B_neigs,eigs,eigv,&B_N,&lwork,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr)); 927 #else 928 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&zero,&thresh,&B_dummyint,&B_dummyint,&eps,&B_neigs,eigs,eigv,&B_N,&lwork,&B_lwork,B_iwork,B_ifail,&B_ierr)); 929 #endif 930 if (B_ierr != 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to SYGVX Lapack routine %d",(int)B_ierr); 931 ierr = PetscFPTrapPop();CHKERRQ(ierr); 932 } else { 933 /* TODO */ 934 } 935 } else { 936 lwork = 0; 937 } 938 939 nv = 0; 940 if (sub_schurs->is_vertices && pcbddc->use_vertices) { /* complement set of active subsets, each entry is a vertex (boundary made by active subsets, vertices and dirichlet dofs) */ 941 ierr = ISGetLocalSize(sub_schurs->is_vertices,&nv);CHKERRQ(ierr); 942 } 943 ierr = PetscBLASIntCast((PetscInt)PetscRealPart(lwork),&B_lwork);CHKERRQ(ierr); 944 if (allocated_S_St) { 945 ierr = PetscMalloc2(mss*mss,&S,mss*mss,&St);CHKERRQ(ierr); 946 } 947 ierr = PetscMalloc5(mss*mss,&eigv,mss,&eigs,B_lwork,&work,5*mss,&B_iwork,mss,&B_ifail);CHKERRQ(ierr); 948 #if defined(PETSC_USE_COMPLEX) 949 ierr = PetscMalloc1(7*mss,&rwork);CHKERRQ(ierr); 950 #endif 951 ierr = PetscMalloc5(nv+sub_schurs->n_subs,&pcbddc->adaptive_constraints_n, 952 nv+sub_schurs->n_subs+1,&pcbddc->adaptive_constraints_idxs_ptr, 953 nv+sub_schurs->n_subs+1,&pcbddc->adaptive_constraints_data_ptr, 954 nv+cum,&pcbddc->adaptive_constraints_idxs, 955 nv+cum2,&pcbddc->adaptive_constraints_data);CHKERRQ(ierr); 956 ierr = PetscMemzero(pcbddc->adaptive_constraints_n,(nv+sub_schurs->n_subs)*sizeof(PetscInt));CHKERRQ(ierr); 957 958 maxneigs = 0; 959 cum = cum2 = cumarray = 0; 960 pcbddc->adaptive_constraints_idxs_ptr[0] = 0; 961 pcbddc->adaptive_constraints_data_ptr[0] = 0; 962 if (sub_schurs->is_vertices && pcbddc->use_vertices) { 963 const PetscInt *idxs; 964 965 ierr = ISGetIndices(sub_schurs->is_vertices,&idxs);CHKERRQ(ierr); 966 for (cum=0;cum<nv;cum++) { 967 pcbddc->adaptive_constraints_n[cum] = 1; 968 pcbddc->adaptive_constraints_idxs[cum] = idxs[cum]; 969 pcbddc->adaptive_constraints_data[cum] = 1.0; 970 pcbddc->adaptive_constraints_idxs_ptr[cum+1] = pcbddc->adaptive_constraints_idxs_ptr[cum]+1; 971 pcbddc->adaptive_constraints_data_ptr[cum+1] = pcbddc->adaptive_constraints_data_ptr[cum]+1; 972 } 973 cum2 = cum; 974 ierr = ISRestoreIndices(sub_schurs->is_vertices,&idxs);CHKERRQ(ierr); 975 } 976 977 if (mss) { /* multilevel */ 978 ierr = MatSeqAIJGetArray(sub_schurs->sum_S_Ej_inv_all,&Sarray);CHKERRQ(ierr); 979 ierr = MatSeqAIJGetArray(sub_schurs->sum_S_Ej_tilda_all,&Starray);CHKERRQ(ierr); 980 } 981 982 thresh = pcbddc->adaptive_threshold; 983 for (i=0;i<sub_schurs->n_subs;i++) { 984 const PetscInt *idxs; 985 PetscReal infty = PETSC_MAX_REAL; 986 PetscInt j,subset_size,eigs_start = 0; 987 PetscBLASInt B_N; 988 PetscBool same_data = PETSC_FALSE; 989 990 ierr = ISGetLocalSize(sub_schurs->is_subs[i],&subset_size);CHKERRQ(ierr); 991 ierr = ISGetIndices(sub_schurs->is_subs[i],&idxs);CHKERRQ(ierr); 992 ierr = PetscBLASIntCast(subset_size,&B_N);CHKERRQ(ierr); 993 if (allocated_S_St) { /* S and S_t should be copied since we could need them later */ 994 if (sub_schurs->is_hermitian) { 995 PetscInt j,k; 996 if (sub_schurs->n_subs == 1) { /* zeroing memory to use PetscMemcmp later */ 997 ierr = PetscMemzero(S,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 998 ierr = PetscMemzero(St,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 999 } 1000 for (j=0;j<subset_size;j++) { 1001 for (k=j;k<subset_size;k++) { 1002 S [j*subset_size+k] = Sarray [cumarray+j*subset_size+k]; 1003 St[j*subset_size+k] = Starray[cumarray+j*subset_size+k]; 1004 } 1005 } 1006 } else { 1007 ierr = PetscMemcpy(S,Sarray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 1008 ierr = PetscMemcpy(St,Starray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 1009 } 1010 } else { 1011 S = Sarray + cumarray; 1012 St = Starray + cumarray; 1013 } 1014 1015 /* see if we can save some work */ 1016 if (sub_schurs->n_subs == 1) { 1017 ierr = PetscMemcmp(S,St,subset_size*subset_size*sizeof(PetscScalar),&same_data);CHKERRQ(ierr); 1018 } 1019 1020 if (same_data) { /* there's no need of constraints here, deluxe scaling is enough */ 1021 B_neigs = 0; 1022 } else { 1023 if (sub_schurs->is_hermitian && sub_schurs->is_posdef) { 1024 PetscBLASInt B_itype = 1; 1025 PetscBLASInt B_IL, B_IU; 1026 PetscReal eps = -1.0; /* dlamch? */ 1027 PetscInt nmin_s; 1028 1029 if (pcbddc->dbg_flag) { 1030 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Computing for sub %d/%d %d %d.\n",i,sub_schurs->n_subs,subset_size,pcbddc->mat_graph->count[idxs[0]]); 1031 } 1032 1033 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 1034 if (thresh > 1.+PETSC_SMALL) { 1035 1036 /* ask for eigenvalues larger than thresh */ 1037 #if defined(PETSC_USE_COMPLEX) 1038 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&thresh,&infty,&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr)); 1039 #else 1040 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&thresh,&infty,&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr)); 1041 #endif 1042 } else { 1043 B_IU = PetscMax(1,PetscMin(B_N,nmax)); 1044 B_IL = 1; 1045 #if defined(PETSC_USE_COMPLEX) 1046 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","I","L",&B_N,St,&B_N,S,&B_N,&thresh,&infty,&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr)); 1047 #else 1048 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","I","L",&B_N,St,&B_N,S,&B_N,&thresh,&infty,&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr)); 1049 #endif 1050 } 1051 ierr = PetscFPTrapPop();CHKERRQ(ierr); 1052 if (B_ierr) { 1053 if (B_ierr < 0 ) { 1054 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: illegal value for argument %d",-(int)B_ierr); 1055 } else if (B_ierr <= B_N) { 1056 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: %d eigenvalues failed to converge",(int)B_ierr); 1057 } else { 1058 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: leading minor of order %d is not positive definite",(int)B_ierr-B_N-1); 1059 } 1060 } 1061 1062 if (B_neigs > nmax) { 1063 if (pcbddc->dbg_flag) { 1064 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," found %d eigs, more than maximum required %d.\n",B_neigs,nmax); 1065 } 1066 eigs_start = B_neigs -nmax; 1067 B_neigs = nmax; 1068 } 1069 1070 nmin_s = PetscMin(nmin,B_N); 1071 if (B_neigs < nmin_s) { 1072 PetscBLASInt B_neigs2; 1073 1074 B_IU = B_N - B_neigs; 1075 B_IL = B_N - nmin_s + 1; 1076 if (pcbddc->dbg_flag) { 1077 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," found %d eigs, less than minimum required %d. Asking for %d to %d incl (fortran like)\n",B_neigs,nmin,B_IL,B_IU); 1078 } 1079 if (sub_schurs->is_hermitian) { 1080 PetscInt j; 1081 for (j=0;j<subset_size;j++) { 1082 ierr = PetscMemcpy(S+j*(subset_size+1),Sarray+cumarray+j*(subset_size+1),(subset_size-j)*sizeof(PetscScalar));CHKERRQ(ierr); 1083 } 1084 for (j=0;j<subset_size;j++) { 1085 ierr = PetscMemcpy(St+j*(subset_size+1),Starray+cumarray+j*(subset_size+1),(subset_size-j)*sizeof(PetscScalar));CHKERRQ(ierr); 1086 } 1087 } else { 1088 ierr = PetscMemcpy(S,Sarray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 1089 ierr = PetscMemcpy(St,Starray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 1090 } 1091 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 1092 #if defined(PETSC_USE_COMPLEX) 1093 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","I","L",&B_N,St,&B_N,S,&B_N,&thresh,&infty,&B_IL,&B_IU,&eps,&B_neigs2,eigs+B_neigs,eigv+B_neigs*subset_size,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr)); 1094 #else 1095 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","I","L",&B_N,St,&B_N,S,&B_N,&thresh,&infty,&B_IL,&B_IU,&eps,&B_neigs2,eigs+B_neigs,eigv+B_neigs*subset_size,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr)); 1096 #endif 1097 ierr = PetscFPTrapPop();CHKERRQ(ierr); 1098 B_neigs += B_neigs2; 1099 } 1100 if (B_ierr) { 1101 if (B_ierr < 0 ) { 1102 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: illegal value for argument %d",-(int)B_ierr); 1103 } else if (B_ierr <= B_N) { 1104 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: %d eigenvalues failed to converge",(int)B_ierr); 1105 } else { 1106 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: leading minor of order %d is not positive definite",(int)B_ierr-B_N-1); 1107 } 1108 } 1109 if (pcbddc->dbg_flag) { 1110 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," -> Got %d eigs\n",B_neigs);CHKERRQ(ierr); 1111 for (j=0;j<B_neigs;j++) { 1112 if (eigs[j] == 0.0) { 1113 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," Inf\n");CHKERRQ(ierr); 1114 } else { 1115 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," %1.6e\n",eigs[j+eigs_start]);CHKERRQ(ierr); 1116 } 1117 } 1118 } 1119 } else { 1120 /* TODO */ 1121 } 1122 } 1123 maxneigs = PetscMax(B_neigs,maxneigs); 1124 pcbddc->adaptive_constraints_n[i+nv] = B_neigs; 1125 if (B_neigs) { 1126 ierr = PetscMemcpy(pcbddc->adaptive_constraints_data+pcbddc->adaptive_constraints_data_ptr[cum],eigv+eigs_start*subset_size,B_neigs*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 1127 1128 if (pcbddc->dbg_flag > 1) { 1129 PetscInt ii; 1130 for (ii=0;ii<B_neigs;ii++) { 1131 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," -> Eigenvector %d/%d (%d)\n",ii,B_neigs,B_N);CHKERRQ(ierr); 1132 for (j=0;j<B_N;j++) { 1133 #if defined(PETSC_USE_COMPLEX) 1134 PetscReal r = PetscRealPart(pcbddc->adaptive_constraints_data[ii*subset_size+j+pcbddc->adaptive_constraints_data_ptr[cum]]); 1135 PetscReal c = PetscImaginaryPart(pcbddc->adaptive_constraints_data[ii*subset_size+j+pcbddc->adaptive_constraints_data_ptr[cum]]); 1136 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," %1.4e + %1.4e i\n",r,c);CHKERRQ(ierr); 1137 #else 1138 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," %1.4e\n",pcbddc->adaptive_constraints_data[ii*subset_size+j+pcbddc->adaptive_constraints_data_ptr[cum]]);CHKERRQ(ierr); 1139 #endif 1140 } 1141 } 1142 } 1143 #if 0 1144 for (j=0;j<B_neigs;j++) { 1145 PetscBLASInt Blas_N,Blas_one = 1.0; 1146 PetscScalar norm; 1147 ierr = PetscBLASIntCast(subset_size,&Blas_N);CHKERRQ(ierr); 1148 PetscStackCallBLAS("BLASdot",norm = BLASdot_(&Blas_N,pcbddc->adaptive_constraints_data+pcbddc->adaptive_constraints_data_ptr[cum]+j*subset_size, 1149 &Blas_one,pcbddc->adaptive_constraints_data+pcbddc->adaptive_constraints_data_ptr[cum]+j*subset_size,&Blas_one)); 1150 if (pcbddc->adaptive_constraints_data[cum2] > 0.0) { 1151 norm = 1.0/PetscSqrtReal(PetscRealPart(norm)); 1152 } else { 1153 norm = -1.0/PetscSqrtReal(PetscRealPart(norm)); 1154 } 1155 PetscStackCallBLAS("BLASscal",BLASscal_(&Blas_N,&norm,pcbddc->adaptive_constraints_data+pcbddc->adaptive_constraints_data_ptr[cum]+j*subset_size,&Blas_one)); 1156 } 1157 #endif 1158 ierr = PetscMemcpy(pcbddc->adaptive_constraints_idxs+pcbddc->adaptive_constraints_idxs_ptr[cum],idxs,subset_size*sizeof(PetscInt));CHKERRQ(ierr); 1159 pcbddc->adaptive_constraints_idxs_ptr[cum+1] = pcbddc->adaptive_constraints_idxs_ptr[cum] + subset_size; 1160 pcbddc->adaptive_constraints_data_ptr[cum+1] = pcbddc->adaptive_constraints_data_ptr[cum] + subset_size*B_neigs; 1161 cum++; 1162 } 1163 ierr = ISRestoreIndices(sub_schurs->is_subs[i],&idxs);CHKERRQ(ierr); 1164 /* shift for next computation */ 1165 cumarray += subset_size*subset_size; 1166 } 1167 if (pcbddc->dbg_flag) { 1168 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 1169 } 1170 1171 if (mss) { 1172 ierr = MatSeqAIJRestoreArray(sub_schurs->sum_S_Ej_inv_all,&Sarray);CHKERRQ(ierr); 1173 ierr = MatSeqAIJRestoreArray(sub_schurs->sum_S_Ej_tilda_all,&Starray);CHKERRQ(ierr); 1174 /* destroy matrices (junk) */ 1175 ierr = MatDestroy(&sub_schurs->sum_S_Ej_inv_all);CHKERRQ(ierr); 1176 ierr = MatDestroy(&sub_schurs->sum_S_Ej_tilda_all);CHKERRQ(ierr); 1177 } 1178 if (allocated_S_St) { 1179 ierr = PetscFree2(S,St);CHKERRQ(ierr); 1180 } 1181 ierr = PetscFree5(eigv,eigs,work,B_iwork,B_ifail);CHKERRQ(ierr); 1182 #if defined(PETSC_USE_COMPLEX) 1183 ierr = PetscFree(rwork);CHKERRQ(ierr); 1184 #endif 1185 if (pcbddc->dbg_flag) { 1186 PetscInt maxneigs_r; 1187 ierr = MPI_Allreduce(&maxneigs,&maxneigs_r,1,MPIU_INT,MPI_MAX,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 1188 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Maximum number of constraints per cc %d\n",maxneigs_r);CHKERRQ(ierr); 1189 } 1190 PetscFunctionReturn(0); 1191 } 1192 1193 #undef __FUNCT__ 1194 #define __FUNCT__ "PCBDDCSetUpSolvers" 1195 PetscErrorCode PCBDDCSetUpSolvers(PC pc) 1196 { 1197 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 1198 PetscScalar *coarse_submat_vals; 1199 PetscErrorCode ierr; 1200 1201 PetscFunctionBegin; 1202 /* Setup local scatters R_to_B and (optionally) R_to_D */ 1203 /* PCBDDCSetUpLocalWorkVectors should be called first! */ 1204 ierr = PCBDDCSetUpLocalScatters(pc);CHKERRQ(ierr); 1205 1206 /* Setup local neumann solver ksp_R */ 1207 /* PCBDDCSetUpLocalScatters should be called first! */ 1208 ierr = PCBDDCSetUpLocalSolvers(pc,PETSC_FALSE,PETSC_TRUE);CHKERRQ(ierr); 1209 1210 /* Change global null space passed in by the user if change of basis has been requested */ 1211 if (pcbddc->NullSpace && pcbddc->ChangeOfBasisMatrix) { 1212 ierr = PCBDDCNullSpaceAdaptGlobal(pc);CHKERRQ(ierr); 1213 } 1214 1215 /* 1216 Setup local correction and local part of coarse basis. 1217 Gives back the dense local part of the coarse matrix in column major ordering 1218 */ 1219 ierr = PCBDDCSetUpCorrection(pc,&coarse_submat_vals);CHKERRQ(ierr); 1220 1221 /* Compute total number of coarse nodes and setup coarse solver */ 1222 ierr = PCBDDCSetUpCoarseSolver(pc,coarse_submat_vals);CHKERRQ(ierr); 1223 1224 /* free */ 1225 ierr = PetscFree(coarse_submat_vals);CHKERRQ(ierr); 1226 PetscFunctionReturn(0); 1227 } 1228 1229 #undef __FUNCT__ 1230 #define __FUNCT__ "PCBDDCResetCustomization" 1231 PetscErrorCode PCBDDCResetCustomization(PC pc) 1232 { 1233 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 1234 PetscErrorCode ierr; 1235 1236 PetscFunctionBegin; 1237 ierr = PCBDDCGraphResetCSR(pcbddc->mat_graph);CHKERRQ(ierr); 1238 ierr = ISDestroy(&pcbddc->user_primal_vertices);CHKERRQ(ierr); 1239 ierr = ISDestroy(&pcbddc->user_primal_vertices_local);CHKERRQ(ierr); 1240 ierr = MatNullSpaceDestroy(&pcbddc->NullSpace);CHKERRQ(ierr); 1241 ierr = ISDestroy(&pcbddc->NeumannBoundaries);CHKERRQ(ierr); 1242 ierr = ISDestroy(&pcbddc->NeumannBoundariesLocal);CHKERRQ(ierr); 1243 ierr = ISDestroy(&pcbddc->DirichletBoundaries);CHKERRQ(ierr); 1244 ierr = MatNullSpaceDestroy(&pcbddc->onearnullspace);CHKERRQ(ierr); 1245 ierr = PetscFree(pcbddc->onearnullvecs_state);CHKERRQ(ierr); 1246 ierr = ISDestroy(&pcbddc->DirichletBoundariesLocal);CHKERRQ(ierr); 1247 ierr = PCBDDCSetDofsSplitting(pc,0,NULL);CHKERRQ(ierr); 1248 ierr = PCBDDCSetDofsSplittingLocal(pc,0,NULL);CHKERRQ(ierr); 1249 PetscFunctionReturn(0); 1250 } 1251 1252 #undef __FUNCT__ 1253 #define __FUNCT__ "PCBDDCResetTopography" 1254 PetscErrorCode PCBDDCResetTopography(PC pc) 1255 { 1256 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 1257 PetscInt i; 1258 PetscErrorCode ierr; 1259 1260 PetscFunctionBegin; 1261 ierr = MatDestroy(&pcbddc->user_ChangeOfBasisMatrix);CHKERRQ(ierr); 1262 ierr = MatDestroy(&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 1263 ierr = MatDestroy(&pcbddc->ConstraintMatrix);CHKERRQ(ierr); 1264 ierr = PCBDDCGraphReset(pcbddc->mat_graph);CHKERRQ(ierr); 1265 for (i=0;i<pcbddc->n_local_subs;i++) { 1266 ierr = ISDestroy(&pcbddc->local_subs[i]);CHKERRQ(ierr); 1267 } 1268 ierr = PetscFree(pcbddc->local_subs);CHKERRQ(ierr); 1269 ierr = PCBDDCSubSchursReset(pcbddc->sub_schurs);CHKERRQ(ierr); 1270 PetscFunctionReturn(0); 1271 } 1272 1273 #undef __FUNCT__ 1274 #define __FUNCT__ "PCBDDCResetSolvers" 1275 PetscErrorCode PCBDDCResetSolvers(PC pc) 1276 { 1277 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 1278 PetscErrorCode ierr; 1279 1280 PetscFunctionBegin; 1281 ierr = VecDestroy(&pcbddc->coarse_vec);CHKERRQ(ierr); 1282 if (pcbddc->coarse_phi_B) { 1283 PetscScalar *array; 1284 ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&array);CHKERRQ(ierr); 1285 ierr = PetscFree(array);CHKERRQ(ierr); 1286 } 1287 ierr = MatDestroy(&pcbddc->coarse_phi_B);CHKERRQ(ierr); 1288 ierr = MatDestroy(&pcbddc->coarse_phi_D);CHKERRQ(ierr); 1289 ierr = MatDestroy(&pcbddc->coarse_psi_B);CHKERRQ(ierr); 1290 ierr = MatDestroy(&pcbddc->coarse_psi_D);CHKERRQ(ierr); 1291 ierr = VecDestroy(&pcbddc->vec1_P);CHKERRQ(ierr); 1292 ierr = VecDestroy(&pcbddc->vec1_C);CHKERRQ(ierr); 1293 ierr = MatDestroy(&pcbddc->local_auxmat2);CHKERRQ(ierr); 1294 ierr = MatDestroy(&pcbddc->local_auxmat1);CHKERRQ(ierr); 1295 ierr = VecDestroy(&pcbddc->vec1_R);CHKERRQ(ierr); 1296 ierr = VecDestroy(&pcbddc->vec2_R);CHKERRQ(ierr); 1297 ierr = ISDestroy(&pcbddc->is_R_local);CHKERRQ(ierr); 1298 ierr = VecScatterDestroy(&pcbddc->R_to_B);CHKERRQ(ierr); 1299 ierr = VecScatterDestroy(&pcbddc->R_to_D);CHKERRQ(ierr); 1300 ierr = VecScatterDestroy(&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr); 1301 ierr = KSPDestroy(&pcbddc->ksp_D);CHKERRQ(ierr); 1302 ierr = KSPDestroy(&pcbddc->ksp_R);CHKERRQ(ierr); 1303 ierr = KSPDestroy(&pcbddc->coarse_ksp);CHKERRQ(ierr); 1304 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 1305 ierr = PetscFree(pcbddc->primal_indices_local_idxs);CHKERRQ(ierr); 1306 ierr = PetscFree2(pcbddc->local_primal_ref_node,pcbddc->local_primal_ref_mult);CHKERRQ(ierr); 1307 ierr = PetscFree(pcbddc->global_primal_indices);CHKERRQ(ierr); 1308 ierr = ISDestroy(&pcbddc->coarse_subassembling);CHKERRQ(ierr); 1309 ierr = ISDestroy(&pcbddc->coarse_subassembling_init);CHKERRQ(ierr); 1310 ierr = MatDestroy(&pcbddc->benign_change);CHKERRQ(ierr); 1311 ierr = VecDestroy(&pcbddc->benign_vec);CHKERRQ(ierr); 1312 ierr = MatDestroy(&pcbddc->benign_original_mat);CHKERRQ(ierr); 1313 ierr = MatDestroy(&pcbddc->benign_B0);CHKERRQ(ierr); 1314 ierr = PetscSFDestroy(&pcbddc->benign_sf);CHKERRQ(ierr); 1315 if (pcbddc->benign_zerodiag_subs) { 1316 PetscInt i; 1317 for (i=0;i<pcbddc->benign_n;i++) { 1318 ierr = ISDestroy(&pcbddc->benign_zerodiag_subs[i]);CHKERRQ(ierr); 1319 } 1320 ierr = PetscFree(pcbddc->benign_zerodiag_subs);CHKERRQ(ierr); 1321 } 1322 ierr = PetscFree3(pcbddc->benign_p0_lidx,pcbddc->benign_p0_gidx,pcbddc->benign_p0);CHKERRQ(ierr); 1323 PetscFunctionReturn(0); 1324 } 1325 1326 #undef __FUNCT__ 1327 #define __FUNCT__ "PCBDDCSetUpLocalWorkVectors" 1328 PetscErrorCode PCBDDCSetUpLocalWorkVectors(PC pc) 1329 { 1330 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 1331 PC_IS *pcis = (PC_IS*)pc->data; 1332 VecType impVecType; 1333 PetscInt n_constraints,n_R,old_size; 1334 PetscErrorCode ierr; 1335 1336 PetscFunctionBegin; 1337 if (!pcbddc->ConstraintMatrix) { 1338 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"BDDC Constraint matrix has not been created"); 1339 } 1340 /* get sizes */ 1341 n_constraints = pcbddc->local_primal_size - pcbddc->benign_n - pcbddc->n_vertices; 1342 n_R = pcis->n - pcbddc->n_vertices; 1343 ierr = VecGetType(pcis->vec1_N,&impVecType);CHKERRQ(ierr); 1344 /* local work vectors (try to avoid unneeded work)*/ 1345 /* R nodes */ 1346 old_size = -1; 1347 if (pcbddc->vec1_R) { 1348 ierr = VecGetSize(pcbddc->vec1_R,&old_size);CHKERRQ(ierr); 1349 } 1350 if (n_R != old_size) { 1351 ierr = VecDestroy(&pcbddc->vec1_R);CHKERRQ(ierr); 1352 ierr = VecDestroy(&pcbddc->vec2_R);CHKERRQ(ierr); 1353 ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_R);CHKERRQ(ierr); 1354 ierr = VecSetSizes(pcbddc->vec1_R,PETSC_DECIDE,n_R);CHKERRQ(ierr); 1355 ierr = VecSetType(pcbddc->vec1_R,impVecType);CHKERRQ(ierr); 1356 ierr = VecDuplicate(pcbddc->vec1_R,&pcbddc->vec2_R);CHKERRQ(ierr); 1357 } 1358 /* local primal dofs */ 1359 old_size = -1; 1360 if (pcbddc->vec1_P) { 1361 ierr = VecGetSize(pcbddc->vec1_P,&old_size);CHKERRQ(ierr); 1362 } 1363 if (pcbddc->local_primal_size != old_size) { 1364 ierr = VecDestroy(&pcbddc->vec1_P);CHKERRQ(ierr); 1365 ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_P);CHKERRQ(ierr); 1366 ierr = VecSetSizes(pcbddc->vec1_P,PETSC_DECIDE,pcbddc->local_primal_size);CHKERRQ(ierr); 1367 ierr = VecSetType(pcbddc->vec1_P,impVecType);CHKERRQ(ierr); 1368 } 1369 /* local explicit constraints */ 1370 old_size = -1; 1371 if (pcbddc->vec1_C) { 1372 ierr = VecGetSize(pcbddc->vec1_C,&old_size);CHKERRQ(ierr); 1373 } 1374 if (n_constraints && n_constraints != old_size) { 1375 ierr = VecDestroy(&pcbddc->vec1_C);CHKERRQ(ierr); 1376 ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_C);CHKERRQ(ierr); 1377 ierr = VecSetSizes(pcbddc->vec1_C,PETSC_DECIDE,n_constraints);CHKERRQ(ierr); 1378 ierr = VecSetType(pcbddc->vec1_C,impVecType);CHKERRQ(ierr); 1379 } 1380 PetscFunctionReturn(0); 1381 } 1382 1383 #undef __FUNCT__ 1384 #define __FUNCT__ "PCBDDCSetUpCorrection" 1385 PetscErrorCode PCBDDCSetUpCorrection(PC pc, PetscScalar **coarse_submat_vals_n) 1386 { 1387 PetscErrorCode ierr; 1388 /* pointers to pcis and pcbddc */ 1389 PC_IS* pcis = (PC_IS*)pc->data; 1390 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 1391 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 1392 /* submatrices of local problem */ 1393 Mat A_RV,A_VR,A_VV,local_auxmat2_R; 1394 /* submatrices of benign trick */ 1395 Mat B0_V = NULL; 1396 /* submatrices of local coarse problem */ 1397 Mat S_VV,S_CV,S_VC,S_CC; 1398 /* working matrices */ 1399 Mat C_CR; 1400 /* additional working stuff */ 1401 PC pc_R; 1402 Mat F; 1403 PetscBool isLU,isCHOL,isILU,need_benign_correction; 1404 1405 PetscScalar *coarse_submat_vals; /* TODO: use a PETSc matrix */ 1406 PetscScalar *work; 1407 PetscInt *idx_V_B; 1408 PetscInt lda_rhs,n,n_vertices,n_constraints,*p0_lidx_I; 1409 PetscInt i,n_R,n_D,n_B; 1410 1411 /* some shortcuts to scalars */ 1412 PetscScalar one=1.0,m_one=-1.0; 1413 1414 PetscFunctionBegin; 1415 if (!pcbddc->symmetric_primal && pcbddc->benign_n) { 1416 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Non-symmetric primal basis computation with benign trick not yet implemented"); 1417 } 1418 1419 /* Set Non-overlapping dimensions */ 1420 n_vertices = pcbddc->n_vertices; 1421 n_constraints = pcbddc->local_primal_size - pcbddc->benign_n - n_vertices; 1422 n_B = pcis->n_B; 1423 n_D = pcis->n - n_B; 1424 n_R = pcis->n - n_vertices; 1425 1426 /* vertices in boundary numbering */ 1427 ierr = PetscMalloc1(n_vertices,&idx_V_B);CHKERRQ(ierr); 1428 ierr = ISGlobalToLocalMappingApply(pcis->BtoNmap,IS_GTOLM_DROP,n_vertices,pcbddc->local_primal_ref_node,&i,idx_V_B);CHKERRQ(ierr); 1429 if (i != n_vertices) { 1430 SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error in boundary numbering for BDDC vertices! %d != %d\n",n_vertices,i); 1431 } 1432 1433 /* Subdomain contribution (Non-overlapping) to coarse matrix */ 1434 ierr = PetscCalloc1(pcbddc->local_primal_size*pcbddc->local_primal_size,&coarse_submat_vals);CHKERRQ(ierr); 1435 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_vertices,n_vertices,coarse_submat_vals,&S_VV);CHKERRQ(ierr); 1436 ierr = MatSeqDenseSetLDA(S_VV,pcbddc->local_primal_size);CHKERRQ(ierr); 1437 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_constraints,n_vertices,coarse_submat_vals+n_vertices,&S_CV);CHKERRQ(ierr); 1438 ierr = MatSeqDenseSetLDA(S_CV,pcbddc->local_primal_size);CHKERRQ(ierr); 1439 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_vertices,n_constraints,coarse_submat_vals+pcbddc->local_primal_size*n_vertices,&S_VC);CHKERRQ(ierr); 1440 ierr = MatSeqDenseSetLDA(S_VC,pcbddc->local_primal_size);CHKERRQ(ierr); 1441 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_constraints,n_constraints,coarse_submat_vals+(pcbddc->local_primal_size+1)*n_vertices,&S_CC);CHKERRQ(ierr); 1442 ierr = MatSeqDenseSetLDA(S_CC,pcbddc->local_primal_size);CHKERRQ(ierr); 1443 1444 /* determine if can use MatSolve routines instead of calling KSPSolve on ksp_R */ 1445 ierr = KSPGetPC(pcbddc->ksp_R,&pc_R);CHKERRQ(ierr); 1446 ierr = PetscObjectTypeCompare((PetscObject)pc_R,PCLU,&isLU);CHKERRQ(ierr); 1447 ierr = PetscObjectTypeCompare((PetscObject)pc_R,PCILU,&isILU);CHKERRQ(ierr); 1448 ierr = PetscObjectTypeCompare((PetscObject)pc_R,PCCHOLESKY,&isCHOL);CHKERRQ(ierr); 1449 lda_rhs = n_R; 1450 need_benign_correction = PETSC_FALSE; 1451 if (isLU || isILU || isCHOL) { 1452 ierr = PCFactorGetMatrix(pc_R,&F);CHKERRQ(ierr); 1453 } else if (sub_schurs->reuse_solver) { 1454 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 1455 MatFactorType type; 1456 1457 F = reuse_solver->F; 1458 ierr = MatGetFactorType(F,&type);CHKERRQ(ierr); 1459 if (type == MAT_FACTOR_CHOLESKY) isCHOL = PETSC_TRUE; 1460 ierr = MatGetSize(F,&lda_rhs,NULL);CHKERRQ(ierr); 1461 need_benign_correction = !!reuse_solver->benign_n; 1462 } else { 1463 F = NULL; 1464 } 1465 1466 /* allocate workspace */ 1467 n = 0; 1468 if (n_constraints) { 1469 n += lda_rhs*n_constraints; 1470 } 1471 if (n_vertices) { 1472 n = PetscMax(2*lda_rhs*n_vertices,n); 1473 n = PetscMax((lda_rhs+n_B)*n_vertices,n); 1474 } 1475 ierr = PetscMalloc1(n,&work);CHKERRQ(ierr); 1476 1477 /* Precompute stuffs needed for preprocessing and application of BDDC*/ 1478 if (n_constraints) { 1479 Mat M1,M2,M3; 1480 Mat auxmat; 1481 IS is_aux; 1482 PetscScalar *array,*array2; 1483 1484 ierr = MatDestroy(&pcbddc->local_auxmat1);CHKERRQ(ierr); 1485 ierr = MatDestroy(&pcbddc->local_auxmat2);CHKERRQ(ierr); 1486 1487 /* Extract constraints on R nodes: C_{CR} */ 1488 ierr = ISCreateStride(PETSC_COMM_SELF,n_constraints,n_vertices,1,&is_aux);CHKERRQ(ierr); 1489 ierr = MatGetSubMatrix(pcbddc->ConstraintMatrix,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&C_CR);CHKERRQ(ierr); 1490 ierr = MatGetSubMatrix(pcbddc->ConstraintMatrix,is_aux,pcis->is_B_local,MAT_INITIAL_MATRIX,&auxmat);CHKERRQ(ierr); 1491 1492 /* Assemble local_auxmat2_R = (- A_{RR}^{-1} C^T_{CR}) needed by BDDC setup */ 1493 /* Assemble pcbddc->local_auxmat2 = R_to_B (- A_{RR}^{-1} C^T_{CR}) needed by BDDC application */ 1494 ierr = PetscMemzero(work,lda_rhs*n_constraints*sizeof(PetscScalar));CHKERRQ(ierr); 1495 for (i=0;i<n_constraints;i++) { 1496 const PetscScalar *row_cmat_values; 1497 const PetscInt *row_cmat_indices; 1498 PetscInt size_of_constraint,j; 1499 1500 ierr = MatGetRow(C_CR,i,&size_of_constraint,&row_cmat_indices,&row_cmat_values);CHKERRQ(ierr); 1501 for (j=0;j<size_of_constraint;j++) { 1502 work[row_cmat_indices[j]+i*lda_rhs] = -row_cmat_values[j]; 1503 } 1504 ierr = MatRestoreRow(C_CR,i,&size_of_constraint,&row_cmat_indices,&row_cmat_values);CHKERRQ(ierr); 1505 } 1506 ierr = MatCreateSeqDense(PETSC_COMM_SELF,lda_rhs,n_constraints,NULL,&local_auxmat2_R);CHKERRQ(ierr); 1507 if (F) { 1508 Mat B; 1509 1510 ierr = MatCreateSeqDense(PETSC_COMM_SELF,lda_rhs,n_constraints,work,&B);CHKERRQ(ierr); 1511 if (need_benign_correction) { 1512 PetscScalar *marr; 1513 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 1514 1515 ierr = MatDenseGetArray(B,&marr);CHKERRQ(ierr); 1516 for (i=0;i<n_constraints;i++) { 1517 ierr = VecPlaceArray(pcbddc->vec1_R,marr+i*lda_rhs);CHKERRQ(ierr); 1518 ierr = PCBDDCReuseSolversChangeInterior(reuse_solver,pcbddc->vec1_R,NULL,PETSC_FALSE);CHKERRQ(ierr); 1519 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 1520 } 1521 ierr = MatDenseRestoreArray(B,&marr);CHKERRQ(ierr); 1522 } 1523 ierr = MatMatSolve(F,B,local_auxmat2_R);CHKERRQ(ierr); 1524 if (need_benign_correction) { 1525 PetscScalar *marr; 1526 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 1527 1528 ierr = MatDenseGetArray(local_auxmat2_R,&marr);CHKERRQ(ierr); 1529 for (i=0;i<n_constraints;i++) { 1530 ierr = VecPlaceArray(pcbddc->vec1_R,marr+i*lda_rhs);CHKERRQ(ierr); 1531 ierr = PCBDDCReuseSolversChangeInterior(reuse_solver,pcbddc->vec1_R,NULL,PETSC_TRUE);CHKERRQ(ierr); 1532 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 1533 } 1534 ierr = MatDenseRestoreArray(local_auxmat2_R,&marr);CHKERRQ(ierr); 1535 } 1536 ierr = MatDestroy(&B);CHKERRQ(ierr); 1537 } else { 1538 PetscScalar *marr; 1539 1540 ierr = MatDenseGetArray(local_auxmat2_R,&marr);CHKERRQ(ierr); 1541 for (i=0;i<n_constraints;i++) { 1542 ierr = VecPlaceArray(pcbddc->vec1_R,work+i*lda_rhs);CHKERRQ(ierr); 1543 ierr = VecPlaceArray(pcbddc->vec2_R,marr+i*lda_rhs);CHKERRQ(ierr); 1544 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 1545 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 1546 ierr = VecResetArray(pcbddc->vec2_R);CHKERRQ(ierr); 1547 } 1548 ierr = MatDenseRestoreArray(local_auxmat2_R,&marr);CHKERRQ(ierr); 1549 } 1550 if (!pcbddc->switch_static) { 1551 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_B,n_constraints,NULL,&pcbddc->local_auxmat2);CHKERRQ(ierr); 1552 ierr = MatDenseGetArray(pcbddc->local_auxmat2,&array);CHKERRQ(ierr); 1553 ierr = MatDenseGetArray(local_auxmat2_R,&array2);CHKERRQ(ierr); 1554 for (i=0;i<n_constraints;i++) { 1555 ierr = VecPlaceArray(pcbddc->vec1_R,array2+i*lda_rhs);CHKERRQ(ierr); 1556 ierr = VecPlaceArray(pcis->vec1_B,array+i*n_B);CHKERRQ(ierr); 1557 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1558 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1559 ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr); 1560 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 1561 } 1562 ierr = MatDenseRestoreArray(local_auxmat2_R,&array2);CHKERRQ(ierr); 1563 ierr = MatDenseRestoreArray(pcbddc->local_auxmat2,&array);CHKERRQ(ierr); 1564 ierr = MatMatMult(auxmat,pcbddc->local_auxmat2,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&M3);CHKERRQ(ierr); 1565 } else { 1566 if (lda_rhs != n_R) { 1567 IS dummy; 1568 1569 ierr = ISCreateStride(PETSC_COMM_SELF,n_R,0,1,&dummy);CHKERRQ(ierr); 1570 ierr = MatGetSubMatrix(local_auxmat2_R,dummy,dummy,MAT_INITIAL_MATRIX,&pcbddc->local_auxmat2);CHKERRQ(ierr); 1571 ierr = ISDestroy(&dummy);CHKERRQ(ierr); 1572 } else { 1573 ierr = PetscObjectReference((PetscObject)local_auxmat2_R);CHKERRQ(ierr); 1574 pcbddc->local_auxmat2 = local_auxmat2_R; 1575 } 1576 ierr = MatMatMult(C_CR,pcbddc->local_auxmat2,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&M3);CHKERRQ(ierr); 1577 } 1578 ierr = ISDestroy(&is_aux);CHKERRQ(ierr); 1579 /* Assemble explicitly S_CC = ( C_{CR} A_{RR}^{-1} C^T_{CR} )^{-1} */ 1580 ierr = MatScale(M3,m_one);CHKERRQ(ierr); 1581 ierr = MatDuplicate(M3,MAT_DO_NOT_COPY_VALUES,&M1);CHKERRQ(ierr); 1582 ierr = MatDuplicate(M3,MAT_DO_NOT_COPY_VALUES,&M2);CHKERRQ(ierr); 1583 if (isCHOL) { 1584 ierr = MatCholeskyFactor(M3,NULL,NULL);CHKERRQ(ierr); 1585 } else { 1586 ierr = MatLUFactor(M3,NULL,NULL,NULL);CHKERRQ(ierr); 1587 } 1588 ierr = VecSet(pcbddc->vec1_C,one);CHKERRQ(ierr); 1589 ierr = MatDiagonalSet(M2,pcbddc->vec1_C,INSERT_VALUES);CHKERRQ(ierr); 1590 ierr = MatMatSolve(M3,M2,M1);CHKERRQ(ierr); 1591 ierr = MatDestroy(&M2);CHKERRQ(ierr); 1592 ierr = MatDestroy(&M3);CHKERRQ(ierr); 1593 /* Assemble local_auxmat1 = S_CC*C_{CB} needed by BDDC application in KSP and in preproc */ 1594 ierr = MatMatMult(M1,auxmat,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&pcbddc->local_auxmat1);CHKERRQ(ierr); 1595 ierr = MatDestroy(&auxmat);CHKERRQ(ierr); 1596 ierr = MatCopy(M1,S_CC,SAME_NONZERO_PATTERN);CHKERRQ(ierr); /* S_CC can have a different LDA, MatMatSolve doesn't support it */ 1597 ierr = MatDestroy(&M1);CHKERRQ(ierr); 1598 } 1599 /* Get submatrices from subdomain matrix */ 1600 if (pcbddc->benign_n) { 1601 IS dummy; 1602 Mat B0_R; 1603 PetscReal norm; 1604 1605 ierr = ISCreateStride(PETSC_COMM_SELF,pcbddc->benign_n,0,1,&dummy);CHKERRQ(ierr); 1606 ierr = MatGetSubMatrix(pcbddc->benign_B0,dummy,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&B0_R);CHKERRQ(ierr); 1607 ierr = MatNorm(B0_R,NORM_INFINITY,&norm);CHKERRQ(ierr); 1608 if (norm > PETSC_SMALL) { 1609 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"Benign trick can not be applied! ||B0_R|| = %f (should be numerically 0.)",norm); 1610 } 1611 ierr = MatDestroy(&B0_R);CHKERRQ(ierr); 1612 ierr = ISDestroy(&dummy);CHKERRQ(ierr); 1613 } 1614 1615 if (n_vertices) { 1616 IS is_aux; 1617 1618 if (sub_schurs->reuse_solver) { /* is_R_local is not sorted, ISComplement doesn't like it */ 1619 IS tis; 1620 1621 ierr = ISDuplicate(pcbddc->is_R_local,&tis);CHKERRQ(ierr); 1622 ierr = ISSort(tis);CHKERRQ(ierr); 1623 ierr = ISComplement(tis,0,pcis->n,&is_aux);CHKERRQ(ierr); 1624 ierr = ISDestroy(&tis);CHKERRQ(ierr); 1625 } else { 1626 ierr = ISComplement(pcbddc->is_R_local,0,pcis->n,&is_aux);CHKERRQ(ierr); 1627 } 1628 ierr = MatGetSubMatrix(pcbddc->local_mat,pcbddc->is_R_local,is_aux,MAT_INITIAL_MATRIX,&A_RV);CHKERRQ(ierr); 1629 ierr = MatGetSubMatrix(pcbddc->local_mat,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&A_VR);CHKERRQ(ierr); 1630 ierr = MatGetSubMatrix(pcbddc->local_mat,is_aux,is_aux,MAT_INITIAL_MATRIX,&A_VV);CHKERRQ(ierr); 1631 if (pcbddc->benign_n) { 1632 IS dummy; 1633 1634 ierr = ISCreateStride(PETSC_COMM_SELF,pcbddc->benign_n,0,1,&dummy);CHKERRQ(ierr); 1635 ierr = MatGetSubMatrix(pcbddc->benign_B0,dummy,is_aux,MAT_INITIAL_MATRIX,&B0_V);CHKERRQ(ierr); 1636 ierr = ISDestroy(&dummy);CHKERRQ(ierr); 1637 } 1638 ierr = ISDestroy(&is_aux);CHKERRQ(ierr); 1639 } 1640 1641 /* Matrix of coarse basis functions (local) */ 1642 if (pcbddc->coarse_phi_B) { 1643 PetscInt on_B,on_primal,on_D=n_D; 1644 if (pcbddc->coarse_phi_D) { 1645 ierr = MatGetSize(pcbddc->coarse_phi_D,&on_D,NULL);CHKERRQ(ierr); 1646 } 1647 ierr = MatGetSize(pcbddc->coarse_phi_B,&on_B,&on_primal);CHKERRQ(ierr); 1648 if (on_B != n_B || on_primal != pcbddc->local_primal_size || on_D != n_D) { 1649 PetscScalar *marray; 1650 1651 ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&marray);CHKERRQ(ierr); 1652 ierr = PetscFree(marray);CHKERRQ(ierr); 1653 ierr = MatDestroy(&pcbddc->coarse_phi_B);CHKERRQ(ierr); 1654 ierr = MatDestroy(&pcbddc->coarse_psi_B);CHKERRQ(ierr); 1655 ierr = MatDestroy(&pcbddc->coarse_phi_D);CHKERRQ(ierr); 1656 ierr = MatDestroy(&pcbddc->coarse_psi_D);CHKERRQ(ierr); 1657 } 1658 } 1659 1660 if (!pcbddc->coarse_phi_B) { 1661 PetscScalar *marray; 1662 1663 n = n_B*pcbddc->local_primal_size; 1664 if (pcbddc->switch_static || pcbddc->dbg_flag) { 1665 n += n_D*pcbddc->local_primal_size; 1666 } 1667 if (!pcbddc->symmetric_primal) { 1668 n *= 2; 1669 } 1670 ierr = PetscCalloc1(n,&marray);CHKERRQ(ierr); 1671 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_B,pcbddc->local_primal_size,marray,&pcbddc->coarse_phi_B);CHKERRQ(ierr); 1672 n = n_B*pcbddc->local_primal_size; 1673 if (pcbddc->switch_static || pcbddc->dbg_flag) { 1674 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_D,pcbddc->local_primal_size,marray+n,&pcbddc->coarse_phi_D);CHKERRQ(ierr); 1675 n += n_D*pcbddc->local_primal_size; 1676 } 1677 if (!pcbddc->symmetric_primal) { 1678 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_B,pcbddc->local_primal_size,marray+n,&pcbddc->coarse_psi_B);CHKERRQ(ierr); 1679 if (pcbddc->switch_static || pcbddc->dbg_flag) { 1680 n = n_B*pcbddc->local_primal_size; 1681 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_D,pcbddc->local_primal_size,marray+n,&pcbddc->coarse_psi_D);CHKERRQ(ierr); 1682 } 1683 } else { 1684 ierr = PetscObjectReference((PetscObject)pcbddc->coarse_phi_B);CHKERRQ(ierr); 1685 pcbddc->coarse_psi_B = pcbddc->coarse_phi_B; 1686 if (pcbddc->switch_static || pcbddc->dbg_flag) { 1687 ierr = PetscObjectReference((PetscObject)pcbddc->coarse_phi_D);CHKERRQ(ierr); 1688 pcbddc->coarse_psi_D = pcbddc->coarse_phi_D; 1689 } 1690 } 1691 } 1692 1693 /* We are now ready to evaluate coarse basis functions and subdomain contribution to coarse problem */ 1694 p0_lidx_I = NULL; 1695 if (pcbddc->benign_n && (pcbddc->switch_static || pcbddc->dbg_flag)) { 1696 const PetscInt *idxs; 1697 1698 ierr = ISGetIndices(pcis->is_I_local,&idxs);CHKERRQ(ierr); 1699 ierr = PetscMalloc1(pcbddc->benign_n,&p0_lidx_I);CHKERRQ(ierr); 1700 for (i=0;i<pcbddc->benign_n;i++) { 1701 ierr = PetscFindInt(pcbddc->benign_p0_lidx[i],pcis->n-pcis->n_B,idxs,&p0_lidx_I[i]);CHKERRQ(ierr); 1702 } 1703 ierr = ISRestoreIndices(pcis->is_I_local,&idxs);CHKERRQ(ierr); 1704 } 1705 1706 /* vertices */ 1707 if (n_vertices) { 1708 1709 ierr = MatConvert(A_VV,MATDENSE,MAT_REUSE_MATRIX,&A_VV);CHKERRQ(ierr); 1710 1711 if (n_R) { 1712 Mat A_RRmA_RV,A_RV_bcorr=NULL,S_VVt; /* S_VVt with LDA=N */ 1713 PetscBLASInt B_N,B_one = 1; 1714 PetscScalar *x,*y; 1715 PetscBool isseqaij; 1716 1717 ierr = MatScale(A_RV,m_one);CHKERRQ(ierr); 1718 if (need_benign_correction) { 1719 ISLocalToGlobalMapping RtoN; 1720 IS is_p0; 1721 PetscInt *idxs_p0,n; 1722 1723 ierr = PetscMalloc1(pcbddc->benign_n,&idxs_p0);CHKERRQ(ierr); 1724 ierr = ISLocalToGlobalMappingCreateIS(pcbddc->is_R_local,&RtoN);CHKERRQ(ierr); 1725 ierr = ISGlobalToLocalMappingApply(RtoN,IS_GTOLM_DROP,pcbddc->benign_n,pcbddc->benign_p0_lidx,&n,idxs_p0);CHKERRQ(ierr); 1726 if (n != pcbddc->benign_n) { 1727 SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error in R numbering for benign p0! %d != %d\n",n,pcbddc->benign_n); 1728 } 1729 ierr = ISLocalToGlobalMappingDestroy(&RtoN);CHKERRQ(ierr); 1730 ierr = ISCreateGeneral(PETSC_COMM_SELF,n,idxs_p0,PETSC_OWN_POINTER,&is_p0);CHKERRQ(ierr); 1731 ierr = MatGetSubMatrix(A_RV,is_p0,NULL,MAT_INITIAL_MATRIX,&A_RV_bcorr);CHKERRQ(ierr); 1732 ierr = ISDestroy(&is_p0);CHKERRQ(ierr); 1733 } 1734 1735 if (lda_rhs == n_R) { 1736 ierr = MatConvert(A_RV,MATDENSE,MAT_REUSE_MATRIX,&A_RV);CHKERRQ(ierr); 1737 } else { 1738 PetscScalar *av,*array; 1739 const PetscInt *xadj,*adjncy; 1740 PetscInt n; 1741 PetscBool flg_row; 1742 1743 array = work+lda_rhs*n_vertices; 1744 ierr = PetscMemzero(array,lda_rhs*n_vertices*sizeof(PetscScalar));CHKERRQ(ierr); 1745 ierr = MatConvert(A_RV,MATSEQAIJ,MAT_REUSE_MATRIX,&A_RV);CHKERRQ(ierr); 1746 ierr = MatGetRowIJ(A_RV,0,PETSC_FALSE,PETSC_FALSE,&n,&xadj,&adjncy,&flg_row);CHKERRQ(ierr); 1747 ierr = MatSeqAIJGetArray(A_RV,&av);CHKERRQ(ierr); 1748 for (i=0;i<n;i++) { 1749 PetscInt j; 1750 for (j=xadj[i];j<xadj[i+1];j++) array[lda_rhs*adjncy[j]+i] = av[j]; 1751 } 1752 ierr = MatRestoreRowIJ(A_RV,0,PETSC_FALSE,PETSC_FALSE,&n,&xadj,&adjncy,&flg_row);CHKERRQ(ierr); 1753 ierr = MatDestroy(&A_RV);CHKERRQ(ierr); 1754 ierr = MatCreateSeqDense(PETSC_COMM_SELF,lda_rhs,n_vertices,array,&A_RV);CHKERRQ(ierr); 1755 } 1756 ierr = MatCreateSeqDense(PETSC_COMM_SELF,lda_rhs,n_vertices,work,&A_RRmA_RV);CHKERRQ(ierr); 1757 if (F) { 1758 if (need_benign_correction) { 1759 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 1760 PetscScalar *marr; 1761 1762 ierr = MatDenseGetArray(A_RV,&marr);CHKERRQ(ierr); 1763 /* need \Phi^T A_RV = (I+L)A_RV, L given by 1764 1765 | 0 0 0 | (V) 1766 L = | 0 0 -1 | (P-p0) 1767 | 0 0 -1 | (p0) 1768 1769 */ 1770 for (i=0;i<reuse_solver->benign_n;i++) { 1771 const PetscScalar *vals; 1772 const PetscInt *idxs,*idxs_zero; 1773 PetscInt n,j,nz; 1774 1775 ierr = ISGetLocalSize(reuse_solver->benign_zerodiag_subs[i],&nz);CHKERRQ(ierr); 1776 ierr = ISGetIndices(reuse_solver->benign_zerodiag_subs[i],&idxs_zero);CHKERRQ(ierr); 1777 ierr = MatGetRow(A_RV_bcorr,i,&n,&idxs,&vals);CHKERRQ(ierr); 1778 for (j=0;j<n;j++) { 1779 PetscScalar val = vals[j]; 1780 PetscInt k,col = idxs[j]; 1781 for (k=0;k<nz;k++) marr[idxs_zero[k]+lda_rhs*col] -= val; 1782 } 1783 ierr = MatRestoreRow(A_RV_bcorr,i,&n,&idxs,&vals);CHKERRQ(ierr); 1784 ierr = ISRestoreIndices(reuse_solver->benign_zerodiag_subs[i],&idxs_zero);CHKERRQ(ierr); 1785 } 1786 /* need to correct the rhs */ 1787 for (i=0;i<n_vertices;i++) { 1788 ierr = VecPlaceArray(pcbddc->vec1_R,marr+i*lda_rhs);CHKERRQ(ierr); 1789 ierr = PCBDDCReuseSolversChangeInterior(reuse_solver,pcbddc->vec1_R,NULL,PETSC_FALSE);CHKERRQ(ierr); 1790 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 1791 } 1792 ierr = MatDenseRestoreArray(A_RV,&marr);CHKERRQ(ierr); 1793 } 1794 ierr = MatMatSolve(F,A_RV,A_RRmA_RV);CHKERRQ(ierr); 1795 /* need to correct the solution */ 1796 if (need_benign_correction) { 1797 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 1798 PetscScalar *marr; 1799 1800 ierr = MatDenseGetArray(A_RRmA_RV,&marr);CHKERRQ(ierr); 1801 for (i=0;i<n_vertices;i++) { 1802 ierr = VecPlaceArray(pcbddc->vec1_R,marr+i*lda_rhs);CHKERRQ(ierr); 1803 ierr = PCBDDCReuseSolversChangeInterior(reuse_solver,pcbddc->vec1_R,NULL,PETSC_TRUE);CHKERRQ(ierr); 1804 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 1805 } 1806 ierr = MatDenseRestoreArray(A_RRmA_RV,&marr);CHKERRQ(ierr); 1807 } 1808 } else { 1809 ierr = MatDenseGetArray(A_RV,&y);CHKERRQ(ierr); 1810 for (i=0;i<n_vertices;i++) { 1811 ierr = VecPlaceArray(pcbddc->vec1_R,y+i*lda_rhs);CHKERRQ(ierr); 1812 ierr = VecPlaceArray(pcbddc->vec2_R,work+i*lda_rhs);CHKERRQ(ierr); 1813 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 1814 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 1815 ierr = VecResetArray(pcbddc->vec2_R);CHKERRQ(ierr); 1816 } 1817 ierr = MatDenseRestoreArray(A_RV,&y);CHKERRQ(ierr); 1818 } 1819 ierr = MatDestroy(&A_RV);CHKERRQ(ierr); 1820 /* S_VV and S_CV */ 1821 if (n_constraints) { 1822 Mat B; 1823 1824 ierr = PetscMemzero(work+lda_rhs*n_vertices,n_B*n_vertices*sizeof(PetscScalar));CHKERRQ(ierr); 1825 for (i=0;i<n_vertices;i++) { 1826 ierr = VecPlaceArray(pcbddc->vec1_R,work+i*lda_rhs);CHKERRQ(ierr); 1827 ierr = VecPlaceArray(pcis->vec1_B,work+lda_rhs*n_vertices+i*n_B);CHKERRQ(ierr); 1828 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1829 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1830 ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr); 1831 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 1832 } 1833 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_B,n_vertices,work+lda_rhs*n_vertices,&B);CHKERRQ(ierr); 1834 ierr = MatMatMult(pcbddc->local_auxmat1,B,MAT_REUSE_MATRIX,PETSC_DEFAULT,&S_CV);CHKERRQ(ierr); 1835 ierr = MatDestroy(&B);CHKERRQ(ierr); 1836 ierr = MatCreateSeqDense(PETSC_COMM_SELF,lda_rhs,n_vertices,work+lda_rhs*n_vertices,&B);CHKERRQ(ierr); 1837 ierr = MatMatMult(local_auxmat2_R,S_CV,MAT_REUSE_MATRIX,PETSC_DEFAULT,&B);CHKERRQ(ierr); 1838 ierr = MatScale(S_CV,m_one);CHKERRQ(ierr); 1839 ierr = PetscBLASIntCast(lda_rhs*n_vertices,&B_N);CHKERRQ(ierr); 1840 PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&B_N,&one,work+lda_rhs*n_vertices,&B_one,work,&B_one)); 1841 ierr = MatDestroy(&B);CHKERRQ(ierr); 1842 } 1843 ierr = PetscObjectTypeCompare((PetscObject)A_VR,MATSEQAIJ,&isseqaij);CHKERRQ(ierr); 1844 if (!isseqaij) { /* MatMatMult with SEQ(S)BAIJ below will raise an error */ 1845 ierr = MatConvert(A_VR,MATSEQAIJ,MAT_REUSE_MATRIX,&A_VR);CHKERRQ(ierr); 1846 } 1847 if (lda_rhs != n_R) { 1848 ierr = MatDestroy(&A_RRmA_RV);CHKERRQ(ierr); 1849 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_vertices,work,&A_RRmA_RV);CHKERRQ(ierr); 1850 ierr = MatSeqDenseSetLDA(A_RRmA_RV,lda_rhs);CHKERRQ(ierr); 1851 } 1852 ierr = MatMatMult(A_VR,A_RRmA_RV,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&S_VVt);CHKERRQ(ierr); 1853 /* need A_VR * \Phi * A_RRmA_RV = A_VR * (I+L)^T * A_RRmA_RV, L given as before */ 1854 if (need_benign_correction) { 1855 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 1856 PetscScalar *marr,*sums; 1857 1858 ierr = PetscMalloc1(n_vertices,&sums);CHKERRQ(ierr); 1859 ierr = MatDenseGetArray(S_VVt,&marr); 1860 for (i=0;i<reuse_solver->benign_n;i++) { 1861 const PetscScalar *vals; 1862 const PetscInt *idxs,*idxs_zero; 1863 PetscInt n,j,nz; 1864 1865 ierr = ISGetLocalSize(reuse_solver->benign_zerodiag_subs[i],&nz);CHKERRQ(ierr); 1866 ierr = ISGetIndices(reuse_solver->benign_zerodiag_subs[i],&idxs_zero);CHKERRQ(ierr); 1867 for (j=0;j<n_vertices;j++) { 1868 PetscInt k; 1869 sums[j] = 0.; 1870 for (k=0;k<nz;k++) sums[j] += work[idxs_zero[k]+j*lda_rhs]; 1871 } 1872 ierr = MatGetRow(A_RV_bcorr,i,&n,&idxs,&vals);CHKERRQ(ierr); 1873 for (j=0;j<n;j++) { 1874 PetscScalar val = vals[j]; 1875 PetscInt k; 1876 for (k=0;k<n_vertices;k++) { 1877 marr[idxs[j]+k*n_vertices] += val*sums[k]; 1878 } 1879 } 1880 ierr = MatRestoreRow(A_RV_bcorr,i,&n,&idxs,&vals);CHKERRQ(ierr); 1881 ierr = ISRestoreIndices(reuse_solver->benign_zerodiag_subs[i],&idxs_zero);CHKERRQ(ierr); 1882 } 1883 ierr = PetscFree(sums);CHKERRQ(ierr); 1884 ierr = MatDenseRestoreArray(S_VVt,&marr); 1885 ierr = MatDestroy(&A_RV_bcorr);CHKERRQ(ierr); 1886 } 1887 ierr = MatDestroy(&A_RRmA_RV);CHKERRQ(ierr); 1888 ierr = PetscBLASIntCast(n_vertices*n_vertices,&B_N);CHKERRQ(ierr); 1889 ierr = MatDenseGetArray(A_VV,&x);CHKERRQ(ierr); 1890 ierr = MatDenseGetArray(S_VVt,&y);CHKERRQ(ierr); 1891 PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&B_N,&one,x,&B_one,y,&B_one)); 1892 ierr = MatDenseRestoreArray(A_VV,&x);CHKERRQ(ierr); 1893 ierr = MatDenseRestoreArray(S_VVt,&y);CHKERRQ(ierr); 1894 ierr = MatCopy(S_VVt,S_VV,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 1895 ierr = MatDestroy(&S_VVt);CHKERRQ(ierr); 1896 } else { 1897 ierr = MatCopy(A_VV,S_VV,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 1898 } 1899 for (i=0;i<pcbddc->benign_n;i++) { 1900 const PetscScalar *vals; 1901 const PetscInt *idxs; 1902 PetscInt n,j,primal_idx; 1903 1904 ierr = MatGetRow(B0_V,i,&n,&idxs,&vals);CHKERRQ(ierr); 1905 primal_idx = pcbddc->local_primal_size - pcbddc->benign_n + i; 1906 for (j=0;j<n;j++) { 1907 coarse_submat_vals[primal_idx*pcbddc->local_primal_size+idxs[j]] = vals[j]; 1908 coarse_submat_vals[idxs[j]*pcbddc->local_primal_size+primal_idx] = vals[j]; 1909 } 1910 ierr = MatRestoreRow(B0_V,i,&n,&idxs,&vals);CHKERRQ(ierr); 1911 } 1912 ierr = MatDestroy(&A_VV);CHKERRQ(ierr); 1913 1914 /* coarse basis functions */ 1915 for (i=0;i<n_vertices;i++) { 1916 PetscScalar *y; 1917 1918 ierr = VecPlaceArray(pcbddc->vec1_R,work+lda_rhs*i);CHKERRQ(ierr); 1919 ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&y);CHKERRQ(ierr); 1920 ierr = VecPlaceArray(pcis->vec1_B,y+n_B*i);CHKERRQ(ierr); 1921 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1922 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1923 y[n_B*i+idx_V_B[i]] = 1.0; 1924 ierr = MatDenseRestoreArray(pcbddc->coarse_phi_B,&y);CHKERRQ(ierr); 1925 ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr); 1926 1927 if (pcbddc->switch_static || pcbddc->dbg_flag) { 1928 PetscInt j; 1929 1930 ierr = MatDenseGetArray(pcbddc->coarse_phi_D,&y);CHKERRQ(ierr); 1931 ierr = VecPlaceArray(pcis->vec1_D,y+n_D*i);CHKERRQ(ierr); 1932 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1933 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1934 ierr = VecResetArray(pcis->vec1_D);CHKERRQ(ierr); 1935 for (j=0;j<pcbddc->benign_n;j++) y[n_D*i+p0_lidx_I[j]] = 0.0; 1936 ierr = MatDenseRestoreArray(pcbddc->coarse_phi_D,&y);CHKERRQ(ierr); 1937 } 1938 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 1939 } 1940 /* if n_R == 0 the object is not destroyed */ 1941 ierr = MatDestroy(&A_RV);CHKERRQ(ierr); 1942 } 1943 1944 if (n_constraints) { 1945 Mat B; 1946 1947 ierr = MatCreateSeqDense(PETSC_COMM_SELF,lda_rhs,n_constraints,work,&B);CHKERRQ(ierr); 1948 ierr = MatScale(S_CC,m_one);CHKERRQ(ierr); 1949 ierr = MatMatMult(local_auxmat2_R,S_CC,MAT_REUSE_MATRIX,PETSC_DEFAULT,&B);CHKERRQ(ierr); 1950 ierr = MatScale(S_CC,m_one);CHKERRQ(ierr); 1951 if (n_vertices) { 1952 if (isCHOL) { /* if we can solve the interior problem with cholesky, we should also be fine with transposing here */ 1953 ierr = MatTranspose(S_CV,MAT_REUSE_MATRIX,&S_VC);CHKERRQ(ierr); 1954 } else { 1955 Mat S_VCt; 1956 1957 if (lda_rhs != n_R) { 1958 ierr = MatDestroy(&B);CHKERRQ(ierr); 1959 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_vertices,work,&B);CHKERRQ(ierr); 1960 ierr = MatSeqDenseSetLDA(B,lda_rhs);CHKERRQ(ierr); 1961 } 1962 ierr = MatMatMult(A_VR,B,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&S_VCt);CHKERRQ(ierr); 1963 ierr = MatCopy(S_VCt,S_VC,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 1964 ierr = MatDestroy(&S_VCt);CHKERRQ(ierr); 1965 } 1966 } 1967 ierr = MatDestroy(&B);CHKERRQ(ierr); 1968 /* coarse basis functions */ 1969 for (i=0;i<n_constraints;i++) { 1970 PetscScalar *y; 1971 1972 ierr = VecPlaceArray(pcbddc->vec1_R,work+lda_rhs*i);CHKERRQ(ierr); 1973 ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&y);CHKERRQ(ierr); 1974 ierr = VecPlaceArray(pcis->vec1_B,y+n_B*(i+n_vertices));CHKERRQ(ierr); 1975 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1976 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1977 ierr = MatDenseRestoreArray(pcbddc->coarse_phi_B,&y);CHKERRQ(ierr); 1978 ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr); 1979 if (pcbddc->switch_static || pcbddc->dbg_flag) { 1980 PetscInt j; 1981 1982 ierr = MatDenseGetArray(pcbddc->coarse_phi_D,&y);CHKERRQ(ierr); 1983 ierr = VecPlaceArray(pcis->vec1_D,y+n_D*(i+n_vertices));CHKERRQ(ierr); 1984 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1985 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1986 ierr = VecResetArray(pcis->vec1_D);CHKERRQ(ierr); 1987 for (j=0;j<pcbddc->benign_n;j++) y[n_D*i+p0_lidx_I[j]] = 0.0; 1988 ierr = MatDenseRestoreArray(pcbddc->coarse_phi_D,&y);CHKERRQ(ierr); 1989 } 1990 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 1991 } 1992 } 1993 if (n_constraints) { 1994 ierr = MatDestroy(&local_auxmat2_R);CHKERRQ(ierr); 1995 } 1996 ierr = PetscFree(p0_lidx_I);CHKERRQ(ierr); 1997 ierr = MatDestroy(&B0_V);CHKERRQ(ierr); 1998 1999 /* compute other basis functions for non-symmetric problems */ 2000 if (!pcbddc->symmetric_primal) { 2001 Mat B_V=NULL,B_C=NULL; 2002 PetscScalar *marray; 2003 2004 if (n_constraints) { 2005 Mat S_CCT,C_CRT; 2006 2007 ierr = MatTranspose(C_CR,MAT_INITIAL_MATRIX,&C_CRT);CHKERRQ(ierr); 2008 ierr = MatTranspose(S_CC,MAT_INITIAL_MATRIX,&S_CCT);CHKERRQ(ierr); 2009 ierr = MatMatMult(C_CRT,S_CCT,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&B_C);CHKERRQ(ierr); 2010 ierr = MatDestroy(&S_CCT);CHKERRQ(ierr); 2011 if (n_vertices) { 2012 Mat S_VCT; 2013 2014 ierr = MatTranspose(S_VC,MAT_INITIAL_MATRIX,&S_VCT);CHKERRQ(ierr); 2015 ierr = MatMatMult(C_CRT,S_VCT,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&B_V);CHKERRQ(ierr); 2016 ierr = MatDestroy(&S_VCT);CHKERRQ(ierr); 2017 } 2018 ierr = MatDestroy(&C_CRT);CHKERRQ(ierr); 2019 } 2020 if (n_vertices && n_R) { 2021 PetscScalar *av,*marray; 2022 const PetscInt *xadj,*adjncy; 2023 PetscInt n; 2024 PetscBool flg_row; 2025 2026 /* B_V = B_V - A_VR^T */ 2027 ierr = MatConvert(A_VR,MATSEQAIJ,MAT_REUSE_MATRIX,&A_VR);CHKERRQ(ierr); 2028 ierr = MatGetRowIJ(A_VR,0,PETSC_FALSE,PETSC_FALSE,&n,&xadj,&adjncy,&flg_row);CHKERRQ(ierr); 2029 ierr = MatSeqAIJGetArray(A_VR,&av);CHKERRQ(ierr); 2030 ierr = MatDenseGetArray(B_V,&marray);CHKERRQ(ierr); 2031 for (i=0;i<n;i++) { 2032 PetscInt j; 2033 for (j=xadj[i];j<xadj[i+1];j++) marray[i*n_R + adjncy[j]] -= av[j]; 2034 } 2035 ierr = MatDenseRestoreArray(B_V,&marray);CHKERRQ(ierr); 2036 ierr = MatRestoreRowIJ(A_VR,0,PETSC_FALSE,PETSC_FALSE,&n,&xadj,&adjncy,&flg_row);CHKERRQ(ierr); 2037 ierr = MatDestroy(&A_VR);CHKERRQ(ierr); 2038 } 2039 2040 /* currently there's no support for MatTransposeMatSolve(F,B,X) */ 2041 ierr = MatDenseGetArray(B_V,&marray);CHKERRQ(ierr); 2042 for (i=0;i<n_vertices;i++) { 2043 ierr = VecPlaceArray(pcbddc->vec1_R,marray+i*n_R);CHKERRQ(ierr); 2044 ierr = VecPlaceArray(pcbddc->vec2_R,work+i*n_R);CHKERRQ(ierr); 2045 ierr = KSPSolveTranspose(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 2046 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 2047 ierr = VecResetArray(pcbddc->vec2_R);CHKERRQ(ierr); 2048 } 2049 ierr = MatDenseRestoreArray(B_V,&marray);CHKERRQ(ierr); 2050 ierr = MatDenseGetArray(B_C,&marray);CHKERRQ(ierr); 2051 for (i=n_vertices;i<n_constraints+n_vertices;i++) { 2052 ierr = VecPlaceArray(pcbddc->vec1_R,marray+(i-n_vertices)*n_R);CHKERRQ(ierr); 2053 ierr = VecPlaceArray(pcbddc->vec2_R,work+i*n_R);CHKERRQ(ierr); 2054 ierr = KSPSolveTranspose(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 2055 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 2056 ierr = VecResetArray(pcbddc->vec2_R);CHKERRQ(ierr); 2057 } 2058 ierr = MatDenseRestoreArray(B_C,&marray);CHKERRQ(ierr); 2059 /* coarse basis functions */ 2060 for (i=0;i<pcbddc->local_primal_size;i++) { 2061 PetscScalar *y; 2062 2063 ierr = VecPlaceArray(pcbddc->vec1_R,work+i*n_R);CHKERRQ(ierr); 2064 ierr = MatDenseGetArray(pcbddc->coarse_psi_B,&y);CHKERRQ(ierr); 2065 ierr = VecPlaceArray(pcis->vec1_B,y+n_B*i);CHKERRQ(ierr); 2066 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2067 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2068 if (i<n_vertices) { 2069 y[n_B*i+idx_V_B[i]] = 1.0; 2070 } 2071 ierr = MatDenseRestoreArray(pcbddc->coarse_psi_B,&y);CHKERRQ(ierr); 2072 ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr); 2073 2074 if (pcbddc->switch_static || pcbddc->dbg_flag) { 2075 ierr = MatDenseGetArray(pcbddc->coarse_psi_D,&y);CHKERRQ(ierr); 2076 ierr = VecPlaceArray(pcis->vec1_D,y+n_D*i);CHKERRQ(ierr); 2077 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2078 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2079 ierr = VecResetArray(pcis->vec1_D);CHKERRQ(ierr); 2080 ierr = MatDenseRestoreArray(pcbddc->coarse_psi_D,&y);CHKERRQ(ierr); 2081 } 2082 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 2083 } 2084 ierr = MatDestroy(&B_V);CHKERRQ(ierr); 2085 ierr = MatDestroy(&B_C);CHKERRQ(ierr); 2086 } 2087 /* free memory */ 2088 ierr = PetscFree(idx_V_B);CHKERRQ(ierr); 2089 ierr = MatDestroy(&S_VV);CHKERRQ(ierr); 2090 ierr = MatDestroy(&S_CV);CHKERRQ(ierr); 2091 ierr = MatDestroy(&S_VC);CHKERRQ(ierr); 2092 ierr = MatDestroy(&S_CC);CHKERRQ(ierr); 2093 ierr = PetscFree(work);CHKERRQ(ierr); 2094 if (n_vertices) { 2095 ierr = MatDestroy(&A_VR);CHKERRQ(ierr); 2096 } 2097 if (n_constraints) { 2098 ierr = MatDestroy(&C_CR);CHKERRQ(ierr); 2099 } 2100 /* Checking coarse_sub_mat and coarse basis functios */ 2101 /* Symmetric case : It should be \Phi^{(j)^T} A^{(j)} \Phi^{(j)}=coarse_sub_mat */ 2102 /* Non-symmetric case : It should be \Psi^{(j)^T} A^{(j)} \Phi^{(j)}=coarse_sub_mat */ 2103 if (pcbddc->dbg_flag) { 2104 Mat coarse_sub_mat; 2105 Mat AUXMAT,TM1,TM2,TM3,TM4; 2106 Mat coarse_phi_D,coarse_phi_B; 2107 Mat coarse_psi_D,coarse_psi_B; 2108 Mat A_II,A_BB,A_IB,A_BI; 2109 Mat C_B,CPHI; 2110 IS is_dummy; 2111 Vec mones; 2112 MatType checkmattype=MATSEQAIJ; 2113 PetscReal real_value; 2114 2115 if (pcbddc->benign_n && !pcbddc->benign_change_explicit) { 2116 Mat A; 2117 ierr = PCBDDCBenignProject(pc,NULL,NULL,&A);CHKERRQ(ierr); 2118 ierr = MatGetSubMatrix(A,pcis->is_I_local,pcis->is_I_local,MAT_INITIAL_MATRIX,&A_II);CHKERRQ(ierr); 2119 ierr = MatGetSubMatrix(A,pcis->is_I_local,pcis->is_B_local,MAT_INITIAL_MATRIX,&A_IB);CHKERRQ(ierr); 2120 ierr = MatGetSubMatrix(A,pcis->is_B_local,pcis->is_I_local,MAT_INITIAL_MATRIX,&A_BI);CHKERRQ(ierr); 2121 ierr = MatGetSubMatrix(A,pcis->is_B_local,pcis->is_B_local,MAT_INITIAL_MATRIX,&A_BB);CHKERRQ(ierr); 2122 ierr = MatDestroy(&A);CHKERRQ(ierr); 2123 } else { 2124 ierr = MatConvert(pcis->A_II,checkmattype,MAT_INITIAL_MATRIX,&A_II);CHKERRQ(ierr); 2125 ierr = MatConvert(pcis->A_IB,checkmattype,MAT_INITIAL_MATRIX,&A_IB);CHKERRQ(ierr); 2126 ierr = MatConvert(pcis->A_BI,checkmattype,MAT_INITIAL_MATRIX,&A_BI);CHKERRQ(ierr); 2127 ierr = MatConvert(pcis->A_BB,checkmattype,MAT_INITIAL_MATRIX,&A_BB);CHKERRQ(ierr); 2128 } 2129 ierr = MatConvert(pcbddc->coarse_phi_D,checkmattype,MAT_INITIAL_MATRIX,&coarse_phi_D);CHKERRQ(ierr); 2130 ierr = MatConvert(pcbddc->coarse_phi_B,checkmattype,MAT_INITIAL_MATRIX,&coarse_phi_B);CHKERRQ(ierr); 2131 if (!pcbddc->symmetric_primal) { 2132 ierr = MatConvert(pcbddc->coarse_psi_D,checkmattype,MAT_INITIAL_MATRIX,&coarse_psi_D);CHKERRQ(ierr); 2133 ierr = MatConvert(pcbddc->coarse_psi_B,checkmattype,MAT_INITIAL_MATRIX,&coarse_psi_B);CHKERRQ(ierr); 2134 } 2135 ierr = MatCreateSeqDense(PETSC_COMM_SELF,pcbddc->local_primal_size,pcbddc->local_primal_size,coarse_submat_vals,&coarse_sub_mat);CHKERRQ(ierr); 2136 2137 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 2138 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Check coarse sub mat computation (symmetric %d)\n",pcbddc->symmetric_primal);CHKERRQ(ierr); 2139 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 2140 if (!pcbddc->symmetric_primal) { 2141 ierr = MatMatMult(A_II,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 2142 ierr = MatTransposeMatMult(coarse_psi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM1);CHKERRQ(ierr); 2143 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 2144 ierr = MatMatMult(A_BB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 2145 ierr = MatTransposeMatMult(coarse_psi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM2);CHKERRQ(ierr); 2146 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 2147 ierr = MatMatMult(A_IB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 2148 ierr = MatTransposeMatMult(coarse_psi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM3);CHKERRQ(ierr); 2149 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 2150 ierr = MatMatMult(A_BI,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 2151 ierr = MatTransposeMatMult(coarse_psi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM4);CHKERRQ(ierr); 2152 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 2153 } else { 2154 ierr = MatPtAP(A_II,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&TM1);CHKERRQ(ierr); 2155 ierr = MatPtAP(A_BB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&TM2);CHKERRQ(ierr); 2156 ierr = MatMatMult(A_IB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 2157 ierr = MatTransposeMatMult(coarse_phi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM3);CHKERRQ(ierr); 2158 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 2159 ierr = MatMatMult(A_BI,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 2160 ierr = MatTransposeMatMult(coarse_phi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM4);CHKERRQ(ierr); 2161 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 2162 } 2163 ierr = MatAXPY(TM1,one,TM2,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 2164 ierr = MatAXPY(TM1,one,TM3,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 2165 ierr = MatAXPY(TM1,one,TM4,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 2166 ierr = MatConvert(TM1,MATSEQDENSE,MAT_REUSE_MATRIX,&TM1);CHKERRQ(ierr); 2167 if (pcbddc->benign_n) { 2168 Mat B0_I,B0_B,B0_BPHI,B0_IPHI; 2169 PetscScalar *data,*data2; 2170 PetscInt j; 2171 2172 ierr = ISCreateStride(PETSC_COMM_SELF,pcbddc->benign_n,0,1,&is_dummy);CHKERRQ(ierr); 2173 ierr = MatGetSubMatrix(pcbddc->benign_B0,is_dummy,pcis->is_B_local,MAT_INITIAL_MATRIX,&B0_B); 2174 ierr = MatGetSubMatrix(pcbddc->benign_B0,is_dummy,pcis->is_I_local,MAT_INITIAL_MATRIX,&B0_I); 2175 ierr = MatMatMult(B0_B,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&B0_BPHI);CHKERRQ(ierr); 2176 ierr = MatConvert(B0_BPHI,MATSEQDENSE,MAT_REUSE_MATRIX,&B0_BPHI);CHKERRQ(ierr); 2177 ierr = MatDenseGetArray(TM1,&data);CHKERRQ(ierr); 2178 ierr = MatDenseGetArray(B0_BPHI,&data2);CHKERRQ(ierr); 2179 for (j=0;j<pcbddc->benign_n;j++) { 2180 PetscInt primal_idx = pcbddc->local_primal_size - pcbddc->benign_n + j; 2181 for (i=0;i<pcbddc->local_primal_size;i++) { 2182 data[primal_idx*pcbddc->local_primal_size+i] += data2[i*pcbddc->benign_n+j]; 2183 data[i*pcbddc->local_primal_size+primal_idx] += data2[i*pcbddc->benign_n+j]; 2184 } 2185 } 2186 ierr = MatDenseRestoreArray(TM1,&data);CHKERRQ(ierr); 2187 ierr = MatDenseRestoreArray(B0_BPHI,&data2);CHKERRQ(ierr); 2188 ierr = MatDestroy(&B0_B);CHKERRQ(ierr); 2189 ierr = ISDestroy(&is_dummy);CHKERRQ(ierr); 2190 ierr = MatDestroy(&B0_BPHI);CHKERRQ(ierr); 2191 ierr = MatMatMult(B0_I,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&B0_IPHI);CHKERRQ(ierr); 2192 ierr = MatDestroy(&B0_I);CHKERRQ(ierr); 2193 ierr = MatNorm(B0_IPHI,NORM_FROBENIUS,&real_value);CHKERRQ(ierr); 2194 ierr = MatDestroy(&B0_IPHI);CHKERRQ(ierr); 2195 } 2196 #if 0 2197 { 2198 PetscViewer viewer; 2199 char filename[256]; 2200 sprintf(filename,"details_local_coarse_mat%d_level%d.m",PetscGlobalRank,pcbddc->current_level); 2201 ierr = PetscViewerASCIIOpen(PETSC_COMM_SELF,filename,&viewer);CHKERRQ(ierr); 2202 ierr = PetscViewerSetFormat(viewer,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr); 2203 ierr = PetscObjectSetName((PetscObject)coarse_sub_mat,"computed");CHKERRQ(ierr); 2204 ierr = MatView(coarse_sub_mat,viewer);CHKERRQ(ierr); 2205 ierr = PetscObjectSetName((PetscObject)TM1,"projected");CHKERRQ(ierr); 2206 ierr = MatView(TM1,viewer);CHKERRQ(ierr); 2207 ierr = PetscObjectSetName((PetscObject)pcbddc->coarse_phi_B,"phi_B");CHKERRQ(ierr); 2208 ierr = MatView(pcbddc->coarse_phi_B,viewer);CHKERRQ(ierr); 2209 if (pcbddc->coarse_phi_D) { 2210 ierr = PetscObjectSetName((PetscObject)pcbddc->coarse_phi_D,"phi_D");CHKERRQ(ierr); 2211 ierr = MatView(pcbddc->coarse_phi_D,viewer);CHKERRQ(ierr); 2212 } 2213 if (pcbddc->coarse_psi_B) { 2214 ierr = PetscObjectSetName((PetscObject)pcbddc->coarse_psi_B,"psi_B");CHKERRQ(ierr); 2215 ierr = MatView(pcbddc->coarse_psi_B,viewer);CHKERRQ(ierr); 2216 } 2217 if (pcbddc->coarse_phi_B) { 2218 ierr = PetscObjectSetName((PetscObject)pcbddc->coarse_psi_D,"psi_D");CHKERRQ(ierr); 2219 ierr = MatView(pcbddc->coarse_psi_D,viewer);CHKERRQ(ierr); 2220 } 2221 ierr = PetscViewerDestroy(&viewer);CHKERRQ(ierr); 2222 } 2223 #endif 2224 ierr = MatAXPY(TM1,m_one,coarse_sub_mat,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 2225 ierr = MatNorm(TM1,NORM_FROBENIUS,&real_value);CHKERRQ(ierr); 2226 ierr = PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 2227 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d matrix error % 1.14e\n",PetscGlobalRank,real_value);CHKERRQ(ierr); 2228 2229 /* check constraints */ 2230 if (!pcbddc->benign_n) { /* TODO: add benign case */ 2231 ierr = ISCreateStride(PETSC_COMM_SELF,pcbddc->local_primal_size,0,1,&is_dummy);CHKERRQ(ierr); 2232 ierr = MatGetSubMatrix(pcbddc->ConstraintMatrix,is_dummy,pcis->is_B_local,MAT_INITIAL_MATRIX,&C_B); 2233 ierr = MatMatMult(C_B,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&CPHI);CHKERRQ(ierr); 2234 ierr = MatCreateVecs(CPHI,&mones,NULL);CHKERRQ(ierr); 2235 ierr = VecSet(mones,-1.0);CHKERRQ(ierr); 2236 ierr = MatDiagonalSet(CPHI,mones,ADD_VALUES);CHKERRQ(ierr); 2237 ierr = MatNorm(CPHI,NORM_FROBENIUS,&real_value);CHKERRQ(ierr); 2238 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d phi constraints error % 1.14e\n",PetscGlobalRank,real_value);CHKERRQ(ierr); 2239 if (!pcbddc->symmetric_primal) { 2240 ierr = MatMatMult(C_B,coarse_psi_B,MAT_REUSE_MATRIX,1.0,&CPHI);CHKERRQ(ierr); 2241 ierr = VecSet(mones,-1.0);CHKERRQ(ierr); 2242 ierr = MatDiagonalSet(CPHI,mones,ADD_VALUES);CHKERRQ(ierr); 2243 ierr = MatNorm(CPHI,NORM_FROBENIUS,&real_value);CHKERRQ(ierr); 2244 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d psi constraints error % 1.14e\n",PetscGlobalRank,real_value);CHKERRQ(ierr); 2245 } 2246 ierr = MatDestroy(&C_B);CHKERRQ(ierr); 2247 ierr = MatDestroy(&CPHI);CHKERRQ(ierr); 2248 ierr = ISDestroy(&is_dummy);CHKERRQ(ierr); 2249 ierr = VecDestroy(&mones);CHKERRQ(ierr); 2250 } 2251 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 2252 ierr = MatDestroy(&A_II);CHKERRQ(ierr); 2253 ierr = MatDestroy(&A_BB);CHKERRQ(ierr); 2254 ierr = MatDestroy(&A_IB);CHKERRQ(ierr); 2255 ierr = MatDestroy(&A_BI);CHKERRQ(ierr); 2256 ierr = MatDestroy(&TM1);CHKERRQ(ierr); 2257 ierr = MatDestroy(&TM2);CHKERRQ(ierr); 2258 ierr = MatDestroy(&TM3);CHKERRQ(ierr); 2259 ierr = MatDestroy(&TM4);CHKERRQ(ierr); 2260 ierr = MatDestroy(&coarse_phi_D);CHKERRQ(ierr); 2261 ierr = MatDestroy(&coarse_phi_B);CHKERRQ(ierr); 2262 if (!pcbddc->symmetric_primal) { 2263 ierr = MatDestroy(&coarse_psi_D);CHKERRQ(ierr); 2264 ierr = MatDestroy(&coarse_psi_B);CHKERRQ(ierr); 2265 } 2266 ierr = MatDestroy(&coarse_sub_mat);CHKERRQ(ierr); 2267 } 2268 /* get back data */ 2269 *coarse_submat_vals_n = coarse_submat_vals; 2270 PetscFunctionReturn(0); 2271 } 2272 2273 #undef __FUNCT__ 2274 #define __FUNCT__ "MatGetSubMatrixUnsorted" 2275 PetscErrorCode MatGetSubMatrixUnsorted(Mat A, IS isrow, IS iscol, Mat* B) 2276 { 2277 Mat *work_mat; 2278 IS isrow_s,iscol_s; 2279 PetscBool rsorted,csorted; 2280 PetscInt rsize,*idxs_perm_r,csize,*idxs_perm_c; 2281 PetscErrorCode ierr; 2282 2283 PetscFunctionBegin; 2284 ierr = ISSorted(isrow,&rsorted);CHKERRQ(ierr); 2285 ierr = ISSorted(iscol,&csorted);CHKERRQ(ierr); 2286 ierr = ISGetLocalSize(isrow,&rsize);CHKERRQ(ierr); 2287 ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr); 2288 2289 if (!rsorted) { 2290 const PetscInt *idxs; 2291 PetscInt *idxs_sorted,i; 2292 2293 ierr = PetscMalloc1(rsize,&idxs_perm_r);CHKERRQ(ierr); 2294 ierr = PetscMalloc1(rsize,&idxs_sorted);CHKERRQ(ierr); 2295 for (i=0;i<rsize;i++) { 2296 idxs_perm_r[i] = i; 2297 } 2298 ierr = ISGetIndices(isrow,&idxs);CHKERRQ(ierr); 2299 ierr = PetscSortIntWithPermutation(rsize,idxs,idxs_perm_r);CHKERRQ(ierr); 2300 for (i=0;i<rsize;i++) { 2301 idxs_sorted[i] = idxs[idxs_perm_r[i]]; 2302 } 2303 ierr = ISRestoreIndices(isrow,&idxs);CHKERRQ(ierr); 2304 ierr = ISCreateGeneral(PETSC_COMM_SELF,rsize,idxs_sorted,PETSC_OWN_POINTER,&isrow_s);CHKERRQ(ierr); 2305 } else { 2306 ierr = PetscObjectReference((PetscObject)isrow);CHKERRQ(ierr); 2307 isrow_s = isrow; 2308 } 2309 2310 if (!csorted) { 2311 if (isrow == iscol) { 2312 ierr = PetscObjectReference((PetscObject)isrow_s);CHKERRQ(ierr); 2313 iscol_s = isrow_s; 2314 } else { 2315 const PetscInt *idxs; 2316 PetscInt *idxs_sorted,i; 2317 2318 ierr = PetscMalloc1(csize,&idxs_perm_c);CHKERRQ(ierr); 2319 ierr = PetscMalloc1(csize,&idxs_sorted);CHKERRQ(ierr); 2320 for (i=0;i<csize;i++) { 2321 idxs_perm_c[i] = i; 2322 } 2323 ierr = ISGetIndices(iscol,&idxs);CHKERRQ(ierr); 2324 ierr = PetscSortIntWithPermutation(csize,idxs,idxs_perm_c);CHKERRQ(ierr); 2325 for (i=0;i<csize;i++) { 2326 idxs_sorted[i] = idxs[idxs_perm_c[i]]; 2327 } 2328 ierr = ISRestoreIndices(iscol,&idxs);CHKERRQ(ierr); 2329 ierr = ISCreateGeneral(PETSC_COMM_SELF,csize,idxs_sorted,PETSC_OWN_POINTER,&iscol_s);CHKERRQ(ierr); 2330 } 2331 } else { 2332 ierr = PetscObjectReference((PetscObject)iscol);CHKERRQ(ierr); 2333 iscol_s = iscol; 2334 } 2335 2336 ierr = MatGetSubMatrices(A,1,&isrow_s,&iscol_s,MAT_INITIAL_MATRIX,&work_mat);CHKERRQ(ierr); 2337 2338 if (!rsorted || !csorted) { 2339 Mat new_mat; 2340 IS is_perm_r,is_perm_c; 2341 2342 if (!rsorted) { 2343 PetscInt *idxs_r,i; 2344 ierr = PetscMalloc1(rsize,&idxs_r);CHKERRQ(ierr); 2345 for (i=0;i<rsize;i++) { 2346 idxs_r[idxs_perm_r[i]] = i; 2347 } 2348 ierr = PetscFree(idxs_perm_r);CHKERRQ(ierr); 2349 ierr = ISCreateGeneral(PETSC_COMM_SELF,rsize,idxs_r,PETSC_OWN_POINTER,&is_perm_r);CHKERRQ(ierr); 2350 } else { 2351 ierr = ISCreateStride(PETSC_COMM_SELF,rsize,0,1,&is_perm_r);CHKERRQ(ierr); 2352 } 2353 ierr = ISSetPermutation(is_perm_r);CHKERRQ(ierr); 2354 2355 if (!csorted) { 2356 if (isrow_s == iscol_s) { 2357 ierr = PetscObjectReference((PetscObject)is_perm_r);CHKERRQ(ierr); 2358 is_perm_c = is_perm_r; 2359 } else { 2360 PetscInt *idxs_c,i; 2361 ierr = PetscMalloc1(csize,&idxs_c);CHKERRQ(ierr); 2362 for (i=0;i<csize;i++) { 2363 idxs_c[idxs_perm_c[i]] = i; 2364 } 2365 ierr = PetscFree(idxs_perm_c);CHKERRQ(ierr); 2366 ierr = ISCreateGeneral(PETSC_COMM_SELF,csize,idxs_c,PETSC_OWN_POINTER,&is_perm_c);CHKERRQ(ierr); 2367 } 2368 } else { 2369 ierr = ISCreateStride(PETSC_COMM_SELF,csize,0,1,&is_perm_c);CHKERRQ(ierr); 2370 } 2371 ierr = ISSetPermutation(is_perm_c);CHKERRQ(ierr); 2372 2373 ierr = MatPermute(work_mat[0],is_perm_r,is_perm_c,&new_mat);CHKERRQ(ierr); 2374 ierr = MatDestroy(&work_mat[0]);CHKERRQ(ierr); 2375 work_mat[0] = new_mat; 2376 ierr = ISDestroy(&is_perm_r);CHKERRQ(ierr); 2377 ierr = ISDestroy(&is_perm_c);CHKERRQ(ierr); 2378 } 2379 2380 ierr = PetscObjectReference((PetscObject)work_mat[0]);CHKERRQ(ierr); 2381 *B = work_mat[0]; 2382 ierr = MatDestroyMatrices(1,&work_mat);CHKERRQ(ierr); 2383 ierr = ISDestroy(&isrow_s);CHKERRQ(ierr); 2384 ierr = ISDestroy(&iscol_s);CHKERRQ(ierr); 2385 PetscFunctionReturn(0); 2386 } 2387 2388 #undef __FUNCT__ 2389 #define __FUNCT__ "PCBDDCComputeLocalMatrix" 2390 PetscErrorCode PCBDDCComputeLocalMatrix(PC pc, Mat ChangeOfBasisMatrix) 2391 { 2392 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 2393 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 2394 Mat new_mat; 2395 IS is_local,is_global; 2396 PetscInt local_size; 2397 PetscBool isseqaij; 2398 PetscErrorCode ierr; 2399 2400 PetscFunctionBegin; 2401 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 2402 ierr = MatGetSize(matis->A,&local_size,NULL);CHKERRQ(ierr); 2403 ierr = ISCreateStride(PetscObjectComm((PetscObject)matis->A),local_size,0,1,&is_local);CHKERRQ(ierr); 2404 ierr = ISLocalToGlobalMappingApplyIS(pc->pmat->rmap->mapping,is_local,&is_global);CHKERRQ(ierr); 2405 ierr = ISDestroy(&is_local);CHKERRQ(ierr); 2406 ierr = MatGetSubMatrixUnsorted(ChangeOfBasisMatrix,is_global,is_global,&new_mat);CHKERRQ(ierr); 2407 ierr = ISDestroy(&is_global);CHKERRQ(ierr); 2408 2409 /* check */ 2410 if (pcbddc->dbg_flag) { 2411 Vec x,x_change; 2412 PetscReal error; 2413 2414 ierr = MatCreateVecs(ChangeOfBasisMatrix,&x,&x_change);CHKERRQ(ierr); 2415 ierr = VecSetRandom(x,NULL);CHKERRQ(ierr); 2416 ierr = MatMult(ChangeOfBasisMatrix,x,x_change);CHKERRQ(ierr); 2417 ierr = VecScatterBegin(matis->cctx,x,matis->x,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2418 ierr = VecScatterEnd(matis->cctx,x,matis->x,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2419 ierr = MatMult(new_mat,matis->x,matis->y);CHKERRQ(ierr); 2420 ierr = VecScatterBegin(matis->rctx,matis->y,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2421 ierr = VecScatterEnd(matis->rctx,matis->y,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2422 ierr = VecAXPY(x,-1.0,x_change);CHKERRQ(ierr); 2423 ierr = VecNorm(x,NORM_INFINITY,&error);CHKERRQ(ierr); 2424 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 2425 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Error global vs local change on N: %1.6e\n",error);CHKERRQ(ierr); 2426 ierr = VecDestroy(&x);CHKERRQ(ierr); 2427 ierr = VecDestroy(&x_change);CHKERRQ(ierr); 2428 } 2429 2430 /* TODO: HOW TO WORK WITH BAIJ and SBAIJ and SEQDENSE? */ 2431 ierr = PetscObjectTypeCompare((PetscObject)matis->A,MATSEQAIJ,&isseqaij);CHKERRQ(ierr); 2432 if (isseqaij) { 2433 Mat M; 2434 2435 ierr = MatPtAP(matis->A,new_mat,MAT_INITIAL_MATRIX,2.0,&M);CHKERRQ(ierr); 2436 ierr = MatSeqAIJCompress(M,&pcbddc->local_mat);CHKERRQ(ierr); 2437 ierr = MatDestroy(&M);CHKERRQ(ierr); 2438 } else { 2439 Mat work_mat,M; 2440 2441 ierr = MatConvert(matis->A,MATSEQAIJ,MAT_INITIAL_MATRIX,&work_mat);CHKERRQ(ierr); 2442 ierr = MatPtAP(work_mat,new_mat,MAT_INITIAL_MATRIX,2.0,&M);CHKERRQ(ierr); 2443 ierr = MatSeqAIJCompress(M,&pcbddc->local_mat);CHKERRQ(ierr); 2444 ierr = MatDestroy(&M);CHKERRQ(ierr); 2445 ierr = MatDestroy(&work_mat);CHKERRQ(ierr); 2446 } 2447 if (matis->A->symmetric_set) { 2448 ierr = MatSetOption(pcbddc->local_mat,MAT_SYMMETRIC,matis->A->symmetric);CHKERRQ(ierr); 2449 #if !defined(PETSC_USE_COMPLEX) 2450 ierr = MatSetOption(pcbddc->local_mat,MAT_HERMITIAN,matis->A->symmetric);CHKERRQ(ierr); 2451 #endif 2452 } 2453 /* 2454 ierr = PetscViewerSetFormat(PETSC_VIEWER_STDOUT_SELF,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr); 2455 ierr = MatView(new_mat,(PetscViewer)0);CHKERRQ(ierr); 2456 */ 2457 ierr = MatDestroy(&new_mat);CHKERRQ(ierr); 2458 PetscFunctionReturn(0); 2459 } 2460 2461 #undef __FUNCT__ 2462 #define __FUNCT__ "PCBDDCSetUpLocalScatters" 2463 PetscErrorCode PCBDDCSetUpLocalScatters(PC pc) 2464 { 2465 PC_IS* pcis = (PC_IS*)(pc->data); 2466 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 2467 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 2468 PetscInt *idx_R_local=NULL; 2469 PetscInt n_vertices,i,j,n_R,n_D,n_B; 2470 PetscInt vbs,bs; 2471 PetscBT bitmask=NULL; 2472 PetscErrorCode ierr; 2473 2474 PetscFunctionBegin; 2475 /* 2476 No need to setup local scatters if 2477 - primal space is unchanged 2478 AND 2479 - we actually have locally some primal dofs (could not be true in multilevel or for isolated subdomains) 2480 AND 2481 - we are not in debugging mode (this is needed since there are Synchronized prints at the end of the subroutine 2482 */ 2483 if (!pcbddc->new_primal_space_local && pcbddc->local_primal_size && !pcbddc->dbg_flag) { 2484 PetscFunctionReturn(0); 2485 } 2486 /* destroy old objects */ 2487 ierr = ISDestroy(&pcbddc->is_R_local);CHKERRQ(ierr); 2488 ierr = VecScatterDestroy(&pcbddc->R_to_B);CHKERRQ(ierr); 2489 ierr = VecScatterDestroy(&pcbddc->R_to_D);CHKERRQ(ierr); 2490 /* Set Non-overlapping dimensions */ 2491 n_B = pcis->n_B; 2492 n_D = pcis->n - n_B; 2493 n_vertices = pcbddc->n_vertices; 2494 2495 /* Dohrmann's notation: dofs splitted in R (Remaining: all dofs but the vertices) and V (Vertices) */ 2496 2497 /* create auxiliary bitmask and allocate workspace */ 2498 if (!sub_schurs->reuse_solver) { 2499 ierr = PetscMalloc1(pcis->n-n_vertices,&idx_R_local);CHKERRQ(ierr); 2500 ierr = PetscBTCreate(pcis->n,&bitmask);CHKERRQ(ierr); 2501 for (i=0;i<n_vertices;i++) { 2502 ierr = PetscBTSet(bitmask,pcbddc->local_primal_ref_node[i]);CHKERRQ(ierr); 2503 } 2504 2505 for (i=0, n_R=0; i<pcis->n; i++) { 2506 if (!PetscBTLookup(bitmask,i)) { 2507 idx_R_local[n_R++] = i; 2508 } 2509 } 2510 } else { /* A different ordering (already computed) is present if we are reusing the Schur solver */ 2511 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 2512 2513 ierr = ISGetIndices(reuse_solver->is_R,(const PetscInt**)&idx_R_local);CHKERRQ(ierr); 2514 ierr = ISGetLocalSize(reuse_solver->is_R,&n_R);CHKERRQ(ierr); 2515 } 2516 2517 /* Block code */ 2518 vbs = 1; 2519 ierr = MatGetBlockSize(pcbddc->local_mat,&bs);CHKERRQ(ierr); 2520 if (bs>1 && !(n_vertices%bs)) { 2521 PetscBool is_blocked = PETSC_TRUE; 2522 PetscInt *vary; 2523 if (!sub_schurs->reuse_solver) { 2524 ierr = PetscMalloc1(pcis->n/bs,&vary);CHKERRQ(ierr); 2525 ierr = PetscMemzero(vary,pcis->n/bs*sizeof(PetscInt));CHKERRQ(ierr); 2526 /* Verify that the vertex indices correspond to each element in a block (code taken from sbaij2.c) */ 2527 /* it is ok to check this way since local_primal_ref_node are always sorted by local numbering and idx_R_local is obtained as a complement */ 2528 for (i=0; i<n_vertices; i++) vary[pcbddc->local_primal_ref_node[i]/bs]++; 2529 for (i=0; i<pcis->n/bs; i++) { 2530 if (vary[i]!=0 && vary[i]!=bs) { 2531 is_blocked = PETSC_FALSE; 2532 break; 2533 } 2534 } 2535 ierr = PetscFree(vary);CHKERRQ(ierr); 2536 } else { 2537 /* Verify directly the R set */ 2538 for (i=0; i<n_R/bs; i++) { 2539 PetscInt j,node=idx_R_local[bs*i]; 2540 for (j=1; j<bs; j++) { 2541 if (node != idx_R_local[bs*i+j]-j) { 2542 is_blocked = PETSC_FALSE; 2543 break; 2544 } 2545 } 2546 } 2547 } 2548 if (is_blocked) { /* build compressed IS for R nodes (complement of vertices) */ 2549 vbs = bs; 2550 for (i=0;i<n_R/vbs;i++) { 2551 idx_R_local[i] = idx_R_local[vbs*i]/vbs; 2552 } 2553 } 2554 } 2555 ierr = ISCreateBlock(PETSC_COMM_SELF,vbs,n_R/vbs,idx_R_local,PETSC_COPY_VALUES,&pcbddc->is_R_local);CHKERRQ(ierr); 2556 if (sub_schurs->reuse_solver) { 2557 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 2558 2559 ierr = ISRestoreIndices(reuse_solver->is_R,(const PetscInt**)&idx_R_local);CHKERRQ(ierr); 2560 ierr = ISDestroy(&reuse_solver->is_R);CHKERRQ(ierr); 2561 ierr = PetscObjectReference((PetscObject)pcbddc->is_R_local);CHKERRQ(ierr); 2562 reuse_solver->is_R = pcbddc->is_R_local; 2563 } else { 2564 ierr = PetscFree(idx_R_local);CHKERRQ(ierr); 2565 } 2566 2567 /* print some info if requested */ 2568 if (pcbddc->dbg_flag) { 2569 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 2570 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 2571 ierr = PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 2572 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d local dimensions\n",PetscGlobalRank);CHKERRQ(ierr); 2573 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local_size = %d, dirichlet_size = %d, boundary_size = %d\n",pcis->n,n_D,n_B);CHKERRQ(ierr); 2574 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"r_size = %d, v_size = %d, constraints = %d, local_primal_size = %d\n",n_R,n_vertices,pcbddc->local_primal_size-n_vertices-pcbddc->benign_n,pcbddc->local_primal_size);CHKERRQ(ierr); 2575 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 2576 } 2577 2578 /* VecScatters pcbddc->R_to_B and (optionally) pcbddc->R_to_D */ 2579 if (!sub_schurs->reuse_solver) { 2580 IS is_aux1,is_aux2; 2581 PetscInt *aux_array1,*aux_array2,*is_indices,*idx_R_local; 2582 2583 ierr = ISGetIndices(pcbddc->is_R_local,(const PetscInt**)&idx_R_local);CHKERRQ(ierr); 2584 ierr = PetscMalloc1(pcis->n_B-n_vertices,&aux_array1);CHKERRQ(ierr); 2585 ierr = PetscMalloc1(pcis->n_B-n_vertices,&aux_array2);CHKERRQ(ierr); 2586 ierr = ISGetIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 2587 for (i=0; i<n_D; i++) { 2588 ierr = PetscBTSet(bitmask,is_indices[i]);CHKERRQ(ierr); 2589 } 2590 ierr = ISRestoreIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 2591 for (i=0, j=0; i<n_R; i++) { 2592 if (!PetscBTLookup(bitmask,idx_R_local[i])) { 2593 aux_array1[j++] = i; 2594 } 2595 } 2596 ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array1,PETSC_OWN_POINTER,&is_aux1);CHKERRQ(ierr); 2597 ierr = ISGetIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 2598 for (i=0, j=0; i<n_B; i++) { 2599 if (!PetscBTLookup(bitmask,is_indices[i])) { 2600 aux_array2[j++] = i; 2601 } 2602 } 2603 ierr = ISRestoreIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 2604 ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array2,PETSC_OWN_POINTER,&is_aux2);CHKERRQ(ierr); 2605 ierr = VecScatterCreate(pcbddc->vec1_R,is_aux1,pcis->vec1_B,is_aux2,&pcbddc->R_to_B);CHKERRQ(ierr); 2606 ierr = ISDestroy(&is_aux1);CHKERRQ(ierr); 2607 ierr = ISDestroy(&is_aux2);CHKERRQ(ierr); 2608 2609 if (pcbddc->switch_static || pcbddc->dbg_flag) { 2610 ierr = PetscMalloc1(n_D,&aux_array1);CHKERRQ(ierr); 2611 for (i=0, j=0; i<n_R; i++) { 2612 if (PetscBTLookup(bitmask,idx_R_local[i])) { 2613 aux_array1[j++] = i; 2614 } 2615 } 2616 ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array1,PETSC_OWN_POINTER,&is_aux1);CHKERRQ(ierr); 2617 ierr = VecScatterCreate(pcbddc->vec1_R,is_aux1,pcis->vec1_D,(IS)0,&pcbddc->R_to_D);CHKERRQ(ierr); 2618 ierr = ISDestroy(&is_aux1);CHKERRQ(ierr); 2619 } 2620 ierr = PetscBTDestroy(&bitmask);CHKERRQ(ierr); 2621 ierr = ISRestoreIndices(pcbddc->is_R_local,(const PetscInt**)&idx_R_local);CHKERRQ(ierr); 2622 } else { 2623 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 2624 IS tis; 2625 PetscInt schur_size; 2626 2627 ierr = ISGetLocalSize(reuse_solver->is_B,&schur_size);CHKERRQ(ierr); 2628 ierr = ISCreateStride(PETSC_COMM_SELF,schur_size,n_D,1,&tis);CHKERRQ(ierr); 2629 ierr = VecScatterCreate(pcbddc->vec1_R,tis,pcis->vec1_B,reuse_solver->is_B,&pcbddc->R_to_B);CHKERRQ(ierr); 2630 ierr = ISDestroy(&tis);CHKERRQ(ierr); 2631 if (pcbddc->switch_static || pcbddc->dbg_flag) { 2632 ierr = ISCreateStride(PETSC_COMM_SELF,n_D,0,1,&tis);CHKERRQ(ierr); 2633 ierr = VecScatterCreate(pcbddc->vec1_R,tis,pcis->vec1_D,(IS)0,&pcbddc->R_to_D);CHKERRQ(ierr); 2634 ierr = ISDestroy(&tis);CHKERRQ(ierr); 2635 } 2636 } 2637 PetscFunctionReturn(0); 2638 } 2639 2640 2641 #undef __FUNCT__ 2642 #define __FUNCT__ "PCBDDCSetUpLocalSolvers" 2643 PetscErrorCode PCBDDCSetUpLocalSolvers(PC pc, PetscBool dirichlet, PetscBool neumann) 2644 { 2645 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 2646 PC_IS *pcis = (PC_IS*)pc->data; 2647 PC pc_temp; 2648 Mat A_RR; 2649 MatReuse reuse; 2650 PetscScalar m_one = -1.0; 2651 PetscReal value; 2652 PetscInt n_D,n_R; 2653 PetscBool use_exact,use_exact_reduced,issbaij; 2654 PetscErrorCode ierr; 2655 /* prefixes stuff */ 2656 char dir_prefix[256],neu_prefix[256],str_level[16]; 2657 size_t len; 2658 2659 PetscFunctionBegin; 2660 2661 /* compute prefixes */ 2662 ierr = PetscStrcpy(dir_prefix,"");CHKERRQ(ierr); 2663 ierr = PetscStrcpy(neu_prefix,"");CHKERRQ(ierr); 2664 if (!pcbddc->current_level) { 2665 ierr = PetscStrcpy(dir_prefix,((PetscObject)pc)->prefix);CHKERRQ(ierr); 2666 ierr = PetscStrcpy(neu_prefix,((PetscObject)pc)->prefix);CHKERRQ(ierr); 2667 ierr = PetscStrcat(dir_prefix,"pc_bddc_dirichlet_");CHKERRQ(ierr); 2668 ierr = PetscStrcat(neu_prefix,"pc_bddc_neumann_");CHKERRQ(ierr); 2669 } else { 2670 ierr = PetscStrcpy(str_level,"");CHKERRQ(ierr); 2671 sprintf(str_level,"l%d_",(int)(pcbddc->current_level)); 2672 ierr = PetscStrlen(((PetscObject)pc)->prefix,&len);CHKERRQ(ierr); 2673 len -= 15; /* remove "pc_bddc_coarse_" */ 2674 if (pcbddc->current_level>1) len -= 3; /* remove "lX_" with X level number */ 2675 if (pcbddc->current_level>10) len -= 1; /* remove another char from level number */ 2676 ierr = PetscStrncpy(dir_prefix,((PetscObject)pc)->prefix,len+1);CHKERRQ(ierr); 2677 ierr = PetscStrncpy(neu_prefix,((PetscObject)pc)->prefix,len+1);CHKERRQ(ierr); 2678 ierr = PetscStrcat(dir_prefix,"pc_bddc_dirichlet_");CHKERRQ(ierr); 2679 ierr = PetscStrcat(neu_prefix,"pc_bddc_neumann_");CHKERRQ(ierr); 2680 ierr = PetscStrcat(dir_prefix,str_level);CHKERRQ(ierr); 2681 ierr = PetscStrcat(neu_prefix,str_level);CHKERRQ(ierr); 2682 } 2683 2684 /* DIRICHLET PROBLEM */ 2685 if (dirichlet) { 2686 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 2687 if (pcbddc->benign_n && !pcbddc->benign_change_explicit && pcbddc->dbg_flag) { 2688 Mat A_IIn; 2689 2690 ierr = PCBDDCBenignProject(pc,pcis->is_I_local,pcis->is_I_local,&A_IIn);CHKERRQ(ierr); 2691 ierr = MatDestroy(&pcis->A_II);CHKERRQ(ierr); 2692 pcis->A_II = A_IIn; 2693 } 2694 if (pcbddc->local_mat->symmetric_set) { 2695 ierr = MatSetOption(pcis->A_II,MAT_SYMMETRIC,pcbddc->local_mat->symmetric_set);CHKERRQ(ierr); 2696 } 2697 /* Matrix for Dirichlet problem is pcis->A_II */ 2698 n_D = pcis->n - pcis->n_B; 2699 if (!pcbddc->ksp_D) { /* create object if not yet build */ 2700 ierr = KSPCreate(PETSC_COMM_SELF,&pcbddc->ksp_D);CHKERRQ(ierr); 2701 ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->ksp_D,(PetscObject)pc,1);CHKERRQ(ierr); 2702 /* default */ 2703 ierr = KSPSetType(pcbddc->ksp_D,KSPPREONLY);CHKERRQ(ierr); 2704 ierr = KSPSetOptionsPrefix(pcbddc->ksp_D,dir_prefix);CHKERRQ(ierr); 2705 ierr = PetscObjectTypeCompare((PetscObject)pcis->A_II,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr); 2706 ierr = KSPGetPC(pcbddc->ksp_D,&pc_temp);CHKERRQ(ierr); 2707 if (issbaij) { 2708 ierr = PCSetType(pc_temp,PCCHOLESKY);CHKERRQ(ierr); 2709 } else { 2710 ierr = PCSetType(pc_temp,PCLU);CHKERRQ(ierr); 2711 } 2712 /* Allow user's customization */ 2713 ierr = KSPSetFromOptions(pcbddc->ksp_D);CHKERRQ(ierr); 2714 ierr = PCFactorSetReuseFill(pc_temp,PETSC_TRUE);CHKERRQ(ierr); 2715 } 2716 ierr = KSPSetOperators(pcbddc->ksp_D,pcis->A_II,pcis->A_II);CHKERRQ(ierr); 2717 if (sub_schurs->reuse_solver) { 2718 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 2719 2720 ierr = KSPSetPC(pcbddc->ksp_D,reuse_solver->interior_solver);CHKERRQ(ierr); 2721 } 2722 /* umfpack interface has a bug when matrix dimension is zero. TODO solve from umfpack interface */ 2723 if (!n_D) { 2724 ierr = KSPGetPC(pcbddc->ksp_D,&pc_temp);CHKERRQ(ierr); 2725 ierr = PCSetType(pc_temp,PCNONE);CHKERRQ(ierr); 2726 } 2727 /* Set Up KSP for Dirichlet problem of BDDC */ 2728 ierr = KSPSetUp(pcbddc->ksp_D);CHKERRQ(ierr); 2729 /* set ksp_D into pcis data */ 2730 ierr = KSPDestroy(&pcis->ksp_D);CHKERRQ(ierr); 2731 ierr = PetscObjectReference((PetscObject)pcbddc->ksp_D);CHKERRQ(ierr); 2732 pcis->ksp_D = pcbddc->ksp_D; 2733 } 2734 2735 /* NEUMANN PROBLEM */ 2736 A_RR = 0; 2737 if (neumann) { 2738 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 2739 PetscInt ibs,mbs; 2740 PetscBool issbaij; 2741 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 2742 /* Matrix for Neumann problem is A_RR -> we need to create/reuse it at this point */ 2743 ierr = ISGetSize(pcbddc->is_R_local,&n_R);CHKERRQ(ierr); 2744 if (pcbddc->ksp_R) { /* already created ksp */ 2745 PetscInt nn_R; 2746 ierr = KSPGetOperators(pcbddc->ksp_R,NULL,&A_RR);CHKERRQ(ierr); 2747 ierr = PetscObjectReference((PetscObject)A_RR);CHKERRQ(ierr); 2748 ierr = MatGetSize(A_RR,&nn_R,NULL);CHKERRQ(ierr); 2749 if (nn_R != n_R) { /* old ksp is not reusable, so reset it */ 2750 ierr = KSPReset(pcbddc->ksp_R);CHKERRQ(ierr); 2751 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 2752 reuse = MAT_INITIAL_MATRIX; 2753 } else { /* same sizes, but nonzero pattern depend on primal vertices so it can be changed */ 2754 if (pcbddc->new_primal_space_local) { /* we are not sure the matrix will have the same nonzero pattern */ 2755 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 2756 reuse = MAT_INITIAL_MATRIX; 2757 } else { /* safe to reuse the matrix */ 2758 reuse = MAT_REUSE_MATRIX; 2759 } 2760 } 2761 /* last check */ 2762 if (pc->flag == DIFFERENT_NONZERO_PATTERN) { 2763 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 2764 reuse = MAT_INITIAL_MATRIX; 2765 } 2766 } else { /* first time, so we need to create the matrix */ 2767 reuse = MAT_INITIAL_MATRIX; 2768 } 2769 /* extract A_RR */ 2770 ierr = MatGetBlockSize(pcbddc->local_mat,&mbs);CHKERRQ(ierr); 2771 ierr = ISGetBlockSize(pcbddc->is_R_local,&ibs);CHKERRQ(ierr); 2772 ierr = PetscObjectTypeCompare((PetscObject)pcbddc->local_mat,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr); 2773 if (ibs != mbs) { /* need to convert to SEQAIJ to extract any submatrix with is_R_local */ 2774 if (matis->A == pcbddc->local_mat) { 2775 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 2776 ierr = MatConvert(matis->A,MATSEQAIJ,MAT_INITIAL_MATRIX,&pcbddc->local_mat);CHKERRQ(ierr); 2777 } else { 2778 ierr = MatConvert(pcbddc->local_mat,MATSEQAIJ,MAT_REUSE_MATRIX,&pcbddc->local_mat);CHKERRQ(ierr); 2779 } 2780 } else if (issbaij) { /* need to convert to BAIJ to get offdiagonal blocks */ 2781 if (matis->A == pcbddc->local_mat) { 2782 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 2783 ierr = MatConvert(matis->A,MATSEQBAIJ,MAT_INITIAL_MATRIX,&pcbddc->local_mat);CHKERRQ(ierr); 2784 } else { 2785 ierr = MatConvert(pcbddc->local_mat,MATSEQBAIJ,MAT_REUSE_MATRIX,&pcbddc->local_mat);CHKERRQ(ierr); 2786 } 2787 } 2788 ierr = MatGetSubMatrix(pcbddc->local_mat,pcbddc->is_R_local,pcbddc->is_R_local,reuse,&A_RR);CHKERRQ(ierr); 2789 if (pcbddc->local_mat->symmetric_set) { 2790 ierr = MatSetOption(A_RR,MAT_SYMMETRIC,pcbddc->local_mat->symmetric_set);CHKERRQ(ierr); 2791 } 2792 if (pcbddc->benign_n && !pcbddc->benign_change_explicit && pcbddc->dbg_flag) { 2793 Mat A_RRn; 2794 ierr = PCBDDCBenignProject(pc,pcbddc->is_R_local,pcbddc->is_R_local,&A_RRn);CHKERRQ(ierr); 2795 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 2796 A_RR = A_RRn; 2797 } 2798 if (!pcbddc->ksp_R) { /* create object if not present */ 2799 ierr = KSPCreate(PETSC_COMM_SELF,&pcbddc->ksp_R);CHKERRQ(ierr); 2800 ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->ksp_R,(PetscObject)pc,1);CHKERRQ(ierr); 2801 /* default */ 2802 ierr = KSPSetType(pcbddc->ksp_R,KSPPREONLY);CHKERRQ(ierr); 2803 ierr = KSPSetOptionsPrefix(pcbddc->ksp_R,neu_prefix);CHKERRQ(ierr); 2804 ierr = KSPGetPC(pcbddc->ksp_R,&pc_temp);CHKERRQ(ierr); 2805 ierr = PetscObjectTypeCompare((PetscObject)A_RR,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr); 2806 if (issbaij) { 2807 ierr = PCSetType(pc_temp,PCCHOLESKY);CHKERRQ(ierr); 2808 } else { 2809 ierr = PCSetType(pc_temp,PCLU);CHKERRQ(ierr); 2810 } 2811 /* Allow user's customization */ 2812 ierr = KSPSetFromOptions(pcbddc->ksp_R);CHKERRQ(ierr); 2813 ierr = PCFactorSetReuseFill(pc_temp,PETSC_TRUE);CHKERRQ(ierr); 2814 } 2815 /* umfpack interface has a bug when matrix dimension is zero. TODO solve from umfpack interface */ 2816 if (!n_R) { 2817 ierr = KSPGetPC(pcbddc->ksp_R,&pc_temp);CHKERRQ(ierr); 2818 ierr = PCSetType(pc_temp,PCNONE);CHKERRQ(ierr); 2819 } 2820 /* Reuse solver if it is present */ 2821 if (sub_schurs->reuse_solver) { 2822 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 2823 2824 ierr = KSPSetPC(pcbddc->ksp_R,reuse_solver->correction_solver);CHKERRQ(ierr); 2825 } 2826 ierr = KSPSetOperators(pcbddc->ksp_R,A_RR,A_RR);CHKERRQ(ierr); 2827 /* Set Up KSP for Neumann problem of BDDC */ 2828 ierr = KSPSetUp(pcbddc->ksp_R);CHKERRQ(ierr); 2829 } 2830 /* free Neumann problem's matrix */ 2831 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 2832 2833 /* check Dirichlet and Neumann solvers and adapt them if a nullspace correction is needed */ 2834 if (pcbddc->NullSpace || pcbddc->dbg_flag) { 2835 if (pcbddc->dbg_flag) { 2836 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 2837 ierr = PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 2838 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 2839 } 2840 if (dirichlet) { /* Dirichlet */ 2841 ierr = VecSetRandom(pcis->vec1_D,NULL);CHKERRQ(ierr); 2842 ierr = MatMult(pcis->A_II,pcis->vec1_D,pcis->vec2_D);CHKERRQ(ierr); 2843 ierr = KSPSolve(pcbddc->ksp_D,pcis->vec2_D,pcis->vec2_D);CHKERRQ(ierr); 2844 ierr = VecAXPY(pcis->vec1_D,m_one,pcis->vec2_D);CHKERRQ(ierr); 2845 ierr = VecNorm(pcis->vec1_D,NORM_INFINITY,&value);CHKERRQ(ierr); 2846 /* need to be adapted? */ 2847 use_exact = (PetscAbsReal(value) > 1.e-4 ? PETSC_FALSE : PETSC_TRUE); 2848 ierr = MPI_Allreduce(&use_exact,&use_exact_reduced,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 2849 ierr = PCBDDCSetUseExactDirichlet(pc,use_exact_reduced);CHKERRQ(ierr); 2850 /* print info */ 2851 if (pcbddc->dbg_flag) { 2852 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d infinity error for Dirichlet solve (%s) = % 1.14e \n",PetscGlobalRank,((PetscObject)(pcbddc->ksp_D))->prefix,value);CHKERRQ(ierr); 2853 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 2854 } 2855 if (pcbddc->NullSpace && !use_exact_reduced && !pcbddc->switch_static) { 2856 ierr = PCBDDCNullSpaceAssembleCorrection(pc,PETSC_TRUE,pcis->is_I_local);CHKERRQ(ierr); 2857 } 2858 } 2859 if (neumann) { /* Neumann */ 2860 ierr = KSPGetOperators(pcbddc->ksp_R,&A_RR,NULL);CHKERRQ(ierr); 2861 ierr = VecSetRandom(pcbddc->vec1_R,NULL);CHKERRQ(ierr); 2862 ierr = MatMult(A_RR,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 2863 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec2_R,pcbddc->vec2_R);CHKERRQ(ierr); 2864 ierr = VecAXPY(pcbddc->vec1_R,m_one,pcbddc->vec2_R);CHKERRQ(ierr); 2865 ierr = VecNorm(pcbddc->vec1_R,NORM_INFINITY,&value);CHKERRQ(ierr); 2866 /* need to be adapted? */ 2867 use_exact = (PetscAbsReal(value) > 1.e-4 ? PETSC_FALSE : PETSC_TRUE); 2868 ierr = MPI_Allreduce(&use_exact,&use_exact_reduced,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 2869 /* print info */ 2870 if (pcbddc->dbg_flag) { 2871 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d infinity error for Neumann solve (%s) = % 1.14e\n",PetscGlobalRank,((PetscObject)(pcbddc->ksp_R))->prefix,value);CHKERRQ(ierr); 2872 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 2873 } 2874 if (pcbddc->NullSpace && !use_exact_reduced) { /* is it the right logic? */ 2875 ierr = PCBDDCNullSpaceAssembleCorrection(pc,PETSC_FALSE,pcbddc->is_R_local);CHKERRQ(ierr); 2876 } 2877 } 2878 } 2879 PetscFunctionReturn(0); 2880 } 2881 2882 #undef __FUNCT__ 2883 #define __FUNCT__ "PCBDDCSolveSubstructureCorrection" 2884 static PetscErrorCode PCBDDCSolveSubstructureCorrection(PC pc, Vec inout_B, Vec inout_D, PetscBool applytranspose) 2885 { 2886 PetscErrorCode ierr; 2887 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 2888 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 2889 2890 PetscFunctionBegin; 2891 if (!sub_schurs->reuse_solver) { 2892 ierr = VecSet(pcbddc->vec1_R,0.);CHKERRQ(ierr); 2893 } 2894 if (!pcbddc->switch_static) { 2895 if (applytranspose && pcbddc->local_auxmat1) { 2896 ierr = MatMultTranspose(pcbddc->local_auxmat2,inout_B,pcbddc->vec1_C);CHKERRQ(ierr); 2897 ierr = MatMultTransposeAdd(pcbddc->local_auxmat1,pcbddc->vec1_C,inout_B,inout_B);CHKERRQ(ierr); 2898 } 2899 if (!sub_schurs->reuse_solver) { 2900 ierr = VecScatterBegin(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2901 ierr = VecScatterEnd(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2902 } else { 2903 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 2904 2905 ierr = VecScatterBegin(reuse_solver->correction_scatter_B,inout_B,reuse_solver->rhs_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2906 ierr = VecScatterEnd(reuse_solver->correction_scatter_B,inout_B,reuse_solver->rhs_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2907 } 2908 } else { 2909 ierr = VecScatterBegin(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2910 ierr = VecScatterEnd(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2911 ierr = VecScatterBegin(pcbddc->R_to_D,inout_D,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2912 ierr = VecScatterEnd(pcbddc->R_to_D,inout_D,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2913 if (applytranspose && pcbddc->local_auxmat1) { 2914 ierr = MatMultTranspose(pcbddc->local_auxmat2,pcbddc->vec1_R,pcbddc->vec1_C);CHKERRQ(ierr); 2915 ierr = MatMultTransposeAdd(pcbddc->local_auxmat1,pcbddc->vec1_C,inout_B,inout_B);CHKERRQ(ierr); 2916 ierr = VecScatterBegin(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2917 ierr = VecScatterEnd(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2918 } 2919 } 2920 if (!sub_schurs->reuse_solver) { 2921 if (applytranspose) { 2922 ierr = KSPSolveTranspose(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr); 2923 } else { 2924 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr); 2925 } 2926 } else { 2927 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 2928 2929 if (applytranspose) { 2930 ierr = MatFactorSolveSchurComplementTranspose(reuse_solver->F,reuse_solver->rhs_B,reuse_solver->sol_B);CHKERRQ(ierr); 2931 } else { 2932 ierr = MatFactorSolveSchurComplement(reuse_solver->F,reuse_solver->rhs_B,reuse_solver->sol_B);CHKERRQ(ierr); 2933 } 2934 } 2935 ierr = VecSet(inout_B,0.);CHKERRQ(ierr); 2936 if (!pcbddc->switch_static) { 2937 if (!sub_schurs->reuse_solver) { 2938 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2939 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2940 } else { 2941 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 2942 2943 ierr = VecScatterBegin(reuse_solver->correction_scatter_B,reuse_solver->sol_B,inout_B,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2944 ierr = VecScatterEnd(reuse_solver->correction_scatter_B,reuse_solver->sol_B,inout_B,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2945 } 2946 if (!applytranspose && pcbddc->local_auxmat1) { 2947 ierr = MatMult(pcbddc->local_auxmat1,inout_B,pcbddc->vec1_C);CHKERRQ(ierr); 2948 ierr = MatMultAdd(pcbddc->local_auxmat2,pcbddc->vec1_C,inout_B,inout_B);CHKERRQ(ierr); 2949 } 2950 } else { 2951 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2952 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2953 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,inout_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2954 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,inout_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2955 if (!applytranspose && pcbddc->local_auxmat1) { 2956 ierr = MatMult(pcbddc->local_auxmat1,inout_B,pcbddc->vec1_C);CHKERRQ(ierr); 2957 ierr = MatMultAdd(pcbddc->local_auxmat2,pcbddc->vec1_C,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr); 2958 } 2959 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2960 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2961 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,inout_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2962 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,inout_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2963 } 2964 PetscFunctionReturn(0); 2965 } 2966 2967 /* parameter apply transpose determines if the interface preconditioner should be applied transposed or not */ 2968 #undef __FUNCT__ 2969 #define __FUNCT__ "PCBDDCApplyInterfacePreconditioner" 2970 PetscErrorCode PCBDDCApplyInterfacePreconditioner(PC pc, PetscBool applytranspose) 2971 { 2972 PetscErrorCode ierr; 2973 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 2974 PC_IS* pcis = (PC_IS*) (pc->data); 2975 const PetscScalar zero = 0.0; 2976 2977 PetscFunctionBegin; 2978 /* Application of PSI^T or PHI^T (depending on applytranspose, see comment above) */ 2979 if (applytranspose) { 2980 ierr = MatMultTranspose(pcbddc->coarse_phi_B,pcis->vec1_B,pcbddc->vec1_P);CHKERRQ(ierr); 2981 if (pcbddc->switch_static) { ierr = MatMultTransposeAdd(pcbddc->coarse_phi_D,pcis->vec1_D,pcbddc->vec1_P,pcbddc->vec1_P);CHKERRQ(ierr); } 2982 } else { 2983 ierr = MatMultTranspose(pcbddc->coarse_psi_B,pcis->vec1_B,pcbddc->vec1_P);CHKERRQ(ierr); 2984 if (pcbddc->switch_static) { ierr = MatMultTransposeAdd(pcbddc->coarse_psi_D,pcis->vec1_D,pcbddc->vec1_P,pcbddc->vec1_P);CHKERRQ(ierr); } 2985 } 2986 2987 /* add p0 to the last value of vec1_P holding the coarse dof relative to p0 */ 2988 if (pcbddc->benign_n) { 2989 PetscScalar *array; 2990 PetscInt j; 2991 2992 ierr = VecGetArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 2993 for (j=0;j<pcbddc->benign_n;j++) array[pcbddc->local_primal_size-pcbddc->benign_n+j] += pcbddc->benign_p0[j]; 2994 ierr = VecRestoreArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 2995 } 2996 2997 /* start communications from local primal nodes to rhs of coarse solver */ 2998 ierr = VecSet(pcbddc->coarse_vec,zero);CHKERRQ(ierr); 2999 ierr = PCBDDCScatterCoarseDataBegin(pc,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3000 ierr = PCBDDCScatterCoarseDataEnd(pc,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3001 3002 /* Coarse solution -> rhs and sol updated inside PCBDDCScattarCoarseDataBegin/End */ 3003 /* TODO remove null space when doing multilevel */ 3004 if (pcbddc->coarse_ksp) { 3005 Mat coarse_mat; 3006 Vec rhs,sol; 3007 MatNullSpace nullsp; 3008 3009 ierr = KSPGetRhs(pcbddc->coarse_ksp,&rhs);CHKERRQ(ierr); 3010 ierr = KSPGetSolution(pcbddc->coarse_ksp,&sol);CHKERRQ(ierr); 3011 ierr = KSPGetOperators(pcbddc->coarse_ksp,&coarse_mat,NULL);CHKERRQ(ierr); 3012 ierr = MatGetNullSpace(coarse_mat,&nullsp);CHKERRQ(ierr); 3013 if (nullsp) { 3014 ierr = MatNullSpaceRemove(nullsp,rhs);CHKERRQ(ierr); 3015 } 3016 if (applytranspose) { 3017 ierr = KSPSolveTranspose(pcbddc->coarse_ksp,rhs,sol);CHKERRQ(ierr); 3018 } else { 3019 ierr = KSPSolve(pcbddc->coarse_ksp,rhs,sol);CHKERRQ(ierr); 3020 } 3021 if (nullsp) { 3022 ierr = MatNullSpaceRemove(nullsp,sol);CHKERRQ(ierr); 3023 } 3024 } 3025 3026 /* Local solution on R nodes */ 3027 if (pcis->n) { /* in/out pcbddc->vec1_B,pcbddc->vec1_D */ 3028 ierr = PCBDDCSolveSubstructureCorrection(pc,pcis->vec1_B,pcis->vec1_D,applytranspose);CHKERRQ(ierr); 3029 } 3030 3031 /* communications from coarse sol to local primal nodes */ 3032 ierr = PCBDDCScatterCoarseDataBegin(pc,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 3033 ierr = PCBDDCScatterCoarseDataEnd(pc,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 3034 3035 /* Sum contributions from two levels */ 3036 if (applytranspose) { 3037 ierr = MatMultAdd(pcbddc->coarse_psi_B,pcbddc->vec1_P,pcis->vec1_B,pcis->vec1_B);CHKERRQ(ierr); 3038 if (pcbddc->switch_static) { ierr = MatMultAdd(pcbddc->coarse_psi_D,pcbddc->vec1_P,pcis->vec1_D,pcis->vec1_D);CHKERRQ(ierr); } 3039 } else { 3040 ierr = MatMultAdd(pcbddc->coarse_phi_B,pcbddc->vec1_P,pcis->vec1_B,pcis->vec1_B);CHKERRQ(ierr); 3041 if (pcbddc->switch_static) { ierr = MatMultAdd(pcbddc->coarse_phi_D,pcbddc->vec1_P,pcis->vec1_D,pcis->vec1_D);CHKERRQ(ierr); } 3042 } 3043 /* store p0 */ 3044 if (pcbddc->benign_n) { 3045 PetscScalar *array; 3046 PetscInt j; 3047 3048 ierr = VecGetArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 3049 for (j=0;j<pcbddc->benign_n;j++) pcbddc->benign_p0[j] = array[pcbddc->local_primal_size-pcbddc->benign_n+j]; 3050 ierr = VecRestoreArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 3051 } 3052 PetscFunctionReturn(0); 3053 } 3054 3055 #undef __FUNCT__ 3056 #define __FUNCT__ "PCBDDCScatterCoarseDataBegin" 3057 PetscErrorCode PCBDDCScatterCoarseDataBegin(PC pc,InsertMode imode, ScatterMode smode) 3058 { 3059 PetscErrorCode ierr; 3060 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 3061 PetscScalar *array; 3062 Vec from,to; 3063 3064 PetscFunctionBegin; 3065 if (smode == SCATTER_REVERSE) { /* from global to local -> get data from coarse solution */ 3066 from = pcbddc->coarse_vec; 3067 to = pcbddc->vec1_P; 3068 if (pcbddc->coarse_ksp) { /* get array from coarse processes */ 3069 Vec tvec; 3070 3071 ierr = KSPGetRhs(pcbddc->coarse_ksp,&tvec);CHKERRQ(ierr); 3072 ierr = VecResetArray(tvec);CHKERRQ(ierr); 3073 ierr = KSPGetSolution(pcbddc->coarse_ksp,&tvec);CHKERRQ(ierr); 3074 ierr = VecGetArray(tvec,&array);CHKERRQ(ierr); 3075 ierr = VecPlaceArray(from,array);CHKERRQ(ierr); 3076 ierr = VecRestoreArray(tvec,&array);CHKERRQ(ierr); 3077 } 3078 } else { /* from local to global -> put data in coarse right hand side */ 3079 from = pcbddc->vec1_P; 3080 to = pcbddc->coarse_vec; 3081 } 3082 ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,from,to,imode,smode);CHKERRQ(ierr); 3083 PetscFunctionReturn(0); 3084 } 3085 3086 #undef __FUNCT__ 3087 #define __FUNCT__ "PCBDDCScatterCoarseDataEnd" 3088 PetscErrorCode PCBDDCScatterCoarseDataEnd(PC pc, InsertMode imode, ScatterMode smode) 3089 { 3090 PetscErrorCode ierr; 3091 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 3092 PetscScalar *array; 3093 Vec from,to; 3094 3095 PetscFunctionBegin; 3096 if (smode == SCATTER_REVERSE) { /* from global to local -> get data from coarse solution */ 3097 from = pcbddc->coarse_vec; 3098 to = pcbddc->vec1_P; 3099 } else { /* from local to global -> put data in coarse right hand side */ 3100 from = pcbddc->vec1_P; 3101 to = pcbddc->coarse_vec; 3102 } 3103 ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,from,to,imode,smode);CHKERRQ(ierr); 3104 if (smode == SCATTER_FORWARD) { 3105 if (pcbddc->coarse_ksp) { /* get array from coarse processes */ 3106 Vec tvec; 3107 3108 ierr = KSPGetRhs(pcbddc->coarse_ksp,&tvec);CHKERRQ(ierr); 3109 ierr = VecGetArray(to,&array);CHKERRQ(ierr); 3110 ierr = VecPlaceArray(tvec,array);CHKERRQ(ierr); 3111 ierr = VecRestoreArray(to,&array);CHKERRQ(ierr); 3112 } 3113 } else { 3114 if (pcbddc->coarse_ksp) { /* restore array of pcbddc->coarse_vec */ 3115 ierr = VecResetArray(from);CHKERRQ(ierr); 3116 } 3117 } 3118 PetscFunctionReturn(0); 3119 } 3120 3121 /* uncomment for testing purposes */ 3122 /* #define PETSC_MISSING_LAPACK_GESVD 1 */ 3123 #undef __FUNCT__ 3124 #define __FUNCT__ "PCBDDCConstraintsSetUp" 3125 PetscErrorCode PCBDDCConstraintsSetUp(PC pc) 3126 { 3127 PetscErrorCode ierr; 3128 PC_IS* pcis = (PC_IS*)(pc->data); 3129 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 3130 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 3131 /* one and zero */ 3132 PetscScalar one=1.0,zero=0.0; 3133 /* space to store constraints and their local indices */ 3134 PetscScalar *constraints_data; 3135 PetscInt *constraints_idxs,*constraints_idxs_B; 3136 PetscInt *constraints_idxs_ptr,*constraints_data_ptr; 3137 PetscInt *constraints_n; 3138 /* iterators */ 3139 PetscInt i,j,k,total_counts,total_counts_cc,cum; 3140 /* BLAS integers */ 3141 PetscBLASInt lwork,lierr; 3142 PetscBLASInt Blas_N,Blas_M,Blas_K,Blas_one=1; 3143 PetscBLASInt Blas_LDA,Blas_LDB,Blas_LDC; 3144 /* reuse */ 3145 PetscInt olocal_primal_size,olocal_primal_size_cc; 3146 PetscInt *olocal_primal_ref_node,*olocal_primal_ref_mult; 3147 /* change of basis */ 3148 PetscBool qr_needed; 3149 PetscBT change_basis,qr_needed_idx; 3150 /* auxiliary stuff */ 3151 PetscInt *nnz,*is_indices; 3152 PetscInt ncc; 3153 /* some quantities */ 3154 PetscInt n_vertices,total_primal_vertices,valid_constraints; 3155 PetscInt size_of_constraint,max_size_of_constraint=0,max_constraints,temp_constraints; 3156 3157 PetscFunctionBegin; 3158 /* Destroy Mat objects computed previously */ 3159 ierr = MatDestroy(&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 3160 ierr = MatDestroy(&pcbddc->ConstraintMatrix);CHKERRQ(ierr); 3161 /* save info on constraints from previous setup (if any) */ 3162 olocal_primal_size = pcbddc->local_primal_size; 3163 olocal_primal_size_cc = pcbddc->local_primal_size_cc; 3164 ierr = PetscMalloc2(olocal_primal_size_cc,&olocal_primal_ref_node,olocal_primal_size_cc,&olocal_primal_ref_mult);CHKERRQ(ierr); 3165 ierr = PetscMemcpy(olocal_primal_ref_node,pcbddc->local_primal_ref_node,olocal_primal_size_cc*sizeof(PetscInt));CHKERRQ(ierr); 3166 ierr = PetscMemcpy(olocal_primal_ref_mult,pcbddc->local_primal_ref_mult,olocal_primal_size_cc*sizeof(PetscInt));CHKERRQ(ierr); 3167 ierr = PetscFree2(pcbddc->local_primal_ref_node,pcbddc->local_primal_ref_mult);CHKERRQ(ierr); 3168 ierr = PetscFree(pcbddc->primal_indices_local_idxs);CHKERRQ(ierr); 3169 3170 /* print some info */ 3171 if (pcbddc->dbg_flag) { 3172 IS vertices; 3173 PetscInt nv,nedges,nfaces; 3174 ierr = PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph,&nfaces,NULL,&nedges,NULL,&vertices);CHKERRQ(ierr); 3175 ierr = ISGetSize(vertices,&nv);CHKERRQ(ierr); 3176 ierr = ISDestroy(&vertices);CHKERRQ(ierr); 3177 ierr = PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 3178 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"--------------------------------------------------------------\n");CHKERRQ(ierr); 3179 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate vertices (%d)\n",PetscGlobalRank,nv,pcbddc->use_vertices);CHKERRQ(ierr); 3180 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate edges (%d)\n",PetscGlobalRank,nedges,pcbddc->use_edges);CHKERRQ(ierr); 3181 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate faces (%d)\n",PetscGlobalRank,nfaces,pcbddc->use_faces);CHKERRQ(ierr); 3182 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 3183 ierr = PetscViewerASCIIPopSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 3184 } 3185 3186 if (!pcbddc->adaptive_selection) { 3187 IS ISForVertices,*ISForFaces,*ISForEdges; 3188 MatNullSpace nearnullsp; 3189 const Vec *nearnullvecs; 3190 Vec *localnearnullsp; 3191 PetscScalar *array; 3192 PetscInt n_ISForFaces,n_ISForEdges,nnsp_size; 3193 PetscBool nnsp_has_cnst; 3194 /* LAPACK working arrays for SVD or POD */ 3195 PetscBool skip_lapack,boolforchange; 3196 PetscScalar *work; 3197 PetscReal *singular_vals; 3198 #if defined(PETSC_USE_COMPLEX) 3199 PetscReal *rwork; 3200 #endif 3201 #if defined(PETSC_MISSING_LAPACK_GESVD) 3202 PetscScalar *temp_basis,*correlation_mat; 3203 #else 3204 PetscBLASInt dummy_int=1; 3205 PetscScalar dummy_scalar=1.; 3206 #endif 3207 3208 /* Get index sets for faces, edges and vertices from graph */ 3209 ierr = PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph,&n_ISForFaces,&ISForFaces,&n_ISForEdges,&ISForEdges,&ISForVertices);CHKERRQ(ierr); 3210 /* free unneeded index sets */ 3211 if (!pcbddc->use_vertices) { 3212 ierr = ISDestroy(&ISForVertices);CHKERRQ(ierr); 3213 } 3214 if (!pcbddc->use_edges) { 3215 for (i=0;i<n_ISForEdges;i++) { 3216 ierr = ISDestroy(&ISForEdges[i]);CHKERRQ(ierr); 3217 } 3218 ierr = PetscFree(ISForEdges);CHKERRQ(ierr); 3219 n_ISForEdges = 0; 3220 } 3221 if (!pcbddc->use_faces) { 3222 for (i=0;i<n_ISForFaces;i++) { 3223 ierr = ISDestroy(&ISForFaces[i]);CHKERRQ(ierr); 3224 } 3225 ierr = PetscFree(ISForFaces);CHKERRQ(ierr); 3226 n_ISForFaces = 0; 3227 } 3228 3229 #if defined(PETSC_USE_DEBUG) 3230 /* HACK: when solving singular problems not using vertices, a change of basis is mandatory. 3231 Also use_change_of_basis should be consistent among processors */ 3232 if (pcbddc->NullSpace) { 3233 PetscBool tbool[2],gbool[2]; 3234 3235 if (!ISForVertices && !pcbddc->user_ChangeOfBasisMatrix) { 3236 pcbddc->use_change_of_basis = PETSC_TRUE; 3237 if (!ISForEdges) { 3238 pcbddc->use_change_on_faces = PETSC_TRUE; 3239 } 3240 } 3241 tbool[0] = pcbddc->use_change_of_basis; 3242 tbool[1] = pcbddc->use_change_on_faces; 3243 ierr = MPI_Allreduce(tbool,gbool,2,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 3244 pcbddc->use_change_of_basis = gbool[0]; 3245 pcbddc->use_change_on_faces = gbool[1]; 3246 } 3247 #endif 3248 3249 /* check if near null space is attached to global mat */ 3250 ierr = MatGetNearNullSpace(pc->pmat,&nearnullsp);CHKERRQ(ierr); 3251 if (nearnullsp) { 3252 ierr = MatNullSpaceGetVecs(nearnullsp,&nnsp_has_cnst,&nnsp_size,&nearnullvecs);CHKERRQ(ierr); 3253 /* remove any stored info */ 3254 ierr = MatNullSpaceDestroy(&pcbddc->onearnullspace);CHKERRQ(ierr); 3255 ierr = PetscFree(pcbddc->onearnullvecs_state);CHKERRQ(ierr); 3256 /* store information for BDDC solver reuse */ 3257 ierr = PetscObjectReference((PetscObject)nearnullsp);CHKERRQ(ierr); 3258 pcbddc->onearnullspace = nearnullsp; 3259 ierr = PetscMalloc1(nnsp_size,&pcbddc->onearnullvecs_state);CHKERRQ(ierr); 3260 for (i=0;i<nnsp_size;i++) { 3261 ierr = PetscObjectStateGet((PetscObject)nearnullvecs[i],&pcbddc->onearnullvecs_state[i]);CHKERRQ(ierr); 3262 } 3263 } else { /* if near null space is not provided BDDC uses constants by default */ 3264 nnsp_size = 0; 3265 nnsp_has_cnst = PETSC_TRUE; 3266 } 3267 /* get max number of constraints on a single cc */ 3268 max_constraints = nnsp_size; 3269 if (nnsp_has_cnst) max_constraints++; 3270 3271 /* 3272 Evaluate maximum storage size needed by the procedure 3273 - Indices for connected component i stored at "constraints_idxs + constraints_idxs_ptr[i]" 3274 - Values for constraints on connected component i stored at "constraints_data + constraints_data_ptr[i]" 3275 There can be multiple constraints per connected component 3276 */ 3277 n_vertices = 0; 3278 if (ISForVertices) { 3279 ierr = ISGetSize(ISForVertices,&n_vertices);CHKERRQ(ierr); 3280 } 3281 ncc = n_vertices+n_ISForFaces+n_ISForEdges; 3282 ierr = PetscMalloc3(ncc+1,&constraints_idxs_ptr,ncc+1,&constraints_data_ptr,ncc,&constraints_n);CHKERRQ(ierr); 3283 3284 total_counts = n_ISForFaces+n_ISForEdges; 3285 total_counts *= max_constraints; 3286 total_counts += n_vertices; 3287 ierr = PetscBTCreate(total_counts,&change_basis);CHKERRQ(ierr); 3288 3289 total_counts = 0; 3290 max_size_of_constraint = 0; 3291 for (i=0;i<n_ISForEdges+n_ISForFaces;i++) { 3292 IS used_is; 3293 if (i<n_ISForEdges) { 3294 used_is = ISForEdges[i]; 3295 } else { 3296 used_is = ISForFaces[i-n_ISForEdges]; 3297 } 3298 ierr = ISGetSize(used_is,&j);CHKERRQ(ierr); 3299 total_counts += j; 3300 max_size_of_constraint = PetscMax(j,max_size_of_constraint); 3301 } 3302 ierr = PetscMalloc3(total_counts*max_constraints+n_vertices,&constraints_data,total_counts+n_vertices,&constraints_idxs,total_counts+n_vertices,&constraints_idxs_B);CHKERRQ(ierr); 3303 3304 /* get local part of global near null space vectors */ 3305 ierr = PetscMalloc1(nnsp_size,&localnearnullsp);CHKERRQ(ierr); 3306 for (k=0;k<nnsp_size;k++) { 3307 ierr = VecDuplicate(pcis->vec1_N,&localnearnullsp[k]);CHKERRQ(ierr); 3308 ierr = VecScatterBegin(matis->rctx,nearnullvecs[k],localnearnullsp[k],INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3309 ierr = VecScatterEnd(matis->rctx,nearnullvecs[k],localnearnullsp[k],INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3310 } 3311 3312 /* whether or not to skip lapack calls */ 3313 skip_lapack = PETSC_TRUE; 3314 if (n_ISForFaces+n_ISForEdges && max_constraints > 1 && !pcbddc->use_nnsp_true) skip_lapack = PETSC_FALSE; 3315 3316 /* First we issue queries to allocate optimal workspace for LAPACKgesvd (or LAPACKsyev if SVD is missing) */ 3317 if (!skip_lapack) { 3318 PetscScalar temp_work; 3319 3320 #if defined(PETSC_MISSING_LAPACK_GESVD) 3321 /* Proper Orthogonal Decomposition (POD) using the snapshot method */ 3322 ierr = PetscMalloc1(max_constraints*max_constraints,&correlation_mat);CHKERRQ(ierr); 3323 ierr = PetscMalloc1(max_constraints,&singular_vals);CHKERRQ(ierr); 3324 ierr = PetscMalloc1(max_size_of_constraint*max_constraints,&temp_basis);CHKERRQ(ierr); 3325 #if defined(PETSC_USE_COMPLEX) 3326 ierr = PetscMalloc1(3*max_constraints,&rwork);CHKERRQ(ierr); 3327 #endif 3328 /* now we evaluate the optimal workspace using query with lwork=-1 */ 3329 ierr = PetscBLASIntCast(max_constraints,&Blas_N);CHKERRQ(ierr); 3330 ierr = PetscBLASIntCast(max_constraints,&Blas_LDA);CHKERRQ(ierr); 3331 lwork = -1; 3332 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 3333 #if !defined(PETSC_USE_COMPLEX) 3334 PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,&temp_work,&lwork,&lierr)); 3335 #else 3336 PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,&temp_work,&lwork,rwork,&lierr)); 3337 #endif 3338 ierr = PetscFPTrapPop();CHKERRQ(ierr); 3339 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to SYEV Lapack routine %d",(int)lierr); 3340 #else /* on missing GESVD */ 3341 /* SVD */ 3342 PetscInt max_n,min_n; 3343 max_n = max_size_of_constraint; 3344 min_n = max_constraints; 3345 if (max_size_of_constraint < max_constraints) { 3346 min_n = max_size_of_constraint; 3347 max_n = max_constraints; 3348 } 3349 ierr = PetscMalloc1(min_n,&singular_vals);CHKERRQ(ierr); 3350 #if defined(PETSC_USE_COMPLEX) 3351 ierr = PetscMalloc1(5*min_n,&rwork);CHKERRQ(ierr); 3352 #endif 3353 /* now we evaluate the optimal workspace using query with lwork=-1 */ 3354 lwork = -1; 3355 ierr = PetscBLASIntCast(max_n,&Blas_M);CHKERRQ(ierr); 3356 ierr = PetscBLASIntCast(min_n,&Blas_N);CHKERRQ(ierr); 3357 ierr = PetscBLASIntCast(max_n,&Blas_LDA);CHKERRQ(ierr); 3358 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 3359 #if !defined(PETSC_USE_COMPLEX) 3360 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&constraints_data[0],&Blas_LDA,singular_vals,&dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,&temp_work,&lwork,&lierr)); 3361 #else 3362 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&constraints_data[0],&Blas_LDA,singular_vals,&dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,&temp_work,&lwork,rwork,&lierr)); 3363 #endif 3364 ierr = PetscFPTrapPop();CHKERRQ(ierr); 3365 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to GESVD Lapack routine %d",(int)lierr); 3366 #endif /* on missing GESVD */ 3367 /* Allocate optimal workspace */ 3368 ierr = PetscBLASIntCast((PetscInt)PetscRealPart(temp_work),&lwork);CHKERRQ(ierr); 3369 ierr = PetscMalloc1(lwork,&work);CHKERRQ(ierr); 3370 } 3371 /* Now we can loop on constraining sets */ 3372 total_counts = 0; 3373 constraints_idxs_ptr[0] = 0; 3374 constraints_data_ptr[0] = 0; 3375 /* vertices */ 3376 if (n_vertices) { 3377 ierr = ISGetIndices(ISForVertices,(const PetscInt**)&is_indices);CHKERRQ(ierr); 3378 ierr = PetscMemcpy(constraints_idxs,is_indices,n_vertices*sizeof(PetscInt));CHKERRQ(ierr); 3379 for (i=0;i<n_vertices;i++) { 3380 constraints_n[total_counts] = 1; 3381 constraints_data[total_counts] = 1.0; 3382 constraints_idxs_ptr[total_counts+1] = constraints_idxs_ptr[total_counts]+1; 3383 constraints_data_ptr[total_counts+1] = constraints_data_ptr[total_counts]+1; 3384 total_counts++; 3385 } 3386 ierr = ISRestoreIndices(ISForVertices,(const PetscInt**)&is_indices);CHKERRQ(ierr); 3387 n_vertices = total_counts; 3388 } 3389 3390 /* edges and faces */ 3391 total_counts_cc = total_counts; 3392 for (ncc=0;ncc<n_ISForEdges+n_ISForFaces;ncc++) { 3393 IS used_is; 3394 PetscBool idxs_copied = PETSC_FALSE; 3395 3396 if (ncc<n_ISForEdges) { 3397 used_is = ISForEdges[ncc]; 3398 boolforchange = pcbddc->use_change_of_basis; /* change or not the basis on the edge */ 3399 } else { 3400 used_is = ISForFaces[ncc-n_ISForEdges]; 3401 boolforchange = (PetscBool)(pcbddc->use_change_of_basis && pcbddc->use_change_on_faces); /* change or not the basis on the face */ 3402 } 3403 temp_constraints = 0; /* zero the number of constraints I have on this conn comp */ 3404 3405 ierr = ISGetSize(used_is,&size_of_constraint);CHKERRQ(ierr); 3406 ierr = ISGetIndices(used_is,(const PetscInt**)&is_indices);CHKERRQ(ierr); 3407 /* change of basis should not be performed on local periodic nodes */ 3408 if (pcbddc->mat_graph->mirrors && pcbddc->mat_graph->mirrors[is_indices[0]]) boolforchange = PETSC_FALSE; 3409 if (nnsp_has_cnst) { 3410 PetscScalar quad_value; 3411 3412 ierr = PetscMemcpy(constraints_idxs + constraints_idxs_ptr[total_counts_cc],is_indices,size_of_constraint*sizeof(PetscInt));CHKERRQ(ierr); 3413 idxs_copied = PETSC_TRUE; 3414 3415 if (!pcbddc->use_nnsp_true) { 3416 quad_value = (PetscScalar)(1.0/PetscSqrtReal((PetscReal)size_of_constraint)); 3417 } else { 3418 quad_value = 1.0; 3419 } 3420 for (j=0;j<size_of_constraint;j++) { 3421 constraints_data[constraints_data_ptr[total_counts_cc]+j] = quad_value; 3422 } 3423 temp_constraints++; 3424 total_counts++; 3425 } 3426 for (k=0;k<nnsp_size;k++) { 3427 PetscReal real_value; 3428 PetscScalar *ptr_to_data; 3429 3430 ierr = VecGetArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr); 3431 ptr_to_data = &constraints_data[constraints_data_ptr[total_counts_cc]+temp_constraints*size_of_constraint]; 3432 for (j=0;j<size_of_constraint;j++) { 3433 ptr_to_data[j] = array[is_indices[j]]; 3434 } 3435 ierr = VecRestoreArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr); 3436 /* check if array is null on the connected component */ 3437 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 3438 PetscStackCallBLAS("BLASasum",real_value = BLASasum_(&Blas_N,ptr_to_data,&Blas_one)); 3439 if (real_value > 0.0) { /* keep indices and values */ 3440 temp_constraints++; 3441 total_counts++; 3442 if (!idxs_copied) { 3443 ierr = PetscMemcpy(constraints_idxs + constraints_idxs_ptr[total_counts_cc],is_indices,size_of_constraint*sizeof(PetscInt));CHKERRQ(ierr); 3444 idxs_copied = PETSC_TRUE; 3445 } 3446 } 3447 } 3448 ierr = ISRestoreIndices(used_is,(const PetscInt**)&is_indices);CHKERRQ(ierr); 3449 valid_constraints = temp_constraints; 3450 if (!pcbddc->use_nnsp_true && temp_constraints) { 3451 if (temp_constraints == 1) { /* just normalize the constraint */ 3452 PetscScalar norm,*ptr_to_data; 3453 3454 ptr_to_data = &constraints_data[constraints_data_ptr[total_counts_cc]]; 3455 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 3456 PetscStackCallBLAS("BLASdot",norm = BLASdot_(&Blas_N,ptr_to_data,&Blas_one,ptr_to_data,&Blas_one)); 3457 norm = 1.0/PetscSqrtReal(PetscRealPart(norm)); 3458 PetscStackCallBLAS("BLASscal",BLASscal_(&Blas_N,&norm,ptr_to_data,&Blas_one)); 3459 } else { /* perform SVD */ 3460 PetscReal tol = 1.0e-8; /* tolerance for retaining eigenmodes */ 3461 PetscScalar *ptr_to_data = &constraints_data[constraints_data_ptr[total_counts_cc]]; 3462 3463 #if defined(PETSC_MISSING_LAPACK_GESVD) 3464 /* SVD: Y = U*S*V^H -> U (eigenvectors of Y*Y^H) = Y*V*(S)^\dag 3465 POD: Y^H*Y = V*D*V^H, D = S^H*S -> U = Y*V*D^(-1/2) 3466 -> When PETSC_USE_COMPLEX and PETSC_MISSING_LAPACK_GESVD are defined 3467 the constraints basis will differ (by a complex factor with absolute value equal to 1) 3468 from that computed using LAPACKgesvd 3469 -> This is due to a different computation of eigenvectors in LAPACKheev 3470 -> The quality of the POD-computed basis will be the same */ 3471 ierr = PetscMemzero(correlation_mat,temp_constraints*temp_constraints*sizeof(PetscScalar));CHKERRQ(ierr); 3472 /* Store upper triangular part of correlation matrix */ 3473 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 3474 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 3475 for (j=0;j<temp_constraints;j++) { 3476 for (k=0;k<j+1;k++) { 3477 PetscStackCallBLAS("BLASdot",correlation_mat[j*temp_constraints+k] = BLASdot_(&Blas_N,ptr_to_data+k*size_of_constraint,&Blas_one,ptr_to_data+j*size_of_constraint,&Blas_one)); 3478 } 3479 } 3480 /* compute eigenvalues and eigenvectors of correlation matrix */ 3481 ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr); 3482 ierr = PetscBLASIntCast(temp_constraints,&Blas_LDA);CHKERRQ(ierr); 3483 #if !defined(PETSC_USE_COMPLEX) 3484 PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,work,&lwork,&lierr)); 3485 #else 3486 PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,work,&lwork,rwork,&lierr)); 3487 #endif 3488 ierr = PetscFPTrapPop();CHKERRQ(ierr); 3489 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYEV Lapack routine %d",(int)lierr); 3490 /* retain eigenvalues greater than tol: note that LAPACKsyev gives eigs in ascending order */ 3491 j = 0; 3492 while (j < temp_constraints && singular_vals[j] < tol) j++; 3493 total_counts = total_counts-j; 3494 valid_constraints = temp_constraints-j; 3495 /* scale and copy POD basis into used quadrature memory */ 3496 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 3497 ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr); 3498 ierr = PetscBLASIntCast(temp_constraints,&Blas_K);CHKERRQ(ierr); 3499 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 3500 ierr = PetscBLASIntCast(temp_constraints,&Blas_LDB);CHKERRQ(ierr); 3501 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDC);CHKERRQ(ierr); 3502 if (j<temp_constraints) { 3503 PetscInt ii; 3504 for (k=j;k<temp_constraints;k++) singular_vals[k] = 1.0/PetscSqrtReal(singular_vals[k]); 3505 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 3506 PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&Blas_M,&Blas_N,&Blas_K,&one,ptr_to_data,&Blas_LDA,correlation_mat,&Blas_LDB,&zero,temp_basis,&Blas_LDC)); 3507 ierr = PetscFPTrapPop();CHKERRQ(ierr); 3508 for (k=0;k<temp_constraints-j;k++) { 3509 for (ii=0;ii<size_of_constraint;ii++) { 3510 ptr_to_data[k*size_of_constraint+ii] = singular_vals[temp_constraints-1-k]*temp_basis[(temp_constraints-1-k)*size_of_constraint+ii]; 3511 } 3512 } 3513 } 3514 #else /* on missing GESVD */ 3515 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 3516 ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr); 3517 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 3518 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 3519 #if !defined(PETSC_USE_COMPLEX) 3520 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,ptr_to_data,&Blas_LDA,singular_vals,&dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,work,&lwork,&lierr)); 3521 #else 3522 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,ptr_to_data,&Blas_LDA,singular_vals,&dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,work,&lwork,rwork,&lierr)); 3523 #endif 3524 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in GESVD Lapack routine %d",(int)lierr); 3525 ierr = PetscFPTrapPop();CHKERRQ(ierr); 3526 /* retain eigenvalues greater than tol: note that LAPACKgesvd gives eigs in descending order */ 3527 k = temp_constraints; 3528 if (k > size_of_constraint) k = size_of_constraint; 3529 j = 0; 3530 while (j < k && singular_vals[k-j-1] < tol) j++; 3531 valid_constraints = k-j; 3532 total_counts = total_counts-temp_constraints+valid_constraints; 3533 #endif /* on missing GESVD */ 3534 } 3535 } 3536 /* update pointers information */ 3537 if (valid_constraints) { 3538 constraints_n[total_counts_cc] = valid_constraints; 3539 constraints_idxs_ptr[total_counts_cc+1] = constraints_idxs_ptr[total_counts_cc]+size_of_constraint; 3540 constraints_data_ptr[total_counts_cc+1] = constraints_data_ptr[total_counts_cc]+size_of_constraint*valid_constraints; 3541 /* set change_of_basis flag */ 3542 if (boolforchange) { 3543 PetscBTSet(change_basis,total_counts_cc); 3544 } 3545 total_counts_cc++; 3546 } 3547 } 3548 /* free workspace */ 3549 if (!skip_lapack) { 3550 ierr = PetscFree(work);CHKERRQ(ierr); 3551 #if defined(PETSC_USE_COMPLEX) 3552 ierr = PetscFree(rwork);CHKERRQ(ierr); 3553 #endif 3554 ierr = PetscFree(singular_vals);CHKERRQ(ierr); 3555 #if defined(PETSC_MISSING_LAPACK_GESVD) 3556 ierr = PetscFree(correlation_mat);CHKERRQ(ierr); 3557 ierr = PetscFree(temp_basis);CHKERRQ(ierr); 3558 #endif 3559 } 3560 for (k=0;k<nnsp_size;k++) { 3561 ierr = VecDestroy(&localnearnullsp[k]);CHKERRQ(ierr); 3562 } 3563 ierr = PetscFree(localnearnullsp);CHKERRQ(ierr); 3564 /* free index sets of faces, edges and vertices */ 3565 for (i=0;i<n_ISForFaces;i++) { 3566 ierr = ISDestroy(&ISForFaces[i]);CHKERRQ(ierr); 3567 } 3568 if (n_ISForFaces) { 3569 ierr = PetscFree(ISForFaces);CHKERRQ(ierr); 3570 } 3571 for (i=0;i<n_ISForEdges;i++) { 3572 ierr = ISDestroy(&ISForEdges[i]);CHKERRQ(ierr); 3573 } 3574 if (n_ISForEdges) { 3575 ierr = PetscFree(ISForEdges);CHKERRQ(ierr); 3576 } 3577 ierr = ISDestroy(&ISForVertices);CHKERRQ(ierr); 3578 } else { 3579 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 3580 3581 total_counts = 0; 3582 n_vertices = 0; 3583 if (sub_schurs->is_vertices && pcbddc->use_vertices) { 3584 ierr = ISGetLocalSize(sub_schurs->is_vertices,&n_vertices);CHKERRQ(ierr); 3585 } 3586 max_constraints = 0; 3587 total_counts_cc = 0; 3588 for (i=0;i<sub_schurs->n_subs+n_vertices;i++) { 3589 total_counts += pcbddc->adaptive_constraints_n[i]; 3590 if (pcbddc->adaptive_constraints_n[i]) total_counts_cc++; 3591 max_constraints = PetscMax(max_constraints,pcbddc->adaptive_constraints_n[i]); 3592 } 3593 constraints_idxs_ptr = pcbddc->adaptive_constraints_idxs_ptr; 3594 constraints_data_ptr = pcbddc->adaptive_constraints_data_ptr; 3595 constraints_idxs = pcbddc->adaptive_constraints_idxs; 3596 constraints_data = pcbddc->adaptive_constraints_data; 3597 /* constraints_n differs from pcbddc->adaptive_constraints_n */ 3598 ierr = PetscMalloc1(total_counts_cc,&constraints_n);CHKERRQ(ierr); 3599 total_counts_cc = 0; 3600 for (i=0;i<sub_schurs->n_subs+n_vertices;i++) { 3601 if (pcbddc->adaptive_constraints_n[i]) { 3602 constraints_n[total_counts_cc++] = pcbddc->adaptive_constraints_n[i]; 3603 } 3604 } 3605 #if 0 3606 printf("Found %d totals (%d)\n",total_counts_cc,total_counts); 3607 for (i=0;i<total_counts_cc;i++) { 3608 printf("const %d, start %d",i,constraints_idxs_ptr[i]); 3609 printf(" end %d:\n",constraints_idxs_ptr[i+1]); 3610 for (j=constraints_idxs_ptr[i];j<constraints_idxs_ptr[i+1];j++) { 3611 printf(" %d",constraints_idxs[j]); 3612 } 3613 printf("\n"); 3614 printf("number of cc: %d\n",constraints_n[i]); 3615 } 3616 for (i=0;i<n_vertices;i++) { 3617 PetscPrintf(PETSC_COMM_SELF,"[%d] vertex %d, n %d\n",PetscGlobalRank,i,pcbddc->adaptive_constraints_n[i]); 3618 } 3619 for (i=0;i<sub_schurs->n_subs;i++) { 3620 PetscPrintf(PETSC_COMM_SELF,"[%d] sub %d, edge %d, n %d\n",PetscGlobalRank,i,(PetscBool)PetscBTLookup(sub_schurs->is_edge,i),pcbddc->adaptive_constraints_n[i+n_vertices]); 3621 } 3622 #endif 3623 3624 max_size_of_constraint = 0; 3625 for (i=0;i<total_counts_cc;i++) max_size_of_constraint = PetscMax(max_size_of_constraint,constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i]); 3626 ierr = PetscMalloc1(constraints_idxs_ptr[total_counts_cc],&constraints_idxs_B);CHKERRQ(ierr); 3627 /* Change of basis */ 3628 ierr = PetscBTCreate(total_counts_cc,&change_basis);CHKERRQ(ierr); 3629 if (pcbddc->use_change_of_basis) { 3630 for (i=0;i<sub_schurs->n_subs;i++) { 3631 if (PetscBTLookup(sub_schurs->is_edge,i) || pcbddc->use_change_on_faces) { 3632 ierr = PetscBTSet(change_basis,i+n_vertices);CHKERRQ(ierr); 3633 } 3634 } 3635 } 3636 } 3637 pcbddc->local_primal_size = total_counts; 3638 ierr = PetscMalloc1(pcbddc->local_primal_size+pcbddc->benign_n,&pcbddc->primal_indices_local_idxs);CHKERRQ(ierr); 3639 3640 /* map constraints_idxs in boundary numbering */ 3641 ierr = ISGlobalToLocalMappingApply(pcis->BtoNmap,IS_GTOLM_DROP,constraints_idxs_ptr[total_counts_cc],constraints_idxs,&i,constraints_idxs_B);CHKERRQ(ierr); 3642 if (i != constraints_idxs_ptr[total_counts_cc]) { 3643 SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_SUP,"Error in boundary numbering for constraints indices %d != %d\n",constraints_idxs_ptr[total_counts_cc],i); 3644 } 3645 3646 /* Create constraint matrix */ 3647 ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->ConstraintMatrix);CHKERRQ(ierr); 3648 ierr = MatSetType(pcbddc->ConstraintMatrix,MATAIJ);CHKERRQ(ierr); 3649 ierr = MatSetSizes(pcbddc->ConstraintMatrix,pcbddc->local_primal_size,pcis->n,pcbddc->local_primal_size,pcis->n);CHKERRQ(ierr); 3650 3651 /* find primal_dofs: subdomain corners plus dofs selected as primal after change of basis */ 3652 /* determine if a QR strategy is needed for change of basis */ 3653 qr_needed = PETSC_FALSE; 3654 ierr = PetscBTCreate(total_counts_cc,&qr_needed_idx);CHKERRQ(ierr); 3655 total_primal_vertices=0; 3656 pcbddc->local_primal_size_cc = 0; 3657 for (i=0;i<total_counts_cc;i++) { 3658 size_of_constraint = constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i]; 3659 if (size_of_constraint == 1) { 3660 pcbddc->primal_indices_local_idxs[total_primal_vertices++] = constraints_idxs[constraints_idxs_ptr[i]]; 3661 pcbddc->local_primal_size_cc += 1; 3662 } else if (PetscBTLookup(change_basis,i)) { 3663 for (k=0;k<constraints_n[i];k++) { 3664 pcbddc->primal_indices_local_idxs[total_primal_vertices++] = constraints_idxs[constraints_idxs_ptr[i]+k]; 3665 } 3666 pcbddc->local_primal_size_cc += constraints_n[i]; 3667 if (constraints_n[i] > 1 || pcbddc->use_qr_single || pcbddc->faster_deluxe) { 3668 PetscBTSet(qr_needed_idx,i); 3669 qr_needed = PETSC_TRUE; 3670 } 3671 } else { 3672 pcbddc->local_primal_size_cc += 1; 3673 } 3674 } 3675 /* note that the local variable n_vertices used below stores the number of pointwise constraints */ 3676 pcbddc->n_vertices = total_primal_vertices; 3677 /* permute indices in order to have a sorted set of vertices */ 3678 ierr = PetscSortInt(total_primal_vertices,pcbddc->primal_indices_local_idxs);CHKERRQ(ierr); 3679 3680 ierr = PetscMalloc2(pcbddc->local_primal_size_cc+pcbddc->benign_n,&pcbddc->local_primal_ref_node,pcbddc->local_primal_size_cc+pcbddc->benign_n,&pcbddc->local_primal_ref_mult);CHKERRQ(ierr); 3681 ierr = PetscMemcpy(pcbddc->local_primal_ref_node,pcbddc->primal_indices_local_idxs,total_primal_vertices*sizeof(PetscInt));CHKERRQ(ierr); 3682 for (i=0;i<total_primal_vertices;i++) pcbddc->local_primal_ref_mult[i] = 1; 3683 3684 /* nonzero structure of constraint matrix */ 3685 /* and get reference dof for local constraints */ 3686 ierr = PetscMalloc1(pcbddc->local_primal_size,&nnz);CHKERRQ(ierr); 3687 for (i=0;i<total_primal_vertices;i++) nnz[i] = 1; 3688 3689 j = total_primal_vertices; 3690 total_counts = total_primal_vertices; 3691 cum = total_primal_vertices; 3692 for (i=n_vertices;i<total_counts_cc;i++) { 3693 if (!PetscBTLookup(change_basis,i)) { 3694 pcbddc->local_primal_ref_node[cum] = constraints_idxs[constraints_idxs_ptr[i]]; 3695 pcbddc->local_primal_ref_mult[cum] = constraints_n[i]; 3696 cum++; 3697 size_of_constraint = constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i]; 3698 for (k=0;k<constraints_n[i];k++) { 3699 pcbddc->primal_indices_local_idxs[total_counts++] = constraints_idxs[constraints_idxs_ptr[i]+k]; 3700 nnz[j+k] = size_of_constraint; 3701 } 3702 j += constraints_n[i]; 3703 } 3704 } 3705 ierr = MatSeqAIJSetPreallocation(pcbddc->ConstraintMatrix,0,nnz);CHKERRQ(ierr); 3706 ierr = PetscFree(nnz);CHKERRQ(ierr); 3707 3708 /* set values in constraint matrix */ 3709 for (i=0;i<total_primal_vertices;i++) { 3710 ierr = MatSetValue(pcbddc->ConstraintMatrix,i,pcbddc->local_primal_ref_node[i],1.0,INSERT_VALUES);CHKERRQ(ierr); 3711 } 3712 total_counts = total_primal_vertices; 3713 for (i=n_vertices;i<total_counts_cc;i++) { 3714 if (!PetscBTLookup(change_basis,i)) { 3715 PetscInt *cols; 3716 3717 size_of_constraint = constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i]; 3718 cols = constraints_idxs+constraints_idxs_ptr[i]; 3719 for (k=0;k<constraints_n[i];k++) { 3720 PetscInt row = total_counts+k; 3721 PetscScalar *vals; 3722 3723 vals = constraints_data+constraints_data_ptr[i]+k*size_of_constraint; 3724 ierr = MatSetValues(pcbddc->ConstraintMatrix,1,&row,size_of_constraint,cols,vals,INSERT_VALUES);CHKERRQ(ierr); 3725 } 3726 total_counts += constraints_n[i]; 3727 } 3728 } 3729 /* assembling */ 3730 ierr = MatAssemblyBegin(pcbddc->ConstraintMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3731 ierr = MatAssemblyEnd(pcbddc->ConstraintMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3732 3733 /* 3734 ierr = PetscViewerSetFormat(PETSC_VIEWER_STDOUT_SELF,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr); 3735 ierr = MatView(pcbddc->ConstraintMatrix,(PetscViewer)0);CHKERRQ(ierr); 3736 */ 3737 /* Create matrix for change of basis. We don't need it in case pcbddc->use_change_of_basis is FALSE */ 3738 if (pcbddc->use_change_of_basis) { 3739 /* dual and primal dofs on a single cc */ 3740 PetscInt dual_dofs,primal_dofs; 3741 /* working stuff for GEQRF */ 3742 PetscScalar *qr_basis,*qr_tau = NULL,*qr_work,lqr_work_t; 3743 PetscBLASInt lqr_work; 3744 /* working stuff for UNGQR */ 3745 PetscScalar *gqr_work,lgqr_work_t; 3746 PetscBLASInt lgqr_work; 3747 /* working stuff for TRTRS */ 3748 PetscScalar *trs_rhs; 3749 PetscBLASInt Blas_NRHS; 3750 /* pointers for values insertion into change of basis matrix */ 3751 PetscInt *start_rows,*start_cols; 3752 PetscScalar *start_vals; 3753 /* working stuff for values insertion */ 3754 PetscBT is_primal; 3755 PetscInt *aux_primal_numbering_B; 3756 /* matrix sizes */ 3757 PetscInt global_size,local_size; 3758 /* temporary change of basis */ 3759 Mat localChangeOfBasisMatrix; 3760 /* extra space for debugging */ 3761 PetscScalar *dbg_work; 3762 3763 /* local temporary change of basis acts on local interfaces -> dimension is n_B x n_B */ 3764 ierr = MatCreate(PETSC_COMM_SELF,&localChangeOfBasisMatrix);CHKERRQ(ierr); 3765 ierr = MatSetType(localChangeOfBasisMatrix,MATAIJ);CHKERRQ(ierr); 3766 ierr = MatSetSizes(localChangeOfBasisMatrix,pcis->n,pcis->n,pcis->n,pcis->n);CHKERRQ(ierr); 3767 /* nonzeros for local mat */ 3768 ierr = PetscMalloc1(pcis->n,&nnz);CHKERRQ(ierr); 3769 for (i=0;i<pcis->n;i++) nnz[i]=1; 3770 for (i=n_vertices;i<total_counts_cc;i++) { 3771 if (PetscBTLookup(change_basis,i)) { 3772 size_of_constraint = constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i]; 3773 if (PetscBTLookup(qr_needed_idx,i)) { 3774 for (j=0;j<size_of_constraint;j++) nnz[constraints_idxs[constraints_idxs_ptr[i]+j]] = size_of_constraint; 3775 } else { 3776 nnz[constraints_idxs[constraints_idxs_ptr[i]]] = size_of_constraint; 3777 for (j=1;j<size_of_constraint;j++) nnz[constraints_idxs[constraints_idxs_ptr[i]+j]] = 2; 3778 } 3779 } 3780 } 3781 ierr = MatSeqAIJSetPreallocation(localChangeOfBasisMatrix,0,nnz);CHKERRQ(ierr); 3782 ierr = PetscFree(nnz);CHKERRQ(ierr); 3783 /* Set initial identity in the matrix */ 3784 for (i=0;i<pcis->n;i++) { 3785 ierr = MatSetValue(localChangeOfBasisMatrix,i,i,1.0,INSERT_VALUES);CHKERRQ(ierr); 3786 } 3787 3788 if (pcbddc->dbg_flag) { 3789 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"--------------------------------------------------------------\n");CHKERRQ(ierr); 3790 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Checking change of basis computation for subdomain %04d\n",PetscGlobalRank);CHKERRQ(ierr); 3791 } 3792 3793 3794 /* Now we loop on the constraints which need a change of basis */ 3795 /* 3796 Change of basis matrix is evaluated similarly to the FIRST APPROACH in 3797 Klawonn and Widlund, Dual-primal FETI-DP methods for linear elasticity, (see Sect 6.2.1) 3798 3799 Basic blocks of change of basis matrix T computed by 3800 3801 - Using the following block transformation if there is only a primal dof on the cc (and -pc_bddc_use_qr_single is not specified) 3802 3803 | 1 0 ... 0 s_1/S | 3804 | 0 1 ... 0 s_2/S | 3805 | ... | 3806 | 0 ... 1 s_{n-1}/S | 3807 | -s_1/s_n ... -s_{n-1}/s_n s_n/S | 3808 3809 with S = \sum_{i=1}^n s_i^2 3810 NOTE: in the above example, the primal dof is the last one of the edge in LOCAL ordering 3811 in the current implementation, the primal dof is the first one of the edge in GLOBAL ordering 3812 3813 - QR decomposition of constraints otherwise 3814 */ 3815 if (qr_needed) { 3816 /* space to store Q */ 3817 ierr = PetscMalloc1(max_size_of_constraint*max_size_of_constraint,&qr_basis);CHKERRQ(ierr); 3818 /* first we issue queries for optimal work */ 3819 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_M);CHKERRQ(ierr); 3820 ierr = PetscBLASIntCast(max_constraints,&Blas_N);CHKERRQ(ierr); 3821 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 3822 lqr_work = -1; 3823 PetscStackCallBLAS("LAPACKgeqrf",LAPACKgeqrf_(&Blas_M,&Blas_N,qr_basis,&Blas_LDA,qr_tau,&lqr_work_t,&lqr_work,&lierr)); 3824 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to GEQRF Lapack routine %d",(int)lierr); 3825 ierr = PetscBLASIntCast((PetscInt)PetscRealPart(lqr_work_t),&lqr_work);CHKERRQ(ierr); 3826 ierr = PetscMalloc1((PetscInt)PetscRealPart(lqr_work_t),&qr_work);CHKERRQ(ierr); 3827 lgqr_work = -1; 3828 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_M);CHKERRQ(ierr); 3829 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_N);CHKERRQ(ierr); 3830 ierr = PetscBLASIntCast(max_constraints,&Blas_K);CHKERRQ(ierr); 3831 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 3832 if (Blas_K>Blas_M) Blas_K=Blas_M; /* adjust just for computing optimal work */ 3833 PetscStackCallBLAS("LAPACKungqr",LAPACKungqr_(&Blas_M,&Blas_N,&Blas_K,qr_basis,&Blas_LDA,qr_tau,&lgqr_work_t,&lgqr_work,&lierr)); 3834 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to UNGQR Lapack routine %d",(int)lierr); 3835 ierr = PetscBLASIntCast((PetscInt)PetscRealPart(lgqr_work_t),&lgqr_work);CHKERRQ(ierr); 3836 ierr = PetscMalloc1((PetscInt)PetscRealPart(lgqr_work_t),&gqr_work);CHKERRQ(ierr); 3837 /* array to store scaling factors for reflectors */ 3838 ierr = PetscMalloc1(max_constraints,&qr_tau);CHKERRQ(ierr); 3839 /* array to store rhs and solution of triangular solver */ 3840 ierr = PetscMalloc1(max_constraints*max_constraints,&trs_rhs);CHKERRQ(ierr); 3841 /* allocating workspace for check */ 3842 if (pcbddc->dbg_flag) { 3843 ierr = PetscMalloc1(max_size_of_constraint*(max_constraints+max_size_of_constraint),&dbg_work);CHKERRQ(ierr); 3844 } 3845 } 3846 /* array to store whether a node is primal or not */ 3847 ierr = PetscBTCreate(pcis->n_B,&is_primal);CHKERRQ(ierr); 3848 ierr = PetscMalloc1(total_primal_vertices,&aux_primal_numbering_B);CHKERRQ(ierr); 3849 ierr = ISGlobalToLocalMappingApply(pcis->BtoNmap,IS_GTOLM_DROP,total_primal_vertices,pcbddc->local_primal_ref_node,&i,aux_primal_numbering_B);CHKERRQ(ierr); 3850 if (i != total_primal_vertices) { 3851 SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_SUP,"Error in boundary numbering for BDDC vertices! %d != %d\n",total_primal_vertices,i); 3852 } 3853 for (i=0;i<total_primal_vertices;i++) { 3854 ierr = PetscBTSet(is_primal,aux_primal_numbering_B[i]);CHKERRQ(ierr); 3855 } 3856 ierr = PetscFree(aux_primal_numbering_B);CHKERRQ(ierr); 3857 3858 /* loop on constraints and see whether or not they need a change of basis and compute it */ 3859 for (total_counts=n_vertices;total_counts<total_counts_cc;total_counts++) { 3860 size_of_constraint = constraints_idxs_ptr[total_counts+1]-constraints_idxs_ptr[total_counts]; 3861 if (PetscBTLookup(change_basis,total_counts)) { 3862 /* get constraint info */ 3863 primal_dofs = constraints_n[total_counts]; 3864 dual_dofs = size_of_constraint-primal_dofs; 3865 3866 if (pcbddc->dbg_flag) { 3867 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Constraints %d: %d need a change of basis (size %d)\n",total_counts,primal_dofs,size_of_constraint);CHKERRQ(ierr); 3868 } 3869 3870 if (PetscBTLookup(qr_needed_idx,total_counts)) { /* QR */ 3871 3872 /* copy quadrature constraints for change of basis check */ 3873 if (pcbddc->dbg_flag) { 3874 ierr = PetscMemcpy(dbg_work,&constraints_data[constraints_data_ptr[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr); 3875 } 3876 /* copy temporary constraints into larger work vector (in order to store all columns of Q) */ 3877 ierr = PetscMemcpy(qr_basis,&constraints_data[constraints_data_ptr[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr); 3878 3879 /* compute QR decomposition of constraints */ 3880 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 3881 ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr); 3882 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 3883 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 3884 PetscStackCallBLAS("LAPACKgeqrf",LAPACKgeqrf_(&Blas_M,&Blas_N,qr_basis,&Blas_LDA,qr_tau,qr_work,&lqr_work,&lierr)); 3885 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in GEQRF Lapack routine %d",(int)lierr); 3886 ierr = PetscFPTrapPop();CHKERRQ(ierr); 3887 3888 /* explictly compute R^-T */ 3889 ierr = PetscMemzero(trs_rhs,primal_dofs*primal_dofs*sizeof(*trs_rhs));CHKERRQ(ierr); 3890 for (j=0;j<primal_dofs;j++) trs_rhs[j*(primal_dofs+1)] = 1.0; 3891 ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr); 3892 ierr = PetscBLASIntCast(primal_dofs,&Blas_NRHS);CHKERRQ(ierr); 3893 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 3894 ierr = PetscBLASIntCast(primal_dofs,&Blas_LDB);CHKERRQ(ierr); 3895 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 3896 PetscStackCallBLAS("LAPACKtrtrs",LAPACKtrtrs_("U","T","N",&Blas_N,&Blas_NRHS,qr_basis,&Blas_LDA,trs_rhs,&Blas_LDB,&lierr)); 3897 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in TRTRS Lapack routine %d",(int)lierr); 3898 ierr = PetscFPTrapPop();CHKERRQ(ierr); 3899 3900 /* explicitly compute all columns of Q (Q = [Q1 | Q2] ) overwriting QR factorization in qr_basis */ 3901 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 3902 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 3903 ierr = PetscBLASIntCast(primal_dofs,&Blas_K);CHKERRQ(ierr); 3904 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 3905 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 3906 PetscStackCallBLAS("LAPACKungqr",LAPACKungqr_(&Blas_M,&Blas_N,&Blas_K,qr_basis,&Blas_LDA,qr_tau,gqr_work,&lgqr_work,&lierr)); 3907 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in UNGQR Lapack routine %d",(int)lierr); 3908 ierr = PetscFPTrapPop();CHKERRQ(ierr); 3909 3910 /* first primal_dofs columns of Q need to be re-scaled in order to be unitary w.r.t constraints 3911 i.e. C_{pxn}*Q_{nxn} should be equal to [I_pxp | 0_pxd] (see check below) 3912 where n=size_of_constraint, p=primal_dofs, d=dual_dofs (n=p+d), I and 0 identity and null matrix resp. */ 3913 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 3914 ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr); 3915 ierr = PetscBLASIntCast(primal_dofs,&Blas_K);CHKERRQ(ierr); 3916 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 3917 ierr = PetscBLASIntCast(primal_dofs,&Blas_LDB);CHKERRQ(ierr); 3918 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDC);CHKERRQ(ierr); 3919 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 3920 PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&Blas_M,&Blas_N,&Blas_K,&one,qr_basis,&Blas_LDA,trs_rhs,&Blas_LDB,&zero,constraints_data+constraints_data_ptr[total_counts],&Blas_LDC)); 3921 ierr = PetscFPTrapPop();CHKERRQ(ierr); 3922 ierr = PetscMemcpy(qr_basis,&constraints_data[constraints_data_ptr[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr); 3923 3924 /* insert values in change of basis matrix respecting global ordering of new primal dofs */ 3925 start_rows = &constraints_idxs[constraints_idxs_ptr[total_counts]]; 3926 /* insert cols for primal dofs */ 3927 for (j=0;j<primal_dofs;j++) { 3928 start_vals = &qr_basis[j*size_of_constraint]; 3929 start_cols = &constraints_idxs[constraints_idxs_ptr[total_counts]+j]; 3930 ierr = MatSetValues(localChangeOfBasisMatrix,size_of_constraint,start_rows,1,start_cols,start_vals,INSERT_VALUES);CHKERRQ(ierr); 3931 } 3932 /* insert cols for dual dofs */ 3933 for (j=0,k=0;j<dual_dofs;k++) { 3934 if (!PetscBTLookup(is_primal,constraints_idxs_B[constraints_idxs_ptr[total_counts]+k])) { 3935 start_vals = &qr_basis[(primal_dofs+j)*size_of_constraint]; 3936 start_cols = &constraints_idxs[constraints_idxs_ptr[total_counts]+k]; 3937 ierr = MatSetValues(localChangeOfBasisMatrix,size_of_constraint,start_rows,1,start_cols,start_vals,INSERT_VALUES);CHKERRQ(ierr); 3938 j++; 3939 } 3940 } 3941 3942 /* check change of basis */ 3943 if (pcbddc->dbg_flag) { 3944 PetscInt ii,jj; 3945 PetscBool valid_qr=PETSC_TRUE; 3946 ierr = PetscBLASIntCast(primal_dofs,&Blas_M);CHKERRQ(ierr); 3947 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 3948 ierr = PetscBLASIntCast(size_of_constraint,&Blas_K);CHKERRQ(ierr); 3949 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 3950 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDB);CHKERRQ(ierr); 3951 ierr = PetscBLASIntCast(primal_dofs,&Blas_LDC);CHKERRQ(ierr); 3952 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 3953 PetscStackCallBLAS("BLASgemm",BLASgemm_("T","N",&Blas_M,&Blas_N,&Blas_K,&one,dbg_work,&Blas_LDA,qr_basis,&Blas_LDB,&zero,&dbg_work[size_of_constraint*primal_dofs],&Blas_LDC)); 3954 ierr = PetscFPTrapPop();CHKERRQ(ierr); 3955 for (jj=0;jj<size_of_constraint;jj++) { 3956 for (ii=0;ii<primal_dofs;ii++) { 3957 if (ii != jj && PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]) > 1.e-12) valid_qr = PETSC_FALSE; 3958 if (ii == jj && PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]-1.0) > 1.e-12) valid_qr = PETSC_FALSE; 3959 } 3960 } 3961 if (!valid_qr) { 3962 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> wrong change of basis!\n");CHKERRQ(ierr); 3963 for (jj=0;jj<size_of_constraint;jj++) { 3964 for (ii=0;ii<primal_dofs;ii++) { 3965 if (ii != jj && PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]) > 1.e-12) { 3966 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\tQr basis function %d is not orthogonal to constraint %d (%1.14e)!\n",jj,ii,PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii])); 3967 } 3968 if (ii == jj && PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]-1.0) > 1.e-12) { 3969 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\tQr basis function %d is not unitary w.r.t constraint %d (%1.14e)!\n",jj,ii,PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii])); 3970 } 3971 } 3972 } 3973 } else { 3974 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> right change of basis!\n");CHKERRQ(ierr); 3975 } 3976 } 3977 } else { /* simple transformation block */ 3978 PetscInt row,col; 3979 PetscScalar val,norm; 3980 3981 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 3982 PetscStackCallBLAS("BLASdot",norm = BLASdot_(&Blas_N,constraints_data+constraints_data_ptr[total_counts],&Blas_one,constraints_data+constraints_data_ptr[total_counts],&Blas_one)); 3983 for (j=0;j<size_of_constraint;j++) { 3984 PetscInt row_B = constraints_idxs_B[constraints_idxs_ptr[total_counts]+j]; 3985 row = constraints_idxs[constraints_idxs_ptr[total_counts]+j]; 3986 if (!PetscBTLookup(is_primal,row_B)) { 3987 col = constraints_idxs[constraints_idxs_ptr[total_counts]]; 3988 ierr = MatSetValue(localChangeOfBasisMatrix,row,row,1.0,INSERT_VALUES);CHKERRQ(ierr); 3989 ierr = MatSetValue(localChangeOfBasisMatrix,row,col,constraints_data[constraints_data_ptr[total_counts]+j]/norm,INSERT_VALUES);CHKERRQ(ierr); 3990 } else { 3991 for (k=0;k<size_of_constraint;k++) { 3992 col = constraints_idxs[constraints_idxs_ptr[total_counts]+k]; 3993 if (row != col) { 3994 val = -constraints_data[constraints_data_ptr[total_counts]+k]/constraints_data[constraints_data_ptr[total_counts]]; 3995 } else { 3996 val = constraints_data[constraints_data_ptr[total_counts]]/norm; 3997 } 3998 ierr = MatSetValue(localChangeOfBasisMatrix,row,col,val,INSERT_VALUES);CHKERRQ(ierr); 3999 } 4000 } 4001 } 4002 if (pcbddc->dbg_flag) { 4003 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> using standard change of basis\n");CHKERRQ(ierr); 4004 } 4005 } 4006 } else { 4007 if (pcbddc->dbg_flag) { 4008 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Constraint %d does not need a change of basis (size %d)\n",total_counts,size_of_constraint);CHKERRQ(ierr); 4009 } 4010 } 4011 } 4012 4013 /* free workspace */ 4014 if (qr_needed) { 4015 if (pcbddc->dbg_flag) { 4016 ierr = PetscFree(dbg_work);CHKERRQ(ierr); 4017 } 4018 ierr = PetscFree(trs_rhs);CHKERRQ(ierr); 4019 ierr = PetscFree(qr_tau);CHKERRQ(ierr); 4020 ierr = PetscFree(qr_work);CHKERRQ(ierr); 4021 ierr = PetscFree(gqr_work);CHKERRQ(ierr); 4022 ierr = PetscFree(qr_basis);CHKERRQ(ierr); 4023 } 4024 ierr = PetscBTDestroy(&is_primal);CHKERRQ(ierr); 4025 ierr = MatAssemblyBegin(localChangeOfBasisMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4026 ierr = MatAssemblyEnd(localChangeOfBasisMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4027 4028 /* assembling of global change of variable */ 4029 { 4030 Mat tmat; 4031 PetscInt bs; 4032 4033 ierr = VecGetSize(pcis->vec1_global,&global_size);CHKERRQ(ierr); 4034 ierr = VecGetLocalSize(pcis->vec1_global,&local_size);CHKERRQ(ierr); 4035 ierr = MatDuplicate(pc->pmat,MAT_DO_NOT_COPY_VALUES,&tmat);CHKERRQ(ierr); 4036 ierr = MatISSetLocalMat(tmat,localChangeOfBasisMatrix);CHKERRQ(ierr); 4037 ierr = MatCreate(PetscObjectComm((PetscObject)pc),&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 4038 ierr = MatSetType(pcbddc->ChangeOfBasisMatrix,MATAIJ);CHKERRQ(ierr); 4039 ierr = MatGetBlockSize(pc->pmat,&bs);CHKERRQ(ierr); 4040 ierr = MatSetBlockSize(pcbddc->ChangeOfBasisMatrix,bs);CHKERRQ(ierr); 4041 ierr = MatSetSizes(pcbddc->ChangeOfBasisMatrix,local_size,local_size,global_size,global_size);CHKERRQ(ierr); 4042 ierr = MatISSetMPIXAIJPreallocation_Private(tmat,pcbddc->ChangeOfBasisMatrix,PETSC_TRUE);CHKERRQ(ierr); 4043 ierr = MatISGetMPIXAIJ(tmat,MAT_REUSE_MATRIX,&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 4044 ierr = MatDestroy(&tmat);CHKERRQ(ierr); 4045 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 4046 ierr = VecSet(pcis->vec1_N,1.0);CHKERRQ(ierr); 4047 ierr = VecScatterBegin(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4048 ierr = VecScatterEnd(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4049 ierr = VecReciprocal(pcis->vec1_global);CHKERRQ(ierr); 4050 ierr = MatDiagonalScale(pcbddc->ChangeOfBasisMatrix,pcis->vec1_global,NULL);CHKERRQ(ierr); 4051 } 4052 /* check */ 4053 if (pcbddc->dbg_flag) { 4054 PetscReal error; 4055 Vec x,x_change; 4056 4057 ierr = VecDuplicate(pcis->vec1_global,&x);CHKERRQ(ierr); 4058 ierr = VecDuplicate(pcis->vec1_global,&x_change);CHKERRQ(ierr); 4059 ierr = VecSetRandom(x,NULL);CHKERRQ(ierr); 4060 ierr = VecCopy(x,pcis->vec1_global);CHKERRQ(ierr); 4061 ierr = VecScatterBegin(matis->rctx,x,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4062 ierr = VecScatterEnd(matis->rctx,x,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4063 ierr = MatMult(localChangeOfBasisMatrix,pcis->vec1_N,pcis->vec2_N);CHKERRQ(ierr); 4064 ierr = VecScatterBegin(matis->rctx,pcis->vec2_N,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4065 ierr = VecScatterEnd(matis->rctx,pcis->vec2_N,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4066 ierr = MatMult(pcbddc->ChangeOfBasisMatrix,pcis->vec1_global,x_change);CHKERRQ(ierr); 4067 ierr = VecAXPY(x,-1.0,x_change);CHKERRQ(ierr); 4068 ierr = VecNorm(x,NORM_INFINITY,&error);CHKERRQ(ierr); 4069 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4070 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Error global vs local change: %1.6e\n",error);CHKERRQ(ierr); 4071 ierr = VecDestroy(&x);CHKERRQ(ierr); 4072 ierr = VecDestroy(&x_change);CHKERRQ(ierr); 4073 } 4074 4075 /* adapt sub_schurs computed (if any) */ 4076 if (pcbddc->use_deluxe_scaling) { 4077 PCBDDCSubSchurs sub_schurs=pcbddc->sub_schurs; 4078 if (sub_schurs->S_Ej_all) { 4079 Mat S_new,tmat; 4080 ISLocalToGlobalMapping NtoSall; 4081 IS is_all_N,is_V,is_V_Sall; 4082 const PetscScalar *array; 4083 const PetscInt *idxs_V,*idxs_all; 4084 PetscInt i,n_V; 4085 4086 ierr = ISLocalToGlobalMappingApplyIS(pcis->BtoNmap,sub_schurs->is_Ej_all,&is_all_N);CHKERRQ(ierr); 4087 ierr = MatGetSubMatrix(localChangeOfBasisMatrix,is_all_N,is_all_N,MAT_INITIAL_MATRIX,&tmat);CHKERRQ(ierr); 4088 ierr = ISCreateGeneral(PETSC_COMM_SELF,pcbddc->n_vertices,pcbddc->local_primal_ref_node,PETSC_COPY_VALUES,&is_V);CHKERRQ(ierr); 4089 ierr = ISLocalToGlobalMappingCreateIS(is_all_N,&NtoSall);CHKERRQ(ierr); 4090 ierr = ISGlobalToLocalMappingApplyIS(NtoSall,IS_GTOLM_DROP,is_V,&is_V_Sall);CHKERRQ(ierr); 4091 ierr = ISLocalToGlobalMappingDestroy(&NtoSall);CHKERRQ(ierr); 4092 ierr = ISDestroy(&is_all_N);CHKERRQ(ierr); 4093 ierr = ISDestroy(&is_V);CHKERRQ(ierr); 4094 ierr = MatPtAP(sub_schurs->S_Ej_all,tmat,MAT_INITIAL_MATRIX,1.0,&S_new);CHKERRQ(ierr); 4095 ierr = MatDestroy(&sub_schurs->S_Ej_all);CHKERRQ(ierr); 4096 ierr = PetscObjectReference((PetscObject)S_new);CHKERRQ(ierr); 4097 ierr = MatZeroRowsColumnsIS(S_new,is_V_Sall,1.,NULL,NULL);CHKERRQ(ierr); 4098 ierr = ISGetLocalSize(is_V_Sall,&n_V);CHKERRQ(ierr); 4099 ierr = ISGetIndices(is_V_Sall,&idxs_V);CHKERRQ(ierr); 4100 ierr = ISGetIndices(sub_schurs->is_Ej_all,&idxs_all);CHKERRQ(ierr); 4101 ierr = VecGetArrayRead(pcis->D,&array);CHKERRQ(ierr); 4102 for (i=0;i<n_V;i++) { 4103 PetscScalar val; 4104 PetscInt idx; 4105 4106 idx = idxs_V[i]; 4107 val = array[idxs_all[idxs_V[i]]]; 4108 ierr = MatSetValue(S_new,idx,idx,val,INSERT_VALUES);CHKERRQ(ierr); 4109 } 4110 ierr = MatAssemblyBegin(S_new,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4111 ierr = MatAssemblyEnd(S_new,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4112 sub_schurs->S_Ej_all = S_new; 4113 ierr = MatDestroy(&S_new);CHKERRQ(ierr); 4114 if (sub_schurs->sum_S_Ej_all) { 4115 ierr = MatPtAP(sub_schurs->sum_S_Ej_all,tmat,MAT_INITIAL_MATRIX,1.0,&S_new);CHKERRQ(ierr); 4116 ierr = MatDestroy(&sub_schurs->sum_S_Ej_all);CHKERRQ(ierr); 4117 ierr = PetscObjectReference((PetscObject)S_new);CHKERRQ(ierr); 4118 ierr = MatZeroRowsColumnsIS(S_new,is_V_Sall,1.,NULL,NULL);CHKERRQ(ierr); 4119 sub_schurs->sum_S_Ej_all = S_new; 4120 ierr = MatDestroy(&S_new);CHKERRQ(ierr); 4121 } 4122 ierr = VecRestoreArrayRead(pcis->D,&array);CHKERRQ(ierr); 4123 ierr = ISRestoreIndices(sub_schurs->is_Ej_all,&idxs_all);CHKERRQ(ierr); 4124 ierr = ISRestoreIndices(is_V_Sall,&idxs_V);CHKERRQ(ierr); 4125 ierr = MatDestroy(&tmat);CHKERRQ(ierr); 4126 ierr = ISDestroy(&is_V_Sall);CHKERRQ(ierr); 4127 } 4128 } 4129 ierr = MatDestroy(&localChangeOfBasisMatrix);CHKERRQ(ierr); 4130 } else if (pcbddc->user_ChangeOfBasisMatrix) { 4131 ierr = PetscObjectReference((PetscObject)pcbddc->user_ChangeOfBasisMatrix);CHKERRQ(ierr); 4132 pcbddc->ChangeOfBasisMatrix = pcbddc->user_ChangeOfBasisMatrix; 4133 } 4134 4135 /* set up change of basis context */ 4136 if (pcbddc->ChangeOfBasisMatrix) { 4137 PCBDDCChange_ctx change_ctx; 4138 4139 if (!pcbddc->new_global_mat) { 4140 PetscInt global_size,local_size; 4141 4142 ierr = VecGetSize(pcis->vec1_global,&global_size);CHKERRQ(ierr); 4143 ierr = VecGetLocalSize(pcis->vec1_global,&local_size);CHKERRQ(ierr); 4144 ierr = MatCreate(PetscObjectComm((PetscObject)pc),&pcbddc->new_global_mat);CHKERRQ(ierr); 4145 ierr = MatSetSizes(pcbddc->new_global_mat,local_size,local_size,global_size,global_size);CHKERRQ(ierr); 4146 ierr = MatSetType(pcbddc->new_global_mat,MATSHELL);CHKERRQ(ierr); 4147 ierr = MatShellSetOperation(pcbddc->new_global_mat,MATOP_MULT,(void (*)(void))PCBDDCMatMult_Private);CHKERRQ(ierr); 4148 ierr = MatShellSetOperation(pcbddc->new_global_mat,MATOP_MULT_TRANSPOSE,(void (*)(void))PCBDDCMatMultTranspose_Private);CHKERRQ(ierr); 4149 ierr = PetscNew(&change_ctx);CHKERRQ(ierr); 4150 ierr = MatShellSetContext(pcbddc->new_global_mat,change_ctx);CHKERRQ(ierr); 4151 } else { 4152 ierr = MatShellGetContext(pcbddc->new_global_mat,&change_ctx);CHKERRQ(ierr); 4153 ierr = MatDestroy(&change_ctx->global_change);CHKERRQ(ierr); 4154 ierr = VecDestroyVecs(2,&change_ctx->work);CHKERRQ(ierr); 4155 } 4156 if (!pcbddc->user_ChangeOfBasisMatrix) { 4157 ierr = PetscObjectReference((PetscObject)pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 4158 change_ctx->global_change = pcbddc->ChangeOfBasisMatrix; 4159 } else { 4160 ierr = PetscObjectReference((PetscObject)pcbddc->user_ChangeOfBasisMatrix);CHKERRQ(ierr); 4161 change_ctx->global_change = pcbddc->user_ChangeOfBasisMatrix; 4162 } 4163 ierr = VecDuplicateVecs(pcis->vec1_global,2,&change_ctx->work);CHKERRQ(ierr); 4164 ierr = MatSetUp(pcbddc->new_global_mat);CHKERRQ(ierr); 4165 ierr = MatAssemblyBegin(pcbddc->new_global_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4166 ierr = MatAssemblyEnd(pcbddc->new_global_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4167 } else { 4168 ierr = MatDestroy(&pcbddc->new_global_mat);CHKERRQ(ierr); 4169 } 4170 4171 /* add pressure dofs to set of primal nodes for numbering purposes */ 4172 for (i=0;i<pcbddc->benign_n;i++) { 4173 pcbddc->local_primal_ref_node[pcbddc->local_primal_size_cc] = pcbddc->benign_p0_lidx[i]; 4174 pcbddc->primal_indices_local_idxs[pcbddc->local_primal_size] = pcbddc->benign_p0_lidx[i]; 4175 pcbddc->local_primal_ref_mult[pcbddc->local_primal_size_cc] = 1; 4176 pcbddc->local_primal_size_cc++; 4177 pcbddc->local_primal_size++; 4178 } 4179 4180 /* check if a new primal space has been introduced (also take into account benign trick) */ 4181 pcbddc->new_primal_space_local = PETSC_TRUE; 4182 if (olocal_primal_size == pcbddc->local_primal_size) { 4183 ierr = PetscMemcmp(pcbddc->local_primal_ref_node,olocal_primal_ref_node,olocal_primal_size_cc*sizeof(PetscInt),&pcbddc->new_primal_space_local);CHKERRQ(ierr); 4184 pcbddc->new_primal_space_local = (PetscBool)(!pcbddc->new_primal_space_local); 4185 if (!pcbddc->new_primal_space_local) { 4186 ierr = PetscMemcmp(pcbddc->local_primal_ref_mult,olocal_primal_ref_mult,olocal_primal_size_cc*sizeof(PetscInt),&pcbddc->new_primal_space_local);CHKERRQ(ierr); 4187 pcbddc->new_primal_space_local = (PetscBool)(!pcbddc->new_primal_space_local); 4188 } 4189 } 4190 ierr = PetscFree2(olocal_primal_ref_node,olocal_primal_ref_mult);CHKERRQ(ierr); 4191 /* new_primal_space will be used for numbering of coarse dofs, so it should be the same across all subdomains */ 4192 ierr = MPI_Allreduce(&pcbddc->new_primal_space_local,&pcbddc->new_primal_space,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 4193 4194 /* flush dbg viewer */ 4195 if (pcbddc->dbg_flag) { 4196 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4197 } 4198 4199 /* free workspace */ 4200 ierr = PetscBTDestroy(&qr_needed_idx);CHKERRQ(ierr); 4201 ierr = PetscBTDestroy(&change_basis);CHKERRQ(ierr); 4202 if (!pcbddc->adaptive_selection) { 4203 ierr = PetscFree3(constraints_idxs_ptr,constraints_data_ptr,constraints_n);CHKERRQ(ierr); 4204 ierr = PetscFree3(constraints_data,constraints_idxs,constraints_idxs_B);CHKERRQ(ierr); 4205 } else { 4206 ierr = PetscFree5(pcbddc->adaptive_constraints_n, 4207 pcbddc->adaptive_constraints_idxs_ptr, 4208 pcbddc->adaptive_constraints_data_ptr, 4209 pcbddc->adaptive_constraints_idxs, 4210 pcbddc->adaptive_constraints_data);CHKERRQ(ierr); 4211 ierr = PetscFree(constraints_n);CHKERRQ(ierr); 4212 ierr = PetscFree(constraints_idxs_B);CHKERRQ(ierr); 4213 } 4214 PetscFunctionReturn(0); 4215 } 4216 4217 #undef __FUNCT__ 4218 #define __FUNCT__ "PCBDDCAnalyzeInterface" 4219 PetscErrorCode PCBDDCAnalyzeInterface(PC pc) 4220 { 4221 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 4222 PC_IS *pcis = (PC_IS*)pc->data; 4223 Mat_IS *matis = (Mat_IS*)pc->pmat->data; 4224 PetscInt ierr,i,vertex_size,N; 4225 PetscViewer viewer=pcbddc->dbg_viewer; 4226 4227 PetscFunctionBegin; 4228 /* Reset previously computed graph */ 4229 ierr = PCBDDCGraphReset(pcbddc->mat_graph);CHKERRQ(ierr); 4230 /* Init local Graph struct */ 4231 ierr = MatGetSize(pc->pmat,&N,NULL);CHKERRQ(ierr); 4232 ierr = PCBDDCGraphInit(pcbddc->mat_graph,pcis->mapping,N);CHKERRQ(ierr); 4233 4234 /* Check validity of the csr graph passed in by the user */ 4235 if (pcbddc->mat_graph->nvtxs_csr && pcbddc->mat_graph->nvtxs_csr != pcbddc->mat_graph->nvtxs) { 4236 SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Invalid size of local CSR graph! Found %d, expected %d\n",pcbddc->mat_graph->nvtxs_csr,pcbddc->mat_graph->nvtxs); 4237 } 4238 4239 /* Set default CSR adjacency of local dofs if not provided by the user with PCBDDCSetLocalAdjacencyGraph */ 4240 if ( (!pcbddc->mat_graph->xadj || !pcbddc->mat_graph->adjncy) && pcbddc->use_local_adj) { 4241 PetscInt *xadj,*adjncy; 4242 PetscInt nvtxs; 4243 PetscBool flg_row=PETSC_FALSE; 4244 4245 ierr = MatGetRowIJ(matis->A,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);CHKERRQ(ierr); 4246 if (flg_row) { 4247 ierr = PCBDDCSetLocalAdjacencyGraph(pc,nvtxs,xadj,adjncy,PETSC_COPY_VALUES);CHKERRQ(ierr); 4248 pcbddc->computed_rowadj = PETSC_TRUE; 4249 } 4250 ierr = MatRestoreRowIJ(matis->A,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);CHKERRQ(ierr); 4251 } 4252 if (pcbddc->dbg_flag) { 4253 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4254 } 4255 4256 /* Set default dofs' splitting if no information has been provided by the user with PCBDDCSetDofsSplitting or PCBDDCSetDofsSplittingLocal */ 4257 vertex_size = 1; 4258 if (pcbddc->user_provided_isfordofs) { 4259 if (pcbddc->n_ISForDofs) { /* need to convert from global to local and remove references to global dofs splitting */ 4260 ierr = PetscMalloc1(pcbddc->n_ISForDofs,&pcbddc->ISForDofsLocal);CHKERRQ(ierr); 4261 for (i=0;i<pcbddc->n_ISForDofs;i++) { 4262 ierr = PCBDDCGlobalToLocal(matis->rctx,pcis->vec1_global,pcis->vec1_N,pcbddc->ISForDofs[i],&pcbddc->ISForDofsLocal[i]);CHKERRQ(ierr); 4263 ierr = ISDestroy(&pcbddc->ISForDofs[i]);CHKERRQ(ierr); 4264 } 4265 pcbddc->n_ISForDofsLocal = pcbddc->n_ISForDofs; 4266 pcbddc->n_ISForDofs = 0; 4267 ierr = PetscFree(pcbddc->ISForDofs);CHKERRQ(ierr); 4268 } 4269 /* mat block size as vertex size (used for elasticity with rigid body modes as nearnullspace) */ 4270 ierr = MatGetBlockSize(matis->A,&vertex_size);CHKERRQ(ierr); 4271 } else { 4272 if (!pcbddc->n_ISForDofsLocal) { /* field split not present, create it in local ordering */ 4273 ierr = MatGetBlockSize(pc->pmat,&pcbddc->n_ISForDofsLocal);CHKERRQ(ierr); 4274 ierr = PetscMalloc1(pcbddc->n_ISForDofsLocal,&pcbddc->ISForDofsLocal);CHKERRQ(ierr); 4275 for (i=0;i<pcbddc->n_ISForDofsLocal;i++) { 4276 ierr = ISCreateStride(PetscObjectComm((PetscObject)pc),pcis->n/pcbddc->n_ISForDofsLocal,i,pcbddc->n_ISForDofsLocal,&pcbddc->ISForDofsLocal[i]);CHKERRQ(ierr); 4277 } 4278 } 4279 } 4280 4281 /* Setup of Graph */ 4282 if (!pcbddc->DirichletBoundariesLocal && pcbddc->DirichletBoundaries) { /* need to convert from global to local */ 4283 ierr = PCBDDCGlobalToLocal(matis->rctx,pcis->vec1_global,pcis->vec1_N,pcbddc->DirichletBoundaries,&pcbddc->DirichletBoundariesLocal);CHKERRQ(ierr); 4284 } 4285 if (!pcbddc->NeumannBoundariesLocal && pcbddc->NeumannBoundaries) { /* need to convert from global to local */ 4286 ierr = PCBDDCGlobalToLocal(matis->rctx,pcis->vec1_global,pcis->vec1_N,pcbddc->NeumannBoundaries,&pcbddc->NeumannBoundariesLocal);CHKERRQ(ierr); 4287 } 4288 if (!pcbddc->user_primal_vertices_local && pcbddc->user_primal_vertices) { /* need to convert from global to local */ 4289 ierr = PCBDDCGlobalToLocal(matis->rctx,pcis->vec1_global,pcis->vec1_N,pcbddc->user_primal_vertices,&pcbddc->user_primal_vertices_local);CHKERRQ(ierr); 4290 } 4291 ierr = PCBDDCGraphSetUp(pcbddc->mat_graph,vertex_size,pcbddc->NeumannBoundariesLocal,pcbddc->DirichletBoundariesLocal,pcbddc->n_ISForDofsLocal,pcbddc->ISForDofsLocal,pcbddc->user_primal_vertices_local);CHKERRQ(ierr); 4292 4293 /* attach info on disconnected subdomains if present */ 4294 if (pcbddc->n_local_subs) { 4295 PetscInt *local_subs; 4296 4297 ierr = PetscMalloc1(N,&local_subs);CHKERRQ(ierr); 4298 for (i=0;i<pcbddc->n_local_subs;i++) { 4299 const PetscInt *idxs; 4300 PetscInt nl,j; 4301 4302 ierr = ISGetLocalSize(pcbddc->local_subs[i],&nl);CHKERRQ(ierr); 4303 ierr = ISGetIndices(pcbddc->local_subs[i],&idxs);CHKERRQ(ierr); 4304 for (j=0;j<nl;j++) { 4305 local_subs[idxs[j]] = i; 4306 } 4307 ierr = ISRestoreIndices(pcbddc->local_subs[i],&idxs);CHKERRQ(ierr); 4308 } 4309 pcbddc->mat_graph->n_local_subs = pcbddc->n_local_subs; 4310 pcbddc->mat_graph->local_subs = local_subs; 4311 } 4312 4313 /* Graph's connected components analysis */ 4314 ierr = PCBDDCGraphComputeConnectedComponents(pcbddc->mat_graph);CHKERRQ(ierr); 4315 4316 /* print some info to stdout */ 4317 if (pcbddc->dbg_flag) { 4318 ierr = PCBDDCGraphASCIIView(pcbddc->mat_graph,pcbddc->dbg_flag,viewer);CHKERRQ(ierr); 4319 } 4320 4321 /* mark topography has done */ 4322 pcbddc->recompute_topography = PETSC_FALSE; 4323 PetscFunctionReturn(0); 4324 } 4325 4326 /* given an index sets possibly with holes, renumbers the indexes removing the holes */ 4327 #undef __FUNCT__ 4328 #define __FUNCT__ "PCBDDCSubsetNumbering" 4329 PetscErrorCode PCBDDCSubsetNumbering(IS subset, IS subset_mult, PetscInt *N_n, IS *subset_n) 4330 { 4331 PetscSF sf; 4332 PetscLayout map; 4333 const PetscInt *idxs; 4334 PetscInt *leaf_data,*root_data,*gidxs; 4335 PetscInt N,n,i,lbounds[2],gbounds[2],Nl; 4336 PetscInt n_n,nlocals,start,first_index; 4337 PetscMPIInt commsize; 4338 PetscBool first_found; 4339 PetscErrorCode ierr; 4340 4341 PetscFunctionBegin; 4342 ierr = ISGetLocalSize(subset,&n);CHKERRQ(ierr); 4343 if (subset_mult) { 4344 PetscCheckSameComm(subset,1,subset_mult,2); 4345 ierr = ISGetLocalSize(subset,&i);CHKERRQ(ierr); 4346 if (i != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Local subset and multiplicity sizes don't match! %d != %d",n,i); 4347 } 4348 /* create workspace layout for computing global indices of subset */ 4349 ierr = ISGetIndices(subset,&idxs);CHKERRQ(ierr); 4350 lbounds[0] = lbounds[1] = 0; 4351 for (i=0;i<n;i++) { 4352 if (idxs[i] < lbounds[0]) lbounds[0] = idxs[i]; 4353 else if (idxs[i] > lbounds[1]) lbounds[1] = idxs[i]; 4354 } 4355 lbounds[0] = -lbounds[0]; 4356 ierr = MPI_Allreduce(lbounds,gbounds,2,MPIU_INT,MPI_MAX,PetscObjectComm((PetscObject)subset));CHKERRQ(ierr); 4357 gbounds[0] = -gbounds[0]; 4358 N = gbounds[1] - gbounds[0] + 1; 4359 ierr = PetscLayoutCreate(PetscObjectComm((PetscObject)subset),&map);CHKERRQ(ierr); 4360 ierr = PetscLayoutSetBlockSize(map,1);CHKERRQ(ierr); 4361 ierr = PetscLayoutSetSize(map,N);CHKERRQ(ierr); 4362 ierr = PetscLayoutSetUp(map);CHKERRQ(ierr); 4363 ierr = PetscLayoutGetLocalSize(map,&Nl);CHKERRQ(ierr); 4364 4365 /* create sf : leaf_data == multiplicity of indexes, root data == global index in layout */ 4366 ierr = PetscMalloc2(n,&leaf_data,Nl,&root_data);CHKERRQ(ierr); 4367 if (subset_mult) { 4368 const PetscInt* idxs_mult; 4369 4370 ierr = ISGetIndices(subset_mult,&idxs_mult);CHKERRQ(ierr); 4371 ierr = PetscMemcpy(leaf_data,idxs_mult,n*sizeof(PetscInt));CHKERRQ(ierr); 4372 ierr = ISRestoreIndices(subset_mult,&idxs_mult);CHKERRQ(ierr); 4373 } else { 4374 for (i=0;i<n;i++) leaf_data[i] = 1; 4375 } 4376 /* local size of new subset */ 4377 n_n = 0; 4378 for (i=0;i<n;i++) n_n += leaf_data[i]; 4379 4380 /* global indexes in layout */ 4381 ierr = PetscMalloc1(n_n,&gidxs);CHKERRQ(ierr); /* allocating possibly extra space in gidxs which will be used later */ 4382 for (i=0;i<n;i++) gidxs[i] = idxs[i] - gbounds[0]; 4383 ierr = ISRestoreIndices(subset,&idxs);CHKERRQ(ierr); 4384 ierr = PetscSFCreate(PetscObjectComm((PetscObject)subset),&sf);CHKERRQ(ierr); 4385 ierr = PetscSFSetGraphLayout(sf,map,n,NULL,PETSC_COPY_VALUES,gidxs);CHKERRQ(ierr); 4386 ierr = PetscLayoutDestroy(&map);CHKERRQ(ierr); 4387 4388 /* reduce from leaves to roots */ 4389 ierr = PetscMemzero(root_data,Nl*sizeof(PetscInt));CHKERRQ(ierr); 4390 ierr = PetscSFReduceBegin(sf,MPIU_INT,leaf_data,root_data,MPI_MAX);CHKERRQ(ierr); 4391 ierr = PetscSFReduceEnd(sf,MPIU_INT,leaf_data,root_data,MPI_MAX);CHKERRQ(ierr); 4392 4393 /* count indexes in local part of layout */ 4394 nlocals = 0; 4395 first_index = -1; 4396 first_found = PETSC_FALSE; 4397 for (i=0;i<Nl;i++) { 4398 if (!first_found && root_data[i]) { 4399 first_found = PETSC_TRUE; 4400 first_index = i; 4401 } 4402 nlocals += root_data[i]; 4403 } 4404 4405 /* cumulative of number of indexes and size of subset without holes */ 4406 #if defined(PETSC_HAVE_MPI_EXSCAN) 4407 start = 0; 4408 ierr = MPI_Exscan(&nlocals,&start,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)subset));CHKERRQ(ierr); 4409 #else 4410 ierr = MPI_Scan(&nlocals,&start,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)subset));CHKERRQ(ierr); 4411 start = start-nlocals; 4412 #endif 4413 4414 if (N_n) { /* compute total size of new subset if requested */ 4415 *N_n = start + nlocals; 4416 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)subset),&commsize);CHKERRQ(ierr); 4417 ierr = MPI_Bcast(N_n,1,MPIU_INT,commsize-1,PetscObjectComm((PetscObject)subset));CHKERRQ(ierr); 4418 } 4419 4420 /* adapt root data with cumulative */ 4421 if (first_found) { 4422 PetscInt old_index; 4423 4424 root_data[first_index] += start; 4425 old_index = first_index; 4426 for (i=first_index+1;i<Nl;i++) { 4427 if (root_data[i]) { 4428 root_data[i] += root_data[old_index]; 4429 old_index = i; 4430 } 4431 } 4432 } 4433 4434 /* from roots to leaves */ 4435 ierr = PetscSFBcastBegin(sf,MPIU_INT,root_data,leaf_data);CHKERRQ(ierr); 4436 ierr = PetscSFBcastEnd(sf,MPIU_INT,root_data,leaf_data);CHKERRQ(ierr); 4437 ierr = PetscSFDestroy(&sf);CHKERRQ(ierr); 4438 4439 /* create new IS with global indexes without holes */ 4440 if (subset_mult) { 4441 const PetscInt* idxs_mult; 4442 PetscInt cum; 4443 4444 cum = 0; 4445 ierr = ISGetIndices(subset_mult,&idxs_mult);CHKERRQ(ierr); 4446 for (i=0;i<n;i++) { 4447 PetscInt j; 4448 for (j=0;j<idxs_mult[i];j++) gidxs[cum++] = leaf_data[i] - idxs_mult[i] + j; 4449 } 4450 ierr = ISRestoreIndices(subset_mult,&idxs_mult);CHKERRQ(ierr); 4451 } else { 4452 for (i=0;i<n;i++) { 4453 gidxs[i] = leaf_data[i]-1; 4454 } 4455 } 4456 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)subset),n_n,gidxs,PETSC_OWN_POINTER,subset_n);CHKERRQ(ierr); 4457 ierr = PetscFree2(leaf_data,root_data);CHKERRQ(ierr); 4458 PetscFunctionReturn(0); 4459 } 4460 4461 #undef __FUNCT__ 4462 #define __FUNCT__ "PCBDDCOrthonormalizeVecs" 4463 PetscErrorCode PCBDDCOrthonormalizeVecs(PetscInt n, Vec vecs[]) 4464 { 4465 PetscInt i,j; 4466 PetscScalar *alphas; 4467 PetscErrorCode ierr; 4468 4469 PetscFunctionBegin; 4470 /* this implements stabilized Gram-Schmidt */ 4471 ierr = PetscMalloc1(n,&alphas);CHKERRQ(ierr); 4472 for (i=0;i<n;i++) { 4473 ierr = VecNormalize(vecs[i],NULL);CHKERRQ(ierr); 4474 if (i<n) { ierr = VecMDot(vecs[i],n-i-1,&vecs[i+1],&alphas[i+1]);CHKERRQ(ierr); } 4475 for (j=i+1;j<n;j++) { ierr = VecAXPY(vecs[j],PetscConj(-alphas[j]),vecs[i]);CHKERRQ(ierr); } 4476 } 4477 ierr = PetscFree(alphas);CHKERRQ(ierr); 4478 PetscFunctionReturn(0); 4479 } 4480 4481 #undef __FUNCT__ 4482 #define __FUNCT__ "MatISGetSubassemblingPattern" 4483 PetscErrorCode MatISGetSubassemblingPattern(Mat mat, PetscInt n_subdomains, PetscInt redprocs, IS* is_sends) 4484 { 4485 IS ranks_send_to; 4486 PetscInt n_neighs,*neighs,*n_shared,**shared; 4487 PetscMPIInt size,rank,color; 4488 PetscInt *xadj,*adjncy; 4489 PetscInt *adjncy_wgt,*v_wgt,*ranks_send_to_idx; 4490 PetscInt i,local_size,threshold=0; 4491 PetscBool use_vwgt=PETSC_FALSE,use_square=PETSC_FALSE; 4492 PetscSubcomm subcomm; 4493 PetscErrorCode ierr; 4494 4495 PetscFunctionBegin; 4496 ierr = PetscOptionsGetBool(NULL,"-matis_partitioning_use_square",&use_square,NULL);CHKERRQ(ierr); 4497 ierr = PetscOptionsGetBool(NULL,"-matis_partitioning_use_vwgt",&use_vwgt,NULL);CHKERRQ(ierr); 4498 ierr = PetscOptionsGetInt(NULL,"-matis_partitioning_threshold",&threshold,NULL);CHKERRQ(ierr); 4499 4500 /* Get info on mapping */ 4501 ierr = ISLocalToGlobalMappingGetSize(mat->rmap->mapping,&local_size);CHKERRQ(ierr); 4502 ierr = ISLocalToGlobalMappingGetInfo(mat->rmap->mapping,&n_neighs,&neighs,&n_shared,&shared);CHKERRQ(ierr); 4503 4504 /* build local CSR graph of subdomains' connectivity */ 4505 ierr = PetscMalloc1(2,&xadj);CHKERRQ(ierr); 4506 xadj[0] = 0; 4507 xadj[1] = PetscMax(n_neighs-1,0); 4508 ierr = PetscMalloc1(xadj[1],&adjncy);CHKERRQ(ierr); 4509 ierr = PetscMalloc1(xadj[1],&adjncy_wgt);CHKERRQ(ierr); 4510 4511 if (threshold) { 4512 PetscInt xadj_count = 0; 4513 for (i=1;i<n_neighs;i++) { 4514 if (n_shared[i] > threshold) { 4515 adjncy[xadj_count] = neighs[i]; 4516 adjncy_wgt[xadj_count] = n_shared[i]; 4517 xadj_count++; 4518 } 4519 } 4520 xadj[1] = xadj_count; 4521 } else { 4522 if (xadj[1]) { 4523 ierr = PetscMemcpy(adjncy,&neighs[1],xadj[1]*sizeof(*adjncy));CHKERRQ(ierr); 4524 ierr = PetscMemcpy(adjncy_wgt,&n_shared[1],xadj[1]*sizeof(*adjncy_wgt));CHKERRQ(ierr); 4525 } 4526 } 4527 ierr = ISLocalToGlobalMappingRestoreInfo(mat->rmap->mapping,&n_neighs,&neighs,&n_shared,&shared);CHKERRQ(ierr); 4528 if (use_square) { 4529 for (i=0;i<xadj[1];i++) { 4530 adjncy_wgt[i] = adjncy_wgt[i]*adjncy_wgt[i]; 4531 } 4532 } 4533 ierr = PetscSortIntWithArray(xadj[1],adjncy,adjncy_wgt);CHKERRQ(ierr); 4534 4535 ierr = PetscMalloc1(1,&ranks_send_to_idx);CHKERRQ(ierr); 4536 4537 /* 4538 Restrict work on active processes only. 4539 */ 4540 ierr = PetscSubcommCreate(PetscObjectComm((PetscObject)mat),&subcomm);CHKERRQ(ierr); 4541 ierr = PetscSubcommSetNumber(subcomm,2);CHKERRQ(ierr); /* 2 groups, active process and not active processes */ 4542 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr); 4543 ierr = PetscMPIIntCast(!local_size,&color);CHKERRQ(ierr); 4544 ierr = PetscSubcommSetTypeGeneral(subcomm,color,rank);CHKERRQ(ierr); 4545 if (color) { 4546 ierr = PetscFree(xadj);CHKERRQ(ierr); 4547 ierr = PetscFree(adjncy);CHKERRQ(ierr); 4548 ierr = PetscFree(adjncy_wgt);CHKERRQ(ierr); 4549 } else { 4550 Mat subdomain_adj; 4551 IS new_ranks,new_ranks_contig; 4552 MatPartitioning partitioner; 4553 PetscInt prank,rstart=0,rend=0; 4554 PetscInt *is_indices,*oldranks; 4555 PetscBool aggregate; 4556 4557 ierr = MPI_Comm_size(PetscSubcommChild(subcomm),&size);CHKERRQ(ierr); 4558 ierr = PetscMalloc1(size,&oldranks);CHKERRQ(ierr); 4559 prank = rank; 4560 ierr = MPI_Allgather(&prank,1,MPIU_INT,oldranks,1,MPIU_INT,PetscSubcommChild(subcomm));CHKERRQ(ierr); 4561 /* 4562 for (i=0;i<size;i++) { 4563 PetscPrintf(subcomm->comm,"oldranks[%d] = %d\n",i,oldranks[i]); 4564 } 4565 */ 4566 for (i=0;i<xadj[1];i++) { 4567 ierr = PetscFindInt(adjncy[i],size,oldranks,&adjncy[i]);CHKERRQ(ierr); 4568 } 4569 ierr = PetscSortIntWithArray(xadj[1],adjncy,adjncy_wgt);CHKERRQ(ierr); 4570 aggregate = ((redprocs > 0 && redprocs < size) ? PETSC_TRUE : PETSC_FALSE); 4571 if (aggregate) { 4572 PetscInt lrows,row,ncols,*cols; 4573 PetscMPIInt nrank; 4574 PetscScalar *vals; 4575 4576 ierr = MPI_Comm_rank(PetscSubcommChild(subcomm),&nrank);CHKERRQ(ierr); 4577 lrows = 0; 4578 if (nrank<redprocs) { 4579 lrows = size/redprocs; 4580 if (nrank<size%redprocs) lrows++; 4581 } 4582 ierr = MatCreateAIJ(PetscSubcommChild(subcomm),lrows,lrows,size,size,50,NULL,50,NULL,&subdomain_adj);CHKERRQ(ierr); 4583 ierr = MatGetOwnershipRange(subdomain_adj,&rstart,&rend);CHKERRQ(ierr); 4584 ierr = MatSetOption(subdomain_adj,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr); 4585 ierr = MatSetOption(subdomain_adj,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr); 4586 row = nrank; 4587 ncols = xadj[1]-xadj[0]; 4588 cols = adjncy; 4589 ierr = PetscMalloc1(ncols,&vals);CHKERRQ(ierr); 4590 for (i=0;i<ncols;i++) vals[i] = adjncy_wgt[i]; 4591 ierr = MatSetValues(subdomain_adj,1,&row,ncols,cols,vals,INSERT_VALUES);CHKERRQ(ierr); 4592 ierr = MatAssemblyBegin(subdomain_adj,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4593 ierr = MatAssemblyEnd(subdomain_adj,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4594 ierr = PetscFree(xadj);CHKERRQ(ierr); 4595 ierr = PetscFree(adjncy);CHKERRQ(ierr); 4596 ierr = PetscFree(adjncy_wgt);CHKERRQ(ierr); 4597 ierr = PetscFree(vals);CHKERRQ(ierr); 4598 } else { 4599 ierr = MatCreateMPIAdj(PetscSubcommChild(subcomm),1,(PetscInt)size,xadj,adjncy,adjncy_wgt,&subdomain_adj);CHKERRQ(ierr); 4600 } 4601 /* ierr = MatView(subdomain_adj,0);CHKERRQ(ierr); */ 4602 4603 /* Partition */ 4604 ierr = MatPartitioningCreate(PetscSubcommChild(subcomm),&partitioner);CHKERRQ(ierr); 4605 ierr = MatPartitioningSetAdjacency(partitioner,subdomain_adj);CHKERRQ(ierr); 4606 if (use_vwgt) { 4607 ierr = PetscMalloc1(1,&v_wgt);CHKERRQ(ierr); 4608 v_wgt[0] = local_size; 4609 ierr = MatPartitioningSetVertexWeights(partitioner,v_wgt);CHKERRQ(ierr); 4610 } 4611 n_subdomains = PetscMin((PetscInt)size,n_subdomains); 4612 ierr = MatPartitioningSetNParts(partitioner,n_subdomains);CHKERRQ(ierr); 4613 ierr = MatPartitioningSetFromOptions(partitioner);CHKERRQ(ierr); 4614 ierr = MatPartitioningApply(partitioner,&new_ranks);CHKERRQ(ierr); 4615 /* ierr = MatPartitioningView(partitioner,0);CHKERRQ(ierr); */ 4616 4617 /* renumber new_ranks to avoid "holes" in new set of processors */ 4618 ierr = PCBDDCSubsetNumbering(new_ranks,NULL,NULL,&new_ranks_contig);CHKERRQ(ierr); 4619 ierr = ISDestroy(&new_ranks);CHKERRQ(ierr); 4620 ierr = ISGetIndices(new_ranks_contig,(const PetscInt**)&is_indices);CHKERRQ(ierr); 4621 if (!redprocs) { 4622 ranks_send_to_idx[0] = oldranks[is_indices[0]]; 4623 } else { 4624 PetscInt idxs[1]; 4625 PetscMPIInt tag; 4626 MPI_Request *reqs; 4627 4628 ierr = PetscObjectGetNewTag((PetscObject)subdomain_adj,&tag);CHKERRQ(ierr); 4629 ierr = PetscMalloc1(rend-rstart,&reqs);CHKERRQ(ierr); 4630 for (i=rstart;i<rend;i++) { 4631 ierr = MPI_Isend(is_indices+i-rstart,1,MPIU_INT,i,tag,PetscSubcommChild(subcomm),&reqs[i-rstart]);CHKERRQ(ierr); 4632 } 4633 ierr = MPI_Recv(idxs,1,MPIU_INT,MPI_ANY_SOURCE,tag,PetscSubcommChild(subcomm),MPI_STATUS_IGNORE);CHKERRQ(ierr); 4634 ierr = MPI_Waitall(rend-rstart,reqs,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 4635 ierr = PetscFree(reqs);CHKERRQ(ierr); 4636 ranks_send_to_idx[0] = oldranks[idxs[0]]; 4637 } 4638 ierr = ISRestoreIndices(new_ranks_contig,(const PetscInt**)&is_indices);CHKERRQ(ierr); 4639 /* clean up */ 4640 ierr = PetscFree(oldranks);CHKERRQ(ierr); 4641 ierr = ISDestroy(&new_ranks_contig);CHKERRQ(ierr); 4642 ierr = MatDestroy(&subdomain_adj);CHKERRQ(ierr); 4643 ierr = MatPartitioningDestroy(&partitioner);CHKERRQ(ierr); 4644 } 4645 ierr = PetscSubcommDestroy(&subcomm);CHKERRQ(ierr); 4646 4647 /* assemble parallel IS for sends */ 4648 i = 1; 4649 if (color) i=0; 4650 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)mat),i,ranks_send_to_idx,PETSC_OWN_POINTER,&ranks_send_to);CHKERRQ(ierr); 4651 /* get back IS */ 4652 *is_sends = ranks_send_to; 4653 PetscFunctionReturn(0); 4654 } 4655 4656 typedef enum {MATDENSE_PRIVATE=0,MATAIJ_PRIVATE,MATBAIJ_PRIVATE,MATSBAIJ_PRIVATE}MatTypePrivate; 4657 4658 #undef __FUNCT__ 4659 #define __FUNCT__ "MatISSubassemble" 4660 PetscErrorCode MatISSubassemble(Mat mat, IS is_sends, PetscInt n_subdomains, PetscBool restrict_comm, PetscBool restrict_full, MatReuse reuse, Mat *mat_n, PetscInt nis, IS isarray[]) 4661 { 4662 Mat local_mat; 4663 IS is_sends_internal; 4664 PetscInt rows,cols,new_local_rows; 4665 PetscInt i,bs,buf_size_idxs,buf_size_idxs_is,buf_size_vals; 4666 PetscBool ismatis,isdense,newisdense,destroy_mat; 4667 ISLocalToGlobalMapping l2gmap; 4668 PetscInt* l2gmap_indices; 4669 const PetscInt* is_indices; 4670 MatType new_local_type; 4671 /* buffers */ 4672 PetscInt *ptr_idxs,*send_buffer_idxs,*recv_buffer_idxs; 4673 PetscInt *ptr_idxs_is,*send_buffer_idxs_is,*recv_buffer_idxs_is; 4674 PetscInt *recv_buffer_idxs_local; 4675 PetscScalar *ptr_vals,*send_buffer_vals,*recv_buffer_vals; 4676 /* MPI */ 4677 MPI_Comm comm,comm_n; 4678 PetscSubcomm subcomm; 4679 PetscMPIInt n_sends,n_recvs,commsize; 4680 PetscMPIInt *iflags,*ilengths_idxs,*ilengths_vals,*ilengths_idxs_is; 4681 PetscMPIInt *onodes,*onodes_is,*olengths_idxs,*olengths_idxs_is,*olengths_vals; 4682 PetscMPIInt len,tag_idxs,tag_idxs_is,tag_vals,source_dest; 4683 MPI_Request *send_req_idxs,*send_req_idxs_is,*send_req_vals; 4684 MPI_Request *recv_req_idxs,*recv_req_idxs_is,*recv_req_vals; 4685 PetscErrorCode ierr; 4686 4687 PetscFunctionBegin; 4688 /* TODO: add missing checks */ 4689 PetscValidLogicalCollectiveInt(mat,n_subdomains,3); 4690 PetscValidLogicalCollectiveBool(mat,restrict_comm,4); 4691 PetscValidLogicalCollectiveEnum(mat,reuse,5); 4692 PetscValidLogicalCollectiveInt(mat,nis,7); 4693 ierr = PetscObjectTypeCompare((PetscObject)mat,MATIS,&ismatis);CHKERRQ(ierr); 4694 if (!ismatis) SETERRQ1(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot use %s on a matrix object which is not of type MATIS",__FUNCT__); 4695 ierr = MatISGetLocalMat(mat,&local_mat);CHKERRQ(ierr); 4696 ierr = PetscObjectTypeCompare((PetscObject)local_mat,MATSEQDENSE,&isdense);CHKERRQ(ierr); 4697 if (!isdense) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Currently cannot subassemble MATIS when local matrix type is not of type SEQDENSE"); 4698 ierr = MatGetSize(local_mat,&rows,&cols);CHKERRQ(ierr); 4699 if (rows != cols) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Local MATIS matrices should be square"); 4700 if (reuse == MAT_REUSE_MATRIX && *mat_n) { 4701 PetscInt mrows,mcols,mnrows,mncols; 4702 ierr = PetscObjectTypeCompare((PetscObject)*mat_n,MATIS,&ismatis);CHKERRQ(ierr); 4703 if (!ismatis) SETERRQ(PetscObjectComm((PetscObject)*mat_n),PETSC_ERR_SUP,"Cannot reuse a matrix which is not of type MATIS"); 4704 ierr = MatGetSize(mat,&mrows,&mcols);CHKERRQ(ierr); 4705 ierr = MatGetSize(*mat_n,&mnrows,&mncols);CHKERRQ(ierr); 4706 if (mrows != mnrows) SETERRQ2(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot reuse matrix! Wrong number of rows %D != %D",mrows,mnrows); 4707 if (mcols != mncols) SETERRQ2(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot reuse matrix! Wrong number of cols %D != %D",mcols,mncols); 4708 } 4709 ierr = MatGetBlockSize(local_mat,&bs);CHKERRQ(ierr); 4710 PetscValidLogicalCollectiveInt(mat,bs,0); 4711 /* prepare IS for sending if not provided */ 4712 if (!is_sends) { 4713 if (!n_subdomains) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"You should specify either an IS or a target number of subdomains"); 4714 ierr = MatISGetSubassemblingPattern(mat,n_subdomains,0,&is_sends_internal);CHKERRQ(ierr); 4715 } else { 4716 ierr = PetscObjectReference((PetscObject)is_sends);CHKERRQ(ierr); 4717 is_sends_internal = is_sends; 4718 } 4719 4720 /* get comm */ 4721 ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr); 4722 4723 /* compute number of sends */ 4724 ierr = ISGetLocalSize(is_sends_internal,&i);CHKERRQ(ierr); 4725 ierr = PetscMPIIntCast(i,&n_sends);CHKERRQ(ierr); 4726 4727 /* compute number of receives */ 4728 ierr = MPI_Comm_size(comm,&commsize);CHKERRQ(ierr); 4729 ierr = PetscMalloc1(commsize,&iflags);CHKERRQ(ierr); 4730 ierr = PetscMemzero(iflags,commsize*sizeof(*iflags));CHKERRQ(ierr); 4731 ierr = ISGetIndices(is_sends_internal,&is_indices);CHKERRQ(ierr); 4732 for (i=0;i<n_sends;i++) iflags[is_indices[i]] = 1; 4733 ierr = PetscGatherNumberOfMessages(comm,iflags,NULL,&n_recvs);CHKERRQ(ierr); 4734 ierr = PetscFree(iflags);CHKERRQ(ierr); 4735 4736 /* restrict comm if requested */ 4737 subcomm = 0; 4738 destroy_mat = PETSC_FALSE; 4739 if (restrict_comm) { 4740 PetscMPIInt color,subcommsize; 4741 4742 color = 0; 4743 if (restrict_full) { 4744 if (!n_recvs) color = 1; /* processes not receiving anything will not partecipate in new comm (full restriction) */ 4745 } else { 4746 if (!n_recvs && n_sends) color = 1; /* just those processes that are sending but not receiving anything will not partecipate in new comm */ 4747 } 4748 ierr = MPI_Allreduce(&color,&subcommsize,1,MPI_INT,MPI_SUM,comm);CHKERRQ(ierr); 4749 subcommsize = commsize - subcommsize; 4750 /* check if reuse has been requested */ 4751 if (reuse == MAT_REUSE_MATRIX) { 4752 if (*mat_n) { 4753 PetscMPIInt subcommsize2; 4754 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)*mat_n),&subcommsize2);CHKERRQ(ierr); 4755 if (subcommsize != subcommsize2) SETERRQ2(PetscObjectComm((PetscObject)*mat_n),PETSC_ERR_PLIB,"Cannot reuse matrix! wrong subcomm size %d != %d",subcommsize,subcommsize2); 4756 comm_n = PetscObjectComm((PetscObject)*mat_n); 4757 } else { 4758 comm_n = PETSC_COMM_SELF; 4759 } 4760 } else { /* MAT_INITIAL_MATRIX */ 4761 PetscMPIInt rank; 4762 4763 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 4764 ierr = PetscSubcommCreate(comm,&subcomm);CHKERRQ(ierr); 4765 ierr = PetscSubcommSetNumber(subcomm,2);CHKERRQ(ierr); 4766 ierr = PetscSubcommSetTypeGeneral(subcomm,color,rank);CHKERRQ(ierr); 4767 comm_n = PetscSubcommChild(subcomm); 4768 } 4769 /* flag to destroy *mat_n if not significative */ 4770 if (color) destroy_mat = PETSC_TRUE; 4771 } else { 4772 comm_n = comm; 4773 } 4774 4775 /* prepare send/receive buffers */ 4776 ierr = PetscMalloc1(commsize,&ilengths_idxs);CHKERRQ(ierr); 4777 ierr = PetscMemzero(ilengths_idxs,commsize*sizeof(*ilengths_idxs));CHKERRQ(ierr); 4778 ierr = PetscMalloc1(commsize,&ilengths_vals);CHKERRQ(ierr); 4779 ierr = PetscMemzero(ilengths_vals,commsize*sizeof(*ilengths_vals));CHKERRQ(ierr); 4780 if (nis) { 4781 ierr = PetscCalloc1(commsize,&ilengths_idxs_is);CHKERRQ(ierr); 4782 } 4783 4784 /* Get data from local matrices */ 4785 if (!isdense) { 4786 SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Subassembling of AIJ local matrices not yet implemented"); 4787 /* TODO: See below some guidelines on how to prepare the local buffers */ 4788 /* 4789 send_buffer_vals should contain the raw values of the local matrix 4790 send_buffer_idxs should contain: 4791 - MatType_PRIVATE type 4792 - PetscInt size_of_l2gmap 4793 - PetscInt global_row_indices[size_of_l2gmap] 4794 - PetscInt all_other_info_which_is_needed_to_compute_preallocation_and_set_values 4795 */ 4796 } else { 4797 ierr = MatDenseGetArray(local_mat,&send_buffer_vals);CHKERRQ(ierr); 4798 ierr = ISLocalToGlobalMappingGetSize(mat->rmap->mapping,&i);CHKERRQ(ierr); 4799 ierr = PetscMalloc1(i+2,&send_buffer_idxs);CHKERRQ(ierr); 4800 send_buffer_idxs[0] = (PetscInt)MATDENSE_PRIVATE; 4801 send_buffer_idxs[1] = i; 4802 ierr = ISLocalToGlobalMappingGetIndices(mat->rmap->mapping,(const PetscInt**)&ptr_idxs);CHKERRQ(ierr); 4803 ierr = PetscMemcpy(&send_buffer_idxs[2],ptr_idxs,i*sizeof(PetscInt));CHKERRQ(ierr); 4804 ierr = ISLocalToGlobalMappingRestoreIndices(mat->rmap->mapping,(const PetscInt**)&ptr_idxs);CHKERRQ(ierr); 4805 ierr = PetscMPIIntCast(i,&len);CHKERRQ(ierr); 4806 for (i=0;i<n_sends;i++) { 4807 ilengths_vals[is_indices[i]] = len*len; 4808 ilengths_idxs[is_indices[i]] = len+2; 4809 } 4810 } 4811 ierr = PetscGatherMessageLengths2(comm,n_sends,n_recvs,ilengths_idxs,ilengths_vals,&onodes,&olengths_idxs,&olengths_vals);CHKERRQ(ierr); 4812 /* additional is (if any) */ 4813 if (nis) { 4814 PetscMPIInt psum; 4815 PetscInt j; 4816 for (j=0,psum=0;j<nis;j++) { 4817 PetscInt plen; 4818 ierr = ISGetLocalSize(isarray[j],&plen);CHKERRQ(ierr); 4819 ierr = PetscMPIIntCast(plen,&len);CHKERRQ(ierr); 4820 psum += len+1; /* indices + lenght */ 4821 } 4822 ierr = PetscMalloc1(psum,&send_buffer_idxs_is);CHKERRQ(ierr); 4823 for (j=0,psum=0;j<nis;j++) { 4824 PetscInt plen; 4825 const PetscInt *is_array_idxs; 4826 ierr = ISGetLocalSize(isarray[j],&plen);CHKERRQ(ierr); 4827 send_buffer_idxs_is[psum] = plen; 4828 ierr = ISGetIndices(isarray[j],&is_array_idxs);CHKERRQ(ierr); 4829 ierr = PetscMemcpy(&send_buffer_idxs_is[psum+1],is_array_idxs,plen*sizeof(PetscInt));CHKERRQ(ierr); 4830 ierr = ISRestoreIndices(isarray[j],&is_array_idxs);CHKERRQ(ierr); 4831 psum += plen+1; /* indices + lenght */ 4832 } 4833 for (i=0;i<n_sends;i++) { 4834 ilengths_idxs_is[is_indices[i]] = psum; 4835 } 4836 ierr = PetscGatherMessageLengths(comm,n_sends,n_recvs,ilengths_idxs_is,&onodes_is,&olengths_idxs_is);CHKERRQ(ierr); 4837 } 4838 4839 buf_size_idxs = 0; 4840 buf_size_vals = 0; 4841 buf_size_idxs_is = 0; 4842 for (i=0;i<n_recvs;i++) { 4843 buf_size_idxs += (PetscInt)olengths_idxs[i]; 4844 buf_size_vals += (PetscInt)olengths_vals[i]; 4845 if (nis) buf_size_idxs_is += (PetscInt)olengths_idxs_is[i]; 4846 } 4847 ierr = PetscMalloc1(buf_size_idxs,&recv_buffer_idxs);CHKERRQ(ierr); 4848 ierr = PetscMalloc1(buf_size_vals,&recv_buffer_vals);CHKERRQ(ierr); 4849 ierr = PetscMalloc1(buf_size_idxs_is,&recv_buffer_idxs_is);CHKERRQ(ierr); 4850 4851 /* get new tags for clean communications */ 4852 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_idxs);CHKERRQ(ierr); 4853 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_vals);CHKERRQ(ierr); 4854 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_idxs_is);CHKERRQ(ierr); 4855 4856 /* allocate for requests */ 4857 ierr = PetscMalloc1(n_sends,&send_req_idxs);CHKERRQ(ierr); 4858 ierr = PetscMalloc1(n_sends,&send_req_vals);CHKERRQ(ierr); 4859 ierr = PetscMalloc1(n_sends,&send_req_idxs_is);CHKERRQ(ierr); 4860 ierr = PetscMalloc1(n_recvs,&recv_req_idxs);CHKERRQ(ierr); 4861 ierr = PetscMalloc1(n_recvs,&recv_req_vals);CHKERRQ(ierr); 4862 ierr = PetscMalloc1(n_recvs,&recv_req_idxs_is);CHKERRQ(ierr); 4863 4864 /* communications */ 4865 ptr_idxs = recv_buffer_idxs; 4866 ptr_vals = recv_buffer_vals; 4867 ptr_idxs_is = recv_buffer_idxs_is; 4868 for (i=0;i<n_recvs;i++) { 4869 source_dest = onodes[i]; 4870 ierr = MPI_Irecv(ptr_idxs,olengths_idxs[i],MPIU_INT,source_dest,tag_idxs,comm,&recv_req_idxs[i]);CHKERRQ(ierr); 4871 ierr = MPI_Irecv(ptr_vals,olengths_vals[i],MPIU_SCALAR,source_dest,tag_vals,comm,&recv_req_vals[i]);CHKERRQ(ierr); 4872 ptr_idxs += olengths_idxs[i]; 4873 ptr_vals += olengths_vals[i]; 4874 if (nis) { 4875 ierr = MPI_Irecv(ptr_idxs_is,olengths_idxs_is[i],MPIU_INT,source_dest,tag_idxs_is,comm,&recv_req_idxs_is[i]);CHKERRQ(ierr); 4876 ptr_idxs_is += olengths_idxs_is[i]; 4877 } 4878 } 4879 for (i=0;i<n_sends;i++) { 4880 ierr = PetscMPIIntCast(is_indices[i],&source_dest);CHKERRQ(ierr); 4881 ierr = MPI_Isend(send_buffer_idxs,ilengths_idxs[source_dest],MPIU_INT,source_dest,tag_idxs,comm,&send_req_idxs[i]);CHKERRQ(ierr); 4882 ierr = MPI_Isend(send_buffer_vals,ilengths_vals[source_dest],MPIU_SCALAR,source_dest,tag_vals,comm,&send_req_vals[i]);CHKERRQ(ierr); 4883 if (nis) { 4884 ierr = MPI_Isend(send_buffer_idxs_is,ilengths_idxs_is[source_dest],MPIU_INT,source_dest,tag_idxs_is,comm,&send_req_idxs_is[i]);CHKERRQ(ierr); 4885 } 4886 } 4887 ierr = ISRestoreIndices(is_sends_internal,&is_indices);CHKERRQ(ierr); 4888 ierr = ISDestroy(&is_sends_internal);CHKERRQ(ierr); 4889 4890 /* assemble new l2g map */ 4891 ierr = MPI_Waitall(n_recvs,recv_req_idxs,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 4892 ptr_idxs = recv_buffer_idxs; 4893 new_local_rows = 0; 4894 for (i=0;i<n_recvs;i++) { 4895 new_local_rows += *(ptr_idxs+1); /* second element is the local size of the l2gmap */ 4896 ptr_idxs += olengths_idxs[i]; 4897 } 4898 ierr = PetscMalloc1(new_local_rows,&l2gmap_indices);CHKERRQ(ierr); 4899 ptr_idxs = recv_buffer_idxs; 4900 new_local_rows = 0; 4901 for (i=0;i<n_recvs;i++) { 4902 ierr = PetscMemcpy(&l2gmap_indices[new_local_rows],ptr_idxs+2,(*(ptr_idxs+1))*sizeof(PetscInt));CHKERRQ(ierr); 4903 new_local_rows += *(ptr_idxs+1); /* second element is the local size of the l2gmap */ 4904 ptr_idxs += olengths_idxs[i]; 4905 } 4906 ierr = PetscSortRemoveDupsInt(&new_local_rows,l2gmap_indices);CHKERRQ(ierr); 4907 ierr = ISLocalToGlobalMappingCreate(comm_n,1,new_local_rows,l2gmap_indices,PETSC_COPY_VALUES,&l2gmap);CHKERRQ(ierr); 4908 ierr = PetscFree(l2gmap_indices);CHKERRQ(ierr); 4909 4910 /* infer new local matrix type from received local matrices type */ 4911 /* currently if all local matrices are of type X, then the resulting matrix will be of type X, except for the dense case */ 4912 /* it also assumes that if the block size is set, than it is the same among all local matrices (see checks at the beginning of the function) */ 4913 if (n_recvs) { 4914 MatTypePrivate new_local_type_private = (MatTypePrivate)send_buffer_idxs[0]; 4915 ptr_idxs = recv_buffer_idxs; 4916 for (i=0;i<n_recvs;i++) { 4917 if ((PetscInt)new_local_type_private != *ptr_idxs) { 4918 new_local_type_private = MATAIJ_PRIVATE; 4919 break; 4920 } 4921 ptr_idxs += olengths_idxs[i]; 4922 } 4923 switch (new_local_type_private) { 4924 case MATDENSE_PRIVATE: 4925 if (n_recvs>1) { /* subassembling of dense matrices does not give a dense matrix! */ 4926 new_local_type = MATSEQAIJ; 4927 bs = 1; 4928 } else { /* if I receive only 1 dense matrix */ 4929 new_local_type = MATSEQDENSE; 4930 bs = 1; 4931 } 4932 break; 4933 case MATAIJ_PRIVATE: 4934 new_local_type = MATSEQAIJ; 4935 bs = 1; 4936 break; 4937 case MATBAIJ_PRIVATE: 4938 new_local_type = MATSEQBAIJ; 4939 break; 4940 case MATSBAIJ_PRIVATE: 4941 new_local_type = MATSEQSBAIJ; 4942 break; 4943 default: 4944 SETERRQ2(comm,PETSC_ERR_SUP,"Unsupported private type %d in %s",new_local_type_private,__FUNCT__); 4945 break; 4946 } 4947 } else { /* by default, new_local_type is seqdense */ 4948 new_local_type = MATSEQDENSE; 4949 bs = 1; 4950 } 4951 4952 /* create MATIS object if needed */ 4953 if (reuse == MAT_INITIAL_MATRIX) { 4954 ierr = MatGetSize(mat,&rows,&cols);CHKERRQ(ierr); 4955 ierr = MatCreateIS(comm_n,bs,PETSC_DECIDE,PETSC_DECIDE,rows,cols,l2gmap,NULL,mat_n);CHKERRQ(ierr); 4956 } else { 4957 /* it also destroys the local matrices */ 4958 ierr = MatSetLocalToGlobalMapping(*mat_n,l2gmap,l2gmap);CHKERRQ(ierr); 4959 } 4960 ierr = MatISGetLocalMat(*mat_n,&local_mat);CHKERRQ(ierr); 4961 ierr = MatSetType(local_mat,new_local_type);CHKERRQ(ierr); 4962 4963 ierr = MPI_Waitall(n_recvs,recv_req_vals,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 4964 4965 /* Global to local map of received indices */ 4966 ierr = PetscMalloc1(buf_size_idxs,&recv_buffer_idxs_local);CHKERRQ(ierr); /* needed for values insertion */ 4967 ierr = ISGlobalToLocalMappingApply(l2gmap,IS_GTOLM_MASK,buf_size_idxs,recv_buffer_idxs,&i,recv_buffer_idxs_local);CHKERRQ(ierr); 4968 ierr = ISLocalToGlobalMappingDestroy(&l2gmap);CHKERRQ(ierr); 4969 4970 /* restore attributes -> type of incoming data and its size */ 4971 buf_size_idxs = 0; 4972 for (i=0;i<n_recvs;i++) { 4973 recv_buffer_idxs_local[buf_size_idxs] = recv_buffer_idxs[buf_size_idxs]; 4974 recv_buffer_idxs_local[buf_size_idxs+1] = recv_buffer_idxs[buf_size_idxs+1]; 4975 buf_size_idxs += (PetscInt)olengths_idxs[i]; 4976 } 4977 ierr = PetscFree(recv_buffer_idxs);CHKERRQ(ierr); 4978 4979 /* set preallocation */ 4980 ierr = PetscObjectTypeCompare((PetscObject)local_mat,MATSEQDENSE,&newisdense);CHKERRQ(ierr); 4981 if (!newisdense) { 4982 PetscInt *new_local_nnz=0; 4983 4984 ptr_vals = recv_buffer_vals; 4985 ptr_idxs = recv_buffer_idxs_local; 4986 if (n_recvs) { 4987 ierr = PetscCalloc1(new_local_rows,&new_local_nnz);CHKERRQ(ierr); 4988 } 4989 for (i=0;i<n_recvs;i++) { 4990 PetscInt j; 4991 if (*ptr_idxs == (PetscInt)MATDENSE_PRIVATE) { /* preallocation provided for dense case only */ 4992 for (j=0;j<*(ptr_idxs+1);j++) { 4993 new_local_nnz[*(ptr_idxs+2+j)] += *(ptr_idxs+1); 4994 } 4995 } else { 4996 /* TODO */ 4997 } 4998 ptr_idxs += olengths_idxs[i]; 4999 } 5000 if (new_local_nnz) { 5001 for (i=0;i<new_local_rows;i++) new_local_nnz[i] = PetscMin(new_local_nnz[i],new_local_rows); 5002 ierr = MatSeqAIJSetPreallocation(local_mat,0,new_local_nnz);CHKERRQ(ierr); 5003 for (i=0;i<new_local_rows;i++) new_local_nnz[i] /= bs; 5004 ierr = MatSeqBAIJSetPreallocation(local_mat,bs,0,new_local_nnz);CHKERRQ(ierr); 5005 for (i=0;i<new_local_rows;i++) new_local_nnz[i] = PetscMax(new_local_nnz[i]-i,0); 5006 ierr = MatSeqSBAIJSetPreallocation(local_mat,bs,0,new_local_nnz);CHKERRQ(ierr); 5007 } else { 5008 ierr = MatSetUp(local_mat);CHKERRQ(ierr); 5009 } 5010 ierr = PetscFree(new_local_nnz);CHKERRQ(ierr); 5011 } else { 5012 ierr = MatSetUp(local_mat);CHKERRQ(ierr); 5013 } 5014 5015 /* set values */ 5016 ptr_vals = recv_buffer_vals; 5017 ptr_idxs = recv_buffer_idxs_local; 5018 for (i=0;i<n_recvs;i++) { 5019 if (*ptr_idxs == (PetscInt)MATDENSE_PRIVATE) { /* values insertion provided for dense case only */ 5020 ierr = MatSetOption(local_mat,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr); 5021 ierr = MatSetValues(local_mat,*(ptr_idxs+1),ptr_idxs+2,*(ptr_idxs+1),ptr_idxs+2,ptr_vals,ADD_VALUES);CHKERRQ(ierr); 5022 ierr = MatAssemblyBegin(local_mat,MAT_FLUSH_ASSEMBLY);CHKERRQ(ierr); 5023 ierr = MatAssemblyEnd(local_mat,MAT_FLUSH_ASSEMBLY);CHKERRQ(ierr); 5024 ierr = MatSetOption(local_mat,MAT_ROW_ORIENTED,PETSC_TRUE);CHKERRQ(ierr); 5025 } else { 5026 /* TODO */ 5027 } 5028 ptr_idxs += olengths_idxs[i]; 5029 ptr_vals += olengths_vals[i]; 5030 } 5031 ierr = MatAssemblyBegin(local_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5032 ierr = MatAssemblyEnd(local_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5033 ierr = MatAssemblyBegin(*mat_n,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5034 ierr = MatAssemblyEnd(*mat_n,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5035 ierr = PetscFree(recv_buffer_vals);CHKERRQ(ierr); 5036 ierr = PetscFree(recv_buffer_idxs_local);CHKERRQ(ierr); 5037 5038 #if 0 5039 if (!restrict_comm) { /* check */ 5040 Vec lvec,rvec; 5041 PetscReal infty_error; 5042 5043 ierr = MatCreateVecs(mat,&rvec,&lvec);CHKERRQ(ierr); 5044 ierr = VecSetRandom(rvec,NULL);CHKERRQ(ierr); 5045 ierr = MatMult(mat,rvec,lvec);CHKERRQ(ierr); 5046 ierr = VecScale(lvec,-1.0);CHKERRQ(ierr); 5047 ierr = MatMultAdd(*mat_n,rvec,lvec,lvec);CHKERRQ(ierr); 5048 ierr = VecNorm(lvec,NORM_INFINITY,&infty_error);CHKERRQ(ierr); 5049 ierr = PetscPrintf(PetscObjectComm((PetscObject)mat),"Infinity error subassembling %1.6e\n",infty_error); 5050 ierr = VecDestroy(&rvec);CHKERRQ(ierr); 5051 ierr = VecDestroy(&lvec);CHKERRQ(ierr); 5052 } 5053 #endif 5054 5055 /* assemble new additional is (if any) */ 5056 if (nis) { 5057 PetscInt **temp_idxs,*count_is,j,psum; 5058 5059 ierr = MPI_Waitall(n_recvs,recv_req_idxs_is,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 5060 ierr = PetscCalloc1(nis,&count_is);CHKERRQ(ierr); 5061 ptr_idxs = recv_buffer_idxs_is; 5062 psum = 0; 5063 for (i=0;i<n_recvs;i++) { 5064 for (j=0;j<nis;j++) { 5065 PetscInt plen = *(ptr_idxs); /* first element is the local size of IS's indices */ 5066 count_is[j] += plen; /* increment counting of buffer for j-th IS */ 5067 psum += plen; 5068 ptr_idxs += plen+1; /* shift pointer to received data */ 5069 } 5070 } 5071 ierr = PetscMalloc1(nis,&temp_idxs);CHKERRQ(ierr); 5072 ierr = PetscMalloc1(psum,&temp_idxs[0]);CHKERRQ(ierr); 5073 for (i=1;i<nis;i++) { 5074 temp_idxs[i] = temp_idxs[i-1]+count_is[i-1]; 5075 } 5076 ierr = PetscMemzero(count_is,nis*sizeof(PetscInt));CHKERRQ(ierr); 5077 ptr_idxs = recv_buffer_idxs_is; 5078 for (i=0;i<n_recvs;i++) { 5079 for (j=0;j<nis;j++) { 5080 PetscInt plen = *(ptr_idxs); /* first element is the local size of IS's indices */ 5081 ierr = PetscMemcpy(&temp_idxs[j][count_is[j]],ptr_idxs+1,plen*sizeof(PetscInt));CHKERRQ(ierr); 5082 count_is[j] += plen; /* increment starting point of buffer for j-th IS */ 5083 ptr_idxs += plen+1; /* shift pointer to received data */ 5084 } 5085 } 5086 for (i=0;i<nis;i++) { 5087 ierr = ISDestroy(&isarray[i]);CHKERRQ(ierr); 5088 ierr = PetscSortRemoveDupsInt(&count_is[i],temp_idxs[i]);CHKERRQ(ierr);CHKERRQ(ierr); 5089 ierr = ISCreateGeneral(comm_n,count_is[i],temp_idxs[i],PETSC_COPY_VALUES,&isarray[i]);CHKERRQ(ierr); 5090 } 5091 ierr = PetscFree(count_is);CHKERRQ(ierr); 5092 ierr = PetscFree(temp_idxs[0]);CHKERRQ(ierr); 5093 ierr = PetscFree(temp_idxs);CHKERRQ(ierr); 5094 } 5095 /* free workspace */ 5096 ierr = PetscFree(recv_buffer_idxs_is);CHKERRQ(ierr); 5097 ierr = MPI_Waitall(n_sends,send_req_idxs,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 5098 ierr = PetscFree(send_buffer_idxs);CHKERRQ(ierr); 5099 ierr = MPI_Waitall(n_sends,send_req_vals,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 5100 if (isdense) { 5101 ierr = MatISGetLocalMat(mat,&local_mat);CHKERRQ(ierr); 5102 ierr = MatDenseRestoreArray(local_mat,&send_buffer_vals);CHKERRQ(ierr); 5103 } else { 5104 /* ierr = PetscFree(send_buffer_vals);CHKERRQ(ierr); */ 5105 } 5106 if (nis) { 5107 ierr = MPI_Waitall(n_sends,send_req_idxs_is,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 5108 ierr = PetscFree(send_buffer_idxs_is);CHKERRQ(ierr); 5109 } 5110 ierr = PetscFree(recv_req_idxs);CHKERRQ(ierr); 5111 ierr = PetscFree(recv_req_vals);CHKERRQ(ierr); 5112 ierr = PetscFree(recv_req_idxs_is);CHKERRQ(ierr); 5113 ierr = PetscFree(send_req_idxs);CHKERRQ(ierr); 5114 ierr = PetscFree(send_req_vals);CHKERRQ(ierr); 5115 ierr = PetscFree(send_req_idxs_is);CHKERRQ(ierr); 5116 ierr = PetscFree(ilengths_vals);CHKERRQ(ierr); 5117 ierr = PetscFree(ilengths_idxs);CHKERRQ(ierr); 5118 ierr = PetscFree(olengths_vals);CHKERRQ(ierr); 5119 ierr = PetscFree(olengths_idxs);CHKERRQ(ierr); 5120 ierr = PetscFree(onodes);CHKERRQ(ierr); 5121 if (nis) { 5122 ierr = PetscFree(ilengths_idxs_is);CHKERRQ(ierr); 5123 ierr = PetscFree(olengths_idxs_is);CHKERRQ(ierr); 5124 ierr = PetscFree(onodes_is);CHKERRQ(ierr); 5125 } 5126 ierr = PetscSubcommDestroy(&subcomm);CHKERRQ(ierr); 5127 if (destroy_mat) { /* destroy mat is true only if restrict comm is true and process will not partecipate */ 5128 ierr = MatDestroy(mat_n);CHKERRQ(ierr); 5129 for (i=0;i<nis;i++) { 5130 ierr = ISDestroy(&isarray[i]);CHKERRQ(ierr); 5131 } 5132 *mat_n = NULL; 5133 } 5134 PetscFunctionReturn(0); 5135 } 5136 5137 /* temporary hack into ksp private data structure */ 5138 #include <petsc/private/kspimpl.h> 5139 5140 #undef __FUNCT__ 5141 #define __FUNCT__ "PCBDDCSetUpCoarseSolver" 5142 PetscErrorCode PCBDDCSetUpCoarseSolver(PC pc,PetscScalar* coarse_submat_vals) 5143 { 5144 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 5145 PC_IS *pcis = (PC_IS*)pc->data; 5146 Mat coarse_mat,coarse_mat_is,coarse_submat_dense; 5147 MatNullSpace CoarseNullSpace=NULL; 5148 ISLocalToGlobalMapping coarse_islg; 5149 IS coarse_is,*isarray; 5150 PetscInt i,im_active=-1,active_procs=-1; 5151 PetscInt nis,nisdofs,nisneu,nisvert; 5152 PC pc_temp; 5153 PCType coarse_pc_type; 5154 KSPType coarse_ksp_type; 5155 PetscBool multilevel_requested,multilevel_allowed; 5156 PetscBool isredundant,isbddc,isnn,coarse_reuse; 5157 Mat t_coarse_mat_is; 5158 PetscInt void_procs,ncoarse_ml,ncoarse_ds,ncoarse; 5159 PetscMPIInt all_procs; 5160 PetscBool csin_ml,csin_ds,csin,csin_type_simple,redist; 5161 PetscBool compute_vecs = PETSC_FALSE; 5162 PetscScalar *array; 5163 PetscErrorCode ierr; 5164 5165 PetscFunctionBegin; 5166 /* Assign global numbering to coarse dofs */ 5167 if (pcbddc->new_primal_space || pcbddc->coarse_size == -1) { /* a new primal space is present or it is the first initialization, so recompute global numbering */ 5168 PetscInt ocoarse_size; 5169 compute_vecs = PETSC_TRUE; 5170 ocoarse_size = pcbddc->coarse_size; 5171 ierr = PetscFree(pcbddc->global_primal_indices);CHKERRQ(ierr); 5172 ierr = PCBDDCComputePrimalNumbering(pc,&pcbddc->coarse_size,&pcbddc->global_primal_indices);CHKERRQ(ierr); 5173 /* see if we can avoid some work */ 5174 if (pcbddc->coarse_ksp) { /* coarse ksp has already been created */ 5175 /* if the coarse size is different or we are using adaptive selection, better to not reuse the coarse matrix */ 5176 if (ocoarse_size != pcbddc->coarse_size || pcbddc->adaptive_selection) { 5177 PC pc; 5178 PetscBool isbddc; 5179 5180 /* temporary workaround since PCBDDC does not have a reset method so far */ 5181 ierr = KSPGetPC(pcbddc->coarse_ksp,&pc);CHKERRQ(ierr); 5182 ierr = PetscObjectTypeCompare((PetscObject)pc,PCBDDC,&isbddc);CHKERRQ(ierr); 5183 if (isbddc) { 5184 ierr = PCDestroy(&pc);CHKERRQ(ierr); 5185 } 5186 ierr = KSPReset(pcbddc->coarse_ksp);CHKERRQ(ierr); 5187 coarse_reuse = PETSC_FALSE; 5188 } else { /* we can safely reuse already computed coarse matrix */ 5189 coarse_reuse = PETSC_TRUE; 5190 } 5191 } else { /* there's no coarse ksp, so we need to create the coarse matrix too */ 5192 coarse_reuse = PETSC_FALSE; 5193 } 5194 /* reset any subassembling information */ 5195 ierr = ISDestroy(&pcbddc->coarse_subassembling);CHKERRQ(ierr); 5196 ierr = ISDestroy(&pcbddc->coarse_subassembling_init);CHKERRQ(ierr); 5197 } else { /* primal space is unchanged, so we can reuse coarse matrix */ 5198 coarse_reuse = PETSC_TRUE; 5199 } 5200 5201 /* count "active" (i.e. with positive local size) and "void" processes */ 5202 im_active = !!(pcis->n); 5203 ierr = MPI_Allreduce(&im_active,&active_procs,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 5204 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)pc),&all_procs);CHKERRQ(ierr); 5205 void_procs = all_procs-active_procs; 5206 csin_type_simple = PETSC_TRUE; 5207 redist = PETSC_FALSE; 5208 if (pcbddc->current_level && void_procs) { 5209 csin_ml = PETSC_TRUE; 5210 ncoarse_ml = void_procs; 5211 /* it has no sense to redistribute on a set of processors larger than the number of active processes */ 5212 if (pcbddc->redistribute_coarse > 0 && pcbddc->redistribute_coarse < active_procs) { 5213 csin_ds = PETSC_TRUE; 5214 ncoarse_ds = pcbddc->redistribute_coarse; 5215 redist = PETSC_TRUE; 5216 } else { 5217 csin_ds = PETSC_TRUE; 5218 ncoarse_ds = active_procs; 5219 redist = PETSC_TRUE; 5220 } 5221 } else { 5222 csin_ml = PETSC_FALSE; 5223 ncoarse_ml = all_procs; 5224 if (void_procs) { 5225 csin_ds = PETSC_TRUE; 5226 ncoarse_ds = void_procs; 5227 csin_type_simple = PETSC_FALSE; 5228 } else { 5229 if (pcbddc->redistribute_coarse > 0 && pcbddc->redistribute_coarse < all_procs) { 5230 csin_ds = PETSC_TRUE; 5231 ncoarse_ds = pcbddc->redistribute_coarse; 5232 redist = PETSC_TRUE; 5233 } else { 5234 csin_ds = PETSC_FALSE; 5235 ncoarse_ds = all_procs; 5236 } 5237 } 5238 } 5239 5240 /* 5241 test if we can go multilevel: three conditions must be satisfied: 5242 - we have not exceeded the number of levels requested 5243 - we can actually subassemble the active processes 5244 - we can find a suitable number of MPI processes where we can place the subassembled problem 5245 */ 5246 multilevel_allowed = PETSC_FALSE; 5247 multilevel_requested = PETSC_FALSE; 5248 if (pcbddc->current_level < pcbddc->max_levels) { 5249 multilevel_requested = PETSC_TRUE; 5250 if (active_procs/pcbddc->coarsening_ratio < 2 || ncoarse_ml/pcbddc->coarsening_ratio < 2) { 5251 multilevel_allowed = PETSC_FALSE; 5252 } else { 5253 multilevel_allowed = PETSC_TRUE; 5254 } 5255 } 5256 /* determine number of process partecipating to coarse solver */ 5257 if (multilevel_allowed) { 5258 ncoarse = ncoarse_ml; 5259 csin = csin_ml; 5260 redist = PETSC_FALSE; 5261 } else { 5262 ncoarse = ncoarse_ds; 5263 csin = csin_ds; 5264 } 5265 5266 /* creates temporary l2gmap and IS for coarse indexes */ 5267 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),pcbddc->local_primal_size,pcbddc->global_primal_indices,PETSC_COPY_VALUES,&coarse_is);CHKERRQ(ierr); 5268 ierr = ISLocalToGlobalMappingCreateIS(coarse_is,&coarse_islg);CHKERRQ(ierr); 5269 5270 /* creates temporary MATIS object for coarse matrix */ 5271 ierr = MatCreateSeqDense(PETSC_COMM_SELF,pcbddc->local_primal_size,pcbddc->local_primal_size,NULL,&coarse_submat_dense);CHKERRQ(ierr); 5272 ierr = MatDenseGetArray(coarse_submat_dense,&array);CHKERRQ(ierr); 5273 ierr = PetscMemcpy(array,coarse_submat_vals,sizeof(*coarse_submat_vals)*pcbddc->local_primal_size*pcbddc->local_primal_size);CHKERRQ(ierr); 5274 ierr = MatDenseRestoreArray(coarse_submat_dense,&array);CHKERRQ(ierr); 5275 ierr = MatCreateIS(PetscObjectComm((PetscObject)pc),1,PETSC_DECIDE,PETSC_DECIDE,pcbddc->coarse_size,pcbddc->coarse_size,coarse_islg,NULL,&t_coarse_mat_is);CHKERRQ(ierr); 5276 ierr = MatISSetLocalMat(t_coarse_mat_is,coarse_submat_dense);CHKERRQ(ierr); 5277 ierr = MatAssemblyBegin(t_coarse_mat_is,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5278 ierr = MatAssemblyEnd(t_coarse_mat_is,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5279 ierr = MatDestroy(&coarse_submat_dense);CHKERRQ(ierr); 5280 5281 /* compute dofs splitting and neumann boundaries for coarse dofs */ 5282 if (multilevel_allowed && (pcbddc->n_ISForDofsLocal || pcbddc->NeumannBoundariesLocal || (pcbddc->benign_saddle_point && pcbddc->user_primal_vertices_local))) { /* protects from unneded computations */ 5283 PetscInt *tidxs,*tidxs2,nout,tsize,i; 5284 const PetscInt *idxs; 5285 ISLocalToGlobalMapping tmap; 5286 5287 /* create map between primal indices (in local representative ordering) and local primal numbering */ 5288 ierr = ISLocalToGlobalMappingCreate(PETSC_COMM_SELF,1,pcbddc->local_primal_size,pcbddc->primal_indices_local_idxs,PETSC_COPY_VALUES,&tmap);CHKERRQ(ierr); 5289 /* allocate space for temporary storage */ 5290 ierr = PetscMalloc1(pcbddc->local_primal_size,&tidxs);CHKERRQ(ierr); 5291 ierr = PetscMalloc1(pcbddc->local_primal_size,&tidxs2);CHKERRQ(ierr); 5292 /* allocate for IS array */ 5293 nisdofs = pcbddc->n_ISForDofsLocal; 5294 nisneu = !!pcbddc->NeumannBoundariesLocal; 5295 nisvert = 0; 5296 if (pcbddc->benign_saddle_point && pcbddc->user_primal_vertices_local) { 5297 nisvert = 1; 5298 } 5299 nis = nisdofs + nisneu + nisvert; 5300 ierr = PetscMalloc1(nis,&isarray);CHKERRQ(ierr); 5301 /* dofs splitting */ 5302 for (i=0;i<nisdofs;i++) { 5303 /* ierr = ISView(pcbddc->ISForDofsLocal[i],0);CHKERRQ(ierr); */ 5304 ierr = ISGetLocalSize(pcbddc->ISForDofsLocal[i],&tsize);CHKERRQ(ierr); 5305 ierr = ISGetIndices(pcbddc->ISForDofsLocal[i],&idxs);CHKERRQ(ierr); 5306 ierr = ISGlobalToLocalMappingApply(tmap,IS_GTOLM_DROP,tsize,idxs,&nout,tidxs);CHKERRQ(ierr); 5307 ierr = ISRestoreIndices(pcbddc->ISForDofsLocal[i],&idxs);CHKERRQ(ierr); 5308 ierr = ISLocalToGlobalMappingApply(coarse_islg,nout,tidxs,tidxs2);CHKERRQ(ierr); 5309 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),nout,tidxs2,PETSC_COPY_VALUES,&isarray[i]);CHKERRQ(ierr); 5310 /* ierr = ISView(isarray[i],0);CHKERRQ(ierr); */ 5311 } 5312 /* neumann boundaries */ 5313 if (pcbddc->NeumannBoundariesLocal) { 5314 /* ierr = ISView(pcbddc->NeumannBoundariesLocal,0);CHKERRQ(ierr); */ 5315 ierr = ISGetLocalSize(pcbddc->NeumannBoundariesLocal,&tsize);CHKERRQ(ierr); 5316 ierr = ISGetIndices(pcbddc->NeumannBoundariesLocal,&idxs);CHKERRQ(ierr); 5317 ierr = ISGlobalToLocalMappingApply(tmap,IS_GTOLM_DROP,tsize,idxs,&nout,tidxs);CHKERRQ(ierr); 5318 ierr = ISRestoreIndices(pcbddc->NeumannBoundariesLocal,&idxs);CHKERRQ(ierr); 5319 ierr = ISLocalToGlobalMappingApply(coarse_islg,nout,tidxs,tidxs2);CHKERRQ(ierr); 5320 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),nout,tidxs2,PETSC_COPY_VALUES,&isarray[nisdofs]);CHKERRQ(ierr); 5321 /* ierr = ISView(isarray[nisdofs],0);CHKERRQ(ierr); */ 5322 } 5323 /* primal vertices (benign) */ 5324 if (pcbddc->benign_saddle_point && pcbddc->user_primal_vertices_local) { 5325 ierr = ISGetLocalSize(pcbddc->user_primal_vertices_local,&tsize);CHKERRQ(ierr); 5326 ierr = ISGetIndices(pcbddc->user_primal_vertices_local,&idxs);CHKERRQ(ierr); 5327 ierr = ISGlobalToLocalMappingApply(tmap,IS_GTOLM_DROP,tsize,idxs,&nout,tidxs);CHKERRQ(ierr); 5328 ierr = ISRestoreIndices(pcbddc->user_primal_vertices_local,&idxs);CHKERRQ(ierr); 5329 ierr = ISLocalToGlobalMappingApply(coarse_islg,nout,tidxs,tidxs2);CHKERRQ(ierr); 5330 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),nout,tidxs2,PETSC_COPY_VALUES,&isarray[nis-1]);CHKERRQ(ierr); 5331 } 5332 /* free memory */ 5333 ierr = PetscFree(tidxs);CHKERRQ(ierr); 5334 ierr = PetscFree(tidxs2);CHKERRQ(ierr); 5335 ierr = ISLocalToGlobalMappingDestroy(&tmap);CHKERRQ(ierr); 5336 } else { 5337 nis = 0; 5338 nisdofs = 0; 5339 nisneu = 0; 5340 nisvert = 0; 5341 isarray = NULL; 5342 } 5343 /* destroy no longer needed map */ 5344 ierr = ISLocalToGlobalMappingDestroy(&coarse_islg);CHKERRQ(ierr); 5345 5346 /* restrict on coarse candidates (if needed) */ 5347 coarse_mat_is = NULL; 5348 if (csin) { 5349 if (!pcbddc->coarse_subassembling_init ) { /* creates subassembling init pattern if not present */ 5350 if (redist) { 5351 PetscMPIInt rank; 5352 PetscInt spc,n_spc_p1,dest[1],destsize; 5353 5354 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)pc),&rank);CHKERRQ(ierr); 5355 spc = active_procs/ncoarse; 5356 n_spc_p1 = active_procs%ncoarse; 5357 if (im_active) { 5358 destsize = 1; 5359 if (rank > n_spc_p1*(spc+1)-1) { 5360 dest[0] = n_spc_p1+(rank-(n_spc_p1*(spc+1)))/spc; 5361 } else { 5362 dest[0] = rank/(spc+1); 5363 } 5364 } else { 5365 destsize = 0; 5366 } 5367 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),destsize,dest,PETSC_COPY_VALUES,&pcbddc->coarse_subassembling_init);CHKERRQ(ierr); 5368 } else if (csin_type_simple) { 5369 PetscMPIInt rank; 5370 PetscInt issize,isidx; 5371 5372 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)pc),&rank);CHKERRQ(ierr); 5373 if (im_active) { 5374 issize = 1; 5375 isidx = (PetscInt)rank; 5376 } else { 5377 issize = 0; 5378 isidx = -1; 5379 } 5380 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),issize,&isidx,PETSC_COPY_VALUES,&pcbddc->coarse_subassembling_init);CHKERRQ(ierr); 5381 } else { /* get a suitable subassembling pattern from MATIS code */ 5382 ierr = MatISGetSubassemblingPattern(t_coarse_mat_is,ncoarse,pcbddc->coarse_adj_red,&pcbddc->coarse_subassembling_init);CHKERRQ(ierr); 5383 } 5384 5385 /* we need to shift on coarse candidates either if we are not redistributing or we are redistributing and we have enough void processes */ 5386 if (!redist || ncoarse <= void_procs) { 5387 PetscInt ncoarse_cand,tissize,*nisindices; 5388 PetscInt *coarse_candidates; 5389 const PetscInt* tisindices; 5390 5391 /* get coarse candidates' ranks in pc communicator */ 5392 ierr = PetscMalloc1(all_procs,&coarse_candidates);CHKERRQ(ierr); 5393 ierr = MPI_Allgather(&im_active,1,MPIU_INT,coarse_candidates,1,MPIU_INT,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 5394 for (i=0,ncoarse_cand=0;i<all_procs;i++) { 5395 if (!coarse_candidates[i]) { 5396 coarse_candidates[ncoarse_cand++]=i; 5397 } 5398 } 5399 if (ncoarse_cand < ncoarse) SETERRQ2(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"This should not happen! %d < %d",ncoarse_cand,ncoarse); 5400 5401 5402 if (pcbddc->dbg_flag) { 5403 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 5404 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Subassembling pattern init (before shift)\n");CHKERRQ(ierr); 5405 ierr = ISView(pcbddc->coarse_subassembling_init,pcbddc->dbg_viewer);CHKERRQ(ierr); 5406 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Coarse candidates\n");CHKERRQ(ierr); 5407 for (i=0;i<ncoarse_cand;i++) { 5408 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"%d ",coarse_candidates[i]);CHKERRQ(ierr); 5409 } 5410 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"\n");CHKERRQ(ierr); 5411 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 5412 } 5413 /* shift the pattern on coarse candidates */ 5414 ierr = ISGetLocalSize(pcbddc->coarse_subassembling_init,&tissize);CHKERRQ(ierr); 5415 ierr = ISGetIndices(pcbddc->coarse_subassembling_init,&tisindices);CHKERRQ(ierr); 5416 ierr = PetscMalloc1(tissize,&nisindices);CHKERRQ(ierr); 5417 for (i=0;i<tissize;i++) nisindices[i] = coarse_candidates[tisindices[i]]; 5418 ierr = ISRestoreIndices(pcbddc->coarse_subassembling_init,&tisindices);CHKERRQ(ierr); 5419 ierr = ISGeneralSetIndices(pcbddc->coarse_subassembling_init,tissize,nisindices,PETSC_OWN_POINTER);CHKERRQ(ierr); 5420 ierr = PetscFree(coarse_candidates);CHKERRQ(ierr); 5421 } 5422 if (pcbddc->dbg_flag) { 5423 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 5424 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Subassembling pattern init\n");CHKERRQ(ierr); 5425 ierr = ISView(pcbddc->coarse_subassembling_init,pcbddc->dbg_viewer);CHKERRQ(ierr); 5426 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 5427 } 5428 } 5429 /* get temporary coarse mat in IS format restricted on coarse procs (plus additional index sets of isarray) */ 5430 if (multilevel_allowed) { /* we need to keep tracking of void processes for future placements */ 5431 ierr = MatISSubassemble(t_coarse_mat_is,pcbddc->coarse_subassembling_init,0,PETSC_TRUE,PETSC_FALSE,MAT_INITIAL_MATRIX,&coarse_mat_is,nis,isarray);CHKERRQ(ierr); 5432 } else { /* this is the last level, so use just receiving processes in subcomm */ 5433 ierr = MatISSubassemble(t_coarse_mat_is,pcbddc->coarse_subassembling_init,0,PETSC_TRUE,PETSC_TRUE,MAT_INITIAL_MATRIX,&coarse_mat_is,nis,isarray);CHKERRQ(ierr); 5434 } 5435 } else { 5436 if (pcbddc->dbg_flag) { 5437 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 5438 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Subassembling pattern init not needed\n");CHKERRQ(ierr); 5439 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 5440 } 5441 ierr = PetscObjectReference((PetscObject)t_coarse_mat_is);CHKERRQ(ierr); 5442 coarse_mat_is = t_coarse_mat_is; 5443 } 5444 5445 /* create local to global scatters for coarse problem */ 5446 if (compute_vecs) { 5447 PetscInt lrows; 5448 ierr = VecDestroy(&pcbddc->coarse_vec);CHKERRQ(ierr); 5449 if (coarse_mat_is) { 5450 ierr = MatGetLocalSize(coarse_mat_is,&lrows,NULL);CHKERRQ(ierr); 5451 } else { 5452 lrows = 0; 5453 } 5454 ierr = VecCreate(PetscObjectComm((PetscObject)pc),&pcbddc->coarse_vec);CHKERRQ(ierr); 5455 ierr = VecSetSizes(pcbddc->coarse_vec,lrows,PETSC_DECIDE);CHKERRQ(ierr); 5456 ierr = VecSetType(pcbddc->coarse_vec,VECSTANDARD);CHKERRQ(ierr); 5457 ierr = VecScatterDestroy(&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr); 5458 ierr = VecScatterCreate(pcbddc->vec1_P,NULL,pcbddc->coarse_vec,coarse_is,&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr); 5459 } 5460 ierr = ISDestroy(&coarse_is);CHKERRQ(ierr); 5461 ierr = MatDestroy(&t_coarse_mat_is);CHKERRQ(ierr); 5462 5463 /* set defaults for coarse KSP and PC */ 5464 if (multilevel_allowed) { 5465 coarse_ksp_type = KSPRICHARDSON; 5466 coarse_pc_type = PCBDDC; 5467 } else { 5468 coarse_ksp_type = KSPPREONLY; 5469 coarse_pc_type = PCREDUNDANT; 5470 } 5471 5472 /* print some info if requested */ 5473 if (pcbddc->dbg_flag) { 5474 if (!multilevel_allowed) { 5475 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 5476 if (multilevel_requested) { 5477 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Not enough active processes on level %d (active processes %d, coarsening ratio %d)\n",pcbddc->current_level,active_procs,pcbddc->coarsening_ratio);CHKERRQ(ierr); 5478 } else if (pcbddc->max_levels) { 5479 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Maximum number of requested levels reached (%d)\n",pcbddc->max_levels);CHKERRQ(ierr); 5480 } 5481 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 5482 } 5483 } 5484 5485 /* create the coarse KSP object only once with defaults */ 5486 if (coarse_mat_is) { 5487 MatReuse coarse_mat_reuse; 5488 PetscViewer dbg_viewer = NULL; 5489 if (pcbddc->dbg_flag) { 5490 dbg_viewer = PETSC_VIEWER_STDOUT_(PetscObjectComm((PetscObject)coarse_mat_is)); 5491 ierr = PetscViewerASCIIAddTab(dbg_viewer,2*pcbddc->current_level);CHKERRQ(ierr); 5492 } 5493 if (!pcbddc->coarse_ksp) { 5494 char prefix[256],str_level[16]; 5495 size_t len; 5496 ierr = KSPCreate(PetscObjectComm((PetscObject)coarse_mat_is),&pcbddc->coarse_ksp);CHKERRQ(ierr); 5497 ierr = KSPSetErrorIfNotConverged(pcbddc->coarse_ksp,pc->erroriffailure);CHKERRQ(ierr); 5498 ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->coarse_ksp,(PetscObject)pc,1);CHKERRQ(ierr); 5499 ierr = KSPSetTolerances(pcbddc->coarse_ksp,PETSC_DEFAULT,PETSC_DEFAULT,PETSC_DEFAULT,1);CHKERRQ(ierr); 5500 ierr = KSPSetOperators(pcbddc->coarse_ksp,coarse_mat_is,coarse_mat_is);CHKERRQ(ierr); 5501 ierr = KSPSetType(pcbddc->coarse_ksp,coarse_ksp_type);CHKERRQ(ierr); 5502 ierr = KSPSetNormType(pcbddc->coarse_ksp,KSP_NORM_NONE);CHKERRQ(ierr); 5503 ierr = KSPGetPC(pcbddc->coarse_ksp,&pc_temp);CHKERRQ(ierr); 5504 ierr = PCSetType(pc_temp,coarse_pc_type);CHKERRQ(ierr); 5505 /* prefix */ 5506 ierr = PetscStrcpy(prefix,"");CHKERRQ(ierr); 5507 ierr = PetscStrcpy(str_level,"");CHKERRQ(ierr); 5508 if (!pcbddc->current_level) { 5509 ierr = PetscStrcpy(prefix,((PetscObject)pc)->prefix);CHKERRQ(ierr); 5510 ierr = PetscStrcat(prefix,"pc_bddc_coarse_");CHKERRQ(ierr); 5511 } else { 5512 ierr = PetscStrlen(((PetscObject)pc)->prefix,&len);CHKERRQ(ierr); 5513 if (pcbddc->current_level>1) len -= 3; /* remove "lX_" with X level number */ 5514 if (pcbddc->current_level>10) len -= 1; /* remove another char from level number */ 5515 ierr = PetscStrncpy(prefix,((PetscObject)pc)->prefix,len+1);CHKERRQ(ierr); 5516 sprintf(str_level,"l%d_",(int)(pcbddc->current_level)); 5517 ierr = PetscStrcat(prefix,str_level);CHKERRQ(ierr); 5518 } 5519 ierr = KSPSetOptionsPrefix(pcbddc->coarse_ksp,prefix);CHKERRQ(ierr); 5520 /* propagate BDDC info to the next level (these are dummy calls if pc_temp is not of type PCBDDC) */ 5521 ierr = PCBDDCSetLevel(pc_temp,pcbddc->current_level+1);CHKERRQ(ierr); 5522 ierr = PCBDDCSetCoarseningRatio(pc_temp,pcbddc->coarsening_ratio);CHKERRQ(ierr); 5523 ierr = PCBDDCSetLevels(pc_temp,pcbddc->max_levels);CHKERRQ(ierr); 5524 /* allow user customization */ 5525 ierr = KSPSetFromOptions(pcbddc->coarse_ksp);CHKERRQ(ierr); 5526 } 5527 /* propagate BDDC info to the next level (these are dummy calls if pc_temp is not of type PCBDDC) */ 5528 ierr = KSPGetPC(pcbddc->coarse_ksp,&pc_temp);CHKERRQ(ierr); 5529 if (nisdofs) { 5530 ierr = PCBDDCSetDofsSplitting(pc_temp,nisdofs,isarray);CHKERRQ(ierr); 5531 for (i=0;i<nisdofs;i++) { 5532 ierr = ISDestroy(&isarray[i]);CHKERRQ(ierr); 5533 } 5534 } 5535 if (nisneu) { 5536 ierr = PCBDDCSetNeumannBoundaries(pc_temp,isarray[nisdofs]);CHKERRQ(ierr); 5537 ierr = ISDestroy(&isarray[nisdofs]);CHKERRQ(ierr); 5538 } 5539 if (nisvert) { 5540 ierr = PCBDDCSetPrimalVerticesIS(pc_temp,isarray[nis-1]);CHKERRQ(ierr); 5541 ierr = ISDestroy(&isarray[nis-1]);CHKERRQ(ierr); 5542 } 5543 5544 /* get some info after set from options */ 5545 ierr = PetscObjectTypeCompare((PetscObject)pc_temp,PCNN,&isnn);CHKERRQ(ierr); 5546 ierr = PetscObjectTypeCompare((PetscObject)pc_temp,PCBDDC,&isbddc);CHKERRQ(ierr); 5547 ierr = PetscObjectTypeCompare((PetscObject)pc_temp,PCREDUNDANT,&isredundant);CHKERRQ(ierr); 5548 if (isbddc && !multilevel_allowed) { /* multilevel can only be requested via pc_bddc_set_levels */ 5549 ierr = PCSetType(pc_temp,coarse_pc_type);CHKERRQ(ierr); 5550 isbddc = PETSC_FALSE; 5551 } 5552 ierr = PCFactorSetReuseFill(pc_temp,PETSC_TRUE);CHKERRQ(ierr); 5553 if (isredundant) { 5554 KSP inner_ksp; 5555 PC inner_pc; 5556 ierr = PCRedundantGetKSP(pc_temp,&inner_ksp);CHKERRQ(ierr); 5557 ierr = KSPGetPC(inner_ksp,&inner_pc);CHKERRQ(ierr); 5558 ierr = PCFactorSetReuseFill(inner_pc,PETSC_TRUE);CHKERRQ(ierr); 5559 } 5560 5561 /* assemble coarse matrix */ 5562 if (coarse_reuse) { 5563 ierr = KSPGetOperators(pcbddc->coarse_ksp,&coarse_mat,NULL);CHKERRQ(ierr); 5564 ierr = PetscObjectReference((PetscObject)coarse_mat);CHKERRQ(ierr); 5565 coarse_mat_reuse = MAT_REUSE_MATRIX; 5566 } else { 5567 coarse_mat_reuse = MAT_INITIAL_MATRIX; 5568 } 5569 if (isbddc || isnn) { 5570 if (isbddc) { /* currently there's no API for this */ 5571 PC_BDDC* pcbddc = (PC_BDDC*)pc_temp->data; 5572 pcbddc->detect_disconnected = PETSC_TRUE; 5573 } 5574 if (pcbddc->coarsening_ratio > 1) { 5575 if (!pcbddc->coarse_subassembling) { /* subassembling info is not present */ 5576 ierr = MatISGetSubassemblingPattern(coarse_mat_is,active_procs/pcbddc->coarsening_ratio,pcbddc->coarse_adj_red,&pcbddc->coarse_subassembling);CHKERRQ(ierr); 5577 if (pcbddc->dbg_flag) { 5578 ierr = PetscViewerASCIIPrintf(dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 5579 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Subassembling pattern\n");CHKERRQ(ierr); 5580 ierr = ISView(pcbddc->coarse_subassembling,dbg_viewer);CHKERRQ(ierr); 5581 ierr = PetscViewerFlush(dbg_viewer);CHKERRQ(ierr); 5582 } 5583 } 5584 ierr = MatISSubassemble(coarse_mat_is,pcbddc->coarse_subassembling,0,PETSC_FALSE,PETSC_FALSE,coarse_mat_reuse,&coarse_mat,0,NULL);CHKERRQ(ierr); 5585 } else { 5586 ierr = PetscObjectReference((PetscObject)coarse_mat_is);CHKERRQ(ierr); 5587 coarse_mat = coarse_mat_is; 5588 } 5589 } else { 5590 ierr = MatISGetMPIXAIJ(coarse_mat_is,coarse_mat_reuse,&coarse_mat);CHKERRQ(ierr); 5591 } 5592 ierr = MatDestroy(&coarse_mat_is);CHKERRQ(ierr); 5593 5594 /* propagate symmetry info of coarse matrix */ 5595 ierr = MatSetOption(coarse_mat,MAT_STRUCTURALLY_SYMMETRIC,PETSC_TRUE);CHKERRQ(ierr); 5596 if (pc->pmat->symmetric_set) { 5597 ierr = MatSetOption(coarse_mat,MAT_SYMMETRIC,pc->pmat->symmetric);CHKERRQ(ierr); 5598 } 5599 if (pc->pmat->hermitian_set) { 5600 ierr = MatSetOption(coarse_mat,MAT_HERMITIAN,pc->pmat->hermitian);CHKERRQ(ierr); 5601 } 5602 if (pc->pmat->spd_set) { 5603 ierr = MatSetOption(coarse_mat,MAT_SPD,pc->pmat->spd);CHKERRQ(ierr); 5604 } 5605 /* set operators */ 5606 ierr = KSPSetOperators(pcbddc->coarse_ksp,coarse_mat,coarse_mat);CHKERRQ(ierr); 5607 if (pcbddc->dbg_flag) { 5608 ierr = PetscViewerASCIISubtractTab(dbg_viewer,2*pcbddc->current_level);CHKERRQ(ierr); 5609 } 5610 } else { /* processes non partecipating to coarse solver (if any) */ 5611 coarse_mat = 0; 5612 } 5613 ierr = PetscFree(isarray);CHKERRQ(ierr); 5614 #if 0 5615 { 5616 PetscViewer viewer; 5617 char filename[256]; 5618 sprintf(filename,"coarse_mat_level%d.m",pcbddc->current_level); 5619 ierr = PetscViewerASCIIOpen(PetscObjectComm((PetscObject)coarse_mat),filename,&viewer);CHKERRQ(ierr); 5620 ierr = PetscViewerSetFormat(viewer,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr); 5621 ierr = MatView(coarse_mat,viewer);CHKERRQ(ierr); 5622 ierr = PetscViewerDestroy(&viewer);CHKERRQ(ierr); 5623 } 5624 #endif 5625 5626 /* Compute coarse null space (special handling by BDDC only) */ 5627 #if 0 5628 if (pcbddc->NullSpace) { 5629 ierr = PCBDDCNullSpaceAssembleCoarse(pc,coarse_mat,&CoarseNullSpace);CHKERRQ(ierr); 5630 } 5631 #endif 5632 /* hack */ 5633 if (pcbddc->coarse_ksp) { 5634 Vec crhs,csol; 5635 5636 ierr = KSPGetSolution(pcbddc->coarse_ksp,&csol);CHKERRQ(ierr); 5637 ierr = KSPGetRhs(pcbddc->coarse_ksp,&crhs);CHKERRQ(ierr); 5638 if (!csol) { 5639 ierr = MatCreateVecs(coarse_mat,&((pcbddc->coarse_ksp)->vec_sol),NULL);CHKERRQ(ierr); 5640 } 5641 if (!crhs) { 5642 ierr = MatCreateVecs(coarse_mat,NULL,&((pcbddc->coarse_ksp)->vec_rhs));CHKERRQ(ierr); 5643 } 5644 } 5645 5646 /* compute null space for coarse solver if the benign trick has been requested */ 5647 if (pcbddc->benign_null) { 5648 5649 ierr = VecSet(pcbddc->vec1_P,0.);CHKERRQ(ierr); 5650 for (i=0;i<pcbddc->benign_n;i++) { 5651 ierr = VecSetValue(pcbddc->vec1_P,pcbddc->local_primal_size-pcbddc->benign_n+i,1.0,INSERT_VALUES);CHKERRQ(ierr); 5652 } 5653 ierr = VecAssemblyBegin(pcbddc->vec1_P);CHKERRQ(ierr); 5654 ierr = VecAssemblyEnd(pcbddc->vec1_P);CHKERRQ(ierr); 5655 ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,pcbddc->vec1_P,pcbddc->coarse_vec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 5656 ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,pcbddc->vec1_P,pcbddc->coarse_vec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 5657 if (coarse_mat) { 5658 Vec nullv; 5659 PetscScalar *array,*array2; 5660 PetscInt nl; 5661 5662 ierr = MatCreateVecs(coarse_mat,&nullv,NULL);CHKERRQ(ierr); 5663 ierr = VecGetLocalSize(nullv,&nl);CHKERRQ(ierr); 5664 ierr = VecGetArrayRead(pcbddc->coarse_vec,(const PetscScalar**)&array);CHKERRQ(ierr); 5665 ierr = VecGetArray(nullv,&array2);CHKERRQ(ierr); 5666 ierr = PetscMemcpy(array2,array,nl*sizeof(*array));CHKERRQ(ierr); 5667 ierr = VecRestoreArray(nullv,&array2);CHKERRQ(ierr); 5668 ierr = VecRestoreArrayRead(pcbddc->coarse_vec,(const PetscScalar**)&array);CHKERRQ(ierr); 5669 ierr = VecNormalize(nullv,NULL);CHKERRQ(ierr); 5670 ierr = MatNullSpaceCreate(PetscObjectComm((PetscObject)coarse_mat),PETSC_FALSE,1,&nullv,&CoarseNullSpace);CHKERRQ(ierr); 5671 ierr = VecDestroy(&nullv);CHKERRQ(ierr); 5672 } 5673 } 5674 5675 if (pcbddc->coarse_ksp) { 5676 PetscBool ispreonly; 5677 5678 if (CoarseNullSpace) { 5679 PetscBool isnull; 5680 ierr = MatNullSpaceTest(CoarseNullSpace,coarse_mat,&isnull);CHKERRQ(ierr); 5681 if (0) { 5682 if (isbddc && !pcbddc->benign_saddle_point) { 5683 ierr = PCBDDCSetNullSpace(pc_temp,CoarseNullSpace);CHKERRQ(ierr); 5684 } else { 5685 ierr = MatSetNullSpace(coarse_mat,CoarseNullSpace);CHKERRQ(ierr); 5686 } 5687 } else { 5688 ierr = MatNullSpaceDestroy(&CoarseNullSpace);CHKERRQ(ierr); 5689 } 5690 } 5691 /* setup coarse ksp */ 5692 ierr = KSPSetUp(pcbddc->coarse_ksp);CHKERRQ(ierr); 5693 /* Check coarse problem if in debug mode or if solving with an iterative method */ 5694 ierr = PetscObjectTypeCompare((PetscObject)pcbddc->coarse_ksp,KSPPREONLY,&ispreonly);CHKERRQ(ierr); 5695 if (pcbddc->dbg_flag || (!ispreonly && pcbddc->use_coarse_estimates) ) { 5696 KSP check_ksp; 5697 KSPType check_ksp_type; 5698 PC check_pc; 5699 Vec check_vec,coarse_vec; 5700 PetscReal abs_infty_error,infty_error,lambda_min=1.0,lambda_max=1.0; 5701 PetscInt its; 5702 PetscBool compute_eigs; 5703 PetscReal *eigs_r,*eigs_c; 5704 PetscInt neigs; 5705 const char *prefix; 5706 5707 /* Create ksp object suitable for estimation of extreme eigenvalues */ 5708 ierr = KSPCreate(PetscObjectComm((PetscObject)pcbddc->coarse_ksp),&check_ksp);CHKERRQ(ierr); 5709 ierr = KSPSetErrorIfNotConverged(pcbddc->coarse_ksp,pc->erroriffailure);CHKERRQ(ierr); 5710 ierr = KSPSetOperators(check_ksp,coarse_mat,coarse_mat);CHKERRQ(ierr); 5711 ierr = KSPSetTolerances(check_ksp,1.e-12,1.e-12,PETSC_DEFAULT,pcbddc->coarse_size);CHKERRQ(ierr); 5712 if (ispreonly) { 5713 check_ksp_type = KSPPREONLY; 5714 compute_eigs = PETSC_FALSE; 5715 } else { 5716 check_ksp_type = KSPGMRES; 5717 compute_eigs = PETSC_TRUE; 5718 } 5719 ierr = KSPSetType(check_ksp,check_ksp_type);CHKERRQ(ierr); 5720 ierr = KSPSetComputeSingularValues(check_ksp,compute_eigs);CHKERRQ(ierr); 5721 ierr = KSPSetComputeEigenvalues(check_ksp,compute_eigs);CHKERRQ(ierr); 5722 ierr = KSPGMRESSetRestart(check_ksp,pcbddc->coarse_size+1);CHKERRQ(ierr); 5723 ierr = KSPGetOptionsPrefix(pcbddc->coarse_ksp,&prefix);CHKERRQ(ierr); 5724 ierr = KSPSetOptionsPrefix(check_ksp,prefix);CHKERRQ(ierr); 5725 ierr = KSPAppendOptionsPrefix(check_ksp,"check_");CHKERRQ(ierr); 5726 ierr = KSPSetFromOptions(check_ksp);CHKERRQ(ierr); 5727 ierr = KSPSetUp(check_ksp);CHKERRQ(ierr); 5728 ierr = KSPGetPC(pcbddc->coarse_ksp,&check_pc);CHKERRQ(ierr); 5729 ierr = KSPSetPC(check_ksp,check_pc);CHKERRQ(ierr); 5730 /* create random vec */ 5731 ierr = KSPGetSolution(pcbddc->coarse_ksp,&coarse_vec);CHKERRQ(ierr); 5732 ierr = VecDuplicate(coarse_vec,&check_vec);CHKERRQ(ierr); 5733 ierr = VecSetRandom(check_vec,NULL);CHKERRQ(ierr); 5734 if (CoarseNullSpace) { 5735 ierr = MatNullSpaceRemove(CoarseNullSpace,check_vec);CHKERRQ(ierr); 5736 } 5737 ierr = MatMult(coarse_mat,check_vec,coarse_vec);CHKERRQ(ierr); 5738 /* solve coarse problem */ 5739 ierr = KSPSolve(check_ksp,coarse_vec,coarse_vec);CHKERRQ(ierr); 5740 if (CoarseNullSpace) { 5741 ierr = MatNullSpaceRemove(CoarseNullSpace,coarse_vec);CHKERRQ(ierr); 5742 } 5743 /* set eigenvalue estimation if preonly has not been requested */ 5744 if (compute_eigs) { 5745 ierr = PetscMalloc1(pcbddc->coarse_size+1,&eigs_r);CHKERRQ(ierr); 5746 ierr = PetscMalloc1(pcbddc->coarse_size+1,&eigs_c);CHKERRQ(ierr); 5747 ierr = KSPComputeEigenvalues(check_ksp,pcbddc->coarse_size+1,eigs_r,eigs_c,&neigs);CHKERRQ(ierr); 5748 lambda_max = eigs_r[neigs-1]; 5749 lambda_min = eigs_r[0]; 5750 if (pcbddc->use_coarse_estimates) { 5751 if (lambda_max>lambda_min) { 5752 ierr = KSPChebyshevSetEigenvalues(pcbddc->coarse_ksp,lambda_max,lambda_min);CHKERRQ(ierr); 5753 ierr = KSPRichardsonSetScale(pcbddc->coarse_ksp,2.0/(lambda_max+lambda_min));CHKERRQ(ierr); 5754 } 5755 } 5756 } 5757 5758 /* check coarse problem residual error */ 5759 if (pcbddc->dbg_flag) { 5760 PetscViewer dbg_viewer = PETSC_VIEWER_STDOUT_(PetscObjectComm((PetscObject)pcbddc->coarse_ksp)); 5761 ierr = PetscViewerASCIIAddTab(dbg_viewer,2*(pcbddc->current_level+1));CHKERRQ(ierr); 5762 ierr = VecAXPY(check_vec,-1.0,coarse_vec);CHKERRQ(ierr); 5763 ierr = VecNorm(check_vec,NORM_INFINITY,&infty_error);CHKERRQ(ierr); 5764 ierr = MatMult(coarse_mat,check_vec,coarse_vec);CHKERRQ(ierr); 5765 ierr = VecNorm(coarse_vec,NORM_INFINITY,&abs_infty_error);CHKERRQ(ierr); 5766 ierr = VecDestroy(&check_vec);CHKERRQ(ierr); 5767 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem details (use estimates %d)\n",pcbddc->use_coarse_estimates);CHKERRQ(ierr); 5768 ierr = PetscObjectPrintClassNamePrefixType((PetscObject)(pcbddc->coarse_ksp),dbg_viewer);CHKERRQ(ierr); 5769 ierr = PetscObjectPrintClassNamePrefixType((PetscObject)(check_pc),dbg_viewer);CHKERRQ(ierr); 5770 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem exact infty_error : %1.6e\n",infty_error);CHKERRQ(ierr); 5771 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem residual infty_error: %1.6e\n",abs_infty_error);CHKERRQ(ierr); 5772 if (CoarseNullSpace) { 5773 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem is singular\n");CHKERRQ(ierr); 5774 } 5775 if (compute_eigs) { 5776 PetscReal lambda_max_s,lambda_min_s; 5777 ierr = KSPGetType(check_ksp,&check_ksp_type);CHKERRQ(ierr); 5778 ierr = KSPGetIterationNumber(check_ksp,&its);CHKERRQ(ierr); 5779 ierr = KSPComputeExtremeSingularValues(check_ksp,&lambda_max_s,&lambda_min_s);CHKERRQ(ierr); 5780 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem eigenvalues (estimated with %d iterations of %s): %1.6e %1.6e (%1.6e %1.6e)\n",its,check_ksp_type,lambda_min,lambda_max,lambda_min_s,lambda_max_s);CHKERRQ(ierr); 5781 for (i=0;i<neigs;i++) { 5782 ierr = PetscViewerASCIIPrintf(dbg_viewer,"%1.6e %1.6ei\n",eigs_r[i],eigs_c[i]);CHKERRQ(ierr); 5783 } 5784 } 5785 ierr = PetscViewerFlush(dbg_viewer);CHKERRQ(ierr); 5786 ierr = PetscViewerASCIISubtractTab(dbg_viewer,2*(pcbddc->current_level+1));CHKERRQ(ierr); 5787 } 5788 ierr = KSPDestroy(&check_ksp);CHKERRQ(ierr); 5789 if (compute_eigs) { 5790 ierr = PetscFree(eigs_r);CHKERRQ(ierr); 5791 ierr = PetscFree(eigs_c);CHKERRQ(ierr); 5792 } 5793 } 5794 } 5795 /* print additional info */ 5796 if (pcbddc->dbg_flag) { 5797 /* waits until all processes reaches this point */ 5798 ierr = PetscBarrier((PetscObject)pc);CHKERRQ(ierr); 5799 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Coarse solver setup completed at level %d\n",pcbddc->current_level);CHKERRQ(ierr); 5800 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 5801 } 5802 5803 /* free memory */ 5804 ierr = MatNullSpaceDestroy(&CoarseNullSpace);CHKERRQ(ierr); 5805 ierr = MatDestroy(&coarse_mat);CHKERRQ(ierr); 5806 PetscFunctionReturn(0); 5807 } 5808 5809 #undef __FUNCT__ 5810 #define __FUNCT__ "PCBDDCComputePrimalNumbering" 5811 PetscErrorCode PCBDDCComputePrimalNumbering(PC pc,PetscInt* coarse_size_n,PetscInt** local_primal_indices_n) 5812 { 5813 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 5814 PC_IS* pcis = (PC_IS*)pc->data; 5815 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 5816 IS subset,subset_mult,subset_n; 5817 PetscInt local_size,coarse_size=0; 5818 PetscInt *local_primal_indices=NULL; 5819 const PetscInt *t_local_primal_indices; 5820 PetscErrorCode ierr; 5821 5822 PetscFunctionBegin; 5823 /* Compute global number of coarse dofs */ 5824 if (pcbddc->local_primal_size && !pcbddc->local_primal_ref_node) { 5825 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"BDDC ConstraintsSetUp should be called first"); 5826 } 5827 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)(pc->pmat)),pcbddc->local_primal_size_cc,pcbddc->local_primal_ref_node,PETSC_COPY_VALUES,&subset_n);CHKERRQ(ierr); 5828 ierr = ISLocalToGlobalMappingApplyIS(pcis->mapping,subset_n,&subset);CHKERRQ(ierr); 5829 ierr = ISDestroy(&subset_n);CHKERRQ(ierr); 5830 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)(pc->pmat)),pcbddc->local_primal_size_cc,pcbddc->local_primal_ref_mult,PETSC_COPY_VALUES,&subset_mult);CHKERRQ(ierr); 5831 ierr = PCBDDCSubsetNumbering(subset,subset_mult,&coarse_size,&subset_n);CHKERRQ(ierr); 5832 ierr = ISDestroy(&subset);CHKERRQ(ierr); 5833 ierr = ISDestroy(&subset_mult);CHKERRQ(ierr); 5834 ierr = ISGetLocalSize(subset_n,&local_size);CHKERRQ(ierr); 5835 if (local_size != pcbddc->local_primal_size) { 5836 SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Invalid number of local primal indices computed %d != %d",local_size,pcbddc->local_primal_size); 5837 } 5838 ierr = PetscMalloc1(local_size,&local_primal_indices);CHKERRQ(ierr); 5839 ierr = ISGetIndices(subset_n,&t_local_primal_indices);CHKERRQ(ierr); 5840 ierr = PetscMemcpy(local_primal_indices,t_local_primal_indices,local_size*sizeof(PetscInt));CHKERRQ(ierr); 5841 ierr = ISRestoreIndices(subset_n,&t_local_primal_indices);CHKERRQ(ierr); 5842 ierr = ISDestroy(&subset_n);CHKERRQ(ierr); 5843 5844 /* check numbering */ 5845 if (pcbddc->dbg_flag) { 5846 PetscScalar coarsesum,*array,*array2; 5847 PetscInt i; 5848 PetscBool set_error = PETSC_FALSE,set_error_reduced = PETSC_FALSE; 5849 5850 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 5851 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 5852 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Check coarse indices\n");CHKERRQ(ierr); 5853 ierr = PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 5854 /* counter */ 5855 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 5856 ierr = VecSet(pcis->vec1_N,1.0);CHKERRQ(ierr); 5857 ierr = VecScatterBegin(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 5858 ierr = VecScatterEnd(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 5859 ierr = VecScatterBegin(matis->rctx,pcis->vec1_global,pcis->vec2_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 5860 ierr = VecScatterEnd(matis->rctx,pcis->vec1_global,pcis->vec2_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 5861 ierr = VecSet(pcis->vec1_N,0.0);CHKERRQ(ierr); 5862 for (i=0;i<pcbddc->local_primal_size;i++) { 5863 ierr = VecSetValue(pcis->vec1_N,pcbddc->primal_indices_local_idxs[i],1.0,INSERT_VALUES);CHKERRQ(ierr); 5864 } 5865 ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr); 5866 ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr); 5867 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 5868 ierr = VecScatterBegin(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 5869 ierr = VecScatterEnd(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 5870 ierr = VecScatterBegin(matis->rctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 5871 ierr = VecScatterEnd(matis->rctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 5872 ierr = VecGetArray(pcis->vec1_N,&array);CHKERRQ(ierr); 5873 ierr = VecGetArray(pcis->vec2_N,&array2);CHKERRQ(ierr); 5874 for (i=0;i<pcis->n;i++) { 5875 if (array[i] != 0.0 && array[i] != array2[i]) { 5876 PetscInt owned = (PetscInt)PetscRealPart(array[i]); 5877 PetscInt neigh = (PetscInt)PetscRealPart(array2[i]); 5878 set_error = PETSC_TRUE; 5879 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d: local index %d owned by a %d processes instead of %d!\n",PetscGlobalRank,i,owned,neigh);CHKERRQ(ierr); 5880 } 5881 } 5882 ierr = VecRestoreArray(pcis->vec2_N,&array2);CHKERRQ(ierr); 5883 ierr = MPI_Allreduce(&set_error,&set_error_reduced,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 5884 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 5885 for (i=0;i<pcis->n;i++) { 5886 if (PetscRealPart(array[i]) > 0.0) array[i] = 1.0/PetscRealPart(array[i]); 5887 } 5888 ierr = VecRestoreArray(pcis->vec1_N,&array);CHKERRQ(ierr); 5889 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 5890 ierr = VecScatterBegin(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 5891 ierr = VecScatterEnd(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 5892 ierr = VecSum(pcis->vec1_global,&coarsesum);CHKERRQ(ierr); 5893 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Size of coarse problem is %d (%lf)\n",coarse_size,PetscRealPart(coarsesum));CHKERRQ(ierr); 5894 if (pcbddc->dbg_flag > 1 || set_error_reduced) { 5895 PetscInt *gidxs; 5896 5897 ierr = PetscMalloc1(pcbddc->local_primal_size,&gidxs);CHKERRQ(ierr); 5898 ierr = ISLocalToGlobalMappingApply(pcis->mapping,pcbddc->local_primal_size,pcbddc->primal_indices_local_idxs,gidxs);CHKERRQ(ierr); 5899 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Distribution of local primal indices\n");CHKERRQ(ierr); 5900 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 5901 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d\n",PetscGlobalRank);CHKERRQ(ierr); 5902 for (i=0;i<pcbddc->local_primal_size;i++) { 5903 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local_primal_indices[%d]=%d (%d,%d)\n",i,local_primal_indices[i],pcbddc->primal_indices_local_idxs[i],gidxs[i]);CHKERRQ(ierr); 5904 } 5905 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 5906 ierr = PetscFree(gidxs);CHKERRQ(ierr); 5907 } 5908 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 5909 ierr = PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 5910 if (set_error_reduced) SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"BDDC Numbering of coarse dofs failed"); 5911 } 5912 /* ierr = PetscPrintf(PetscObjectComm((PetscObject)pc),"Size of coarse problem is %d\n",coarse_size);CHKERRQ(ierr); */ 5913 /* get back data */ 5914 *coarse_size_n = coarse_size; 5915 *local_primal_indices_n = local_primal_indices; 5916 PetscFunctionReturn(0); 5917 } 5918 5919 #undef __FUNCT__ 5920 #define __FUNCT__ "PCBDDCGlobalToLocal" 5921 PetscErrorCode PCBDDCGlobalToLocal(VecScatter g2l_ctx,Vec gwork, Vec lwork, IS globalis, IS* localis) 5922 { 5923 IS localis_t; 5924 PetscInt i,lsize,*idxs,n; 5925 PetscScalar *vals; 5926 PetscErrorCode ierr; 5927 5928 PetscFunctionBegin; 5929 /* get indices in local ordering exploiting local to global map */ 5930 ierr = ISGetLocalSize(globalis,&lsize);CHKERRQ(ierr); 5931 ierr = PetscMalloc1(lsize,&vals);CHKERRQ(ierr); 5932 for (i=0;i<lsize;i++) vals[i] = 1.0; 5933 ierr = ISGetIndices(globalis,(const PetscInt**)&idxs);CHKERRQ(ierr); 5934 ierr = VecSet(gwork,0.0);CHKERRQ(ierr); 5935 ierr = VecSet(lwork,0.0);CHKERRQ(ierr); 5936 if (idxs) { /* multilevel guard */ 5937 ierr = VecSetValues(gwork,lsize,idxs,vals,INSERT_VALUES);CHKERRQ(ierr); 5938 } 5939 ierr = VecAssemblyBegin(gwork);CHKERRQ(ierr); 5940 ierr = ISRestoreIndices(globalis,(const PetscInt**)&idxs);CHKERRQ(ierr); 5941 ierr = PetscFree(vals);CHKERRQ(ierr); 5942 ierr = VecAssemblyEnd(gwork);CHKERRQ(ierr); 5943 /* now compute set in local ordering */ 5944 ierr = VecScatterBegin(g2l_ctx,gwork,lwork,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 5945 ierr = VecScatterEnd(g2l_ctx,gwork,lwork,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 5946 ierr = VecGetArrayRead(lwork,(const PetscScalar**)&vals);CHKERRQ(ierr); 5947 ierr = VecGetSize(lwork,&n);CHKERRQ(ierr); 5948 for (i=0,lsize=0;i<n;i++) { 5949 if (PetscRealPart(vals[i]) > 0.5) { 5950 lsize++; 5951 } 5952 } 5953 ierr = PetscMalloc1(lsize,&idxs);CHKERRQ(ierr); 5954 for (i=0,lsize=0;i<n;i++) { 5955 if (PetscRealPart(vals[i]) > 0.5) { 5956 idxs[lsize++] = i; 5957 } 5958 } 5959 ierr = VecRestoreArrayRead(lwork,(const PetscScalar**)&vals);CHKERRQ(ierr); 5960 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)gwork),lsize,idxs,PETSC_OWN_POINTER,&localis_t);CHKERRQ(ierr); 5961 *localis = localis_t; 5962 PetscFunctionReturn(0); 5963 } 5964 5965 /* the next two functions will be called in KSPMatMult if a change of basis has been requested */ 5966 #undef __FUNCT__ 5967 #define __FUNCT__ "PCBDDCMatMult_Private" 5968 static PetscErrorCode PCBDDCMatMult_Private(Mat A, Vec x, Vec y) 5969 { 5970 PCBDDCChange_ctx change_ctx; 5971 PetscErrorCode ierr; 5972 5973 PetscFunctionBegin; 5974 ierr = MatShellGetContext(A,&change_ctx);CHKERRQ(ierr); 5975 ierr = MatMult(change_ctx->global_change,x,change_ctx->work[0]);CHKERRQ(ierr); 5976 ierr = MatMult(change_ctx->original_mat,change_ctx->work[0],change_ctx->work[1]);CHKERRQ(ierr); 5977 ierr = MatMultTranspose(change_ctx->global_change,change_ctx->work[1],y);CHKERRQ(ierr); 5978 PetscFunctionReturn(0); 5979 } 5980 5981 #undef __FUNCT__ 5982 #define __FUNCT__ "PCBDDCMatMultTranspose_Private" 5983 static PetscErrorCode PCBDDCMatMultTranspose_Private(Mat A, Vec x, Vec y) 5984 { 5985 PCBDDCChange_ctx change_ctx; 5986 PetscErrorCode ierr; 5987 5988 PetscFunctionBegin; 5989 ierr = MatShellGetContext(A,&change_ctx);CHKERRQ(ierr); 5990 ierr = MatMult(change_ctx->global_change,x,change_ctx->work[0]);CHKERRQ(ierr); 5991 ierr = MatMultTranspose(change_ctx->original_mat,change_ctx->work[0],change_ctx->work[1]);CHKERRQ(ierr); 5992 ierr = MatMultTranspose(change_ctx->global_change,change_ctx->work[1],y);CHKERRQ(ierr); 5993 PetscFunctionReturn(0); 5994 } 5995 5996 #undef __FUNCT__ 5997 #define __FUNCT__ "PCBDDCSetUpSubSchurs" 5998 PetscErrorCode PCBDDCSetUpSubSchurs(PC pc) 5999 { 6000 PC_IS *pcis=(PC_IS*)pc->data; 6001 PC_BDDC *pcbddc=(PC_BDDC*)pc->data; 6002 PCBDDCSubSchurs sub_schurs=pcbddc->sub_schurs; 6003 Mat S_j; 6004 PetscInt *used_xadj,*used_adjncy; 6005 PetscBool free_used_adj; 6006 PetscErrorCode ierr; 6007 6008 PetscFunctionBegin; 6009 /* decide the adjacency to be used for determining internal problems for local schur on subsets */ 6010 free_used_adj = PETSC_FALSE; 6011 if (pcbddc->sub_schurs_layers == -1) { 6012 used_xadj = NULL; 6013 used_adjncy = NULL; 6014 } else { 6015 if (pcbddc->sub_schurs_use_useradj && pcbddc->mat_graph->xadj) { 6016 used_xadj = pcbddc->mat_graph->xadj; 6017 used_adjncy = pcbddc->mat_graph->adjncy; 6018 } else if (pcbddc->computed_rowadj) { 6019 used_xadj = pcbddc->mat_graph->xadj; 6020 used_adjncy = pcbddc->mat_graph->adjncy; 6021 } else { 6022 PetscBool flg_row=PETSC_FALSE; 6023 const PetscInt *xadj,*adjncy; 6024 PetscInt nvtxs; 6025 6026 ierr = MatGetRowIJ(pcbddc->local_mat,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,&xadj,&adjncy,&flg_row);CHKERRQ(ierr); 6027 if (flg_row) { 6028 ierr = PetscMalloc2(nvtxs+1,&used_xadj,xadj[nvtxs],&used_adjncy);CHKERRQ(ierr); 6029 ierr = PetscMemcpy(used_xadj,xadj,(nvtxs+1)*sizeof(*xadj));CHKERRQ(ierr); 6030 ierr = PetscMemcpy(used_adjncy,adjncy,(xadj[nvtxs])*sizeof(*adjncy));CHKERRQ(ierr); 6031 free_used_adj = PETSC_TRUE; 6032 } else { 6033 pcbddc->sub_schurs_layers = -1; 6034 used_xadj = NULL; 6035 used_adjncy = NULL; 6036 } 6037 ierr = MatRestoreRowIJ(pcbddc->local_mat,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,&xadj,&adjncy,&flg_row);CHKERRQ(ierr); 6038 } 6039 } 6040 6041 /* setup sub_schurs data */ 6042 ierr = MatCreateSchurComplement(pcis->A_II,pcis->A_II,pcis->A_IB,pcis->A_BI,pcis->A_BB,&S_j);CHKERRQ(ierr); 6043 if (!sub_schurs->schur_explicit) { 6044 /* pcbddc->ksp_D up to date only if not using MatFactor with Schur complement support */ 6045 ierr = MatSchurComplementSetKSP(S_j,pcbddc->ksp_D);CHKERRQ(ierr); 6046 ierr = PCBDDCSubSchursSetUp(sub_schurs,NULL,S_j,PETSC_FALSE,used_xadj,used_adjncy,pcbddc->sub_schurs_layers,pcbddc->faster_deluxe,pcbddc->adaptive_selection,PETSC_FALSE,PETSC_FALSE,0,NULL,NULL);CHKERRQ(ierr); 6047 } else { 6048 PetscBool reuse_solvers = (PetscBool)!pcbddc->use_change_of_basis; 6049 PetscBool isseqaij; 6050 PetscInt benign_n; 6051 6052 if (!pcbddc->use_vertices && reuse_solvers) { 6053 PetscInt n_vertices; 6054 6055 ierr = ISGetLocalSize(sub_schurs->is_vertices,&n_vertices);CHKERRQ(ierr); 6056 reuse_solvers = (PetscBool)!n_vertices; 6057 } 6058 ierr = PetscObjectTypeCompare((PetscObject)pcbddc->local_mat,MATSEQAIJ,&isseqaij);CHKERRQ(ierr); 6059 if (!isseqaij) { 6060 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 6061 if (matis->A == pcbddc->local_mat) { 6062 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 6063 ierr = MatConvert(matis->A,MATSEQAIJ,MAT_INITIAL_MATRIX,&pcbddc->local_mat);CHKERRQ(ierr); 6064 } else { 6065 ierr = MatConvert(pcbddc->local_mat,MATSEQAIJ,MAT_REUSE_MATRIX,&pcbddc->local_mat);CHKERRQ(ierr); 6066 } 6067 } 6068 if (!pcbddc->benign_change_explicit) { 6069 benign_n = pcbddc->benign_n; 6070 } else { 6071 benign_n = 0; 6072 } 6073 ierr = PCBDDCSubSchursSetUp(sub_schurs,pcbddc->local_mat,S_j,pcbddc->sub_schurs_exact_schur,used_xadj,used_adjncy,pcbddc->sub_schurs_layers,pcbddc->faster_deluxe,pcbddc->adaptive_selection,reuse_solvers,pcbddc->benign_saddle_point,benign_n,pcbddc->benign_p0_lidx,pcbddc->benign_zerodiag_subs);CHKERRQ(ierr); 6074 } 6075 ierr = MatDestroy(&S_j);CHKERRQ(ierr); 6076 6077 /* free adjacency */ 6078 if (free_used_adj) { 6079 ierr = PetscFree2(used_xadj,used_adjncy);CHKERRQ(ierr); 6080 } 6081 PetscFunctionReturn(0); 6082 } 6083 6084 #undef __FUNCT__ 6085 #define __FUNCT__ "PCBDDCInitSubSchurs" 6086 PetscErrorCode PCBDDCInitSubSchurs(PC pc) 6087 { 6088 PC_IS *pcis=(PC_IS*)pc->data; 6089 PC_BDDC *pcbddc=(PC_BDDC*)pc->data; 6090 PCBDDCSubSchurs sub_schurs=pcbddc->sub_schurs; 6091 PCBDDCGraph graph; 6092 PetscErrorCode ierr; 6093 6094 PetscFunctionBegin; 6095 /* attach interface graph for determining subsets */ 6096 if (pcbddc->sub_schurs_rebuild) { /* in case rebuild has been requested, it uses a graph generated only by the neighbouring information */ 6097 IS verticesIS,verticescomm; 6098 PetscInt vsize,*idxs; 6099 6100 ierr = PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph,NULL,NULL,NULL,NULL,&verticesIS);CHKERRQ(ierr); 6101 ierr = ISGetSize(verticesIS,&vsize);CHKERRQ(ierr); 6102 ierr = ISGetIndices(verticesIS,(const PetscInt**)&idxs);CHKERRQ(ierr); 6103 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),vsize,idxs,PETSC_COPY_VALUES,&verticescomm);CHKERRQ(ierr); 6104 ierr = ISRestoreIndices(verticesIS,(const PetscInt**)&idxs);CHKERRQ(ierr); 6105 ierr = ISDestroy(&verticesIS);CHKERRQ(ierr); 6106 ierr = PCBDDCGraphCreate(&graph);CHKERRQ(ierr); 6107 ierr = PCBDDCGraphInit(graph,pcbddc->mat_graph->l2gmap,pcbddc->mat_graph->nvtxs_global);CHKERRQ(ierr); 6108 ierr = PCBDDCGraphSetUp(graph,0,NULL,pcbddc->DirichletBoundariesLocal,0,NULL,verticescomm);CHKERRQ(ierr); 6109 ierr = ISDestroy(&verticescomm);CHKERRQ(ierr); 6110 ierr = PCBDDCGraphComputeConnectedComponents(graph);CHKERRQ(ierr); 6111 /* 6112 if (pcbddc->dbg_flag) { 6113 ierr = PCBDDCGraphASCIIView(graph,pcbddc->dbg_flag,pcbddc->dbg_viewer);CHKERRQ(ierr); 6114 } 6115 */ 6116 } else { 6117 graph = pcbddc->mat_graph; 6118 } 6119 6120 /* sub_schurs init */ 6121 ierr = PCBDDCSubSchursInit(sub_schurs,pcis->is_I_local,pcis->is_B_local,graph,pcis->BtoNmap);CHKERRQ(ierr); 6122 6123 /* free graph struct */ 6124 if (pcbddc->sub_schurs_rebuild) { 6125 ierr = PCBDDCGraphDestroy(&graph);CHKERRQ(ierr); 6126 } 6127 PetscFunctionReturn(0); 6128 } 6129 6130 #undef __FUNCT__ 6131 #define __FUNCT__ "PCBDDCCheckOperator" 6132 PetscErrorCode PCBDDCCheckOperator(PC pc) 6133 { 6134 PC_IS *pcis=(PC_IS*)pc->data; 6135 PC_BDDC *pcbddc=(PC_BDDC*)pc->data; 6136 PetscErrorCode ierr; 6137 6138 PetscFunctionBegin; 6139 if (pcbddc->n_vertices == pcbddc->local_primal_size) { 6140 IS zerodiag = NULL; 6141 Mat S_j,B0_B=NULL; 6142 Vec dummy_vec=NULL,vec_check_B,vec_scale_P; 6143 PetscScalar *p0_check,*array,*array2; 6144 PetscReal norm; 6145 PetscInt i; 6146 6147 /* B0 and B0_B */ 6148 if (zerodiag) { 6149 IS dummy; 6150 6151 ierr = ISCreateStride(PETSC_COMM_SELF,pcbddc->benign_n,0,1,&dummy);CHKERRQ(ierr); 6152 ierr = MatGetSubMatrix(pcbddc->benign_B0,dummy,pcis->is_B_local,MAT_INITIAL_MATRIX,&B0_B);CHKERRQ(ierr); 6153 ierr = MatCreateVecs(B0_B,NULL,&dummy_vec);CHKERRQ(ierr); 6154 ierr = ISDestroy(&dummy);CHKERRQ(ierr); 6155 } 6156 /* I need a primal vector to scale primal nodes since BDDC sums contibutions */ 6157 ierr = VecDuplicate(pcbddc->vec1_P,&vec_scale_P);CHKERRQ(ierr); 6158 ierr = VecSet(pcbddc->vec1_P,1.0);CHKERRQ(ierr); 6159 ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,pcbddc->vec1_P,pcbddc->coarse_vec,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 6160 ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,pcbddc->vec1_P,pcbddc->coarse_vec,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 6161 ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,pcbddc->coarse_vec,vec_scale_P,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 6162 ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,pcbddc->coarse_vec,vec_scale_P,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 6163 ierr = VecReciprocal(vec_scale_P);CHKERRQ(ierr); 6164 /* S_j */ 6165 ierr = MatCreateSchurComplement(pcis->A_II,pcis->A_II,pcis->A_IB,pcis->A_BI,pcis->A_BB,&S_j);CHKERRQ(ierr); 6166 ierr = MatSchurComplementSetKSP(S_j,pcbddc->ksp_D);CHKERRQ(ierr); 6167 6168 /* mimic vector in \widetilde{W}_\Gamma */ 6169 ierr = VecSetRandom(pcis->vec1_N,NULL);CHKERRQ(ierr); 6170 /* continuous in primal space */ 6171 ierr = VecSetRandom(pcbddc->coarse_vec,NULL);CHKERRQ(ierr); 6172 ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,pcbddc->coarse_vec,pcbddc->vec1_P,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 6173 ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,pcbddc->coarse_vec,pcbddc->vec1_P,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 6174 ierr = VecGetArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 6175 ierr = PetscCalloc1(pcbddc->benign_n,&p0_check);CHKERRQ(ierr); 6176 for (i=0;i<pcbddc->benign_n;i++) p0_check[i] = array[pcbddc->local_primal_size-pcbddc->benign_n+i]; 6177 ierr = VecSetValues(pcis->vec1_N,pcbddc->local_primal_size,pcbddc->local_primal_ref_node,array,INSERT_VALUES);CHKERRQ(ierr); 6178 ierr = VecRestoreArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 6179 ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr); 6180 ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr); 6181 ierr = VecScatterBegin(pcis->N_to_B,pcis->vec1_N,pcis->vec2_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 6182 ierr = VecScatterEnd(pcis->N_to_B,pcis->vec1_N,pcis->vec2_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 6183 ierr = VecDuplicate(pcis->vec2_B,&vec_check_B);CHKERRQ(ierr); 6184 ierr = VecCopy(pcis->vec2_B,vec_check_B);CHKERRQ(ierr); 6185 6186 /* assemble rhs for coarse problem */ 6187 /* widetilde{S}_\Gamma w_\Gamma + \widetilde{B0}^T_B p0 */ 6188 /* local with Schur */ 6189 ierr = MatMult(S_j,pcis->vec2_B,pcis->vec1_B);CHKERRQ(ierr); 6190 if (zerodiag) { 6191 ierr = VecGetArray(dummy_vec,&array);CHKERRQ(ierr); 6192 for (i=0;i<pcbddc->benign_n;i++) array[i] = p0_check[i]; 6193 ierr = VecRestoreArray(dummy_vec,&array);CHKERRQ(ierr); 6194 ierr = MatMultTransposeAdd(B0_B,dummy_vec,pcis->vec1_B,pcis->vec1_B);CHKERRQ(ierr); 6195 } 6196 /* sum on primal nodes the local contributions */ 6197 ierr = VecScatterBegin(pcis->N_to_B,pcis->vec1_B,pcis->vec1_N,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 6198 ierr = VecScatterEnd(pcis->N_to_B,pcis->vec1_B,pcis->vec1_N,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 6199 ierr = VecGetArray(pcis->vec1_N,&array);CHKERRQ(ierr); 6200 ierr = VecGetArray(pcbddc->vec1_P,&array2);CHKERRQ(ierr); 6201 for (i=0;i<pcbddc->local_primal_size;i++) array2[i] = array[pcbddc->local_primal_ref_node[i]]; 6202 ierr = VecRestoreArray(pcbddc->vec1_P,&array2);CHKERRQ(ierr); 6203 ierr = VecRestoreArray(pcis->vec1_N,&array);CHKERRQ(ierr); 6204 ierr = VecSet(pcbddc->coarse_vec,0.);CHKERRQ(ierr); 6205 ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,pcbddc->vec1_P,pcbddc->coarse_vec,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 6206 ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,pcbddc->vec1_P,pcbddc->coarse_vec,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 6207 ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,pcbddc->coarse_vec,pcbddc->vec1_P,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 6208 ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,pcbddc->coarse_vec,pcbddc->vec1_P,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 6209 ierr = VecGetArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 6210 /* scale primal nodes (BDDC sums contibutions) */ 6211 ierr = VecPointwiseMult(pcbddc->vec1_P,vec_scale_P,pcbddc->vec1_P);CHKERRQ(ierr); 6212 ierr = VecSetValues(pcis->vec1_N,pcbddc->local_primal_size,pcbddc->local_primal_ref_node,array,INSERT_VALUES);CHKERRQ(ierr); 6213 ierr = VecRestoreArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 6214 ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr); 6215 ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr); 6216 ierr = VecScatterBegin(pcis->N_to_B,pcis->vec1_N,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 6217 ierr = VecScatterEnd(pcis->N_to_B,pcis->vec1_N,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 6218 /* global: \widetilde{B0}_B w_\Gamma */ 6219 if (zerodiag) { 6220 ierr = MatMult(B0_B,pcis->vec2_B,dummy_vec);CHKERRQ(ierr); 6221 ierr = VecGetArray(dummy_vec,&array);CHKERRQ(ierr); 6222 for (i=0;i<pcbddc->benign_n;i++) pcbddc->benign_p0[i] = array[i]; 6223 ierr = VecRestoreArray(dummy_vec,&array);CHKERRQ(ierr); 6224 } 6225 /* BDDC */ 6226 ierr = VecSet(pcis->vec1_D,0.);CHKERRQ(ierr); 6227 ierr = PCBDDCApplyInterfacePreconditioner(pc,PETSC_FALSE);CHKERRQ(ierr); 6228 6229 ierr = VecCopy(pcis->vec1_B,pcis->vec2_B);CHKERRQ(ierr); 6230 ierr = VecAXPY(pcis->vec1_B,-1.0,vec_check_B);CHKERRQ(ierr); 6231 ierr = VecNorm(pcis->vec1_B,NORM_INFINITY,&norm);CHKERRQ(ierr); 6232 PetscPrintf(PETSC_COMM_SELF,"[%d] BDDC local error is %1.4e\n",PetscGlobalRank,norm); 6233 for (i=0;i<pcbddc->benign_n;i++) { 6234 PetscPrintf(PETSC_COMM_SELF,"[%d] BDDC p0[%d] error is %1.4e\n",PetscGlobalRank,i,PetscAbsScalar(pcbddc->benign_p0[i]-p0_check[i])); 6235 } 6236 ierr = PetscFree(p0_check);CHKERRQ(ierr); 6237 ierr = VecDestroy(&vec_scale_P);CHKERRQ(ierr); 6238 ierr = VecDestroy(&vec_check_B);CHKERRQ(ierr); 6239 ierr = VecDestroy(&dummy_vec);CHKERRQ(ierr); 6240 ierr = MatDestroy(&S_j);CHKERRQ(ierr); 6241 ierr = MatDestroy(&B0_B);CHKERRQ(ierr); 6242 } 6243 PetscFunctionReturn(0); 6244 } 6245