1 #include <../src/mat/impls/aij/seq/aij.h> 2 #include <../src/ksp/pc/impls/bddc/bddc.h> 3 #include <../src/ksp/pc/impls/bddc/bddcprivate.h> 4 #include <petscblaslapack.h> 5 6 static PetscErrorCode PCBDDCMatMultTranspose_Private(Mat A, Vec x, Vec y); 7 static PetscErrorCode PCBDDCMatMult_Private(Mat A, Vec x, Vec y); 8 9 #undef __FUNCT__ 10 #define __FUNCT__ "PCBDDCBenignMatMult_Private_Private" 11 PetscErrorCode PCBDDCBenignMatMult_Private_Private(Mat A, Vec x, Vec y, PetscBool transpose) 12 { 13 PCBDDCBenignMatMult_ctx ctx; 14 PetscErrorCode ierr; 15 PetscBool apply_right,apply_left,reset_x; 16 17 PetscFunctionBegin; 18 ierr = MatShellGetContext(A,&ctx);CHKERRQ(ierr); 19 if (transpose) { 20 apply_right = ctx->apply_left; 21 apply_left = ctx->apply_right; 22 } else { 23 apply_right = ctx->apply_right; 24 apply_left = ctx->apply_left; 25 } 26 reset_x = PETSC_FALSE; 27 if (apply_right) { 28 const PetscScalar *ax; 29 PetscInt nl,i; 30 31 ierr = VecGetLocalSize(x,&nl);CHKERRQ(ierr); 32 ierr = VecGetArrayRead(x,&ax);CHKERRQ(ierr); 33 ierr = PetscMemcpy(ctx->work,ax,nl*sizeof(PetscScalar));CHKERRQ(ierr); 34 ierr = VecRestoreArrayRead(x,&ax);CHKERRQ(ierr); 35 for (i=0;i<ctx->benign_n;i++) { 36 PetscScalar sum,val; 37 const PetscInt *idxs; 38 PetscInt nz,j; 39 ierr = ISGetLocalSize(ctx->benign_zerodiag_subs[i],&nz);CHKERRQ(ierr); 40 ierr = ISGetIndices(ctx->benign_zerodiag_subs[i],&idxs);CHKERRQ(ierr); 41 sum = 0.; 42 if (ctx->apply_p0) { 43 val = ctx->work[idxs[nz-1]]; 44 for (j=0;j<nz-1;j++) { 45 sum += ctx->work[idxs[j]]; 46 ctx->work[idxs[j]] += val; 47 } 48 } else { 49 for (j=0;j<nz-1;j++) { 50 sum += ctx->work[idxs[j]]; 51 } 52 } 53 ctx->work[idxs[nz-1]] -= sum; 54 ierr = ISRestoreIndices(ctx->benign_zerodiag_subs[i],&idxs);CHKERRQ(ierr); 55 } 56 ierr = VecPlaceArray(x,ctx->work);CHKERRQ(ierr); 57 reset_x = PETSC_TRUE; 58 } 59 if (transpose) { 60 ierr = MatMultTranspose(ctx->A,x,y);CHKERRQ(ierr); 61 } else { 62 ierr = MatMult(ctx->A,x,y);CHKERRQ(ierr); 63 } 64 if (reset_x) { 65 ierr = VecResetArray(x);CHKERRQ(ierr); 66 } 67 if (apply_left) { 68 PetscScalar *ay; 69 PetscInt i; 70 71 ierr = VecGetArray(y,&ay);CHKERRQ(ierr); 72 for (i=0;i<ctx->benign_n;i++) { 73 PetscScalar sum,val; 74 const PetscInt *idxs; 75 PetscInt nz,j; 76 ierr = ISGetLocalSize(ctx->benign_zerodiag_subs[i],&nz);CHKERRQ(ierr); 77 ierr = ISGetIndices(ctx->benign_zerodiag_subs[i],&idxs);CHKERRQ(ierr); 78 val = -ay[idxs[nz-1]]; 79 if (ctx->apply_p0) { 80 sum = 0.; 81 for (j=0;j<nz-1;j++) { 82 sum += ay[idxs[j]]; 83 ay[idxs[j]] += val; 84 } 85 ay[idxs[nz-1]] += sum; 86 } else { 87 for (j=0;j<nz-1;j++) { 88 ay[idxs[j]] += val; 89 } 90 ay[idxs[nz-1]] = 0.; 91 } 92 ierr = ISRestoreIndices(ctx->benign_zerodiag_subs[i],&idxs);CHKERRQ(ierr); 93 } 94 ierr = VecRestoreArray(y,&ay);CHKERRQ(ierr); 95 } 96 PetscFunctionReturn(0); 97 } 98 99 #undef __FUNCT__ 100 #define __FUNCT__ "PCBDDCBenignMatMultTranspose_Private" 101 PetscErrorCode PCBDDCBenignMatMultTranspose_Private(Mat A, Vec x, Vec y) 102 { 103 PetscErrorCode ierr; 104 105 PetscFunctionBegin; 106 ierr = PCBDDCBenignMatMult_Private_Private(A,x,y,PETSC_TRUE);CHKERRQ(ierr); 107 PetscFunctionReturn(0); 108 } 109 110 #undef __FUNCT__ 111 #define __FUNCT__ "PCBDDCBenignMatMult_Private" 112 PetscErrorCode PCBDDCBenignMatMult_Private(Mat A, Vec x, Vec y) 113 { 114 PetscErrorCode ierr; 115 116 PetscFunctionBegin; 117 ierr = PCBDDCBenignMatMult_Private_Private(A,x,y,PETSC_FALSE);CHKERRQ(ierr); 118 PetscFunctionReturn(0); 119 } 120 121 #undef __FUNCT__ 122 #define __FUNCT__ "PCBDDCBenignShellMat" 123 PetscErrorCode PCBDDCBenignShellMat(PC pc, PetscBool restore) 124 { 125 PC_IS *pcis = (PC_IS*)pc->data; 126 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 127 Mat_IS *matis = (Mat_IS*)pc->pmat->data; 128 PCBDDCBenignMatMult_ctx ctx; 129 PetscErrorCode ierr; 130 131 PetscFunctionBegin; 132 if (!restore) { 133 Mat A,A_IB,A_BI; 134 PetscScalar *work; 135 PCBDDCReuseSolvers reuse = pcbddc->sub_schurs->reuse_solver; 136 137 if (!reuse) { 138 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Cannot setup shell matrices!"); 139 } 140 141 /* local mat */ 142 ierr = MatCreate(PETSC_COMM_SELF,&A);CHKERRQ(ierr); 143 ierr = MatSetSizes(A,pcis->n,pcis->n,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr); 144 ierr = MatSetType(A,MATSHELL);CHKERRQ(ierr); 145 ierr = MatShellSetOperation(A,MATOP_MULT,(void (*)(void))PCBDDCBenignMatMult_Private);CHKERRQ(ierr); 146 ierr = MatShellSetOperation(A,MATOP_MULT_TRANSPOSE,(void (*)(void))PCBDDCBenignMatMultTranspose_Private);CHKERRQ(ierr); 147 ierr = PetscNew(&ctx);CHKERRQ(ierr); 148 ierr = MatShellSetContext(A,ctx);CHKERRQ(ierr); 149 ctx->apply_left = PETSC_TRUE; 150 ctx->apply_right = PETSC_TRUE; 151 ctx->apply_p0 = PETSC_TRUE; 152 ctx->benign_n = pcbddc->benign_n; 153 ctx->benign_zerodiag_subs = pcbddc->benign_zerodiag_subs; 154 ctx->A = matis->A; 155 ierr = PetscMalloc1(pcis->n,&work);CHKERRQ(ierr); 156 ctx->work = work; 157 ierr = MatSetUp(A);CHKERRQ(ierr); 158 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 159 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 160 matis->A = A; 161 162 /* A_IB */ 163 ierr = MatCreate(PETSC_COMM_SELF,&A_IB);CHKERRQ(ierr); 164 ierr = MatSetSizes(A_IB,pcis->n-pcis->n_B,pcis->n_B,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr); 165 ierr = MatSetType(A_IB,MATSHELL);CHKERRQ(ierr); 166 ierr = MatShellSetOperation(A_IB,MATOP_MULT,(void (*)(void))PCBDDCBenignMatMult_Private);CHKERRQ(ierr); 167 ierr = MatShellSetOperation(A_IB,MATOP_MULT_TRANSPOSE,(void (*)(void))PCBDDCBenignMatMultTranspose_Private);CHKERRQ(ierr); 168 ierr = PetscNew(&ctx);CHKERRQ(ierr); 169 ierr = MatShellSetContext(A_IB,ctx);CHKERRQ(ierr); 170 ctx->apply_left = PETSC_TRUE; 171 ctx->apply_right = PETSC_FALSE; 172 ctx->apply_p0 = PETSC_FALSE; 173 ctx->benign_n = pcbddc->benign_n; 174 ctx->benign_zerodiag_subs = reuse->benign_zerodiag_subs; 175 ctx->A = pcis->A_IB; 176 ctx->work = work; 177 ierr = MatSetUp(A_IB);CHKERRQ(ierr); 178 ierr = MatAssemblyBegin(A_IB,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 179 ierr = MatAssemblyEnd(A_IB,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 180 pcis->A_IB = A_IB; 181 182 /* A_BI as A_IB^T */ 183 ierr = MatCreateTranspose(A_IB,&A_BI);CHKERRQ(ierr); 184 pcbddc->benign_original_mat = pcis->A_BI; 185 pcis->A_BI = A_BI; 186 } else { 187 ierr = MatShellGetContext(matis->A,&ctx);CHKERRQ(ierr); 188 ierr = MatDestroy(&matis->A);CHKERRQ(ierr); 189 matis->A = ctx->A; 190 ierr = PetscFree(ctx);CHKERRQ(ierr); 191 ierr = MatShellGetContext(pcis->A_IB,&ctx);CHKERRQ(ierr); 192 ierr = MatDestroy(&pcis->A_IB);CHKERRQ(ierr); 193 pcis->A_IB = ctx->A; 194 ierr = PetscFree(ctx->work);CHKERRQ(ierr); 195 ierr = PetscFree(ctx);CHKERRQ(ierr); 196 ierr = MatDestroy(&pcis->A_BI);CHKERRQ(ierr); 197 pcis->A_BI = pcbddc->benign_original_mat; 198 pcbddc->benign_original_mat = NULL; 199 } 200 PetscFunctionReturn(0); 201 } 202 203 /* used just in bddc debug mode */ 204 #undef __FUNCT__ 205 #define __FUNCT__ "PCBDDCBenignProject" 206 PetscErrorCode PCBDDCBenignProject(PC pc, IS is1, IS is2, Mat *B) 207 { 208 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 209 Mat_IS *matis = (Mat_IS*)pc->pmat->data; 210 Mat An; 211 PetscErrorCode ierr; 212 213 PetscFunctionBegin; 214 ierr = MatPtAP(matis->A,pcbddc->benign_change,MAT_INITIAL_MATRIX,2.0,&An);CHKERRQ(ierr); 215 ierr = MatZeroRowsColumns(An,pcbddc->benign_n,pcbddc->benign_p0_lidx,1.0,NULL,NULL);CHKERRQ(ierr); 216 if (is1) { 217 ierr = MatGetSubMatrix(An,is1,is2,MAT_INITIAL_MATRIX,B);CHKERRQ(ierr); 218 ierr = MatDestroy(&An);CHKERRQ(ierr); 219 } else { 220 *B = An; 221 } 222 PetscFunctionReturn(0); 223 } 224 225 /* TODO: add reuse flag */ 226 #undef __FUNCT__ 227 #define __FUNCT__ "MatSeqAIJCompress" 228 PetscErrorCode MatSeqAIJCompress(Mat A, Mat *B) 229 { 230 Mat Bt; 231 PetscScalar *a,*bdata; 232 const PetscInt *ii,*ij; 233 PetscInt m,n,i,nnz,*bii,*bij; 234 PetscBool flg_row; 235 PetscErrorCode ierr; 236 237 PetscFunctionBegin; 238 ierr = MatGetSize(A,&n,&m);CHKERRQ(ierr); 239 ierr = MatGetRowIJ(A,0,PETSC_FALSE,PETSC_FALSE,&n,&ii,&ij,&flg_row);CHKERRQ(ierr); 240 ierr = MatSeqAIJGetArray(A,&a);CHKERRQ(ierr); 241 nnz = n; 242 for (i=0;i<ii[n];i++) { 243 if (PetscLikely(PetscAbsScalar(a[i]) > PETSC_SMALL)) nnz++; 244 } 245 ierr = PetscMalloc1(n+1,&bii);CHKERRQ(ierr); 246 ierr = PetscMalloc1(nnz,&bij);CHKERRQ(ierr); 247 ierr = PetscMalloc1(nnz,&bdata);CHKERRQ(ierr); 248 nnz = 0; 249 bii[0] = 0; 250 for (i=0;i<n;i++) { 251 PetscInt j; 252 for (j=ii[i];j<ii[i+1];j++) { 253 PetscScalar entry = a[j]; 254 if (PetscLikely(PetscAbsScalar(entry) > PETSC_SMALL) || ij[j] == i) { 255 bij[nnz] = ij[j]; 256 bdata[nnz] = entry; 257 nnz++; 258 } 259 } 260 bii[i+1] = nnz; 261 } 262 ierr = MatSeqAIJRestoreArray(A,&a);CHKERRQ(ierr); 263 ierr = MatCreateSeqAIJWithArrays(PetscObjectComm((PetscObject)A),n,m,bii,bij,bdata,&Bt);CHKERRQ(ierr); 264 ierr = MatRestoreRowIJ(A,0,PETSC_FALSE,PETSC_FALSE,&n,&ii,&ij,&flg_row);CHKERRQ(ierr); 265 { 266 Mat_SeqAIJ *b = (Mat_SeqAIJ*)(Bt->data); 267 b->free_a = PETSC_TRUE; 268 b->free_ij = PETSC_TRUE; 269 } 270 *B = Bt; 271 PetscFunctionReturn(0); 272 } 273 274 #undef __FUNCT__ 275 #define __FUNCT__ "MatDetectDisconnectedComponents" 276 PetscErrorCode MatDetectDisconnectedComponents(Mat A, PetscBool filter, PetscInt *ncc, IS* cc[]) 277 { 278 Mat B; 279 IS is_dummy,*cc_n; 280 ISLocalToGlobalMapping l2gmap_dummy; 281 PCBDDCGraph graph; 282 PetscInt i,n; 283 PetscInt *xadj,*adjncy; 284 PetscInt *xadj_filtered,*adjncy_filtered; 285 PetscBool flg_row,isseqaij; 286 PetscErrorCode ierr; 287 288 PetscFunctionBegin; 289 ierr = PetscObjectTypeCompare((PetscObject)A,MATSEQAIJ,&isseqaij);CHKERRQ(ierr); 290 if (!isseqaij && filter) { 291 PetscBool isseqdense; 292 293 ierr = PetscObjectTypeCompare((PetscObject)A,MATSEQDENSE,&isseqdense);CHKERRQ(ierr); 294 if (!isseqdense) { 295 ierr = MatConvert(A,MATSEQAIJ,MAT_INITIAL_MATRIX,&B);CHKERRQ(ierr); 296 } else { /* TODO: rectangular case and LDA */ 297 PetscScalar *array; 298 PetscReal chop=1.e-6; 299 300 ierr = MatDuplicate(A,MAT_COPY_VALUES,&B);CHKERRQ(ierr); 301 ierr = MatDenseGetArray(B,&array);CHKERRQ(ierr); 302 ierr = MatGetSize(B,&n,NULL);CHKERRQ(ierr); 303 for (i=0;i<n;i++) { 304 PetscInt j; 305 for (j=i+1;j<n;j++) { 306 PetscReal thresh = chop*(PetscAbsScalar(array[i*(n+1)])+PetscAbsScalar(array[j*(n+1)])); 307 if (PetscAbsScalar(array[i*n+j]) < thresh) array[i*n+j] = 0.; 308 if (PetscAbsScalar(array[j*n+i]) < thresh) array[j*n+i] = 0.; 309 } 310 } 311 ierr = MatDenseRestoreArray(B,&array);CHKERRQ(ierr); 312 ierr = MatConvert(B,MATSEQAIJ,MAT_REUSE_MATRIX,&B);CHKERRQ(ierr); 313 } 314 } else { 315 B = A; 316 } 317 ierr = MatGetRowIJ(B,0,PETSC_TRUE,PETSC_FALSE,&n,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);CHKERRQ(ierr); 318 319 /* if filter is true, then removes entries lower than PETSC_SMALL in magnitude */ 320 if (filter) { 321 PetscScalar *data; 322 PetscInt j,cum; 323 324 ierr = PetscCalloc2(n+1,&xadj_filtered,xadj[n],&adjncy_filtered);CHKERRQ(ierr); 325 ierr = MatSeqAIJGetArray(B,&data);CHKERRQ(ierr); 326 cum = 0; 327 for (i=0;i<n;i++) { 328 PetscInt t; 329 330 for (j=xadj[i];j<xadj[i+1];j++) { 331 if (PetscUnlikely(PetscAbsScalar(data[j]) < PETSC_SMALL)) { 332 continue; 333 } 334 adjncy_filtered[cum+xadj_filtered[i]++] = adjncy[j]; 335 } 336 t = xadj_filtered[i]; 337 xadj_filtered[i] = cum; 338 cum += t; 339 } 340 ierr = MatSeqAIJRestoreArray(B,&data);CHKERRQ(ierr); 341 } else { 342 xadj_filtered = NULL; 343 adjncy_filtered = NULL; 344 } 345 346 /* compute local connected components using PCBDDCGraph */ 347 ierr = ISCreateStride(PETSC_COMM_SELF,n,0,1,&is_dummy);CHKERRQ(ierr); 348 ierr = ISLocalToGlobalMappingCreateIS(is_dummy,&l2gmap_dummy);CHKERRQ(ierr); 349 ierr = ISDestroy(&is_dummy);CHKERRQ(ierr); 350 ierr = PCBDDCGraphCreate(&graph);CHKERRQ(ierr); 351 ierr = PCBDDCGraphInit(graph,l2gmap_dummy,n);CHKERRQ(ierr); 352 ierr = ISLocalToGlobalMappingDestroy(&l2gmap_dummy);CHKERRQ(ierr); 353 if (xadj_filtered) { 354 graph->xadj = xadj_filtered; 355 graph->adjncy = adjncy_filtered; 356 } else { 357 graph->xadj = xadj; 358 graph->adjncy = adjncy; 359 } 360 ierr = PCBDDCGraphSetUp(graph,1,NULL,NULL,0,NULL,NULL);CHKERRQ(ierr); 361 ierr = PCBDDCGraphComputeConnectedComponents(graph);CHKERRQ(ierr); 362 /* partial clean up */ 363 ierr = PetscFree2(xadj_filtered,adjncy_filtered);CHKERRQ(ierr); 364 ierr = MatRestoreRowIJ(B,0,PETSC_TRUE,PETSC_FALSE,&n,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);CHKERRQ(ierr); 365 if (A != B) { 366 ierr = MatDestroy(&B);CHKERRQ(ierr); 367 } 368 369 /* get back data */ 370 if (ncc) *ncc = graph->ncc; 371 if (cc) { 372 ierr = PetscMalloc1(graph->ncc,&cc_n);CHKERRQ(ierr); 373 for (i=0;i<graph->ncc;i++) { 374 ierr = ISCreateGeneral(PETSC_COMM_SELF,graph->cptr[i+1]-graph->cptr[i],graph->queue+graph->cptr[i],PETSC_COPY_VALUES,&cc_n[i]);CHKERRQ(ierr); 375 } 376 *cc = cc_n; 377 } 378 /* clean up graph */ 379 graph->xadj = 0; 380 graph->adjncy = 0; 381 ierr = PCBDDCGraphDestroy(&graph);CHKERRQ(ierr); 382 PetscFunctionReturn(0); 383 } 384 385 #undef __FUNCT__ 386 #define __FUNCT__ "PCBDDCBenignCheck" 387 PetscErrorCode PCBDDCBenignCheck(PC pc, IS zerodiag) 388 { 389 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 390 PC_IS* pcis = (PC_IS*)(pc->data); 391 IS dirIS = NULL; 392 PetscInt i; 393 PetscErrorCode ierr; 394 395 PetscFunctionBegin; 396 ierr = PCBDDCGraphGetDirichletDofs(pcbddc->mat_graph,&dirIS);CHKERRQ(ierr); 397 if (zerodiag) { 398 Mat A; 399 Vec vec3_N; 400 PetscScalar *vals; 401 const PetscInt *idxs; 402 PetscInt nz; 403 404 /* p0 */ 405 ierr = VecSet(pcis->vec1_N,0.);CHKERRQ(ierr); 406 ierr = PetscMalloc1(pcis->n,&vals);CHKERRQ(ierr); 407 ierr = ISGetLocalSize(zerodiag,&nz);CHKERRQ(ierr); 408 ierr = ISGetIndices(zerodiag,&idxs);CHKERRQ(ierr); 409 for (i=0;i<nz;i++) vals[i] = 1.; 410 ierr = VecSetValues(pcis->vec1_N,nz,idxs,vals,INSERT_VALUES);CHKERRQ(ierr); 411 ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr); 412 ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr); 413 /* v_I */ 414 ierr = VecSetRandom(pcis->vec2_N,NULL);CHKERRQ(ierr); 415 for (i=0;i<nz;i++) vals[i] = 0.; 416 ierr = VecSetValues(pcis->vec2_N,nz,idxs,vals,INSERT_VALUES);CHKERRQ(ierr); 417 ierr = ISRestoreIndices(zerodiag,&idxs);CHKERRQ(ierr); 418 ierr = ISGetIndices(pcis->is_B_local,&idxs);CHKERRQ(ierr); 419 for (i=0;i<pcis->n_B;i++) vals[i] = 0.; 420 ierr = VecSetValues(pcis->vec2_N,pcis->n_B,idxs,vals,INSERT_VALUES);CHKERRQ(ierr); 421 ierr = ISRestoreIndices(pcis->is_B_local,&idxs);CHKERRQ(ierr); 422 if (dirIS) { 423 PetscInt n; 424 425 ierr = ISGetLocalSize(dirIS,&n);CHKERRQ(ierr); 426 ierr = ISGetIndices(dirIS,&idxs);CHKERRQ(ierr); 427 for (i=0;i<n;i++) vals[i] = 0.; 428 ierr = VecSetValues(pcis->vec2_N,n,idxs,vals,INSERT_VALUES);CHKERRQ(ierr); 429 ierr = ISRestoreIndices(dirIS,&idxs);CHKERRQ(ierr); 430 } 431 ierr = VecAssemblyBegin(pcis->vec2_N);CHKERRQ(ierr); 432 ierr = VecAssemblyEnd(pcis->vec2_N);CHKERRQ(ierr); 433 ierr = VecDuplicate(pcis->vec1_N,&vec3_N);CHKERRQ(ierr); 434 ierr = VecSet(vec3_N,0.);CHKERRQ(ierr); 435 ierr = MatISGetLocalMat(pc->mat,&A);CHKERRQ(ierr); 436 ierr = MatMult(A,pcis->vec1_N,vec3_N);CHKERRQ(ierr); 437 ierr = VecDot(vec3_N,pcis->vec2_N,&vals[0]);CHKERRQ(ierr); 438 if (PetscAbsScalar(vals[0]) > PETSC_SMALL) { 439 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"Benign trick can not be applied! b(v_I,p_0) = %f (should be numerically 0.)",PetscAbsScalar(vals[0])); 440 } 441 ierr = PetscFree(vals);CHKERRQ(ierr); 442 ierr = VecDestroy(&vec3_N);CHKERRQ(ierr); 443 } 444 ierr = ISDestroy(&dirIS);CHKERRQ(ierr); 445 446 /* check PCBDDCBenignGetOrSetP0 */ 447 ierr = VecSetRandom(pcis->vec1_global,NULL);CHKERRQ(ierr); 448 for (i=0;i<pcbddc->benign_n;i++) pcbddc->benign_p0[i] = -PetscGlobalRank-i; 449 ierr = PCBDDCBenignGetOrSetP0(pc,pcis->vec1_global,PETSC_FALSE);CHKERRQ(ierr); 450 for (i=0;i<pcbddc->benign_n;i++) pcbddc->benign_p0[i] = 1; 451 ierr = PCBDDCBenignGetOrSetP0(pc,pcis->vec1_global,PETSC_TRUE);CHKERRQ(ierr); 452 for (i=0;i<pcbddc->benign_n;i++) { 453 if ((PetscInt)PetscRealPart(pcbddc->benign_p0[i]) != -PetscGlobalRank-i) { 454 SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error testing PCBDDCBenignGetOrSetP0! Found %1.4e at %d instead of %1.4e\n",pcbddc->benign_p0[i],i,-PetscGlobalRank-i);CHKERRQ(ierr); 455 } 456 } 457 PetscFunctionReturn(0); 458 } 459 460 #undef __FUNCT__ 461 #define __FUNCT__ "PCBDDCBenignDetectSaddlePoint" 462 PetscErrorCode PCBDDCBenignDetectSaddlePoint(PC pc, IS *zerodiaglocal) 463 { 464 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 465 IS pressures,zerodiag,*zerodiag_subs; 466 PetscInt nz,n; 467 PetscBool sorted,have_null,has_null_pressures,recompute_zerodiag; 468 PetscErrorCode ierr; 469 470 PetscFunctionBegin; 471 ierr = PetscSFDestroy(&pcbddc->benign_sf);CHKERRQ(ierr); 472 ierr = MatDestroy(&pcbddc->benign_B0);CHKERRQ(ierr); 473 ierr = MatDestroy(&pcbddc->benign_original_mat);CHKERRQ(ierr); 474 for (n=0;n<pcbddc->benign_n;n++) { 475 ierr = ISDestroy(&pcbddc->benign_zerodiag_subs[n]);CHKERRQ(ierr); 476 } 477 ierr = PetscFree(pcbddc->benign_zerodiag_subs);CHKERRQ(ierr); 478 pcbddc->benign_n = 0; 479 /* if a local info on dofs is present, assumes the last field is represented by "pressures" 480 otherwise, it uses only zerodiagonal dofs (ok if the pressure block is all zero; it could fail if it is not) 481 Checks if all the pressure dofs in each subdomain have a zero diagonal 482 If not, a change of basis on pressures is not needed 483 since the local Schur complements are SPD 484 */ 485 has_null_pressures = PETSC_TRUE; 486 have_null = PETSC_TRUE; 487 if (pcbddc->n_ISForDofsLocal) { 488 PetscInt npl,*idxs,p = pcbddc->n_ISForDofsLocal-1; 489 490 /* Dofs splitting for BDDC cannot have PETSC_COMM_SELF, so create a sequential IS */ 491 ierr = ISGetLocalSize(pcbddc->ISForDofsLocal[p],&npl);CHKERRQ(ierr); 492 ierr = ISGetIndices(pcbddc->ISForDofsLocal[p],(const PetscInt**)&idxs);CHKERRQ(ierr); 493 ierr = ISCreateGeneral(PETSC_COMM_SELF,npl,idxs,PETSC_COPY_VALUES,&pressures);CHKERRQ(ierr); 494 ierr = ISRestoreIndices(pcbddc->ISForDofsLocal[p],(const PetscInt**)&idxs);CHKERRQ(ierr); 495 ierr = ISSorted(pressures,&sorted);CHKERRQ(ierr); 496 if (!sorted) { 497 ierr = ISSort(pressures);CHKERRQ(ierr); 498 } 499 } else { 500 pressures = NULL; 501 } 502 /* pcis has not been setup yet, so get the local size from the subdomain matrix */ 503 ierr = MatGetLocalSize(pcbddc->local_mat,&n,NULL);CHKERRQ(ierr); 504 /* TODO: add check for shared dofs and raise error */ 505 ierr = MatFindZeroDiagonals(pcbddc->local_mat,&zerodiag);CHKERRQ(ierr); 506 ierr = ISSorted(zerodiag,&sorted);CHKERRQ(ierr); 507 if (!sorted) { 508 ierr = ISSort(zerodiag);CHKERRQ(ierr); 509 } 510 ierr = ISGetLocalSize(zerodiag,&nz);CHKERRQ(ierr); 511 if (!nz) { 512 if (n) have_null = PETSC_FALSE; 513 has_null_pressures = PETSC_FALSE; 514 ierr = ISDestroy(&zerodiag);CHKERRQ(ierr); 515 } 516 recompute_zerodiag = PETSC_FALSE; 517 /* in case disconnected subdomains info is present, split the pressures accordingly (otherwise the benign trick could fail) */ 518 zerodiag_subs = NULL; 519 pcbddc->benign_n = 0; 520 if (has_null_pressures) { 521 IS *subs; 522 PetscInt nsubs,i; 523 524 subs = pcbddc->local_subs; 525 nsubs = pcbddc->n_local_subs; 526 if (nsubs > 1) { 527 ierr = PetscCalloc1(nsubs,&zerodiag_subs);CHKERRQ(ierr); 528 for (i=0;i<nsubs;i++) { 529 ISLocalToGlobalMapping l2g; 530 IS t_zerodiag_subs; 531 PetscInt nl; 532 533 ierr = ISLocalToGlobalMappingCreateIS(subs[i],&l2g);CHKERRQ(ierr); 534 ierr = ISGlobalToLocalMappingApplyIS(l2g,IS_GTOLM_DROP,zerodiag,&t_zerodiag_subs);CHKERRQ(ierr); 535 ierr = ISGetLocalSize(t_zerodiag_subs,&nl);CHKERRQ(ierr); 536 if (nl) { 537 PetscBool valid = PETSC_TRUE; 538 539 if (pressures) { 540 IS t_pressure_subs; 541 ierr = ISGlobalToLocalMappingApplyIS(l2g,IS_GTOLM_DROP,pressures,&t_pressure_subs);CHKERRQ(ierr); 542 ierr = ISEqual(t_pressure_subs,t_zerodiag_subs,&valid);CHKERRQ(ierr); 543 ierr = ISDestroy(&t_pressure_subs);CHKERRQ(ierr); 544 } 545 if (valid) { 546 ierr = ISLocalToGlobalMappingApplyIS(l2g,t_zerodiag_subs,&zerodiag_subs[pcbddc->benign_n]);CHKERRQ(ierr); 547 pcbddc->benign_n++; 548 } else { 549 recompute_zerodiag = PETSC_TRUE; 550 } 551 } 552 ierr = ISDestroy(&t_zerodiag_subs);CHKERRQ(ierr); 553 ierr = ISLocalToGlobalMappingDestroy(&l2g);CHKERRQ(ierr); 554 } 555 } else { /* there's just one subdomain (or zero if they have not been detected */ 556 PetscBool valid = PETSC_TRUE; 557 if (pressures) { 558 ierr = ISEqual(pressures,zerodiag,&valid);CHKERRQ(ierr); 559 } 560 if (valid) { 561 pcbddc->benign_n = 1; 562 ierr = PetscMalloc1(pcbddc->benign_n,&zerodiag_subs);CHKERRQ(ierr); 563 ierr = PetscObjectReference((PetscObject)zerodiag);CHKERRQ(ierr); 564 zerodiag_subs[0] = zerodiag; 565 } 566 } 567 } 568 569 if (!pcbddc->benign_n) { 570 ierr = ISDestroy(&zerodiag);CHKERRQ(ierr); 571 recompute_zerodiag = PETSC_FALSE; 572 has_null_pressures = PETSC_FALSE; 573 have_null = PETSC_FALSE; 574 } 575 576 /* final check for null pressures */ 577 if (zerodiag && pressures) { 578 PetscInt nz,np; 579 ierr = ISGetLocalSize(zerodiag,&nz);CHKERRQ(ierr); 580 ierr = ISGetLocalSize(pressures,&np);CHKERRQ(ierr); 581 if (nz != np) have_null = PETSC_FALSE; 582 } 583 584 if (recompute_zerodiag) { 585 ierr = ISDestroy(&zerodiag);CHKERRQ(ierr); 586 if (pcbddc->benign_n == 1) { 587 ierr = PetscObjectReference((PetscObject)zerodiag_subs[0]);CHKERRQ(ierr); 588 zerodiag = zerodiag_subs[0]; 589 } else { 590 PetscInt i,nzn,*new_idxs; 591 592 nzn = 0; 593 for (i=0;i<pcbddc->benign_n;i++) { 594 PetscInt ns; 595 ierr = ISGetLocalSize(zerodiag_subs[i],&ns);CHKERRQ(ierr); 596 nzn += ns; 597 } 598 ierr = PetscMalloc1(nzn,&new_idxs);CHKERRQ(ierr); 599 nzn = 0; 600 for (i=0;i<pcbddc->benign_n;i++) { 601 PetscInt ns,*idxs; 602 ierr = ISGetLocalSize(zerodiag_subs[i],&ns);CHKERRQ(ierr); 603 ierr = ISGetIndices(zerodiag_subs[i],(const PetscInt**)&idxs);CHKERRQ(ierr); 604 ierr = PetscMemcpy(new_idxs+nzn,idxs,ns*sizeof(PetscInt));CHKERRQ(ierr); 605 ierr = ISRestoreIndices(zerodiag_subs[i],(const PetscInt**)&idxs);CHKERRQ(ierr); 606 nzn += ns; 607 } 608 ierr = PetscSortInt(nzn,new_idxs);CHKERRQ(ierr); 609 ierr = ISCreateGeneral(PETSC_COMM_SELF,nzn,new_idxs,PETSC_OWN_POINTER,&zerodiag);CHKERRQ(ierr); 610 } 611 have_null = PETSC_FALSE; 612 } 613 614 if (has_null_pressures) { 615 IS zerodiagc; 616 const PetscInt *idxs,*idxsc; 617 PetscInt i,s,*nnz; 618 619 ierr = ISGetLocalSize(zerodiag,&nz);CHKERRQ(ierr); 620 ierr = ISComplement(zerodiag,0,n,&zerodiagc);CHKERRQ(ierr); 621 ierr = ISGetIndices(zerodiagc,&idxsc);CHKERRQ(ierr); 622 /* local change of basis for pressures */ 623 ierr = MatDestroy(&pcbddc->benign_change);CHKERRQ(ierr); 624 ierr = MatCreate(PetscObjectComm((PetscObject)pcbddc->local_mat),&pcbddc->benign_change);CHKERRQ(ierr); 625 ierr = MatSetType(pcbddc->benign_change,MATAIJ);CHKERRQ(ierr); 626 ierr = MatSetSizes(pcbddc->benign_change,n,n,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr); 627 ierr = PetscMalloc1(n,&nnz);CHKERRQ(ierr); 628 for (i=0;i<n-nz;i++) nnz[idxsc[i]] = 1; /* identity on velocities plus pressure dofs for non-singular subdomains */ 629 for (i=0;i<pcbddc->benign_n;i++) { 630 PetscInt nzs,j; 631 632 ierr = ISGetLocalSize(zerodiag_subs[i],&nzs);CHKERRQ(ierr); 633 ierr = ISGetIndices(zerodiag_subs[i],&idxs);CHKERRQ(ierr); 634 for (j=0;j<nzs-1;j++) nnz[idxs[j]] = 2; /* change on pressures */ 635 nnz[idxs[nzs-1]] = nzs; /* last local pressure dof in subdomain */ 636 ierr = ISRestoreIndices(zerodiag_subs[i],&idxs);CHKERRQ(ierr); 637 } 638 ierr = MatSeqAIJSetPreallocation(pcbddc->benign_change,0,nnz);CHKERRQ(ierr); 639 ierr = PetscFree(nnz);CHKERRQ(ierr); 640 /* set identity on velocities */ 641 for (i=0;i<n-nz;i++) { 642 ierr = MatSetValue(pcbddc->benign_change,idxsc[i],idxsc[i],1.,INSERT_VALUES);CHKERRQ(ierr); 643 } 644 ierr = ISRestoreIndices(zerodiagc,&idxsc);CHKERRQ(ierr); 645 ierr = ISDestroy(&zerodiagc);CHKERRQ(ierr); 646 ierr = PetscFree3(pcbddc->benign_p0_lidx,pcbddc->benign_p0_gidx,pcbddc->benign_p0);CHKERRQ(ierr); 647 ierr = PetscMalloc3(pcbddc->benign_n,&pcbddc->benign_p0_lidx,pcbddc->benign_n,&pcbddc->benign_p0_gidx,pcbddc->benign_n,&pcbddc->benign_p0);CHKERRQ(ierr); 648 /* set change on pressures */ 649 for (s=0;s<pcbddc->benign_n;s++) { 650 PetscScalar *array; 651 PetscInt nzs; 652 653 ierr = ISGetLocalSize(zerodiag_subs[s],&nzs);CHKERRQ(ierr); 654 ierr = ISGetIndices(zerodiag_subs[s],&idxs);CHKERRQ(ierr); 655 for (i=0;i<nzs-1;i++) { 656 PetscScalar vals[2]; 657 PetscInt cols[2]; 658 659 cols[0] = idxs[i]; 660 cols[1] = idxs[nzs-1]; 661 vals[0] = 1.; 662 vals[1] = 1.; 663 ierr = MatSetValues(pcbddc->benign_change,1,cols,2,cols,vals,INSERT_VALUES);CHKERRQ(ierr); 664 } 665 ierr = PetscMalloc1(nzs,&array);CHKERRQ(ierr); 666 for (i=0;i<nzs-1;i++) array[i] = -1.; 667 array[nzs-1] = 1.; 668 ierr = MatSetValues(pcbddc->benign_change,1,idxs+nzs-1,nzs,idxs,array,INSERT_VALUES);CHKERRQ(ierr); 669 /* store local idxs for p0 */ 670 pcbddc->benign_p0_lidx[s] = idxs[nzs-1]; 671 ierr = ISRestoreIndices(zerodiag_subs[s],&idxs);CHKERRQ(ierr); 672 ierr = PetscFree(array);CHKERRQ(ierr); 673 } 674 ierr = MatAssemblyBegin(pcbddc->benign_change,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 675 ierr = MatAssemblyEnd(pcbddc->benign_change,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 676 /* project if needed */ 677 if (pcbddc->benign_change_explicit) { 678 ierr = MatPtAP(pcbddc->local_mat,pcbddc->benign_change,MAT_INITIAL_MATRIX,2.0,&pcbddc->benign_original_mat);CHKERRQ(ierr); 679 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 680 ierr = MatSeqAIJCompress(pcbddc->benign_original_mat,&pcbddc->local_mat);CHKERRQ(ierr); 681 ierr = MatDestroy(&pcbddc->benign_original_mat);CHKERRQ(ierr); 682 } 683 /* store global idxs for p0 */ 684 ierr = ISLocalToGlobalMappingApply(pc->pmat->rmap->mapping,pcbddc->benign_n,pcbddc->benign_p0_lidx,pcbddc->benign_p0_gidx);CHKERRQ(ierr); 685 } 686 pcbddc->benign_zerodiag_subs = zerodiag_subs; 687 ierr = ISDestroy(&pressures);CHKERRQ(ierr); 688 689 /* determines if the coarse solver will be singular or not */ 690 ierr = MPI_Allreduce(&have_null,&pcbddc->benign_null,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 691 *zerodiaglocal = zerodiag; 692 PetscFunctionReturn(0); 693 } 694 695 #undef __FUNCT__ 696 #define __FUNCT__ "PCBDDCBenignGetOrSetP0" 697 PetscErrorCode PCBDDCBenignGetOrSetP0(PC pc, Vec v, PetscBool get) 698 { 699 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 700 PetscErrorCode ierr; 701 702 PetscFunctionBegin; 703 if (!pcbddc->benign_sf) { 704 ierr = PetscSFCreate(PetscObjectComm((PetscObject)pc),&pcbddc->benign_sf);CHKERRQ(ierr); 705 ierr = PetscSFSetGraphLayout(pcbddc->benign_sf,pc->pmat->rmap,pcbddc->benign_n,NULL,PETSC_OWN_POINTER,pcbddc->benign_p0_gidx);CHKERRQ(ierr); 706 } 707 if (get) { /* use SF to get values */ 708 PetscScalar *array; 709 710 ierr = VecGetArrayRead(v,(const PetscScalar**)&array);CHKERRQ(ierr); 711 ierr = PetscSFBcastBegin(pcbddc->benign_sf,MPIU_SCALAR,array,pcbddc->benign_p0);CHKERRQ(ierr); 712 ierr = PetscSFBcastEnd(pcbddc->benign_sf,MPIU_SCALAR,array,pcbddc->benign_p0);CHKERRQ(ierr); 713 ierr = VecRestoreArrayRead(v,(const PetscScalar**)&array);CHKERRQ(ierr); 714 } else { /* use VecSetValues (not scalable, I should try to find a better solution (defining a new MPI_OP for reduction) */ 715 ierr = VecSetValues(v,pcbddc->benign_n,pcbddc->benign_p0_gidx,pcbddc->benign_p0,INSERT_VALUES);CHKERRQ(ierr); 716 ierr = VecAssemblyBegin(v);CHKERRQ(ierr); 717 ierr = VecAssemblyEnd(v);CHKERRQ(ierr); 718 } 719 PetscFunctionReturn(0); 720 } 721 722 #undef __FUNCT__ 723 #define __FUNCT__ "PCBDDCBenignPopOrPushB0" 724 PetscErrorCode PCBDDCBenignPopOrPushB0(PC pc, PetscBool pop) 725 { 726 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 727 PetscErrorCode ierr; 728 729 PetscFunctionBegin; 730 /* TODO: add error checking 731 - avoid nested pop (or push) calls. 732 - cannot push before pop. 733 - cannot call this if pcbddc->local_mat is NULL 734 */ 735 if (!pcbddc->benign_n) { 736 PetscFunctionReturn(0); 737 } 738 if (pop) { 739 if (pcbddc->benign_change_explicit) { 740 IS is_p0; 741 MatReuse reuse; 742 743 /* extract B_0 */ 744 reuse = MAT_INITIAL_MATRIX; 745 if (pcbddc->benign_B0) { 746 reuse = MAT_REUSE_MATRIX; 747 } 748 ierr = ISCreateGeneral(PETSC_COMM_SELF,pcbddc->benign_n,pcbddc->benign_p0_lidx,PETSC_COPY_VALUES,&is_p0);CHKERRQ(ierr); 749 ierr = MatGetSubMatrix(pcbddc->local_mat,is_p0,NULL,reuse,&pcbddc->benign_B0);CHKERRQ(ierr); 750 /* remove rows and cols from local problem */ 751 ierr = MatSetOption(pcbddc->local_mat,MAT_KEEP_NONZERO_PATTERN,PETSC_TRUE);CHKERRQ(ierr); 752 ierr = MatSetOption(pcbddc->local_mat,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr); 753 ierr = MatZeroRowsColumnsIS(pcbddc->local_mat,is_p0,1.0,NULL,NULL);CHKERRQ(ierr); 754 ierr = ISDestroy(&is_p0);CHKERRQ(ierr); 755 } else { 756 Mat_IS *matis = (Mat_IS*)pc->pmat->data; 757 PetscScalar *vals; 758 PetscInt i,n,*idxs_ins; 759 760 ierr = VecGetLocalSize(matis->y,&n);CHKERRQ(ierr); 761 ierr = PetscMalloc2(n,&idxs_ins,n,&vals);CHKERRQ(ierr); 762 if (!pcbddc->benign_B0) { 763 ierr = MatCreate(PetscObjectComm((PetscObject)pcbddc->local_mat),&pcbddc->benign_B0);CHKERRQ(ierr); 764 ierr = MatSetType(pcbddc->benign_B0,MATAIJ);CHKERRQ(ierr); 765 ierr = MatSetSizes(pcbddc->benign_B0,pcbddc->benign_n,n,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr); 766 /* this matrix is very sparse: the nnz pattern is not known unless we do 2 sweeps of the next loop. 767 Setting nnz=100 should be more than enough */ 768 ierr = MatSeqAIJSetPreallocation(pcbddc->benign_B0,100,NULL);CHKERRQ(ierr); 769 } 770 771 for (i=0;i<pcbddc->benign_n;i++) { 772 PetscScalar *array; 773 PetscInt *idxs,j,nz,cum; 774 775 ierr = VecSet(matis->x,0.);CHKERRQ(ierr); 776 ierr = ISGetLocalSize(pcbddc->benign_zerodiag_subs[i],&nz);CHKERRQ(ierr); 777 ierr = ISGetIndices(pcbddc->benign_zerodiag_subs[i],(const PetscInt**)&idxs);CHKERRQ(ierr); 778 for (j=0;j<nz;j++) vals[j] = 1.; 779 ierr = VecSetValues(matis->x,nz,idxs,vals,INSERT_VALUES);CHKERRQ(ierr); 780 ierr = VecAssemblyBegin(matis->x);CHKERRQ(ierr); 781 ierr = VecAssemblyEnd(matis->x);CHKERRQ(ierr); 782 ierr = VecSet(matis->y,0.);CHKERRQ(ierr); 783 ierr = MatMult(matis->A,matis->x,matis->y);CHKERRQ(ierr); 784 ierr = VecGetArray(matis->y,&array);CHKERRQ(ierr); 785 cum = 0; 786 for (j=0;j<n;j++) { 787 if (PetscUnlikely(PetscAbsReal(array[j]) > PETSC_SMALL)) { 788 vals[cum] = array[j]; 789 idxs_ins[cum] = j; 790 cum++; 791 } 792 } 793 ierr = MatSetValues(pcbddc->benign_B0,1,&i,cum,idxs_ins,vals,INSERT_VALUES);CHKERRQ(ierr); 794 ierr = VecRestoreArray(matis->y,&array);CHKERRQ(ierr); 795 ierr = ISRestoreIndices(pcbddc->benign_zerodiag_subs[i],(const PetscInt**)&idxs);CHKERRQ(ierr); 796 } 797 ierr = MatAssemblyBegin(pcbddc->benign_B0,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 798 ierr = MatAssemblyEnd(pcbddc->benign_B0,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 799 ierr = PetscFree2(idxs_ins,vals);CHKERRQ(ierr); 800 } 801 } else { /* push */ 802 if (pcbddc->benign_change_explicit) { 803 PetscInt i; 804 805 for (i=0;i<pcbddc->benign_n;i++) { 806 PetscScalar *B0_vals; 807 PetscInt *B0_cols,B0_ncol; 808 809 ierr = MatGetRow(pcbddc->benign_B0,i,&B0_ncol,(const PetscInt**)&B0_cols,(const PetscScalar**)&B0_vals);CHKERRQ(ierr); 810 ierr = MatSetValues(pcbddc->local_mat,1,pcbddc->benign_p0_lidx+i,B0_ncol,B0_cols,B0_vals,INSERT_VALUES);CHKERRQ(ierr); 811 ierr = MatSetValues(pcbddc->local_mat,B0_ncol,B0_cols,1,pcbddc->benign_p0_lidx+i,B0_vals,INSERT_VALUES);CHKERRQ(ierr); 812 ierr = MatSetValue(pcbddc->local_mat,pcbddc->benign_p0_lidx[i],pcbddc->benign_p0_lidx[i],0.0,INSERT_VALUES);CHKERRQ(ierr); 813 ierr = MatRestoreRow(pcbddc->benign_B0,i,&B0_ncol,(const PetscInt**)&B0_cols,(const PetscScalar**)&B0_vals);CHKERRQ(ierr); 814 } 815 ierr = MatAssemblyBegin(pcbddc->local_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 816 ierr = MatAssemblyEnd(pcbddc->local_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 817 } else { 818 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Cannot push B0!\n"); 819 } 820 } 821 PetscFunctionReturn(0); 822 } 823 824 #undef __FUNCT__ 825 #define __FUNCT__ "PCBDDCAdaptiveSelection" 826 PetscErrorCode PCBDDCAdaptiveSelection(PC pc) 827 { 828 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 829 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 830 PetscBLASInt B_dummyint,B_neigs,B_ierr,B_lwork; 831 PetscBLASInt *B_iwork,*B_ifail; 832 PetscScalar *work,lwork; 833 PetscScalar *St,*S,*eigv; 834 PetscScalar *Sarray,*Starray; 835 PetscReal *eigs,thresh; 836 PetscInt i,nmax,nmin,nv,cum,mss,cum2,cumarray,maxneigs; 837 PetscBool allocated_S_St; 838 #if defined(PETSC_USE_COMPLEX) 839 PetscReal *rwork; 840 #endif 841 PetscErrorCode ierr; 842 843 PetscFunctionBegin; 844 if (!sub_schurs->schur_explicit) { 845 SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_SUP,"Adaptive selection of constraints requires MUMPS and/or MKL_CPARDISO"); 846 } 847 848 if (sub_schurs->n_subs && (!sub_schurs->is_hermitian || !sub_schurs->is_posdef)) { 849 SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_SUP,"Adaptive selection not yet implemented for general matrix pencils (herm %d, posdef %d)\n",sub_schurs->is_hermitian,sub_schurs->is_posdef); 850 } 851 852 if (pcbddc->dbg_flag) { 853 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 854 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 855 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Check adaptive selection of constraints\n");CHKERRQ(ierr); 856 ierr = PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 857 } 858 859 if (pcbddc->dbg_flag) { 860 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d cc %d (%d,%d).\n",PetscGlobalRank,sub_schurs->n_subs,sub_schurs->is_hermitian,sub_schurs->is_posdef); 861 } 862 863 /* max size of subsets */ 864 mss = 0; 865 for (i=0;i<sub_schurs->n_subs;i++) { 866 PetscInt subset_size; 867 868 ierr = ISGetLocalSize(sub_schurs->is_subs[i],&subset_size);CHKERRQ(ierr); 869 mss = PetscMax(mss,subset_size); 870 } 871 872 /* min/max and threshold */ 873 nmax = pcbddc->adaptive_nmax > 0 ? pcbddc->adaptive_nmax : mss; 874 nmin = pcbddc->adaptive_nmin > 0 ? pcbddc->adaptive_nmin : 0; 875 nmax = PetscMax(nmin,nmax); 876 allocated_S_St = PETSC_FALSE; 877 if (nmin) { 878 allocated_S_St = PETSC_TRUE; 879 } 880 881 /* allocate lapack workspace */ 882 cum = cum2 = 0; 883 maxneigs = 0; 884 for (i=0;i<sub_schurs->n_subs;i++) { 885 PetscInt n,subset_size; 886 887 ierr = ISGetLocalSize(sub_schurs->is_subs[i],&subset_size);CHKERRQ(ierr); 888 n = PetscMin(subset_size,nmax); 889 cum += subset_size; 890 cum2 += subset_size*n; 891 maxneigs = PetscMax(maxneigs,n); 892 } 893 if (mss) { 894 if (sub_schurs->is_hermitian && sub_schurs->is_posdef) { 895 PetscBLASInt B_itype = 1; 896 PetscBLASInt B_N = mss; 897 PetscReal zero = 0.0; 898 PetscReal eps = 0.0; /* dlamch? */ 899 900 B_lwork = -1; 901 S = NULL; 902 St = NULL; 903 eigs = NULL; 904 eigv = NULL; 905 B_iwork = NULL; 906 B_ifail = NULL; 907 #if defined(PETSC_USE_COMPLEX) 908 rwork = NULL; 909 #endif 910 thresh = 1.0; 911 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 912 #if defined(PETSC_USE_COMPLEX) 913 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&zero,&thresh,&B_dummyint,&B_dummyint,&eps,&B_neigs,eigs,eigv,&B_N,&lwork,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr)); 914 #else 915 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&zero,&thresh,&B_dummyint,&B_dummyint,&eps,&B_neigs,eigs,eigv,&B_N,&lwork,&B_lwork,B_iwork,B_ifail,&B_ierr)); 916 #endif 917 if (B_ierr != 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to SYGVX Lapack routine %d",(int)B_ierr); 918 ierr = PetscFPTrapPop();CHKERRQ(ierr); 919 } else { 920 /* TODO */ 921 } 922 } else { 923 lwork = 0; 924 } 925 926 nv = 0; 927 if (sub_schurs->is_vertices && pcbddc->use_vertices) { /* complement set of active subsets, each entry is a vertex (boundary made by active subsets, vertices and dirichlet dofs) */ 928 ierr = ISGetLocalSize(sub_schurs->is_vertices,&nv);CHKERRQ(ierr); 929 } 930 ierr = PetscBLASIntCast((PetscInt)PetscRealPart(lwork),&B_lwork);CHKERRQ(ierr); 931 if (allocated_S_St) { 932 ierr = PetscMalloc2(mss*mss,&S,mss*mss,&St);CHKERRQ(ierr); 933 } 934 ierr = PetscMalloc5(mss*mss,&eigv,mss,&eigs,B_lwork,&work,5*mss,&B_iwork,mss,&B_ifail);CHKERRQ(ierr); 935 #if defined(PETSC_USE_COMPLEX) 936 ierr = PetscMalloc1(7*mss,&rwork);CHKERRQ(ierr); 937 #endif 938 ierr = PetscMalloc5(nv+sub_schurs->n_subs,&pcbddc->adaptive_constraints_n, 939 nv+sub_schurs->n_subs+1,&pcbddc->adaptive_constraints_idxs_ptr, 940 nv+sub_schurs->n_subs+1,&pcbddc->adaptive_constraints_data_ptr, 941 nv+cum,&pcbddc->adaptive_constraints_idxs, 942 nv+cum2,&pcbddc->adaptive_constraints_data);CHKERRQ(ierr); 943 ierr = PetscMemzero(pcbddc->adaptive_constraints_n,(nv+sub_schurs->n_subs)*sizeof(PetscInt));CHKERRQ(ierr); 944 945 maxneigs = 0; 946 cum = cum2 = cumarray = 0; 947 pcbddc->adaptive_constraints_idxs_ptr[0] = 0; 948 pcbddc->adaptive_constraints_data_ptr[0] = 0; 949 if (sub_schurs->is_vertices && pcbddc->use_vertices) { 950 const PetscInt *idxs; 951 952 ierr = ISGetIndices(sub_schurs->is_vertices,&idxs);CHKERRQ(ierr); 953 for (cum=0;cum<nv;cum++) { 954 pcbddc->adaptive_constraints_n[cum] = 1; 955 pcbddc->adaptive_constraints_idxs[cum] = idxs[cum]; 956 pcbddc->adaptive_constraints_data[cum] = 1.0; 957 pcbddc->adaptive_constraints_idxs_ptr[cum+1] = pcbddc->adaptive_constraints_idxs_ptr[cum]+1; 958 pcbddc->adaptive_constraints_data_ptr[cum+1] = pcbddc->adaptive_constraints_data_ptr[cum]+1; 959 } 960 cum2 = cum; 961 ierr = ISRestoreIndices(sub_schurs->is_vertices,&idxs);CHKERRQ(ierr); 962 } 963 964 if (mss) { /* multilevel */ 965 ierr = MatSeqAIJGetArray(sub_schurs->sum_S_Ej_inv_all,&Sarray);CHKERRQ(ierr); 966 ierr = MatSeqAIJGetArray(sub_schurs->sum_S_Ej_tilda_all,&Starray);CHKERRQ(ierr); 967 } 968 969 thresh = pcbddc->adaptive_threshold; 970 for (i=0;i<sub_schurs->n_subs;i++) { 971 const PetscInt *idxs; 972 PetscReal infty = PETSC_MAX_REAL; 973 PetscInt j,subset_size,eigs_start = 0; 974 PetscBLASInt B_N; 975 PetscBool same_data = PETSC_FALSE; 976 977 ierr = ISGetLocalSize(sub_schurs->is_subs[i],&subset_size);CHKERRQ(ierr); 978 ierr = ISGetIndices(sub_schurs->is_subs[i],&idxs);CHKERRQ(ierr); 979 ierr = PetscBLASIntCast(subset_size,&B_N);CHKERRQ(ierr); 980 if (allocated_S_St) { /* S and S_t should be copied since we could need them later */ 981 if (sub_schurs->is_hermitian) { 982 PetscInt j,k; 983 if (sub_schurs->n_subs == 1) { /* zeroing memory to use PetscMemcmp later */ 984 ierr = PetscMemzero(S,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 985 ierr = PetscMemzero(St,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 986 } 987 for (j=0;j<subset_size;j++) { 988 for (k=j;k<subset_size;k++) { 989 S [j*subset_size+k] = Sarray [cumarray+j*subset_size+k]; 990 St[j*subset_size+k] = Starray[cumarray+j*subset_size+k]; 991 } 992 } 993 } else { 994 ierr = PetscMemcpy(S,Sarray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 995 ierr = PetscMemcpy(St,Starray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 996 } 997 } else { 998 S = Sarray + cumarray; 999 St = Starray + cumarray; 1000 } 1001 1002 /* see if we can save some work */ 1003 if (sub_schurs->n_subs == 1) { 1004 ierr = PetscMemcmp(S,St,subset_size*subset_size*sizeof(PetscScalar),&same_data);CHKERRQ(ierr); 1005 } 1006 1007 if (same_data) { /* there's no need of constraints here, deluxe scaling is enough */ 1008 B_neigs = 0; 1009 } else { 1010 if (sub_schurs->is_hermitian && sub_schurs->is_posdef) { 1011 PetscBLASInt B_itype = 1; 1012 PetscBLASInt B_IL, B_IU; 1013 PetscReal eps = -1.0; /* dlamch? */ 1014 PetscInt nmin_s; 1015 1016 if (pcbddc->dbg_flag) { 1017 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Computing for sub %d/%d %d %d.\n",i,sub_schurs->n_subs,subset_size,pcbddc->mat_graph->count[idxs[0]]); 1018 } 1019 1020 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 1021 if (thresh > 1.+PETSC_SMALL) { 1022 1023 /* ask for eigenvalues larger than thresh */ 1024 #if defined(PETSC_USE_COMPLEX) 1025 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&thresh,&infty,&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr)); 1026 #else 1027 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&thresh,&infty,&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr)); 1028 #endif 1029 } else { 1030 B_IU = PetscMax(1,PetscMin(B_N,nmax)); 1031 B_IL = 1; 1032 #if defined(PETSC_USE_COMPLEX) 1033 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","I","L",&B_N,St,&B_N,S,&B_N,&thresh,&infty,&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr)); 1034 #else 1035 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","I","L",&B_N,St,&B_N,S,&B_N,&thresh,&infty,&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr)); 1036 #endif 1037 } 1038 ierr = PetscFPTrapPop();CHKERRQ(ierr); 1039 if (B_ierr) { 1040 if (B_ierr < 0 ) { 1041 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: illegal value for argument %d",-(int)B_ierr); 1042 } else if (B_ierr <= B_N) { 1043 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: %d eigenvalues failed to converge",(int)B_ierr); 1044 } else { 1045 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: leading minor of order %d is not positive definite",(int)B_ierr-B_N-1); 1046 } 1047 } 1048 1049 if (B_neigs > nmax) { 1050 if (pcbddc->dbg_flag) { 1051 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," found %d eigs, more than maximum required %d.\n",B_neigs,nmax); 1052 } 1053 eigs_start = B_neigs -nmax; 1054 B_neigs = nmax; 1055 } 1056 1057 nmin_s = PetscMin(nmin,B_N); 1058 if (B_neigs < nmin_s) { 1059 PetscBLASInt B_neigs2; 1060 1061 B_IU = B_N - B_neigs; 1062 B_IL = B_N - nmin_s + 1; 1063 if (pcbddc->dbg_flag) { 1064 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," found %d eigs, less than minimum required %d. Asking for %d to %d incl (fortran like)\n",B_neigs,nmin,B_IL,B_IU); 1065 } 1066 if (sub_schurs->is_hermitian) { 1067 PetscInt j; 1068 for (j=0;j<subset_size;j++) { 1069 ierr = PetscMemcpy(S+j*(subset_size+1),Sarray+cumarray+j*(subset_size+1),(subset_size-j)*sizeof(PetscScalar));CHKERRQ(ierr); 1070 } 1071 for (j=0;j<subset_size;j++) { 1072 ierr = PetscMemcpy(St+j*(subset_size+1),Starray+cumarray+j*(subset_size+1),(subset_size-j)*sizeof(PetscScalar));CHKERRQ(ierr); 1073 } 1074 } else { 1075 ierr = PetscMemcpy(S,Sarray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 1076 ierr = PetscMemcpy(St,Starray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 1077 } 1078 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 1079 #if defined(PETSC_USE_COMPLEX) 1080 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","I","L",&B_N,St,&B_N,S,&B_N,&thresh,&infty,&B_IL,&B_IU,&eps,&B_neigs2,eigs+B_neigs,eigv+B_neigs*subset_size,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr)); 1081 #else 1082 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","I","L",&B_N,St,&B_N,S,&B_N,&thresh,&infty,&B_IL,&B_IU,&eps,&B_neigs2,eigs+B_neigs,eigv+B_neigs*subset_size,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr)); 1083 #endif 1084 ierr = PetscFPTrapPop();CHKERRQ(ierr); 1085 B_neigs += B_neigs2; 1086 } 1087 if (B_ierr) { 1088 if (B_ierr < 0 ) { 1089 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: illegal value for argument %d",-(int)B_ierr); 1090 } else if (B_ierr <= B_N) { 1091 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: %d eigenvalues failed to converge",(int)B_ierr); 1092 } else { 1093 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: leading minor of order %d is not positive definite",(int)B_ierr-B_N-1); 1094 } 1095 } 1096 if (pcbddc->dbg_flag) { 1097 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," -> Got %d eigs\n",B_neigs);CHKERRQ(ierr); 1098 for (j=0;j<B_neigs;j++) { 1099 if (eigs[j] == 0.0) { 1100 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," Inf\n");CHKERRQ(ierr); 1101 } else { 1102 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," %1.6e\n",eigs[j+eigs_start]);CHKERRQ(ierr); 1103 } 1104 } 1105 } 1106 } else { 1107 /* TODO */ 1108 } 1109 } 1110 maxneigs = PetscMax(B_neigs,maxneigs); 1111 pcbddc->adaptive_constraints_n[i+nv] = B_neigs; 1112 if (B_neigs) { 1113 ierr = PetscMemcpy(pcbddc->adaptive_constraints_data+pcbddc->adaptive_constraints_data_ptr[cum],eigv+eigs_start*subset_size,B_neigs*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 1114 1115 if (pcbddc->dbg_flag > 1) { 1116 PetscInt ii; 1117 for (ii=0;ii<B_neigs;ii++) { 1118 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," -> Eigenvector %d/%d (%d)\n",ii,B_neigs,B_N);CHKERRQ(ierr); 1119 for (j=0;j<B_N;j++) { 1120 #if defined(PETSC_USE_COMPLEX) 1121 PetscReal r = PetscRealPart(pcbddc->adaptive_constraints_data[ii*subset_size+j+pcbddc->adaptive_constraints_data_ptr[cum]]); 1122 PetscReal c = PetscImaginaryPart(pcbddc->adaptive_constraints_data[ii*subset_size+j+pcbddc->adaptive_constraints_data_ptr[cum]]); 1123 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," %1.4e + %1.4e i\n",r,c);CHKERRQ(ierr); 1124 #else 1125 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," %1.4e\n",pcbddc->adaptive_constraints_data[ii*subset_size+j+pcbddc->adaptive_constraints_data_ptr[cum]]);CHKERRQ(ierr); 1126 #endif 1127 } 1128 } 1129 } 1130 #if 0 1131 for (j=0;j<B_neigs;j++) { 1132 PetscBLASInt Blas_N,Blas_one = 1.0; 1133 PetscScalar norm; 1134 ierr = PetscBLASIntCast(subset_size,&Blas_N);CHKERRQ(ierr); 1135 PetscStackCallBLAS("BLASdot",norm = BLASdot_(&Blas_N,pcbddc->adaptive_constraints_data+pcbddc->adaptive_constraints_data_ptr[cum]+j*subset_size, 1136 &Blas_one,pcbddc->adaptive_constraints_data+pcbddc->adaptive_constraints_data_ptr[cum]+j*subset_size,&Blas_one)); 1137 if (pcbddc->adaptive_constraints_data[cum2] > 0.0) { 1138 norm = 1.0/PetscSqrtReal(PetscRealPart(norm)); 1139 } else { 1140 norm = -1.0/PetscSqrtReal(PetscRealPart(norm)); 1141 } 1142 PetscStackCallBLAS("BLASscal",BLASscal_(&Blas_N,&norm,pcbddc->adaptive_constraints_data+pcbddc->adaptive_constraints_data_ptr[cum]+j*subset_size,&Blas_one)); 1143 } 1144 #endif 1145 ierr = PetscMemcpy(pcbddc->adaptive_constraints_idxs+pcbddc->adaptive_constraints_idxs_ptr[cum],idxs,subset_size*sizeof(PetscInt));CHKERRQ(ierr); 1146 pcbddc->adaptive_constraints_idxs_ptr[cum+1] = pcbddc->adaptive_constraints_idxs_ptr[cum] + subset_size; 1147 pcbddc->adaptive_constraints_data_ptr[cum+1] = pcbddc->adaptive_constraints_data_ptr[cum] + subset_size*B_neigs; 1148 cum++; 1149 } 1150 ierr = ISRestoreIndices(sub_schurs->is_subs[i],&idxs);CHKERRQ(ierr); 1151 /* shift for next computation */ 1152 cumarray += subset_size*subset_size; 1153 } 1154 if (pcbddc->dbg_flag) { 1155 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 1156 } 1157 1158 if (mss) { 1159 ierr = MatSeqAIJRestoreArray(sub_schurs->sum_S_Ej_inv_all,&Sarray);CHKERRQ(ierr); 1160 ierr = MatSeqAIJRestoreArray(sub_schurs->sum_S_Ej_tilda_all,&Starray);CHKERRQ(ierr); 1161 /* destroy matrices (junk) */ 1162 ierr = MatDestroy(&sub_schurs->sum_S_Ej_inv_all);CHKERRQ(ierr); 1163 ierr = MatDestroy(&sub_schurs->sum_S_Ej_tilda_all);CHKERRQ(ierr); 1164 } 1165 if (allocated_S_St) { 1166 ierr = PetscFree2(S,St);CHKERRQ(ierr); 1167 } 1168 ierr = PetscFree5(eigv,eigs,work,B_iwork,B_ifail);CHKERRQ(ierr); 1169 #if defined(PETSC_USE_COMPLEX) 1170 ierr = PetscFree(rwork);CHKERRQ(ierr); 1171 #endif 1172 if (pcbddc->dbg_flag) { 1173 PetscInt maxneigs_r; 1174 ierr = MPI_Allreduce(&maxneigs,&maxneigs_r,1,MPIU_INT,MPI_MAX,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 1175 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Maximum number of constraints per cc %d\n",maxneigs_r);CHKERRQ(ierr); 1176 } 1177 PetscFunctionReturn(0); 1178 } 1179 1180 #undef __FUNCT__ 1181 #define __FUNCT__ "PCBDDCSetUpSolvers" 1182 PetscErrorCode PCBDDCSetUpSolvers(PC pc) 1183 { 1184 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 1185 PetscScalar *coarse_submat_vals; 1186 PetscErrorCode ierr; 1187 1188 PetscFunctionBegin; 1189 /* Setup local scatters R_to_B and (optionally) R_to_D */ 1190 /* PCBDDCSetUpLocalWorkVectors should be called first! */ 1191 ierr = PCBDDCSetUpLocalScatters(pc);CHKERRQ(ierr); 1192 1193 /* Setup local neumann solver ksp_R */ 1194 /* PCBDDCSetUpLocalScatters should be called first! */ 1195 ierr = PCBDDCSetUpLocalSolvers(pc,PETSC_FALSE,PETSC_TRUE);CHKERRQ(ierr); 1196 1197 /* Change global null space passed in by the user if change of basis has been requested */ 1198 if (pcbddc->NullSpace && pcbddc->ChangeOfBasisMatrix) { 1199 ierr = PCBDDCNullSpaceAdaptGlobal(pc);CHKERRQ(ierr); 1200 } 1201 1202 /* 1203 Setup local correction and local part of coarse basis. 1204 Gives back the dense local part of the coarse matrix in column major ordering 1205 */ 1206 ierr = PCBDDCSetUpCorrection(pc,&coarse_submat_vals);CHKERRQ(ierr); 1207 1208 /* Compute total number of coarse nodes and setup coarse solver */ 1209 ierr = PCBDDCSetUpCoarseSolver(pc,coarse_submat_vals);CHKERRQ(ierr); 1210 1211 /* free */ 1212 ierr = PetscFree(coarse_submat_vals);CHKERRQ(ierr); 1213 PetscFunctionReturn(0); 1214 } 1215 1216 #undef __FUNCT__ 1217 #define __FUNCT__ "PCBDDCResetCustomization" 1218 PetscErrorCode PCBDDCResetCustomization(PC pc) 1219 { 1220 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 1221 PetscErrorCode ierr; 1222 1223 PetscFunctionBegin; 1224 ierr = PCBDDCGraphResetCSR(pcbddc->mat_graph);CHKERRQ(ierr); 1225 ierr = ISDestroy(&pcbddc->user_primal_vertices);CHKERRQ(ierr); 1226 ierr = ISDestroy(&pcbddc->user_primal_vertices_local);CHKERRQ(ierr); 1227 ierr = MatNullSpaceDestroy(&pcbddc->NullSpace);CHKERRQ(ierr); 1228 ierr = ISDestroy(&pcbddc->NeumannBoundaries);CHKERRQ(ierr); 1229 ierr = ISDestroy(&pcbddc->NeumannBoundariesLocal);CHKERRQ(ierr); 1230 ierr = ISDestroy(&pcbddc->DirichletBoundaries);CHKERRQ(ierr); 1231 ierr = MatNullSpaceDestroy(&pcbddc->onearnullspace);CHKERRQ(ierr); 1232 ierr = PetscFree(pcbddc->onearnullvecs_state);CHKERRQ(ierr); 1233 ierr = ISDestroy(&pcbddc->DirichletBoundariesLocal);CHKERRQ(ierr); 1234 ierr = PCBDDCSetDofsSplitting(pc,0,NULL);CHKERRQ(ierr); 1235 ierr = PCBDDCSetDofsSplittingLocal(pc,0,NULL);CHKERRQ(ierr); 1236 PetscFunctionReturn(0); 1237 } 1238 1239 #undef __FUNCT__ 1240 #define __FUNCT__ "PCBDDCResetTopography" 1241 PetscErrorCode PCBDDCResetTopography(PC pc) 1242 { 1243 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 1244 PetscInt i; 1245 PetscErrorCode ierr; 1246 1247 PetscFunctionBegin; 1248 ierr = MatDestroy(&pcbddc->user_ChangeOfBasisMatrix);CHKERRQ(ierr); 1249 ierr = MatDestroy(&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 1250 ierr = MatDestroy(&pcbddc->ConstraintMatrix);CHKERRQ(ierr); 1251 ierr = PCBDDCGraphReset(pcbddc->mat_graph);CHKERRQ(ierr); 1252 for (i=0;i<pcbddc->n_local_subs;i++) { 1253 ierr = ISDestroy(&pcbddc->local_subs[i]);CHKERRQ(ierr); 1254 } 1255 ierr = PetscFree(pcbddc->local_subs);CHKERRQ(ierr); 1256 ierr = PCBDDCSubSchursReset(pcbddc->sub_schurs);CHKERRQ(ierr); 1257 PetscFunctionReturn(0); 1258 } 1259 1260 #undef __FUNCT__ 1261 #define __FUNCT__ "PCBDDCResetSolvers" 1262 PetscErrorCode PCBDDCResetSolvers(PC pc) 1263 { 1264 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 1265 PetscErrorCode ierr; 1266 1267 PetscFunctionBegin; 1268 ierr = VecDestroy(&pcbddc->coarse_vec);CHKERRQ(ierr); 1269 if (pcbddc->coarse_phi_B) { 1270 PetscScalar *array; 1271 ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&array);CHKERRQ(ierr); 1272 ierr = PetscFree(array);CHKERRQ(ierr); 1273 } 1274 ierr = MatDestroy(&pcbddc->coarse_phi_B);CHKERRQ(ierr); 1275 ierr = MatDestroy(&pcbddc->coarse_phi_D);CHKERRQ(ierr); 1276 ierr = MatDestroy(&pcbddc->coarse_psi_B);CHKERRQ(ierr); 1277 ierr = MatDestroy(&pcbddc->coarse_psi_D);CHKERRQ(ierr); 1278 ierr = VecDestroy(&pcbddc->vec1_P);CHKERRQ(ierr); 1279 ierr = VecDestroy(&pcbddc->vec1_C);CHKERRQ(ierr); 1280 ierr = MatDestroy(&pcbddc->local_auxmat2);CHKERRQ(ierr); 1281 ierr = MatDestroy(&pcbddc->local_auxmat1);CHKERRQ(ierr); 1282 ierr = VecDestroy(&pcbddc->vec1_R);CHKERRQ(ierr); 1283 ierr = VecDestroy(&pcbddc->vec2_R);CHKERRQ(ierr); 1284 ierr = ISDestroy(&pcbddc->is_R_local);CHKERRQ(ierr); 1285 ierr = VecScatterDestroy(&pcbddc->R_to_B);CHKERRQ(ierr); 1286 ierr = VecScatterDestroy(&pcbddc->R_to_D);CHKERRQ(ierr); 1287 ierr = VecScatterDestroy(&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr); 1288 ierr = KSPDestroy(&pcbddc->ksp_D);CHKERRQ(ierr); 1289 ierr = KSPDestroy(&pcbddc->ksp_R);CHKERRQ(ierr); 1290 ierr = KSPDestroy(&pcbddc->coarse_ksp);CHKERRQ(ierr); 1291 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 1292 ierr = PetscFree(pcbddc->primal_indices_local_idxs);CHKERRQ(ierr); 1293 ierr = PetscFree2(pcbddc->local_primal_ref_node,pcbddc->local_primal_ref_mult);CHKERRQ(ierr); 1294 ierr = PetscFree(pcbddc->global_primal_indices);CHKERRQ(ierr); 1295 ierr = ISDestroy(&pcbddc->coarse_subassembling);CHKERRQ(ierr); 1296 ierr = ISDestroy(&pcbddc->coarse_subassembling_init);CHKERRQ(ierr); 1297 ierr = MatDestroy(&pcbddc->benign_change);CHKERRQ(ierr); 1298 ierr = VecDestroy(&pcbddc->benign_vec);CHKERRQ(ierr); 1299 ierr = MatDestroy(&pcbddc->benign_original_mat);CHKERRQ(ierr); 1300 ierr = MatDestroy(&pcbddc->benign_B0);CHKERRQ(ierr); 1301 ierr = PetscSFDestroy(&pcbddc->benign_sf);CHKERRQ(ierr); 1302 if (pcbddc->benign_zerodiag_subs) { 1303 PetscInt i; 1304 for (i=0;i<pcbddc->benign_n;i++) { 1305 ierr = ISDestroy(&pcbddc->benign_zerodiag_subs[i]);CHKERRQ(ierr); 1306 } 1307 ierr = PetscFree(pcbddc->benign_zerodiag_subs);CHKERRQ(ierr); 1308 } 1309 ierr = PetscFree3(pcbddc->benign_p0_lidx,pcbddc->benign_p0_gidx,pcbddc->benign_p0);CHKERRQ(ierr); 1310 PetscFunctionReturn(0); 1311 } 1312 1313 #undef __FUNCT__ 1314 #define __FUNCT__ "PCBDDCSetUpLocalWorkVectors" 1315 PetscErrorCode PCBDDCSetUpLocalWorkVectors(PC pc) 1316 { 1317 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 1318 PC_IS *pcis = (PC_IS*)pc->data; 1319 VecType impVecType; 1320 PetscInt n_constraints,n_R,old_size; 1321 PetscErrorCode ierr; 1322 1323 PetscFunctionBegin; 1324 if (!pcbddc->ConstraintMatrix) { 1325 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"BDDC Constraint matrix has not been created"); 1326 } 1327 /* get sizes */ 1328 n_constraints = pcbddc->local_primal_size - pcbddc->benign_n - pcbddc->n_vertices; 1329 n_R = pcis->n - pcbddc->n_vertices; 1330 ierr = VecGetType(pcis->vec1_N,&impVecType);CHKERRQ(ierr); 1331 /* local work vectors (try to avoid unneeded work)*/ 1332 /* R nodes */ 1333 old_size = -1; 1334 if (pcbddc->vec1_R) { 1335 ierr = VecGetSize(pcbddc->vec1_R,&old_size);CHKERRQ(ierr); 1336 } 1337 if (n_R != old_size) { 1338 ierr = VecDestroy(&pcbddc->vec1_R);CHKERRQ(ierr); 1339 ierr = VecDestroy(&pcbddc->vec2_R);CHKERRQ(ierr); 1340 ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_R);CHKERRQ(ierr); 1341 ierr = VecSetSizes(pcbddc->vec1_R,PETSC_DECIDE,n_R);CHKERRQ(ierr); 1342 ierr = VecSetType(pcbddc->vec1_R,impVecType);CHKERRQ(ierr); 1343 ierr = VecDuplicate(pcbddc->vec1_R,&pcbddc->vec2_R);CHKERRQ(ierr); 1344 } 1345 /* local primal dofs */ 1346 old_size = -1; 1347 if (pcbddc->vec1_P) { 1348 ierr = VecGetSize(pcbddc->vec1_P,&old_size);CHKERRQ(ierr); 1349 } 1350 if (pcbddc->local_primal_size != old_size) { 1351 ierr = VecDestroy(&pcbddc->vec1_P);CHKERRQ(ierr); 1352 ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_P);CHKERRQ(ierr); 1353 ierr = VecSetSizes(pcbddc->vec1_P,PETSC_DECIDE,pcbddc->local_primal_size);CHKERRQ(ierr); 1354 ierr = VecSetType(pcbddc->vec1_P,impVecType);CHKERRQ(ierr); 1355 } 1356 /* local explicit constraints */ 1357 old_size = -1; 1358 if (pcbddc->vec1_C) { 1359 ierr = VecGetSize(pcbddc->vec1_C,&old_size);CHKERRQ(ierr); 1360 } 1361 if (n_constraints && n_constraints != old_size) { 1362 ierr = VecDestroy(&pcbddc->vec1_C);CHKERRQ(ierr); 1363 ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_C);CHKERRQ(ierr); 1364 ierr = VecSetSizes(pcbddc->vec1_C,PETSC_DECIDE,n_constraints);CHKERRQ(ierr); 1365 ierr = VecSetType(pcbddc->vec1_C,impVecType);CHKERRQ(ierr); 1366 } 1367 PetscFunctionReturn(0); 1368 } 1369 1370 #undef __FUNCT__ 1371 #define __FUNCT__ "PCBDDCSetUpCorrection" 1372 PetscErrorCode PCBDDCSetUpCorrection(PC pc, PetscScalar **coarse_submat_vals_n) 1373 { 1374 PetscErrorCode ierr; 1375 /* pointers to pcis and pcbddc */ 1376 PC_IS* pcis = (PC_IS*)pc->data; 1377 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 1378 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 1379 /* submatrices of local problem */ 1380 Mat A_RV,A_VR,A_VV,local_auxmat2_R; 1381 /* submatrices of benign trick */ 1382 Mat B0_V = NULL; 1383 /* submatrices of local coarse problem */ 1384 Mat S_VV,S_CV,S_VC,S_CC; 1385 /* working matrices */ 1386 Mat C_CR; 1387 /* additional working stuff */ 1388 PC pc_R; 1389 Mat F; 1390 PetscBool isLU,isCHOL,isILU,need_benign_correction; 1391 1392 PetscScalar *coarse_submat_vals; /* TODO: use a PETSc matrix */ 1393 PetscScalar *work; 1394 PetscInt *idx_V_B; 1395 PetscInt lda_rhs,n,n_vertices,n_constraints,*p0_lidx_I; 1396 PetscInt i,n_R,n_D,n_B; 1397 1398 /* some shortcuts to scalars */ 1399 PetscScalar one=1.0,m_one=-1.0; 1400 1401 PetscFunctionBegin; 1402 if (!pcbddc->symmetric_primal && pcbddc->benign_n) { 1403 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Non-symmetric primal basis computation with benign trick not yet implemented"); 1404 } 1405 1406 /* Set Non-overlapping dimensions */ 1407 n_vertices = pcbddc->n_vertices; 1408 n_constraints = pcbddc->local_primal_size - pcbddc->benign_n - n_vertices; 1409 n_B = pcis->n_B; 1410 n_D = pcis->n - n_B; 1411 n_R = pcis->n - n_vertices; 1412 1413 /* vertices in boundary numbering */ 1414 ierr = PetscMalloc1(n_vertices,&idx_V_B);CHKERRQ(ierr); 1415 ierr = ISGlobalToLocalMappingApply(pcis->BtoNmap,IS_GTOLM_DROP,n_vertices,pcbddc->local_primal_ref_node,&i,idx_V_B);CHKERRQ(ierr); 1416 if (i != n_vertices) { 1417 SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error in boundary numbering for BDDC vertices! %d != %d\n",n_vertices,i); 1418 } 1419 1420 /* Subdomain contribution (Non-overlapping) to coarse matrix */ 1421 ierr = PetscCalloc1(pcbddc->local_primal_size*pcbddc->local_primal_size,&coarse_submat_vals);CHKERRQ(ierr); 1422 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_vertices,n_vertices,coarse_submat_vals,&S_VV);CHKERRQ(ierr); 1423 ierr = MatSeqDenseSetLDA(S_VV,pcbddc->local_primal_size);CHKERRQ(ierr); 1424 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_constraints,n_vertices,coarse_submat_vals+n_vertices,&S_CV);CHKERRQ(ierr); 1425 ierr = MatSeqDenseSetLDA(S_CV,pcbddc->local_primal_size);CHKERRQ(ierr); 1426 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_vertices,n_constraints,coarse_submat_vals+pcbddc->local_primal_size*n_vertices,&S_VC);CHKERRQ(ierr); 1427 ierr = MatSeqDenseSetLDA(S_VC,pcbddc->local_primal_size);CHKERRQ(ierr); 1428 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_constraints,n_constraints,coarse_submat_vals+(pcbddc->local_primal_size+1)*n_vertices,&S_CC);CHKERRQ(ierr); 1429 ierr = MatSeqDenseSetLDA(S_CC,pcbddc->local_primal_size);CHKERRQ(ierr); 1430 1431 /* determine if can use MatSolve routines instead of calling KSPSolve on ksp_R */ 1432 ierr = KSPGetPC(pcbddc->ksp_R,&pc_R);CHKERRQ(ierr); 1433 ierr = PetscObjectTypeCompare((PetscObject)pc_R,PCLU,&isLU);CHKERRQ(ierr); 1434 ierr = PetscObjectTypeCompare((PetscObject)pc_R,PCILU,&isILU);CHKERRQ(ierr); 1435 ierr = PetscObjectTypeCompare((PetscObject)pc_R,PCCHOLESKY,&isCHOL);CHKERRQ(ierr); 1436 lda_rhs = n_R; 1437 need_benign_correction = PETSC_FALSE; 1438 if (isLU || isILU || isCHOL) { 1439 ierr = PCFactorGetMatrix(pc_R,&F);CHKERRQ(ierr); 1440 } else if (sub_schurs->reuse_solver) { 1441 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 1442 MatFactorType type; 1443 1444 F = reuse_solver->F; 1445 ierr = MatGetFactorType(F,&type);CHKERRQ(ierr); 1446 if (type == MAT_FACTOR_CHOLESKY) isCHOL = PETSC_TRUE; 1447 ierr = MatGetSize(F,&lda_rhs,NULL);CHKERRQ(ierr); 1448 need_benign_correction = !!reuse_solver->benign_n; 1449 } else { 1450 F = NULL; 1451 } 1452 1453 /* allocate workspace */ 1454 n = 0; 1455 if (n_constraints) { 1456 n += lda_rhs*n_constraints; 1457 } 1458 if (n_vertices) { 1459 n = PetscMax(2*lda_rhs*n_vertices,n); 1460 n = PetscMax((lda_rhs+n_B)*n_vertices,n); 1461 } 1462 ierr = PetscMalloc1(n,&work);CHKERRQ(ierr); 1463 1464 /* Precompute stuffs needed for preprocessing and application of BDDC*/ 1465 if (n_constraints) { 1466 Mat M1,M2,M3; 1467 Mat auxmat; 1468 IS is_aux; 1469 PetscScalar *array,*array2; 1470 1471 ierr = MatDestroy(&pcbddc->local_auxmat1);CHKERRQ(ierr); 1472 ierr = MatDestroy(&pcbddc->local_auxmat2);CHKERRQ(ierr); 1473 1474 /* Extract constraints on R nodes: C_{CR} */ 1475 ierr = ISCreateStride(PETSC_COMM_SELF,n_constraints,n_vertices,1,&is_aux);CHKERRQ(ierr); 1476 ierr = MatGetSubMatrix(pcbddc->ConstraintMatrix,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&C_CR);CHKERRQ(ierr); 1477 ierr = MatGetSubMatrix(pcbddc->ConstraintMatrix,is_aux,pcis->is_B_local,MAT_INITIAL_MATRIX,&auxmat);CHKERRQ(ierr); 1478 1479 /* Assemble local_auxmat2_R = (- A_{RR}^{-1} C^T_{CR}) needed by BDDC setup */ 1480 /* Assemble pcbddc->local_auxmat2 = R_to_B (- A_{RR}^{-1} C^T_{CR}) needed by BDDC application */ 1481 ierr = PetscMemzero(work,lda_rhs*n_constraints*sizeof(PetscScalar));CHKERRQ(ierr); 1482 for (i=0;i<n_constraints;i++) { 1483 const PetscScalar *row_cmat_values; 1484 const PetscInt *row_cmat_indices; 1485 PetscInt size_of_constraint,j; 1486 1487 ierr = MatGetRow(C_CR,i,&size_of_constraint,&row_cmat_indices,&row_cmat_values);CHKERRQ(ierr); 1488 for (j=0;j<size_of_constraint;j++) { 1489 work[row_cmat_indices[j]+i*lda_rhs] = -row_cmat_values[j]; 1490 } 1491 ierr = MatRestoreRow(C_CR,i,&size_of_constraint,&row_cmat_indices,&row_cmat_values);CHKERRQ(ierr); 1492 } 1493 ierr = MatCreateSeqDense(PETSC_COMM_SELF,lda_rhs,n_constraints,NULL,&local_auxmat2_R);CHKERRQ(ierr); 1494 if (F) { 1495 Mat B; 1496 1497 ierr = MatCreateSeqDense(PETSC_COMM_SELF,lda_rhs,n_constraints,work,&B);CHKERRQ(ierr); 1498 if (need_benign_correction) { 1499 PetscScalar *marr; 1500 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 1501 1502 ierr = MatDenseGetArray(B,&marr);CHKERRQ(ierr); 1503 for (i=0;i<n_constraints;i++) { 1504 ierr = VecPlaceArray(pcbddc->vec1_R,marr+i*lda_rhs);CHKERRQ(ierr); 1505 ierr = PCBDDCReuseSolversChangeInterior(reuse_solver,pcbddc->vec1_R,NULL,PETSC_FALSE);CHKERRQ(ierr); 1506 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 1507 } 1508 ierr = MatDenseRestoreArray(B,&marr);CHKERRQ(ierr); 1509 } 1510 ierr = MatMatSolve(F,B,local_auxmat2_R);CHKERRQ(ierr); 1511 if (need_benign_correction) { 1512 PetscScalar *marr; 1513 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 1514 1515 ierr = MatDenseGetArray(local_auxmat2_R,&marr);CHKERRQ(ierr); 1516 for (i=0;i<n_constraints;i++) { 1517 ierr = VecPlaceArray(pcbddc->vec1_R,marr+i*lda_rhs);CHKERRQ(ierr); 1518 ierr = PCBDDCReuseSolversChangeInterior(reuse_solver,pcbddc->vec1_R,NULL,PETSC_TRUE);CHKERRQ(ierr); 1519 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 1520 } 1521 ierr = MatDenseRestoreArray(local_auxmat2_R,&marr);CHKERRQ(ierr); 1522 } 1523 ierr = MatDestroy(&B);CHKERRQ(ierr); 1524 } else { 1525 PetscScalar *marr; 1526 1527 ierr = MatDenseGetArray(local_auxmat2_R,&marr);CHKERRQ(ierr); 1528 for (i=0;i<n_constraints;i++) { 1529 ierr = VecPlaceArray(pcbddc->vec1_R,work+i*lda_rhs);CHKERRQ(ierr); 1530 ierr = VecPlaceArray(pcbddc->vec2_R,marr+i*lda_rhs);CHKERRQ(ierr); 1531 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 1532 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 1533 ierr = VecResetArray(pcbddc->vec2_R);CHKERRQ(ierr); 1534 } 1535 ierr = MatDenseRestoreArray(local_auxmat2_R,&marr);CHKERRQ(ierr); 1536 } 1537 if (!pcbddc->switch_static) { 1538 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_B,n_constraints,NULL,&pcbddc->local_auxmat2);CHKERRQ(ierr); 1539 ierr = MatDenseGetArray(pcbddc->local_auxmat2,&array);CHKERRQ(ierr); 1540 ierr = MatDenseGetArray(local_auxmat2_R,&array2);CHKERRQ(ierr); 1541 for (i=0;i<n_constraints;i++) { 1542 ierr = VecPlaceArray(pcbddc->vec1_R,array2+i*lda_rhs);CHKERRQ(ierr); 1543 ierr = VecPlaceArray(pcis->vec1_B,array+i*n_B);CHKERRQ(ierr); 1544 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1545 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1546 ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr); 1547 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 1548 } 1549 ierr = MatDenseRestoreArray(local_auxmat2_R,&array2);CHKERRQ(ierr); 1550 ierr = MatDenseRestoreArray(pcbddc->local_auxmat2,&array);CHKERRQ(ierr); 1551 ierr = MatMatMult(auxmat,pcbddc->local_auxmat2,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&M3);CHKERRQ(ierr); 1552 } else { 1553 if (lda_rhs != n_R) { 1554 IS dummy; 1555 1556 ierr = ISCreateStride(PETSC_COMM_SELF,n_R,0,1,&dummy);CHKERRQ(ierr); 1557 ierr = MatGetSubMatrix(local_auxmat2_R,dummy,dummy,MAT_INITIAL_MATRIX,&pcbddc->local_auxmat2);CHKERRQ(ierr); 1558 ierr = ISDestroy(&dummy);CHKERRQ(ierr); 1559 } else { 1560 ierr = PetscObjectReference((PetscObject)local_auxmat2_R);CHKERRQ(ierr); 1561 pcbddc->local_auxmat2 = local_auxmat2_R; 1562 } 1563 ierr = MatMatMult(C_CR,pcbddc->local_auxmat2,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&M3);CHKERRQ(ierr); 1564 } 1565 ierr = ISDestroy(&is_aux);CHKERRQ(ierr); 1566 /* Assemble explicitly S_CC = ( C_{CR} A_{RR}^{-1} C^T_{CR} )^{-1} */ 1567 ierr = MatScale(M3,m_one);CHKERRQ(ierr); 1568 ierr = MatDuplicate(M3,MAT_DO_NOT_COPY_VALUES,&M1);CHKERRQ(ierr); 1569 ierr = MatDuplicate(M3,MAT_DO_NOT_COPY_VALUES,&M2);CHKERRQ(ierr); 1570 if (isCHOL) { 1571 ierr = MatCholeskyFactor(M3,NULL,NULL);CHKERRQ(ierr); 1572 } else { 1573 ierr = MatLUFactor(M3,NULL,NULL,NULL);CHKERRQ(ierr); 1574 } 1575 ierr = VecSet(pcbddc->vec1_C,one);CHKERRQ(ierr); 1576 ierr = MatDiagonalSet(M2,pcbddc->vec1_C,INSERT_VALUES);CHKERRQ(ierr); 1577 ierr = MatMatSolve(M3,M2,M1);CHKERRQ(ierr); 1578 ierr = MatDestroy(&M2);CHKERRQ(ierr); 1579 ierr = MatDestroy(&M3);CHKERRQ(ierr); 1580 /* Assemble local_auxmat1 = S_CC*C_{CB} needed by BDDC application in KSP and in preproc */ 1581 ierr = MatMatMult(M1,auxmat,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&pcbddc->local_auxmat1);CHKERRQ(ierr); 1582 ierr = MatDestroy(&auxmat);CHKERRQ(ierr); 1583 ierr = MatCopy(M1,S_CC,SAME_NONZERO_PATTERN);CHKERRQ(ierr); /* S_CC can have a different LDA, MatMatSolve doesn't support it */ 1584 ierr = MatDestroy(&M1);CHKERRQ(ierr); 1585 } 1586 /* Get submatrices from subdomain matrix */ 1587 if (pcbddc->benign_n) { 1588 IS dummy; 1589 Mat B0_R; 1590 PetscReal norm; 1591 1592 ierr = ISCreateStride(PETSC_COMM_SELF,pcbddc->benign_n,0,1,&dummy);CHKERRQ(ierr); 1593 ierr = MatGetSubMatrix(pcbddc->benign_B0,dummy,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&B0_R);CHKERRQ(ierr); 1594 ierr = MatNorm(B0_R,NORM_INFINITY,&norm);CHKERRQ(ierr); 1595 if (norm > PETSC_SMALL) { 1596 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"Benign trick can not be applied! ||B0_R|| = %f (should be numerically 0.)",norm); 1597 } 1598 ierr = MatDestroy(&B0_R);CHKERRQ(ierr); 1599 ierr = ISDestroy(&dummy);CHKERRQ(ierr); 1600 } 1601 1602 if (n_vertices) { 1603 IS is_aux; 1604 1605 if (sub_schurs->reuse_solver) { /* is_R_local is not sorted, ISComplement doesn't like it */ 1606 IS tis; 1607 1608 ierr = ISDuplicate(pcbddc->is_R_local,&tis);CHKERRQ(ierr); 1609 ierr = ISSort(tis);CHKERRQ(ierr); 1610 ierr = ISComplement(tis,0,pcis->n,&is_aux);CHKERRQ(ierr); 1611 ierr = ISDestroy(&tis);CHKERRQ(ierr); 1612 } else { 1613 ierr = ISComplement(pcbddc->is_R_local,0,pcis->n,&is_aux);CHKERRQ(ierr); 1614 } 1615 ierr = MatGetSubMatrix(pcbddc->local_mat,pcbddc->is_R_local,is_aux,MAT_INITIAL_MATRIX,&A_RV);CHKERRQ(ierr); 1616 ierr = MatGetSubMatrix(pcbddc->local_mat,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&A_VR);CHKERRQ(ierr); 1617 ierr = MatGetSubMatrix(pcbddc->local_mat,is_aux,is_aux,MAT_INITIAL_MATRIX,&A_VV);CHKERRQ(ierr); 1618 if (pcbddc->benign_n) { 1619 IS dummy; 1620 1621 ierr = ISCreateStride(PETSC_COMM_SELF,pcbddc->benign_n,0,1,&dummy);CHKERRQ(ierr); 1622 ierr = MatGetSubMatrix(pcbddc->benign_B0,dummy,is_aux,MAT_INITIAL_MATRIX,&B0_V);CHKERRQ(ierr); 1623 ierr = ISDestroy(&dummy);CHKERRQ(ierr); 1624 } 1625 ierr = ISDestroy(&is_aux);CHKERRQ(ierr); 1626 } 1627 1628 /* Matrix of coarse basis functions (local) */ 1629 if (pcbddc->coarse_phi_B) { 1630 PetscInt on_B,on_primal,on_D=n_D; 1631 if (pcbddc->coarse_phi_D) { 1632 ierr = MatGetSize(pcbddc->coarse_phi_D,&on_D,NULL);CHKERRQ(ierr); 1633 } 1634 ierr = MatGetSize(pcbddc->coarse_phi_B,&on_B,&on_primal);CHKERRQ(ierr); 1635 if (on_B != n_B || on_primal != pcbddc->local_primal_size || on_D != n_D) { 1636 PetscScalar *marray; 1637 1638 ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&marray);CHKERRQ(ierr); 1639 ierr = PetscFree(marray);CHKERRQ(ierr); 1640 ierr = MatDestroy(&pcbddc->coarse_phi_B);CHKERRQ(ierr); 1641 ierr = MatDestroy(&pcbddc->coarse_psi_B);CHKERRQ(ierr); 1642 ierr = MatDestroy(&pcbddc->coarse_phi_D);CHKERRQ(ierr); 1643 ierr = MatDestroy(&pcbddc->coarse_psi_D);CHKERRQ(ierr); 1644 } 1645 } 1646 1647 if (!pcbddc->coarse_phi_B) { 1648 PetscScalar *marray; 1649 1650 n = n_B*pcbddc->local_primal_size; 1651 if (pcbddc->switch_static || pcbddc->dbg_flag) { 1652 n += n_D*pcbddc->local_primal_size; 1653 } 1654 if (!pcbddc->symmetric_primal) { 1655 n *= 2; 1656 } 1657 ierr = PetscCalloc1(n,&marray);CHKERRQ(ierr); 1658 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_B,pcbddc->local_primal_size,marray,&pcbddc->coarse_phi_B);CHKERRQ(ierr); 1659 n = n_B*pcbddc->local_primal_size; 1660 if (pcbddc->switch_static || pcbddc->dbg_flag) { 1661 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_D,pcbddc->local_primal_size,marray+n,&pcbddc->coarse_phi_D);CHKERRQ(ierr); 1662 n += n_D*pcbddc->local_primal_size; 1663 } 1664 if (!pcbddc->symmetric_primal) { 1665 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_B,pcbddc->local_primal_size,marray+n,&pcbddc->coarse_psi_B);CHKERRQ(ierr); 1666 if (pcbddc->switch_static || pcbddc->dbg_flag) { 1667 n = n_B*pcbddc->local_primal_size; 1668 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_D,pcbddc->local_primal_size,marray+n,&pcbddc->coarse_psi_D);CHKERRQ(ierr); 1669 } 1670 } else { 1671 ierr = PetscObjectReference((PetscObject)pcbddc->coarse_phi_B);CHKERRQ(ierr); 1672 pcbddc->coarse_psi_B = pcbddc->coarse_phi_B; 1673 if (pcbddc->switch_static || pcbddc->dbg_flag) { 1674 ierr = PetscObjectReference((PetscObject)pcbddc->coarse_phi_D);CHKERRQ(ierr); 1675 pcbddc->coarse_psi_D = pcbddc->coarse_phi_D; 1676 } 1677 } 1678 } 1679 1680 /* We are now ready to evaluate coarse basis functions and subdomain contribution to coarse problem */ 1681 p0_lidx_I = NULL; 1682 if (pcbddc->benign_n && (pcbddc->switch_static || pcbddc->dbg_flag)) { 1683 const PetscInt *idxs; 1684 1685 ierr = ISGetIndices(pcis->is_I_local,&idxs);CHKERRQ(ierr); 1686 ierr = PetscMalloc1(pcbddc->benign_n,&p0_lidx_I);CHKERRQ(ierr); 1687 for (i=0;i<pcbddc->benign_n;i++) { 1688 ierr = PetscFindInt(pcbddc->benign_p0_lidx[i],pcis->n-pcis->n_B,idxs,&p0_lidx_I[i]);CHKERRQ(ierr); 1689 } 1690 ierr = ISRestoreIndices(pcis->is_I_local,&idxs);CHKERRQ(ierr); 1691 } 1692 1693 /* vertices */ 1694 if (n_vertices) { 1695 1696 ierr = MatConvert(A_VV,MATDENSE,MAT_REUSE_MATRIX,&A_VV);CHKERRQ(ierr); 1697 1698 if (n_R) { 1699 Mat A_RRmA_RV,A_RV_bcorr=NULL,S_VVt; /* S_VVt with LDA=N */ 1700 PetscBLASInt B_N,B_one = 1; 1701 PetscScalar *x,*y; 1702 PetscBool isseqaij; 1703 1704 ierr = MatScale(A_RV,m_one);CHKERRQ(ierr); 1705 if (need_benign_correction) { 1706 ISLocalToGlobalMapping RtoN; 1707 IS is_p0; 1708 PetscInt *idxs_p0,n; 1709 1710 ierr = PetscMalloc1(pcbddc->benign_n,&idxs_p0);CHKERRQ(ierr); 1711 ierr = ISLocalToGlobalMappingCreateIS(pcbddc->is_R_local,&RtoN);CHKERRQ(ierr); 1712 ierr = ISGlobalToLocalMappingApply(RtoN,IS_GTOLM_DROP,pcbddc->benign_n,pcbddc->benign_p0_lidx,&n,idxs_p0);CHKERRQ(ierr); 1713 if (n != pcbddc->benign_n) { 1714 SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error in R numbering for benign p0! %d != %d\n",n,pcbddc->benign_n); 1715 } 1716 ierr = ISLocalToGlobalMappingDestroy(&RtoN);CHKERRQ(ierr); 1717 ierr = ISCreateGeneral(PETSC_COMM_SELF,n,idxs_p0,PETSC_OWN_POINTER,&is_p0);CHKERRQ(ierr); 1718 ierr = MatGetSubMatrix(A_RV,is_p0,NULL,MAT_INITIAL_MATRIX,&A_RV_bcorr);CHKERRQ(ierr); 1719 ierr = ISDestroy(&is_p0);CHKERRQ(ierr); 1720 } 1721 1722 if (lda_rhs == n_R) { 1723 ierr = MatConvert(A_RV,MATDENSE,MAT_REUSE_MATRIX,&A_RV);CHKERRQ(ierr); 1724 } else { 1725 PetscScalar *av,*array; 1726 const PetscInt *xadj,*adjncy; 1727 PetscInt n; 1728 PetscBool flg_row; 1729 1730 array = work+lda_rhs*n_vertices; 1731 ierr = PetscMemzero(array,lda_rhs*n_vertices*sizeof(PetscScalar));CHKERRQ(ierr); 1732 ierr = MatConvert(A_RV,MATSEQAIJ,MAT_REUSE_MATRIX,&A_RV);CHKERRQ(ierr); 1733 ierr = MatGetRowIJ(A_RV,0,PETSC_FALSE,PETSC_FALSE,&n,&xadj,&adjncy,&flg_row);CHKERRQ(ierr); 1734 ierr = MatSeqAIJGetArray(A_RV,&av);CHKERRQ(ierr); 1735 for (i=0;i<n;i++) { 1736 PetscInt j; 1737 for (j=xadj[i];j<xadj[i+1];j++) array[lda_rhs*adjncy[j]+i] = av[j]; 1738 } 1739 ierr = MatRestoreRowIJ(A_RV,0,PETSC_FALSE,PETSC_FALSE,&n,&xadj,&adjncy,&flg_row);CHKERRQ(ierr); 1740 ierr = MatDestroy(&A_RV);CHKERRQ(ierr); 1741 ierr = MatCreateSeqDense(PETSC_COMM_SELF,lda_rhs,n_vertices,array,&A_RV);CHKERRQ(ierr); 1742 } 1743 ierr = MatCreateSeqDense(PETSC_COMM_SELF,lda_rhs,n_vertices,work,&A_RRmA_RV);CHKERRQ(ierr); 1744 if (F) { 1745 if (need_benign_correction) { 1746 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 1747 PetscScalar *marr; 1748 1749 ierr = MatDenseGetArray(A_RV,&marr);CHKERRQ(ierr); 1750 /* need \Phi^T A_RV = (I+L)A_RV, L given by 1751 1752 | 0 0 0 | (V) 1753 L = | 0 0 -1 | (P-p0) 1754 | 0 0 -1 | (p0) 1755 1756 */ 1757 for (i=0;i<reuse_solver->benign_n;i++) { 1758 const PetscScalar *vals; 1759 const PetscInt *idxs,*idxs_zero; 1760 PetscInt n,j,nz; 1761 1762 ierr = ISGetLocalSize(reuse_solver->benign_zerodiag_subs[i],&nz);CHKERRQ(ierr); 1763 ierr = ISGetIndices(reuse_solver->benign_zerodiag_subs[i],&idxs_zero);CHKERRQ(ierr); 1764 ierr = MatGetRow(A_RV_bcorr,i,&n,&idxs,&vals);CHKERRQ(ierr); 1765 for (j=0;j<n;j++) { 1766 PetscScalar val = vals[j]; 1767 PetscInt k,col = idxs[j]; 1768 for (k=0;k<nz;k++) marr[idxs_zero[k]+lda_rhs*col] -= val; 1769 } 1770 ierr = MatRestoreRow(A_RV_bcorr,i,&n,&idxs,&vals);CHKERRQ(ierr); 1771 ierr = ISRestoreIndices(reuse_solver->benign_zerodiag_subs[i],&idxs_zero);CHKERRQ(ierr); 1772 } 1773 /* need to correct the rhs */ 1774 for (i=0;i<n_vertices;i++) { 1775 ierr = VecPlaceArray(pcbddc->vec1_R,marr+i*lda_rhs);CHKERRQ(ierr); 1776 ierr = PCBDDCReuseSolversChangeInterior(reuse_solver,pcbddc->vec1_R,NULL,PETSC_FALSE);CHKERRQ(ierr); 1777 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 1778 } 1779 ierr = MatDenseRestoreArray(A_RV,&marr);CHKERRQ(ierr); 1780 } 1781 ierr = MatMatSolve(F,A_RV,A_RRmA_RV);CHKERRQ(ierr); 1782 /* need to correct the solution */ 1783 if (need_benign_correction) { 1784 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 1785 PetscScalar *marr; 1786 1787 ierr = MatDenseGetArray(A_RRmA_RV,&marr);CHKERRQ(ierr); 1788 for (i=0;i<n_vertices;i++) { 1789 ierr = VecPlaceArray(pcbddc->vec1_R,marr+i*lda_rhs);CHKERRQ(ierr); 1790 ierr = PCBDDCReuseSolversChangeInterior(reuse_solver,pcbddc->vec1_R,NULL,PETSC_TRUE);CHKERRQ(ierr); 1791 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 1792 } 1793 ierr = MatDenseRestoreArray(A_RRmA_RV,&marr);CHKERRQ(ierr); 1794 } 1795 } else { 1796 ierr = MatDenseGetArray(A_RV,&y);CHKERRQ(ierr); 1797 for (i=0;i<n_vertices;i++) { 1798 ierr = VecPlaceArray(pcbddc->vec1_R,y+i*lda_rhs);CHKERRQ(ierr); 1799 ierr = VecPlaceArray(pcbddc->vec2_R,work+i*lda_rhs);CHKERRQ(ierr); 1800 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 1801 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 1802 ierr = VecResetArray(pcbddc->vec2_R);CHKERRQ(ierr); 1803 } 1804 ierr = MatDenseRestoreArray(A_RV,&y);CHKERRQ(ierr); 1805 } 1806 ierr = MatDestroy(&A_RV);CHKERRQ(ierr); 1807 /* S_VV and S_CV */ 1808 if (n_constraints) { 1809 Mat B; 1810 1811 ierr = PetscMemzero(work+lda_rhs*n_vertices,n_B*n_vertices*sizeof(PetscScalar));CHKERRQ(ierr); 1812 for (i=0;i<n_vertices;i++) { 1813 ierr = VecPlaceArray(pcbddc->vec1_R,work+i*lda_rhs);CHKERRQ(ierr); 1814 ierr = VecPlaceArray(pcis->vec1_B,work+lda_rhs*n_vertices+i*n_B);CHKERRQ(ierr); 1815 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1816 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1817 ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr); 1818 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 1819 } 1820 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_B,n_vertices,work+lda_rhs*n_vertices,&B);CHKERRQ(ierr); 1821 ierr = MatMatMult(pcbddc->local_auxmat1,B,MAT_REUSE_MATRIX,PETSC_DEFAULT,&S_CV);CHKERRQ(ierr); 1822 ierr = MatDestroy(&B);CHKERRQ(ierr); 1823 ierr = MatCreateSeqDense(PETSC_COMM_SELF,lda_rhs,n_vertices,work+lda_rhs*n_vertices,&B);CHKERRQ(ierr); 1824 ierr = MatMatMult(local_auxmat2_R,S_CV,MAT_REUSE_MATRIX,PETSC_DEFAULT,&B);CHKERRQ(ierr); 1825 ierr = MatScale(S_CV,m_one);CHKERRQ(ierr); 1826 ierr = PetscBLASIntCast(lda_rhs*n_vertices,&B_N);CHKERRQ(ierr); 1827 PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&B_N,&one,work+lda_rhs*n_vertices,&B_one,work,&B_one)); 1828 ierr = MatDestroy(&B);CHKERRQ(ierr); 1829 } 1830 ierr = PetscObjectTypeCompare((PetscObject)A_VR,MATSEQAIJ,&isseqaij);CHKERRQ(ierr); 1831 if (!isseqaij) { /* MatMatMult with SEQ(S)BAIJ below will raise an error */ 1832 ierr = MatConvert(A_VR,MATSEQAIJ,MAT_REUSE_MATRIX,&A_VR);CHKERRQ(ierr); 1833 } 1834 if (lda_rhs != n_R) { 1835 ierr = MatDestroy(&A_RRmA_RV);CHKERRQ(ierr); 1836 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_vertices,work,&A_RRmA_RV);CHKERRQ(ierr); 1837 ierr = MatSeqDenseSetLDA(A_RRmA_RV,lda_rhs);CHKERRQ(ierr); 1838 } 1839 ierr = MatMatMult(A_VR,A_RRmA_RV,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&S_VVt);CHKERRQ(ierr); 1840 /* need A_VR * \Phi * A_RRmA_RV = A_VR * (I+L)^T * A_RRmA_RV, L given as before */ 1841 if (need_benign_correction) { 1842 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 1843 PetscScalar *marr,*sums; 1844 1845 ierr = PetscMalloc1(n_vertices,&sums);CHKERRQ(ierr); 1846 ierr = MatDenseGetArray(S_VVt,&marr); 1847 for (i=0;i<reuse_solver->benign_n;i++) { 1848 const PetscScalar *vals; 1849 const PetscInt *idxs,*idxs_zero; 1850 PetscInt n,j,nz; 1851 1852 ierr = ISGetLocalSize(reuse_solver->benign_zerodiag_subs[i],&nz);CHKERRQ(ierr); 1853 ierr = ISGetIndices(reuse_solver->benign_zerodiag_subs[i],&idxs_zero);CHKERRQ(ierr); 1854 for (j=0;j<n_vertices;j++) { 1855 PetscInt k; 1856 sums[j] = 0.; 1857 for (k=0;k<nz;k++) sums[j] += work[idxs_zero[k]+j*lda_rhs]; 1858 } 1859 ierr = MatGetRow(A_RV_bcorr,i,&n,&idxs,&vals);CHKERRQ(ierr); 1860 for (j=0;j<n;j++) { 1861 PetscScalar val = vals[j]; 1862 PetscInt k; 1863 for (k=0;k<n_vertices;k++) { 1864 marr[idxs[j]+k*n_vertices] += val*sums[k]; 1865 } 1866 } 1867 ierr = MatRestoreRow(A_RV_bcorr,i,&n,&idxs,&vals);CHKERRQ(ierr); 1868 ierr = ISRestoreIndices(reuse_solver->benign_zerodiag_subs[i],&idxs_zero);CHKERRQ(ierr); 1869 } 1870 ierr = PetscFree(sums);CHKERRQ(ierr); 1871 ierr = MatDenseRestoreArray(S_VVt,&marr); 1872 ierr = MatDestroy(&A_RV_bcorr);CHKERRQ(ierr); 1873 } 1874 ierr = MatDestroy(&A_RRmA_RV);CHKERRQ(ierr); 1875 ierr = PetscBLASIntCast(n_vertices*n_vertices,&B_N);CHKERRQ(ierr); 1876 ierr = MatDenseGetArray(A_VV,&x);CHKERRQ(ierr); 1877 ierr = MatDenseGetArray(S_VVt,&y);CHKERRQ(ierr); 1878 PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&B_N,&one,x,&B_one,y,&B_one)); 1879 ierr = MatDenseRestoreArray(A_VV,&x);CHKERRQ(ierr); 1880 ierr = MatDenseRestoreArray(S_VVt,&y);CHKERRQ(ierr); 1881 ierr = MatCopy(S_VVt,S_VV,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 1882 ierr = MatDestroy(&S_VVt);CHKERRQ(ierr); 1883 } else { 1884 ierr = MatCopy(A_VV,S_VV,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 1885 } 1886 for (i=0;i<pcbddc->benign_n;i++) { 1887 const PetscScalar *vals; 1888 const PetscInt *idxs; 1889 PetscInt n,j,primal_idx; 1890 1891 ierr = MatGetRow(B0_V,i,&n,&idxs,&vals);CHKERRQ(ierr); 1892 primal_idx = pcbddc->local_primal_size - pcbddc->benign_n + i; 1893 for (j=0;j<n;j++) { 1894 coarse_submat_vals[primal_idx*pcbddc->local_primal_size+idxs[j]] = vals[j]; 1895 coarse_submat_vals[idxs[j]*pcbddc->local_primal_size+primal_idx] = vals[j]; 1896 } 1897 ierr = MatRestoreRow(B0_V,i,&n,&idxs,&vals);CHKERRQ(ierr); 1898 } 1899 ierr = MatDestroy(&A_VV);CHKERRQ(ierr); 1900 1901 /* coarse basis functions */ 1902 for (i=0;i<n_vertices;i++) { 1903 PetscScalar *y; 1904 1905 ierr = VecPlaceArray(pcbddc->vec1_R,work+lda_rhs*i);CHKERRQ(ierr); 1906 ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&y);CHKERRQ(ierr); 1907 ierr = VecPlaceArray(pcis->vec1_B,y+n_B*i);CHKERRQ(ierr); 1908 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1909 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1910 y[n_B*i+idx_V_B[i]] = 1.0; 1911 ierr = MatDenseRestoreArray(pcbddc->coarse_phi_B,&y);CHKERRQ(ierr); 1912 ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr); 1913 1914 if (pcbddc->switch_static || pcbddc->dbg_flag) { 1915 PetscInt j; 1916 1917 ierr = MatDenseGetArray(pcbddc->coarse_phi_D,&y);CHKERRQ(ierr); 1918 ierr = VecPlaceArray(pcis->vec1_D,y+n_D*i);CHKERRQ(ierr); 1919 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1920 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1921 ierr = VecResetArray(pcis->vec1_D);CHKERRQ(ierr); 1922 for (j=0;j<pcbddc->benign_n;j++) y[n_D*i+p0_lidx_I[j]] = 0.0; 1923 ierr = MatDenseRestoreArray(pcbddc->coarse_phi_D,&y);CHKERRQ(ierr); 1924 } 1925 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 1926 } 1927 /* if n_R == 0 the object is not destroyed */ 1928 ierr = MatDestroy(&A_RV);CHKERRQ(ierr); 1929 } 1930 1931 if (n_constraints) { 1932 Mat B; 1933 1934 ierr = MatCreateSeqDense(PETSC_COMM_SELF,lda_rhs,n_constraints,work,&B);CHKERRQ(ierr); 1935 ierr = MatScale(S_CC,m_one);CHKERRQ(ierr); 1936 ierr = MatMatMult(local_auxmat2_R,S_CC,MAT_REUSE_MATRIX,PETSC_DEFAULT,&B);CHKERRQ(ierr); 1937 ierr = MatScale(S_CC,m_one);CHKERRQ(ierr); 1938 if (n_vertices) { 1939 if (isCHOL) { /* if we can solve the interior problem with cholesky, we should also be fine with transposing here */ 1940 ierr = MatTranspose(S_CV,MAT_REUSE_MATRIX,&S_VC);CHKERRQ(ierr); 1941 } else { 1942 Mat S_VCt; 1943 1944 if (lda_rhs != n_R) { 1945 ierr = MatDestroy(&B);CHKERRQ(ierr); 1946 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_vertices,work,&B);CHKERRQ(ierr); 1947 ierr = MatSeqDenseSetLDA(B,lda_rhs);CHKERRQ(ierr); 1948 } 1949 ierr = MatMatMult(A_VR,B,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&S_VCt);CHKERRQ(ierr); 1950 ierr = MatCopy(S_VCt,S_VC,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 1951 ierr = MatDestroy(&S_VCt);CHKERRQ(ierr); 1952 } 1953 } 1954 ierr = MatDestroy(&B);CHKERRQ(ierr); 1955 /* coarse basis functions */ 1956 for (i=0;i<n_constraints;i++) { 1957 PetscScalar *y; 1958 1959 ierr = VecPlaceArray(pcbddc->vec1_R,work+lda_rhs*i);CHKERRQ(ierr); 1960 ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&y);CHKERRQ(ierr); 1961 ierr = VecPlaceArray(pcis->vec1_B,y+n_B*(i+n_vertices));CHKERRQ(ierr); 1962 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1963 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1964 ierr = MatDenseRestoreArray(pcbddc->coarse_phi_B,&y);CHKERRQ(ierr); 1965 ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr); 1966 if (pcbddc->switch_static || pcbddc->dbg_flag) { 1967 PetscInt j; 1968 1969 ierr = MatDenseGetArray(pcbddc->coarse_phi_D,&y);CHKERRQ(ierr); 1970 ierr = VecPlaceArray(pcis->vec1_D,y+n_D*(i+n_vertices));CHKERRQ(ierr); 1971 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1972 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1973 ierr = VecResetArray(pcis->vec1_D);CHKERRQ(ierr); 1974 for (j=0;j<pcbddc->benign_n;j++) y[n_D*i+p0_lidx_I[j]] = 0.0; 1975 ierr = MatDenseRestoreArray(pcbddc->coarse_phi_D,&y);CHKERRQ(ierr); 1976 } 1977 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 1978 } 1979 } 1980 if (n_constraints) { 1981 ierr = MatDestroy(&local_auxmat2_R);CHKERRQ(ierr); 1982 } 1983 ierr = PetscFree(p0_lidx_I);CHKERRQ(ierr); 1984 ierr = MatDestroy(&B0_V);CHKERRQ(ierr); 1985 1986 /* compute other basis functions for non-symmetric problems */ 1987 if (!pcbddc->symmetric_primal) { 1988 Mat B_V=NULL,B_C=NULL; 1989 PetscScalar *marray; 1990 1991 if (n_constraints) { 1992 Mat S_CCT,C_CRT; 1993 1994 ierr = MatTranspose(C_CR,MAT_INITIAL_MATRIX,&C_CRT);CHKERRQ(ierr); 1995 ierr = MatTranspose(S_CC,MAT_INITIAL_MATRIX,&S_CCT);CHKERRQ(ierr); 1996 ierr = MatMatMult(C_CRT,S_CCT,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&B_C);CHKERRQ(ierr); 1997 ierr = MatDestroy(&S_CCT);CHKERRQ(ierr); 1998 if (n_vertices) { 1999 Mat S_VCT; 2000 2001 ierr = MatTranspose(S_VC,MAT_INITIAL_MATRIX,&S_VCT);CHKERRQ(ierr); 2002 ierr = MatMatMult(C_CRT,S_VCT,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&B_V);CHKERRQ(ierr); 2003 ierr = MatDestroy(&S_VCT);CHKERRQ(ierr); 2004 } 2005 ierr = MatDestroy(&C_CRT);CHKERRQ(ierr); 2006 } 2007 if (n_vertices && n_R) { 2008 PetscScalar *av,*marray; 2009 const PetscInt *xadj,*adjncy; 2010 PetscInt n; 2011 PetscBool flg_row; 2012 2013 /* B_V = B_V - A_VR^T */ 2014 ierr = MatConvert(A_VR,MATSEQAIJ,MAT_REUSE_MATRIX,&A_VR);CHKERRQ(ierr); 2015 ierr = MatGetRowIJ(A_VR,0,PETSC_FALSE,PETSC_FALSE,&n,&xadj,&adjncy,&flg_row);CHKERRQ(ierr); 2016 ierr = MatSeqAIJGetArray(A_VR,&av);CHKERRQ(ierr); 2017 ierr = MatDenseGetArray(B_V,&marray);CHKERRQ(ierr); 2018 for (i=0;i<n;i++) { 2019 PetscInt j; 2020 for (j=xadj[i];j<xadj[i+1];j++) marray[i*n_R + adjncy[j]] -= av[j]; 2021 } 2022 ierr = MatDenseRestoreArray(B_V,&marray);CHKERRQ(ierr); 2023 ierr = MatRestoreRowIJ(A_VR,0,PETSC_FALSE,PETSC_FALSE,&n,&xadj,&adjncy,&flg_row);CHKERRQ(ierr); 2024 ierr = MatDestroy(&A_VR);CHKERRQ(ierr); 2025 } 2026 2027 /* currently there's no support for MatTransposeMatSolve(F,B,X) */ 2028 ierr = MatDenseGetArray(B_V,&marray);CHKERRQ(ierr); 2029 for (i=0;i<n_vertices;i++) { 2030 ierr = VecPlaceArray(pcbddc->vec1_R,marray+i*n_R);CHKERRQ(ierr); 2031 ierr = VecPlaceArray(pcbddc->vec2_R,work+i*n_R);CHKERRQ(ierr); 2032 ierr = KSPSolveTranspose(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 2033 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 2034 ierr = VecResetArray(pcbddc->vec2_R);CHKERRQ(ierr); 2035 } 2036 ierr = MatDenseRestoreArray(B_V,&marray);CHKERRQ(ierr); 2037 ierr = MatDenseGetArray(B_C,&marray);CHKERRQ(ierr); 2038 for (i=n_vertices;i<n_constraints+n_vertices;i++) { 2039 ierr = VecPlaceArray(pcbddc->vec1_R,marray+(i-n_vertices)*n_R);CHKERRQ(ierr); 2040 ierr = VecPlaceArray(pcbddc->vec2_R,work+i*n_R);CHKERRQ(ierr); 2041 ierr = KSPSolveTranspose(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 2042 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 2043 ierr = VecResetArray(pcbddc->vec2_R);CHKERRQ(ierr); 2044 } 2045 ierr = MatDenseRestoreArray(B_C,&marray);CHKERRQ(ierr); 2046 /* coarse basis functions */ 2047 for (i=0;i<pcbddc->local_primal_size;i++) { 2048 PetscScalar *y; 2049 2050 ierr = VecPlaceArray(pcbddc->vec1_R,work+i*n_R);CHKERRQ(ierr); 2051 ierr = MatDenseGetArray(pcbddc->coarse_psi_B,&y);CHKERRQ(ierr); 2052 ierr = VecPlaceArray(pcis->vec1_B,y+n_B*i);CHKERRQ(ierr); 2053 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2054 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2055 if (i<n_vertices) { 2056 y[n_B*i+idx_V_B[i]] = 1.0; 2057 } 2058 ierr = MatDenseRestoreArray(pcbddc->coarse_psi_B,&y);CHKERRQ(ierr); 2059 ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr); 2060 2061 if (pcbddc->switch_static || pcbddc->dbg_flag) { 2062 ierr = MatDenseGetArray(pcbddc->coarse_psi_D,&y);CHKERRQ(ierr); 2063 ierr = VecPlaceArray(pcis->vec1_D,y+n_D*i);CHKERRQ(ierr); 2064 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2065 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2066 ierr = VecResetArray(pcis->vec1_D);CHKERRQ(ierr); 2067 ierr = MatDenseRestoreArray(pcbddc->coarse_psi_D,&y);CHKERRQ(ierr); 2068 } 2069 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 2070 } 2071 ierr = MatDestroy(&B_V);CHKERRQ(ierr); 2072 ierr = MatDestroy(&B_C);CHKERRQ(ierr); 2073 } 2074 /* free memory */ 2075 ierr = PetscFree(idx_V_B);CHKERRQ(ierr); 2076 ierr = MatDestroy(&S_VV);CHKERRQ(ierr); 2077 ierr = MatDestroy(&S_CV);CHKERRQ(ierr); 2078 ierr = MatDestroy(&S_VC);CHKERRQ(ierr); 2079 ierr = MatDestroy(&S_CC);CHKERRQ(ierr); 2080 ierr = PetscFree(work);CHKERRQ(ierr); 2081 if (n_vertices) { 2082 ierr = MatDestroy(&A_VR);CHKERRQ(ierr); 2083 } 2084 if (n_constraints) { 2085 ierr = MatDestroy(&C_CR);CHKERRQ(ierr); 2086 } 2087 /* Checking coarse_sub_mat and coarse basis functios */ 2088 /* Symmetric case : It should be \Phi^{(j)^T} A^{(j)} \Phi^{(j)}=coarse_sub_mat */ 2089 /* Non-symmetric case : It should be \Psi^{(j)^T} A^{(j)} \Phi^{(j)}=coarse_sub_mat */ 2090 if (pcbddc->dbg_flag) { 2091 Mat coarse_sub_mat; 2092 Mat AUXMAT,TM1,TM2,TM3,TM4; 2093 Mat coarse_phi_D,coarse_phi_B; 2094 Mat coarse_psi_D,coarse_psi_B; 2095 Mat A_II,A_BB,A_IB,A_BI; 2096 Mat C_B,CPHI; 2097 IS is_dummy; 2098 Vec mones; 2099 MatType checkmattype=MATSEQAIJ; 2100 PetscReal real_value; 2101 2102 if (pcbddc->benign_n && !pcbddc->benign_change_explicit) { 2103 Mat A; 2104 ierr = PCBDDCBenignProject(pc,NULL,NULL,&A);CHKERRQ(ierr); 2105 ierr = MatGetSubMatrix(A,pcis->is_I_local,pcis->is_I_local,MAT_INITIAL_MATRIX,&A_II);CHKERRQ(ierr); 2106 ierr = MatGetSubMatrix(A,pcis->is_I_local,pcis->is_B_local,MAT_INITIAL_MATRIX,&A_IB);CHKERRQ(ierr); 2107 ierr = MatGetSubMatrix(A,pcis->is_B_local,pcis->is_I_local,MAT_INITIAL_MATRIX,&A_BI);CHKERRQ(ierr); 2108 ierr = MatGetSubMatrix(A,pcis->is_B_local,pcis->is_B_local,MAT_INITIAL_MATRIX,&A_BB);CHKERRQ(ierr); 2109 ierr = MatDestroy(&A);CHKERRQ(ierr); 2110 } else { 2111 ierr = MatConvert(pcis->A_II,checkmattype,MAT_INITIAL_MATRIX,&A_II);CHKERRQ(ierr); 2112 ierr = MatConvert(pcis->A_IB,checkmattype,MAT_INITIAL_MATRIX,&A_IB);CHKERRQ(ierr); 2113 ierr = MatConvert(pcis->A_BI,checkmattype,MAT_INITIAL_MATRIX,&A_BI);CHKERRQ(ierr); 2114 ierr = MatConvert(pcis->A_BB,checkmattype,MAT_INITIAL_MATRIX,&A_BB);CHKERRQ(ierr); 2115 } 2116 ierr = MatConvert(pcbddc->coarse_phi_D,checkmattype,MAT_INITIAL_MATRIX,&coarse_phi_D);CHKERRQ(ierr); 2117 ierr = MatConvert(pcbddc->coarse_phi_B,checkmattype,MAT_INITIAL_MATRIX,&coarse_phi_B);CHKERRQ(ierr); 2118 if (!pcbddc->symmetric_primal) { 2119 ierr = MatConvert(pcbddc->coarse_psi_D,checkmattype,MAT_INITIAL_MATRIX,&coarse_psi_D);CHKERRQ(ierr); 2120 ierr = MatConvert(pcbddc->coarse_psi_B,checkmattype,MAT_INITIAL_MATRIX,&coarse_psi_B);CHKERRQ(ierr); 2121 } 2122 ierr = MatCreateSeqDense(PETSC_COMM_SELF,pcbddc->local_primal_size,pcbddc->local_primal_size,coarse_submat_vals,&coarse_sub_mat);CHKERRQ(ierr); 2123 2124 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 2125 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Check coarse sub mat computation (symmetric %d)\n",pcbddc->symmetric_primal);CHKERRQ(ierr); 2126 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 2127 if (!pcbddc->symmetric_primal) { 2128 ierr = MatMatMult(A_II,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 2129 ierr = MatTransposeMatMult(coarse_psi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM1);CHKERRQ(ierr); 2130 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 2131 ierr = MatMatMult(A_BB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 2132 ierr = MatTransposeMatMult(coarse_psi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM2);CHKERRQ(ierr); 2133 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 2134 ierr = MatMatMult(A_IB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 2135 ierr = MatTransposeMatMult(coarse_psi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM3);CHKERRQ(ierr); 2136 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 2137 ierr = MatMatMult(A_BI,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 2138 ierr = MatTransposeMatMult(coarse_psi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM4);CHKERRQ(ierr); 2139 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 2140 } else { 2141 ierr = MatPtAP(A_II,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&TM1);CHKERRQ(ierr); 2142 ierr = MatPtAP(A_BB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&TM2);CHKERRQ(ierr); 2143 ierr = MatMatMult(A_IB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 2144 ierr = MatTransposeMatMult(coarse_phi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM3);CHKERRQ(ierr); 2145 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 2146 ierr = MatMatMult(A_BI,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 2147 ierr = MatTransposeMatMult(coarse_phi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM4);CHKERRQ(ierr); 2148 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 2149 } 2150 ierr = MatAXPY(TM1,one,TM2,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 2151 ierr = MatAXPY(TM1,one,TM3,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 2152 ierr = MatAXPY(TM1,one,TM4,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 2153 ierr = MatConvert(TM1,MATSEQDENSE,MAT_REUSE_MATRIX,&TM1);CHKERRQ(ierr); 2154 if (pcbddc->benign_n) { 2155 Mat B0_I,B0_B,B0_BPHI,B0_IPHI; 2156 PetscScalar *data,*data2; 2157 PetscInt j; 2158 2159 ierr = ISCreateStride(PETSC_COMM_SELF,pcbddc->benign_n,0,1,&is_dummy);CHKERRQ(ierr); 2160 ierr = MatGetSubMatrix(pcbddc->benign_B0,is_dummy,pcis->is_B_local,MAT_INITIAL_MATRIX,&B0_B); 2161 ierr = MatGetSubMatrix(pcbddc->benign_B0,is_dummy,pcis->is_I_local,MAT_INITIAL_MATRIX,&B0_I); 2162 ierr = MatMatMult(B0_B,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&B0_BPHI);CHKERRQ(ierr); 2163 ierr = MatConvert(B0_BPHI,MATSEQDENSE,MAT_REUSE_MATRIX,&B0_BPHI);CHKERRQ(ierr); 2164 ierr = MatDenseGetArray(TM1,&data);CHKERRQ(ierr); 2165 ierr = MatDenseGetArray(B0_BPHI,&data2);CHKERRQ(ierr); 2166 for (j=0;j<pcbddc->benign_n;j++) { 2167 PetscInt primal_idx = pcbddc->local_primal_size - pcbddc->benign_n + j; 2168 for (i=0;i<pcbddc->local_primal_size;i++) { 2169 data[primal_idx*pcbddc->local_primal_size+i] += data2[i*pcbddc->benign_n+j]; 2170 data[i*pcbddc->local_primal_size+primal_idx] += data2[i*pcbddc->benign_n+j]; 2171 } 2172 } 2173 ierr = MatDenseRestoreArray(TM1,&data);CHKERRQ(ierr); 2174 ierr = MatDenseRestoreArray(B0_BPHI,&data2);CHKERRQ(ierr); 2175 ierr = MatDestroy(&B0_B);CHKERRQ(ierr); 2176 ierr = ISDestroy(&is_dummy);CHKERRQ(ierr); 2177 ierr = MatDestroy(&B0_BPHI);CHKERRQ(ierr); 2178 ierr = MatMatMult(B0_I,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&B0_IPHI);CHKERRQ(ierr); 2179 ierr = MatDestroy(&B0_I);CHKERRQ(ierr); 2180 ierr = MatNorm(B0_IPHI,NORM_FROBENIUS,&real_value);CHKERRQ(ierr); 2181 ierr = MatDestroy(&B0_IPHI);CHKERRQ(ierr); 2182 } 2183 #if 0 2184 { 2185 PetscViewer viewer; 2186 char filename[256]; 2187 sprintf(filename,"details_local_coarse_mat%d_level%d.m",PetscGlobalRank,pcbddc->current_level); 2188 ierr = PetscViewerASCIIOpen(PETSC_COMM_SELF,filename,&viewer);CHKERRQ(ierr); 2189 ierr = PetscViewerSetFormat(viewer,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr); 2190 ierr = PetscObjectSetName((PetscObject)coarse_sub_mat,"computed");CHKERRQ(ierr); 2191 ierr = MatView(coarse_sub_mat,viewer);CHKERRQ(ierr); 2192 ierr = PetscObjectSetName((PetscObject)TM1,"projected");CHKERRQ(ierr); 2193 ierr = MatView(TM1,viewer);CHKERRQ(ierr); 2194 ierr = PetscObjectSetName((PetscObject)pcbddc->coarse_phi_B,"phi_B");CHKERRQ(ierr); 2195 ierr = MatView(pcbddc->coarse_phi_B,viewer);CHKERRQ(ierr); 2196 if (pcbddc->coarse_phi_D) { 2197 ierr = PetscObjectSetName((PetscObject)pcbddc->coarse_phi_D,"phi_D");CHKERRQ(ierr); 2198 ierr = MatView(pcbddc->coarse_phi_D,viewer);CHKERRQ(ierr); 2199 } 2200 if (pcbddc->coarse_psi_B) { 2201 ierr = PetscObjectSetName((PetscObject)pcbddc->coarse_psi_B,"psi_B");CHKERRQ(ierr); 2202 ierr = MatView(pcbddc->coarse_psi_B,viewer);CHKERRQ(ierr); 2203 } 2204 if (pcbddc->coarse_phi_B) { 2205 ierr = PetscObjectSetName((PetscObject)pcbddc->coarse_psi_D,"psi_D");CHKERRQ(ierr); 2206 ierr = MatView(pcbddc->coarse_psi_D,viewer);CHKERRQ(ierr); 2207 } 2208 ierr = PetscViewerDestroy(&viewer);CHKERRQ(ierr); 2209 } 2210 #endif 2211 ierr = MatAXPY(TM1,m_one,coarse_sub_mat,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 2212 ierr = MatNorm(TM1,NORM_FROBENIUS,&real_value);CHKERRQ(ierr); 2213 ierr = PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 2214 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d matrix error % 1.14e\n",PetscGlobalRank,real_value);CHKERRQ(ierr); 2215 2216 /* check constraints */ 2217 if (!pcbddc->benign_n) { /* TODO: add benign case */ 2218 ierr = ISCreateStride(PETSC_COMM_SELF,pcbddc->local_primal_size,0,1,&is_dummy);CHKERRQ(ierr); 2219 ierr = MatGetSubMatrix(pcbddc->ConstraintMatrix,is_dummy,pcis->is_B_local,MAT_INITIAL_MATRIX,&C_B); 2220 ierr = MatMatMult(C_B,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&CPHI);CHKERRQ(ierr); 2221 ierr = MatCreateVecs(CPHI,&mones,NULL);CHKERRQ(ierr); 2222 ierr = VecSet(mones,-1.0);CHKERRQ(ierr); 2223 ierr = MatDiagonalSet(CPHI,mones,ADD_VALUES);CHKERRQ(ierr); 2224 ierr = MatNorm(CPHI,NORM_FROBENIUS,&real_value);CHKERRQ(ierr); 2225 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d phi constraints error % 1.14e\n",PetscGlobalRank,real_value);CHKERRQ(ierr); 2226 if (!pcbddc->symmetric_primal) { 2227 ierr = MatMatMult(C_B,coarse_psi_B,MAT_REUSE_MATRIX,1.0,&CPHI);CHKERRQ(ierr); 2228 ierr = VecSet(mones,-1.0);CHKERRQ(ierr); 2229 ierr = MatDiagonalSet(CPHI,mones,ADD_VALUES);CHKERRQ(ierr); 2230 ierr = MatNorm(CPHI,NORM_FROBENIUS,&real_value);CHKERRQ(ierr); 2231 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d psi constraints error % 1.14e\n",PetscGlobalRank,real_value);CHKERRQ(ierr); 2232 } 2233 ierr = MatDestroy(&C_B);CHKERRQ(ierr); 2234 ierr = MatDestroy(&CPHI);CHKERRQ(ierr); 2235 ierr = ISDestroy(&is_dummy);CHKERRQ(ierr); 2236 ierr = VecDestroy(&mones);CHKERRQ(ierr); 2237 } 2238 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 2239 ierr = MatDestroy(&A_II);CHKERRQ(ierr); 2240 ierr = MatDestroy(&A_BB);CHKERRQ(ierr); 2241 ierr = MatDestroy(&A_IB);CHKERRQ(ierr); 2242 ierr = MatDestroy(&A_BI);CHKERRQ(ierr); 2243 ierr = MatDestroy(&TM1);CHKERRQ(ierr); 2244 ierr = MatDestroy(&TM2);CHKERRQ(ierr); 2245 ierr = MatDestroy(&TM3);CHKERRQ(ierr); 2246 ierr = MatDestroy(&TM4);CHKERRQ(ierr); 2247 ierr = MatDestroy(&coarse_phi_D);CHKERRQ(ierr); 2248 ierr = MatDestroy(&coarse_phi_B);CHKERRQ(ierr); 2249 if (!pcbddc->symmetric_primal) { 2250 ierr = MatDestroy(&coarse_psi_D);CHKERRQ(ierr); 2251 ierr = MatDestroy(&coarse_psi_B);CHKERRQ(ierr); 2252 } 2253 ierr = MatDestroy(&coarse_sub_mat);CHKERRQ(ierr); 2254 } 2255 /* get back data */ 2256 *coarse_submat_vals_n = coarse_submat_vals; 2257 PetscFunctionReturn(0); 2258 } 2259 2260 #undef __FUNCT__ 2261 #define __FUNCT__ "MatGetSubMatrixUnsorted" 2262 PetscErrorCode MatGetSubMatrixUnsorted(Mat A, IS isrow, IS iscol, Mat* B) 2263 { 2264 Mat *work_mat; 2265 IS isrow_s,iscol_s; 2266 PetscBool rsorted,csorted; 2267 PetscInt rsize,*idxs_perm_r,csize,*idxs_perm_c; 2268 PetscErrorCode ierr; 2269 2270 PetscFunctionBegin; 2271 ierr = ISSorted(isrow,&rsorted);CHKERRQ(ierr); 2272 ierr = ISSorted(iscol,&csorted);CHKERRQ(ierr); 2273 ierr = ISGetLocalSize(isrow,&rsize);CHKERRQ(ierr); 2274 ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr); 2275 2276 if (!rsorted) { 2277 const PetscInt *idxs; 2278 PetscInt *idxs_sorted,i; 2279 2280 ierr = PetscMalloc1(rsize,&idxs_perm_r);CHKERRQ(ierr); 2281 ierr = PetscMalloc1(rsize,&idxs_sorted);CHKERRQ(ierr); 2282 for (i=0;i<rsize;i++) { 2283 idxs_perm_r[i] = i; 2284 } 2285 ierr = ISGetIndices(isrow,&idxs);CHKERRQ(ierr); 2286 ierr = PetscSortIntWithPermutation(rsize,idxs,idxs_perm_r);CHKERRQ(ierr); 2287 for (i=0;i<rsize;i++) { 2288 idxs_sorted[i] = idxs[idxs_perm_r[i]]; 2289 } 2290 ierr = ISRestoreIndices(isrow,&idxs);CHKERRQ(ierr); 2291 ierr = ISCreateGeneral(PETSC_COMM_SELF,rsize,idxs_sorted,PETSC_OWN_POINTER,&isrow_s);CHKERRQ(ierr); 2292 } else { 2293 ierr = PetscObjectReference((PetscObject)isrow);CHKERRQ(ierr); 2294 isrow_s = isrow; 2295 } 2296 2297 if (!csorted) { 2298 if (isrow == iscol) { 2299 ierr = PetscObjectReference((PetscObject)isrow_s);CHKERRQ(ierr); 2300 iscol_s = isrow_s; 2301 } else { 2302 const PetscInt *idxs; 2303 PetscInt *idxs_sorted,i; 2304 2305 ierr = PetscMalloc1(csize,&idxs_perm_c);CHKERRQ(ierr); 2306 ierr = PetscMalloc1(csize,&idxs_sorted);CHKERRQ(ierr); 2307 for (i=0;i<csize;i++) { 2308 idxs_perm_c[i] = i; 2309 } 2310 ierr = ISGetIndices(iscol,&idxs);CHKERRQ(ierr); 2311 ierr = PetscSortIntWithPermutation(csize,idxs,idxs_perm_c);CHKERRQ(ierr); 2312 for (i=0;i<csize;i++) { 2313 idxs_sorted[i] = idxs[idxs_perm_c[i]]; 2314 } 2315 ierr = ISRestoreIndices(iscol,&idxs);CHKERRQ(ierr); 2316 ierr = ISCreateGeneral(PETSC_COMM_SELF,csize,idxs_sorted,PETSC_OWN_POINTER,&iscol_s);CHKERRQ(ierr); 2317 } 2318 } else { 2319 ierr = PetscObjectReference((PetscObject)iscol);CHKERRQ(ierr); 2320 iscol_s = iscol; 2321 } 2322 2323 ierr = MatGetSubMatrices(A,1,&isrow_s,&iscol_s,MAT_INITIAL_MATRIX,&work_mat);CHKERRQ(ierr); 2324 2325 if (!rsorted || !csorted) { 2326 Mat new_mat; 2327 IS is_perm_r,is_perm_c; 2328 2329 if (!rsorted) { 2330 PetscInt *idxs_r,i; 2331 ierr = PetscMalloc1(rsize,&idxs_r);CHKERRQ(ierr); 2332 for (i=0;i<rsize;i++) { 2333 idxs_r[idxs_perm_r[i]] = i; 2334 } 2335 ierr = PetscFree(idxs_perm_r);CHKERRQ(ierr); 2336 ierr = ISCreateGeneral(PETSC_COMM_SELF,rsize,idxs_r,PETSC_OWN_POINTER,&is_perm_r);CHKERRQ(ierr); 2337 } else { 2338 ierr = ISCreateStride(PETSC_COMM_SELF,rsize,0,1,&is_perm_r);CHKERRQ(ierr); 2339 } 2340 ierr = ISSetPermutation(is_perm_r);CHKERRQ(ierr); 2341 2342 if (!csorted) { 2343 if (isrow_s == iscol_s) { 2344 ierr = PetscObjectReference((PetscObject)is_perm_r);CHKERRQ(ierr); 2345 is_perm_c = is_perm_r; 2346 } else { 2347 PetscInt *idxs_c,i; 2348 ierr = PetscMalloc1(csize,&idxs_c);CHKERRQ(ierr); 2349 for (i=0;i<csize;i++) { 2350 idxs_c[idxs_perm_c[i]] = i; 2351 } 2352 ierr = PetscFree(idxs_perm_c);CHKERRQ(ierr); 2353 ierr = ISCreateGeneral(PETSC_COMM_SELF,csize,idxs_c,PETSC_OWN_POINTER,&is_perm_c);CHKERRQ(ierr); 2354 } 2355 } else { 2356 ierr = ISCreateStride(PETSC_COMM_SELF,csize,0,1,&is_perm_c);CHKERRQ(ierr); 2357 } 2358 ierr = ISSetPermutation(is_perm_c);CHKERRQ(ierr); 2359 2360 ierr = MatPermute(work_mat[0],is_perm_r,is_perm_c,&new_mat);CHKERRQ(ierr); 2361 ierr = MatDestroy(&work_mat[0]);CHKERRQ(ierr); 2362 work_mat[0] = new_mat; 2363 ierr = ISDestroy(&is_perm_r);CHKERRQ(ierr); 2364 ierr = ISDestroy(&is_perm_c);CHKERRQ(ierr); 2365 } 2366 2367 ierr = PetscObjectReference((PetscObject)work_mat[0]);CHKERRQ(ierr); 2368 *B = work_mat[0]; 2369 ierr = MatDestroyMatrices(1,&work_mat);CHKERRQ(ierr); 2370 ierr = ISDestroy(&isrow_s);CHKERRQ(ierr); 2371 ierr = ISDestroy(&iscol_s);CHKERRQ(ierr); 2372 PetscFunctionReturn(0); 2373 } 2374 2375 #undef __FUNCT__ 2376 #define __FUNCT__ "PCBDDCComputeLocalMatrix" 2377 PetscErrorCode PCBDDCComputeLocalMatrix(PC pc, Mat ChangeOfBasisMatrix) 2378 { 2379 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 2380 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 2381 Mat new_mat; 2382 IS is_local,is_global; 2383 PetscInt local_size; 2384 PetscBool isseqaij; 2385 PetscErrorCode ierr; 2386 2387 PetscFunctionBegin; 2388 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 2389 ierr = MatGetSize(matis->A,&local_size,NULL);CHKERRQ(ierr); 2390 ierr = ISCreateStride(PetscObjectComm((PetscObject)matis->A),local_size,0,1,&is_local);CHKERRQ(ierr); 2391 ierr = ISLocalToGlobalMappingApplyIS(pc->pmat->rmap->mapping,is_local,&is_global);CHKERRQ(ierr); 2392 ierr = ISDestroy(&is_local);CHKERRQ(ierr); 2393 ierr = MatGetSubMatrixUnsorted(ChangeOfBasisMatrix,is_global,is_global,&new_mat);CHKERRQ(ierr); 2394 ierr = ISDestroy(&is_global);CHKERRQ(ierr); 2395 2396 /* check */ 2397 if (pcbddc->dbg_flag) { 2398 Vec x,x_change; 2399 PetscReal error; 2400 2401 ierr = MatCreateVecs(ChangeOfBasisMatrix,&x,&x_change);CHKERRQ(ierr); 2402 ierr = VecSetRandom(x,NULL);CHKERRQ(ierr); 2403 ierr = MatMult(ChangeOfBasisMatrix,x,x_change);CHKERRQ(ierr); 2404 ierr = VecScatterBegin(matis->cctx,x,matis->x,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2405 ierr = VecScatterEnd(matis->cctx,x,matis->x,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2406 ierr = MatMult(new_mat,matis->x,matis->y);CHKERRQ(ierr); 2407 ierr = VecScatterBegin(matis->rctx,matis->y,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2408 ierr = VecScatterEnd(matis->rctx,matis->y,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2409 ierr = VecAXPY(x,-1.0,x_change);CHKERRQ(ierr); 2410 ierr = VecNorm(x,NORM_INFINITY,&error);CHKERRQ(ierr); 2411 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 2412 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Error global vs local change on N: %1.6e\n",error);CHKERRQ(ierr); 2413 ierr = VecDestroy(&x);CHKERRQ(ierr); 2414 ierr = VecDestroy(&x_change);CHKERRQ(ierr); 2415 } 2416 2417 /* TODO: HOW TO WORK WITH BAIJ and SBAIJ and SEQDENSE? */ 2418 ierr = PetscObjectTypeCompare((PetscObject)matis->A,MATSEQAIJ,&isseqaij);CHKERRQ(ierr); 2419 if (isseqaij) { 2420 Mat M; 2421 2422 ierr = MatPtAP(matis->A,new_mat,MAT_INITIAL_MATRIX,2.0,&M);CHKERRQ(ierr); 2423 ierr = MatSeqAIJCompress(M,&pcbddc->local_mat);CHKERRQ(ierr); 2424 ierr = MatDestroy(&M);CHKERRQ(ierr); 2425 } else { 2426 Mat work_mat,M; 2427 2428 ierr = MatConvert(matis->A,MATSEQAIJ,MAT_INITIAL_MATRIX,&work_mat);CHKERRQ(ierr); 2429 ierr = MatPtAP(work_mat,new_mat,MAT_INITIAL_MATRIX,2.0,&M);CHKERRQ(ierr); 2430 ierr = MatSeqAIJCompress(M,&pcbddc->local_mat);CHKERRQ(ierr); 2431 ierr = MatDestroy(&M);CHKERRQ(ierr); 2432 ierr = MatDestroy(&work_mat);CHKERRQ(ierr); 2433 } 2434 if (matis->A->symmetric_set) { 2435 ierr = MatSetOption(pcbddc->local_mat,MAT_SYMMETRIC,matis->A->symmetric);CHKERRQ(ierr); 2436 #if !defined(PETSC_USE_COMPLEX) 2437 ierr = MatSetOption(pcbddc->local_mat,MAT_HERMITIAN,matis->A->symmetric);CHKERRQ(ierr); 2438 #endif 2439 } 2440 /* 2441 ierr = PetscViewerSetFormat(PETSC_VIEWER_STDOUT_SELF,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr); 2442 ierr = MatView(new_mat,(PetscViewer)0);CHKERRQ(ierr); 2443 */ 2444 ierr = MatDestroy(&new_mat);CHKERRQ(ierr); 2445 PetscFunctionReturn(0); 2446 } 2447 2448 #undef __FUNCT__ 2449 #define __FUNCT__ "PCBDDCSetUpLocalScatters" 2450 PetscErrorCode PCBDDCSetUpLocalScatters(PC pc) 2451 { 2452 PC_IS* pcis = (PC_IS*)(pc->data); 2453 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 2454 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 2455 PetscInt *idx_R_local=NULL; 2456 PetscInt n_vertices,i,j,n_R,n_D,n_B; 2457 PetscInt vbs,bs; 2458 PetscBT bitmask=NULL; 2459 PetscErrorCode ierr; 2460 2461 PetscFunctionBegin; 2462 /* 2463 No need to setup local scatters if 2464 - primal space is unchanged 2465 AND 2466 - we actually have locally some primal dofs (could not be true in multilevel or for isolated subdomains) 2467 AND 2468 - we are not in debugging mode (this is needed since there are Synchronized prints at the end of the subroutine 2469 */ 2470 if (!pcbddc->new_primal_space_local && pcbddc->local_primal_size && !pcbddc->dbg_flag) { 2471 PetscFunctionReturn(0); 2472 } 2473 /* destroy old objects */ 2474 ierr = ISDestroy(&pcbddc->is_R_local);CHKERRQ(ierr); 2475 ierr = VecScatterDestroy(&pcbddc->R_to_B);CHKERRQ(ierr); 2476 ierr = VecScatterDestroy(&pcbddc->R_to_D);CHKERRQ(ierr); 2477 /* Set Non-overlapping dimensions */ 2478 n_B = pcis->n_B; 2479 n_D = pcis->n - n_B; 2480 n_vertices = pcbddc->n_vertices; 2481 2482 /* Dohrmann's notation: dofs splitted in R (Remaining: all dofs but the vertices) and V (Vertices) */ 2483 2484 /* create auxiliary bitmask and allocate workspace */ 2485 if (!sub_schurs->reuse_solver) { 2486 ierr = PetscMalloc1(pcis->n-n_vertices,&idx_R_local);CHKERRQ(ierr); 2487 ierr = PetscBTCreate(pcis->n,&bitmask);CHKERRQ(ierr); 2488 for (i=0;i<n_vertices;i++) { 2489 ierr = PetscBTSet(bitmask,pcbddc->local_primal_ref_node[i]);CHKERRQ(ierr); 2490 } 2491 2492 for (i=0, n_R=0; i<pcis->n; i++) { 2493 if (!PetscBTLookup(bitmask,i)) { 2494 idx_R_local[n_R++] = i; 2495 } 2496 } 2497 } else { /* A different ordering (already computed) is present if we are reusing the Schur solver */ 2498 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 2499 2500 ierr = ISGetIndices(reuse_solver->is_R,(const PetscInt**)&idx_R_local);CHKERRQ(ierr); 2501 ierr = ISGetLocalSize(reuse_solver->is_R,&n_R);CHKERRQ(ierr); 2502 } 2503 2504 /* Block code */ 2505 vbs = 1; 2506 ierr = MatGetBlockSize(pcbddc->local_mat,&bs);CHKERRQ(ierr); 2507 if (bs>1 && !(n_vertices%bs)) { 2508 PetscBool is_blocked = PETSC_TRUE; 2509 PetscInt *vary; 2510 if (!sub_schurs->reuse_solver) { 2511 ierr = PetscMalloc1(pcis->n/bs,&vary);CHKERRQ(ierr); 2512 ierr = PetscMemzero(vary,pcis->n/bs*sizeof(PetscInt));CHKERRQ(ierr); 2513 /* Verify that the vertex indices correspond to each element in a block (code taken from sbaij2.c) */ 2514 /* it is ok to check this way since local_primal_ref_node are always sorted by local numbering and idx_R_local is obtained as a complement */ 2515 for (i=0; i<n_vertices; i++) vary[pcbddc->local_primal_ref_node[i]/bs]++; 2516 for (i=0; i<pcis->n/bs; i++) { 2517 if (vary[i]!=0 && vary[i]!=bs) { 2518 is_blocked = PETSC_FALSE; 2519 break; 2520 } 2521 } 2522 ierr = PetscFree(vary);CHKERRQ(ierr); 2523 } else { 2524 /* Verify directly the R set */ 2525 for (i=0; i<n_R/bs; i++) { 2526 PetscInt j,node=idx_R_local[bs*i]; 2527 for (j=1; j<bs; j++) { 2528 if (node != idx_R_local[bs*i+j]-j) { 2529 is_blocked = PETSC_FALSE; 2530 break; 2531 } 2532 } 2533 } 2534 } 2535 if (is_blocked) { /* build compressed IS for R nodes (complement of vertices) */ 2536 vbs = bs; 2537 for (i=0;i<n_R/vbs;i++) { 2538 idx_R_local[i] = idx_R_local[vbs*i]/vbs; 2539 } 2540 } 2541 } 2542 ierr = ISCreateBlock(PETSC_COMM_SELF,vbs,n_R/vbs,idx_R_local,PETSC_COPY_VALUES,&pcbddc->is_R_local);CHKERRQ(ierr); 2543 if (sub_schurs->reuse_solver) { 2544 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 2545 2546 ierr = ISRestoreIndices(reuse_solver->is_R,(const PetscInt**)&idx_R_local);CHKERRQ(ierr); 2547 ierr = ISDestroy(&reuse_solver->is_R);CHKERRQ(ierr); 2548 ierr = PetscObjectReference((PetscObject)pcbddc->is_R_local);CHKERRQ(ierr); 2549 reuse_solver->is_R = pcbddc->is_R_local; 2550 } else { 2551 ierr = PetscFree(idx_R_local);CHKERRQ(ierr); 2552 } 2553 2554 /* print some info if requested */ 2555 if (pcbddc->dbg_flag) { 2556 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 2557 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 2558 ierr = PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 2559 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d local dimensions\n",PetscGlobalRank);CHKERRQ(ierr); 2560 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local_size = %d, dirichlet_size = %d, boundary_size = %d\n",pcis->n,n_D,n_B);CHKERRQ(ierr); 2561 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"r_size = %d, v_size = %d, constraints = %d, local_primal_size = %d\n",n_R,n_vertices,pcbddc->local_primal_size-n_vertices-pcbddc->benign_n,pcbddc->local_primal_size);CHKERRQ(ierr); 2562 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 2563 } 2564 2565 /* VecScatters pcbddc->R_to_B and (optionally) pcbddc->R_to_D */ 2566 if (!sub_schurs->reuse_solver) { 2567 IS is_aux1,is_aux2; 2568 PetscInt *aux_array1,*aux_array2,*is_indices,*idx_R_local; 2569 2570 ierr = ISGetIndices(pcbddc->is_R_local,(const PetscInt**)&idx_R_local);CHKERRQ(ierr); 2571 ierr = PetscMalloc1(pcis->n_B-n_vertices,&aux_array1);CHKERRQ(ierr); 2572 ierr = PetscMalloc1(pcis->n_B-n_vertices,&aux_array2);CHKERRQ(ierr); 2573 ierr = ISGetIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 2574 for (i=0; i<n_D; i++) { 2575 ierr = PetscBTSet(bitmask,is_indices[i]);CHKERRQ(ierr); 2576 } 2577 ierr = ISRestoreIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 2578 for (i=0, j=0; i<n_R; i++) { 2579 if (!PetscBTLookup(bitmask,idx_R_local[i])) { 2580 aux_array1[j++] = i; 2581 } 2582 } 2583 ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array1,PETSC_OWN_POINTER,&is_aux1);CHKERRQ(ierr); 2584 ierr = ISGetIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 2585 for (i=0, j=0; i<n_B; i++) { 2586 if (!PetscBTLookup(bitmask,is_indices[i])) { 2587 aux_array2[j++] = i; 2588 } 2589 } 2590 ierr = ISRestoreIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 2591 ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array2,PETSC_OWN_POINTER,&is_aux2);CHKERRQ(ierr); 2592 ierr = VecScatterCreate(pcbddc->vec1_R,is_aux1,pcis->vec1_B,is_aux2,&pcbddc->R_to_B);CHKERRQ(ierr); 2593 ierr = ISDestroy(&is_aux1);CHKERRQ(ierr); 2594 ierr = ISDestroy(&is_aux2);CHKERRQ(ierr); 2595 2596 if (pcbddc->switch_static || pcbddc->dbg_flag) { 2597 ierr = PetscMalloc1(n_D,&aux_array1);CHKERRQ(ierr); 2598 for (i=0, j=0; i<n_R; i++) { 2599 if (PetscBTLookup(bitmask,idx_R_local[i])) { 2600 aux_array1[j++] = i; 2601 } 2602 } 2603 ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array1,PETSC_OWN_POINTER,&is_aux1);CHKERRQ(ierr); 2604 ierr = VecScatterCreate(pcbddc->vec1_R,is_aux1,pcis->vec1_D,(IS)0,&pcbddc->R_to_D);CHKERRQ(ierr); 2605 ierr = ISDestroy(&is_aux1);CHKERRQ(ierr); 2606 } 2607 ierr = PetscBTDestroy(&bitmask);CHKERRQ(ierr); 2608 ierr = ISRestoreIndices(pcbddc->is_R_local,(const PetscInt**)&idx_R_local);CHKERRQ(ierr); 2609 } else { 2610 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 2611 IS tis; 2612 PetscInt schur_size; 2613 2614 ierr = ISGetLocalSize(reuse_solver->is_B,&schur_size);CHKERRQ(ierr); 2615 ierr = ISCreateStride(PETSC_COMM_SELF,schur_size,n_D,1,&tis);CHKERRQ(ierr); 2616 ierr = VecScatterCreate(pcbddc->vec1_R,tis,pcis->vec1_B,reuse_solver->is_B,&pcbddc->R_to_B);CHKERRQ(ierr); 2617 ierr = ISDestroy(&tis);CHKERRQ(ierr); 2618 if (pcbddc->switch_static || pcbddc->dbg_flag) { 2619 ierr = ISCreateStride(PETSC_COMM_SELF,n_D,0,1,&tis);CHKERRQ(ierr); 2620 ierr = VecScatterCreate(pcbddc->vec1_R,tis,pcis->vec1_D,(IS)0,&pcbddc->R_to_D);CHKERRQ(ierr); 2621 ierr = ISDestroy(&tis);CHKERRQ(ierr); 2622 } 2623 } 2624 PetscFunctionReturn(0); 2625 } 2626 2627 2628 #undef __FUNCT__ 2629 #define __FUNCT__ "PCBDDCSetUpLocalSolvers" 2630 PetscErrorCode PCBDDCSetUpLocalSolvers(PC pc, PetscBool dirichlet, PetscBool neumann) 2631 { 2632 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 2633 PC_IS *pcis = (PC_IS*)pc->data; 2634 PC pc_temp; 2635 Mat A_RR; 2636 MatReuse reuse; 2637 PetscScalar m_one = -1.0; 2638 PetscReal value; 2639 PetscInt n_D,n_R; 2640 PetscBool use_exact,use_exact_reduced,issbaij; 2641 PetscErrorCode ierr; 2642 /* prefixes stuff */ 2643 char dir_prefix[256],neu_prefix[256],str_level[16]; 2644 size_t len; 2645 2646 PetscFunctionBegin; 2647 2648 /* compute prefixes */ 2649 ierr = PetscStrcpy(dir_prefix,"");CHKERRQ(ierr); 2650 ierr = PetscStrcpy(neu_prefix,"");CHKERRQ(ierr); 2651 if (!pcbddc->current_level) { 2652 ierr = PetscStrcpy(dir_prefix,((PetscObject)pc)->prefix);CHKERRQ(ierr); 2653 ierr = PetscStrcpy(neu_prefix,((PetscObject)pc)->prefix);CHKERRQ(ierr); 2654 ierr = PetscStrcat(dir_prefix,"pc_bddc_dirichlet_");CHKERRQ(ierr); 2655 ierr = PetscStrcat(neu_prefix,"pc_bddc_neumann_");CHKERRQ(ierr); 2656 } else { 2657 ierr = PetscStrcpy(str_level,"");CHKERRQ(ierr); 2658 sprintf(str_level,"l%d_",(int)(pcbddc->current_level)); 2659 ierr = PetscStrlen(((PetscObject)pc)->prefix,&len);CHKERRQ(ierr); 2660 len -= 15; /* remove "pc_bddc_coarse_" */ 2661 if (pcbddc->current_level>1) len -= 3; /* remove "lX_" with X level number */ 2662 if (pcbddc->current_level>10) len -= 1; /* remove another char from level number */ 2663 ierr = PetscStrncpy(dir_prefix,((PetscObject)pc)->prefix,len+1);CHKERRQ(ierr); 2664 ierr = PetscStrncpy(neu_prefix,((PetscObject)pc)->prefix,len+1);CHKERRQ(ierr); 2665 ierr = PetscStrcat(dir_prefix,"pc_bddc_dirichlet_");CHKERRQ(ierr); 2666 ierr = PetscStrcat(neu_prefix,"pc_bddc_neumann_");CHKERRQ(ierr); 2667 ierr = PetscStrcat(dir_prefix,str_level);CHKERRQ(ierr); 2668 ierr = PetscStrcat(neu_prefix,str_level);CHKERRQ(ierr); 2669 } 2670 2671 /* DIRICHLET PROBLEM */ 2672 if (dirichlet) { 2673 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 2674 if (pcbddc->benign_n && !pcbddc->benign_change_explicit && pcbddc->dbg_flag) { 2675 Mat A_IIn; 2676 2677 ierr = PCBDDCBenignProject(pc,pcis->is_I_local,pcis->is_I_local,&A_IIn);CHKERRQ(ierr); 2678 ierr = MatDestroy(&pcis->A_II);CHKERRQ(ierr); 2679 pcis->A_II = A_IIn; 2680 } 2681 if (pcbddc->local_mat->symmetric_set) { 2682 ierr = MatSetOption(pcis->A_II,MAT_SYMMETRIC,pcbddc->local_mat->symmetric_set);CHKERRQ(ierr); 2683 } 2684 /* Matrix for Dirichlet problem is pcis->A_II */ 2685 n_D = pcis->n - pcis->n_B; 2686 if (!pcbddc->ksp_D) { /* create object if not yet build */ 2687 ierr = KSPCreate(PETSC_COMM_SELF,&pcbddc->ksp_D);CHKERRQ(ierr); 2688 ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->ksp_D,(PetscObject)pc,1);CHKERRQ(ierr); 2689 /* default */ 2690 ierr = KSPSetType(pcbddc->ksp_D,KSPPREONLY);CHKERRQ(ierr); 2691 ierr = KSPSetOptionsPrefix(pcbddc->ksp_D,dir_prefix);CHKERRQ(ierr); 2692 ierr = PetscObjectTypeCompare((PetscObject)pcis->A_II,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr); 2693 ierr = KSPGetPC(pcbddc->ksp_D,&pc_temp);CHKERRQ(ierr); 2694 if (issbaij) { 2695 ierr = PCSetType(pc_temp,PCCHOLESKY);CHKERRQ(ierr); 2696 } else { 2697 ierr = PCSetType(pc_temp,PCLU);CHKERRQ(ierr); 2698 } 2699 /* Allow user's customization */ 2700 ierr = KSPSetFromOptions(pcbddc->ksp_D);CHKERRQ(ierr); 2701 ierr = PCFactorSetReuseFill(pc_temp,PETSC_TRUE);CHKERRQ(ierr); 2702 } 2703 ierr = KSPSetOperators(pcbddc->ksp_D,pcis->A_II,pcis->A_II);CHKERRQ(ierr); 2704 if (sub_schurs->reuse_solver) { 2705 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 2706 2707 ierr = KSPSetPC(pcbddc->ksp_D,reuse_solver->interior_solver);CHKERRQ(ierr); 2708 } 2709 /* umfpack interface has a bug when matrix dimension is zero. TODO solve from umfpack interface */ 2710 if (!n_D) { 2711 ierr = KSPGetPC(pcbddc->ksp_D,&pc_temp);CHKERRQ(ierr); 2712 ierr = PCSetType(pc_temp,PCNONE);CHKERRQ(ierr); 2713 } 2714 /* Set Up KSP for Dirichlet problem of BDDC */ 2715 ierr = KSPSetUp(pcbddc->ksp_D);CHKERRQ(ierr); 2716 /* set ksp_D into pcis data */ 2717 ierr = KSPDestroy(&pcis->ksp_D);CHKERRQ(ierr); 2718 ierr = PetscObjectReference((PetscObject)pcbddc->ksp_D);CHKERRQ(ierr); 2719 pcis->ksp_D = pcbddc->ksp_D; 2720 } 2721 2722 /* NEUMANN PROBLEM */ 2723 A_RR = 0; 2724 if (neumann) { 2725 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 2726 PetscInt ibs,mbs; 2727 PetscBool issbaij; 2728 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 2729 /* Matrix for Neumann problem is A_RR -> we need to create/reuse it at this point */ 2730 ierr = ISGetSize(pcbddc->is_R_local,&n_R);CHKERRQ(ierr); 2731 if (pcbddc->ksp_R) { /* already created ksp */ 2732 PetscInt nn_R; 2733 ierr = KSPGetOperators(pcbddc->ksp_R,NULL,&A_RR);CHKERRQ(ierr); 2734 ierr = PetscObjectReference((PetscObject)A_RR);CHKERRQ(ierr); 2735 ierr = MatGetSize(A_RR,&nn_R,NULL);CHKERRQ(ierr); 2736 if (nn_R != n_R) { /* old ksp is not reusable, so reset it */ 2737 ierr = KSPReset(pcbddc->ksp_R);CHKERRQ(ierr); 2738 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 2739 reuse = MAT_INITIAL_MATRIX; 2740 } else { /* same sizes, but nonzero pattern depend on primal vertices so it can be changed */ 2741 if (pcbddc->new_primal_space_local) { /* we are not sure the matrix will have the same nonzero pattern */ 2742 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 2743 reuse = MAT_INITIAL_MATRIX; 2744 } else { /* safe to reuse the matrix */ 2745 reuse = MAT_REUSE_MATRIX; 2746 } 2747 } 2748 /* last check */ 2749 if (pc->flag == DIFFERENT_NONZERO_PATTERN) { 2750 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 2751 reuse = MAT_INITIAL_MATRIX; 2752 } 2753 } else { /* first time, so we need to create the matrix */ 2754 reuse = MAT_INITIAL_MATRIX; 2755 } 2756 /* extract A_RR */ 2757 ierr = MatGetBlockSize(pcbddc->local_mat,&mbs);CHKERRQ(ierr); 2758 ierr = ISGetBlockSize(pcbddc->is_R_local,&ibs);CHKERRQ(ierr); 2759 ierr = PetscObjectTypeCompare((PetscObject)pcbddc->local_mat,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr); 2760 if (ibs != mbs) { /* need to convert to SEQAIJ to extract any submatrix with is_R_local */ 2761 if (matis->A == pcbddc->local_mat) { 2762 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 2763 ierr = MatConvert(matis->A,MATSEQAIJ,MAT_INITIAL_MATRIX,&pcbddc->local_mat);CHKERRQ(ierr); 2764 } else { 2765 ierr = MatConvert(pcbddc->local_mat,MATSEQAIJ,MAT_REUSE_MATRIX,&pcbddc->local_mat);CHKERRQ(ierr); 2766 } 2767 } else if (issbaij) { /* need to convert to BAIJ to get offdiagonal blocks */ 2768 if (matis->A == pcbddc->local_mat) { 2769 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 2770 ierr = MatConvert(matis->A,MATSEQBAIJ,MAT_INITIAL_MATRIX,&pcbddc->local_mat);CHKERRQ(ierr); 2771 } else { 2772 ierr = MatConvert(pcbddc->local_mat,MATSEQBAIJ,MAT_REUSE_MATRIX,&pcbddc->local_mat);CHKERRQ(ierr); 2773 } 2774 } 2775 ierr = MatGetSubMatrix(pcbddc->local_mat,pcbddc->is_R_local,pcbddc->is_R_local,reuse,&A_RR);CHKERRQ(ierr); 2776 if (pcbddc->local_mat->symmetric_set) { 2777 ierr = MatSetOption(A_RR,MAT_SYMMETRIC,pcbddc->local_mat->symmetric_set);CHKERRQ(ierr); 2778 } 2779 if (pcbddc->benign_n && !pcbddc->benign_change_explicit && pcbddc->dbg_flag) { 2780 Mat A_RRn; 2781 ierr = PCBDDCBenignProject(pc,pcbddc->is_R_local,pcbddc->is_R_local,&A_RRn);CHKERRQ(ierr); 2782 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 2783 A_RR = A_RRn; 2784 } 2785 if (!pcbddc->ksp_R) { /* create object if not present */ 2786 ierr = KSPCreate(PETSC_COMM_SELF,&pcbddc->ksp_R);CHKERRQ(ierr); 2787 ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->ksp_R,(PetscObject)pc,1);CHKERRQ(ierr); 2788 /* default */ 2789 ierr = KSPSetType(pcbddc->ksp_R,KSPPREONLY);CHKERRQ(ierr); 2790 ierr = KSPSetOptionsPrefix(pcbddc->ksp_R,neu_prefix);CHKERRQ(ierr); 2791 ierr = KSPGetPC(pcbddc->ksp_R,&pc_temp);CHKERRQ(ierr); 2792 ierr = PetscObjectTypeCompare((PetscObject)A_RR,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr); 2793 if (issbaij) { 2794 ierr = PCSetType(pc_temp,PCCHOLESKY);CHKERRQ(ierr); 2795 } else { 2796 ierr = PCSetType(pc_temp,PCLU);CHKERRQ(ierr); 2797 } 2798 /* Allow user's customization */ 2799 ierr = KSPSetFromOptions(pcbddc->ksp_R);CHKERRQ(ierr); 2800 ierr = PCFactorSetReuseFill(pc_temp,PETSC_TRUE);CHKERRQ(ierr); 2801 } 2802 /* umfpack interface has a bug when matrix dimension is zero. TODO solve from umfpack interface */ 2803 if (!n_R) { 2804 ierr = KSPGetPC(pcbddc->ksp_R,&pc_temp);CHKERRQ(ierr); 2805 ierr = PCSetType(pc_temp,PCNONE);CHKERRQ(ierr); 2806 } 2807 /* Reuse solver if it is present */ 2808 if (sub_schurs->reuse_solver) { 2809 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 2810 2811 ierr = KSPSetPC(pcbddc->ksp_R,reuse_solver->correction_solver);CHKERRQ(ierr); 2812 } 2813 ierr = KSPSetOperators(pcbddc->ksp_R,A_RR,A_RR);CHKERRQ(ierr); 2814 /* Set Up KSP for Neumann problem of BDDC */ 2815 ierr = KSPSetUp(pcbddc->ksp_R);CHKERRQ(ierr); 2816 } 2817 /* free Neumann problem's matrix */ 2818 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 2819 2820 /* check Dirichlet and Neumann solvers and adapt them if a nullspace correction is needed */ 2821 if (pcbddc->NullSpace || pcbddc->dbg_flag) { 2822 if (pcbddc->dbg_flag) { 2823 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 2824 ierr = PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 2825 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 2826 } 2827 if (dirichlet) { /* Dirichlet */ 2828 ierr = VecSetRandom(pcis->vec1_D,NULL);CHKERRQ(ierr); 2829 ierr = MatMult(pcis->A_II,pcis->vec1_D,pcis->vec2_D);CHKERRQ(ierr); 2830 ierr = KSPSolve(pcbddc->ksp_D,pcis->vec2_D,pcis->vec2_D);CHKERRQ(ierr); 2831 ierr = VecAXPY(pcis->vec1_D,m_one,pcis->vec2_D);CHKERRQ(ierr); 2832 ierr = VecNorm(pcis->vec1_D,NORM_INFINITY,&value);CHKERRQ(ierr); 2833 /* need to be adapted? */ 2834 use_exact = (PetscAbsReal(value) > 1.e-4 ? PETSC_FALSE : PETSC_TRUE); 2835 ierr = MPI_Allreduce(&use_exact,&use_exact_reduced,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 2836 ierr = PCBDDCSetUseExactDirichlet(pc,use_exact_reduced);CHKERRQ(ierr); 2837 /* print info */ 2838 if (pcbddc->dbg_flag) { 2839 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d infinity error for Dirichlet solve (%s) = % 1.14e \n",PetscGlobalRank,((PetscObject)(pcbddc->ksp_D))->prefix,value);CHKERRQ(ierr); 2840 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 2841 } 2842 if (pcbddc->NullSpace && !use_exact_reduced && !pcbddc->switch_static) { 2843 ierr = PCBDDCNullSpaceAssembleCorrection(pc,PETSC_TRUE,pcis->is_I_local);CHKERRQ(ierr); 2844 } 2845 } 2846 if (neumann) { /* Neumann */ 2847 ierr = KSPGetOperators(pcbddc->ksp_R,&A_RR,NULL);CHKERRQ(ierr); 2848 ierr = VecSetRandom(pcbddc->vec1_R,NULL);CHKERRQ(ierr); 2849 ierr = MatMult(A_RR,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 2850 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec2_R,pcbddc->vec2_R);CHKERRQ(ierr); 2851 ierr = VecAXPY(pcbddc->vec1_R,m_one,pcbddc->vec2_R);CHKERRQ(ierr); 2852 ierr = VecNorm(pcbddc->vec1_R,NORM_INFINITY,&value);CHKERRQ(ierr); 2853 /* need to be adapted? */ 2854 use_exact = (PetscAbsReal(value) > 1.e-4 ? PETSC_FALSE : PETSC_TRUE); 2855 ierr = MPI_Allreduce(&use_exact,&use_exact_reduced,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 2856 /* print info */ 2857 if (pcbddc->dbg_flag) { 2858 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d infinity error for Neumann solve (%s) = % 1.14e\n",PetscGlobalRank,((PetscObject)(pcbddc->ksp_R))->prefix,value);CHKERRQ(ierr); 2859 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 2860 } 2861 if (pcbddc->NullSpace && !use_exact_reduced) { /* is it the right logic? */ 2862 ierr = PCBDDCNullSpaceAssembleCorrection(pc,PETSC_FALSE,pcbddc->is_R_local);CHKERRQ(ierr); 2863 } 2864 } 2865 } 2866 PetscFunctionReturn(0); 2867 } 2868 2869 #undef __FUNCT__ 2870 #define __FUNCT__ "PCBDDCSolveSubstructureCorrection" 2871 static PetscErrorCode PCBDDCSolveSubstructureCorrection(PC pc, Vec inout_B, Vec inout_D, PetscBool applytranspose) 2872 { 2873 PetscErrorCode ierr; 2874 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 2875 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 2876 2877 PetscFunctionBegin; 2878 if (!sub_schurs->reuse_solver) { 2879 ierr = VecSet(pcbddc->vec1_R,0.);CHKERRQ(ierr); 2880 } 2881 if (!pcbddc->switch_static) { 2882 if (applytranspose && pcbddc->local_auxmat1) { 2883 ierr = MatMultTranspose(pcbddc->local_auxmat2,inout_B,pcbddc->vec1_C);CHKERRQ(ierr); 2884 ierr = MatMultTransposeAdd(pcbddc->local_auxmat1,pcbddc->vec1_C,inout_B,inout_B);CHKERRQ(ierr); 2885 } 2886 if (!sub_schurs->reuse_solver) { 2887 ierr = VecScatterBegin(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2888 ierr = VecScatterEnd(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2889 } else { 2890 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 2891 2892 ierr = VecScatterBegin(reuse_solver->correction_scatter_B,inout_B,reuse_solver->rhs_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2893 ierr = VecScatterEnd(reuse_solver->correction_scatter_B,inout_B,reuse_solver->rhs_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2894 } 2895 } else { 2896 ierr = VecScatterBegin(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2897 ierr = VecScatterEnd(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2898 ierr = VecScatterBegin(pcbddc->R_to_D,inout_D,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2899 ierr = VecScatterEnd(pcbddc->R_to_D,inout_D,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2900 if (applytranspose && pcbddc->local_auxmat1) { 2901 ierr = MatMultTranspose(pcbddc->local_auxmat2,pcbddc->vec1_R,pcbddc->vec1_C);CHKERRQ(ierr); 2902 ierr = MatMultTransposeAdd(pcbddc->local_auxmat1,pcbddc->vec1_C,inout_B,inout_B);CHKERRQ(ierr); 2903 ierr = VecScatterBegin(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2904 ierr = VecScatterEnd(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2905 } 2906 } 2907 if (!sub_schurs->reuse_solver) { 2908 if (applytranspose) { 2909 ierr = KSPSolveTranspose(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr); 2910 } else { 2911 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr); 2912 } 2913 } else { 2914 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 2915 2916 if (applytranspose) { 2917 ierr = MatFactorSolveSchurComplementTranspose(reuse_solver->F,reuse_solver->rhs_B,reuse_solver->sol_B);CHKERRQ(ierr); 2918 } else { 2919 ierr = MatFactorSolveSchurComplement(reuse_solver->F,reuse_solver->rhs_B,reuse_solver->sol_B);CHKERRQ(ierr); 2920 } 2921 } 2922 ierr = VecSet(inout_B,0.);CHKERRQ(ierr); 2923 if (!pcbddc->switch_static) { 2924 if (!sub_schurs->reuse_solver) { 2925 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2926 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2927 } else { 2928 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 2929 2930 ierr = VecScatterBegin(reuse_solver->correction_scatter_B,reuse_solver->sol_B,inout_B,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2931 ierr = VecScatterEnd(reuse_solver->correction_scatter_B,reuse_solver->sol_B,inout_B,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2932 } 2933 if (!applytranspose && pcbddc->local_auxmat1) { 2934 ierr = MatMult(pcbddc->local_auxmat1,inout_B,pcbddc->vec1_C);CHKERRQ(ierr); 2935 ierr = MatMultAdd(pcbddc->local_auxmat2,pcbddc->vec1_C,inout_B,inout_B);CHKERRQ(ierr); 2936 } 2937 } else { 2938 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2939 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2940 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,inout_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2941 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,inout_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2942 if (!applytranspose && pcbddc->local_auxmat1) { 2943 ierr = MatMult(pcbddc->local_auxmat1,inout_B,pcbddc->vec1_C);CHKERRQ(ierr); 2944 ierr = MatMultAdd(pcbddc->local_auxmat2,pcbddc->vec1_C,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr); 2945 } 2946 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2947 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2948 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,inout_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2949 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,inout_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2950 } 2951 PetscFunctionReturn(0); 2952 } 2953 2954 /* parameter apply transpose determines if the interface preconditioner should be applied transposed or not */ 2955 #undef __FUNCT__ 2956 #define __FUNCT__ "PCBDDCApplyInterfacePreconditioner" 2957 PetscErrorCode PCBDDCApplyInterfacePreconditioner(PC pc, PetscBool applytranspose) 2958 { 2959 PetscErrorCode ierr; 2960 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 2961 PC_IS* pcis = (PC_IS*) (pc->data); 2962 const PetscScalar zero = 0.0; 2963 2964 PetscFunctionBegin; 2965 /* Application of PSI^T or PHI^T (depending on applytranspose, see comment above) */ 2966 if (applytranspose) { 2967 ierr = MatMultTranspose(pcbddc->coarse_phi_B,pcis->vec1_B,pcbddc->vec1_P);CHKERRQ(ierr); 2968 if (pcbddc->switch_static) { ierr = MatMultTransposeAdd(pcbddc->coarse_phi_D,pcis->vec1_D,pcbddc->vec1_P,pcbddc->vec1_P);CHKERRQ(ierr); } 2969 } else { 2970 ierr = MatMultTranspose(pcbddc->coarse_psi_B,pcis->vec1_B,pcbddc->vec1_P);CHKERRQ(ierr); 2971 if (pcbddc->switch_static) { ierr = MatMultTransposeAdd(pcbddc->coarse_psi_D,pcis->vec1_D,pcbddc->vec1_P,pcbddc->vec1_P);CHKERRQ(ierr); } 2972 } 2973 2974 /* add p0 to the last value of vec1_P holding the coarse dof relative to p0 */ 2975 if (pcbddc->benign_n) { 2976 PetscScalar *array; 2977 PetscInt j; 2978 2979 ierr = VecGetArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 2980 for (j=0;j<pcbddc->benign_n;j++) array[pcbddc->local_primal_size-pcbddc->benign_n+j] += pcbddc->benign_p0[j]; 2981 ierr = VecRestoreArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 2982 } 2983 2984 /* start communications from local primal nodes to rhs of coarse solver */ 2985 ierr = VecSet(pcbddc->coarse_vec,zero);CHKERRQ(ierr); 2986 ierr = PCBDDCScatterCoarseDataBegin(pc,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2987 ierr = PCBDDCScatterCoarseDataEnd(pc,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2988 2989 /* Coarse solution -> rhs and sol updated inside PCBDDCScattarCoarseDataBegin/End */ 2990 /* TODO remove null space when doing multilevel */ 2991 if (pcbddc->coarse_ksp) { 2992 Mat coarse_mat; 2993 Vec rhs,sol; 2994 MatNullSpace nullsp; 2995 2996 ierr = KSPGetRhs(pcbddc->coarse_ksp,&rhs);CHKERRQ(ierr); 2997 ierr = KSPGetSolution(pcbddc->coarse_ksp,&sol);CHKERRQ(ierr); 2998 ierr = KSPGetOperators(pcbddc->coarse_ksp,&coarse_mat,NULL);CHKERRQ(ierr); 2999 ierr = MatGetNullSpace(coarse_mat,&nullsp);CHKERRQ(ierr); 3000 if (nullsp) { 3001 ierr = MatNullSpaceRemove(nullsp,rhs);CHKERRQ(ierr); 3002 } 3003 if (applytranspose) { 3004 ierr = KSPSolveTranspose(pcbddc->coarse_ksp,rhs,sol);CHKERRQ(ierr); 3005 } else { 3006 ierr = KSPSolve(pcbddc->coarse_ksp,rhs,sol);CHKERRQ(ierr); 3007 } 3008 if (nullsp) { 3009 ierr = MatNullSpaceRemove(nullsp,sol);CHKERRQ(ierr); 3010 } 3011 } 3012 3013 /* Local solution on R nodes */ 3014 if (pcis->n) { /* in/out pcbddc->vec1_B,pcbddc->vec1_D */ 3015 ierr = PCBDDCSolveSubstructureCorrection(pc,pcis->vec1_B,pcis->vec1_D,applytranspose);CHKERRQ(ierr); 3016 } 3017 3018 /* communications from coarse sol to local primal nodes */ 3019 ierr = PCBDDCScatterCoarseDataBegin(pc,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 3020 ierr = PCBDDCScatterCoarseDataEnd(pc,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 3021 3022 /* Sum contributions from two levels */ 3023 if (applytranspose) { 3024 ierr = MatMultAdd(pcbddc->coarse_psi_B,pcbddc->vec1_P,pcis->vec1_B,pcis->vec1_B);CHKERRQ(ierr); 3025 if (pcbddc->switch_static) { ierr = MatMultAdd(pcbddc->coarse_psi_D,pcbddc->vec1_P,pcis->vec1_D,pcis->vec1_D);CHKERRQ(ierr); } 3026 } else { 3027 ierr = MatMultAdd(pcbddc->coarse_phi_B,pcbddc->vec1_P,pcis->vec1_B,pcis->vec1_B);CHKERRQ(ierr); 3028 if (pcbddc->switch_static) { ierr = MatMultAdd(pcbddc->coarse_phi_D,pcbddc->vec1_P,pcis->vec1_D,pcis->vec1_D);CHKERRQ(ierr); } 3029 } 3030 /* store p0 */ 3031 if (pcbddc->benign_n) { 3032 PetscScalar *array; 3033 PetscInt j; 3034 3035 ierr = VecGetArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 3036 for (j=0;j<pcbddc->benign_n;j++) pcbddc->benign_p0[j] = array[pcbddc->local_primal_size-pcbddc->benign_n+j]; 3037 ierr = VecRestoreArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 3038 } 3039 PetscFunctionReturn(0); 3040 } 3041 3042 #undef __FUNCT__ 3043 #define __FUNCT__ "PCBDDCScatterCoarseDataBegin" 3044 PetscErrorCode PCBDDCScatterCoarseDataBegin(PC pc,InsertMode imode, ScatterMode smode) 3045 { 3046 PetscErrorCode ierr; 3047 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 3048 PetscScalar *array; 3049 Vec from,to; 3050 3051 PetscFunctionBegin; 3052 if (smode == SCATTER_REVERSE) { /* from global to local -> get data from coarse solution */ 3053 from = pcbddc->coarse_vec; 3054 to = pcbddc->vec1_P; 3055 if (pcbddc->coarse_ksp) { /* get array from coarse processes */ 3056 Vec tvec; 3057 3058 ierr = KSPGetRhs(pcbddc->coarse_ksp,&tvec);CHKERRQ(ierr); 3059 ierr = VecResetArray(tvec);CHKERRQ(ierr); 3060 ierr = KSPGetSolution(pcbddc->coarse_ksp,&tvec);CHKERRQ(ierr); 3061 ierr = VecGetArray(tvec,&array);CHKERRQ(ierr); 3062 ierr = VecPlaceArray(from,array);CHKERRQ(ierr); 3063 ierr = VecRestoreArray(tvec,&array);CHKERRQ(ierr); 3064 } 3065 } else { /* from local to global -> put data in coarse right hand side */ 3066 from = pcbddc->vec1_P; 3067 to = pcbddc->coarse_vec; 3068 } 3069 ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,from,to,imode,smode);CHKERRQ(ierr); 3070 PetscFunctionReturn(0); 3071 } 3072 3073 #undef __FUNCT__ 3074 #define __FUNCT__ "PCBDDCScatterCoarseDataEnd" 3075 PetscErrorCode PCBDDCScatterCoarseDataEnd(PC pc, InsertMode imode, ScatterMode smode) 3076 { 3077 PetscErrorCode ierr; 3078 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 3079 PetscScalar *array; 3080 Vec from,to; 3081 3082 PetscFunctionBegin; 3083 if (smode == SCATTER_REVERSE) { /* from global to local -> get data from coarse solution */ 3084 from = pcbddc->coarse_vec; 3085 to = pcbddc->vec1_P; 3086 } else { /* from local to global -> put data in coarse right hand side */ 3087 from = pcbddc->vec1_P; 3088 to = pcbddc->coarse_vec; 3089 } 3090 ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,from,to,imode,smode);CHKERRQ(ierr); 3091 if (smode == SCATTER_FORWARD) { 3092 if (pcbddc->coarse_ksp) { /* get array from coarse processes */ 3093 Vec tvec; 3094 3095 ierr = KSPGetRhs(pcbddc->coarse_ksp,&tvec);CHKERRQ(ierr); 3096 ierr = VecGetArray(to,&array);CHKERRQ(ierr); 3097 ierr = VecPlaceArray(tvec,array);CHKERRQ(ierr); 3098 ierr = VecRestoreArray(to,&array);CHKERRQ(ierr); 3099 } 3100 } else { 3101 if (pcbddc->coarse_ksp) { /* restore array of pcbddc->coarse_vec */ 3102 ierr = VecResetArray(from);CHKERRQ(ierr); 3103 } 3104 } 3105 PetscFunctionReturn(0); 3106 } 3107 3108 /* uncomment for testing purposes */ 3109 /* #define PETSC_MISSING_LAPACK_GESVD 1 */ 3110 #undef __FUNCT__ 3111 #define __FUNCT__ "PCBDDCConstraintsSetUp" 3112 PetscErrorCode PCBDDCConstraintsSetUp(PC pc) 3113 { 3114 PetscErrorCode ierr; 3115 PC_IS* pcis = (PC_IS*)(pc->data); 3116 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 3117 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 3118 /* one and zero */ 3119 PetscScalar one=1.0,zero=0.0; 3120 /* space to store constraints and their local indices */ 3121 PetscScalar *constraints_data; 3122 PetscInt *constraints_idxs,*constraints_idxs_B; 3123 PetscInt *constraints_idxs_ptr,*constraints_data_ptr; 3124 PetscInt *constraints_n; 3125 /* iterators */ 3126 PetscInt i,j,k,total_counts,total_counts_cc,cum; 3127 /* BLAS integers */ 3128 PetscBLASInt lwork,lierr; 3129 PetscBLASInt Blas_N,Blas_M,Blas_K,Blas_one=1; 3130 PetscBLASInt Blas_LDA,Blas_LDB,Blas_LDC; 3131 /* reuse */ 3132 PetscInt olocal_primal_size,olocal_primal_size_cc; 3133 PetscInt *olocal_primal_ref_node,*olocal_primal_ref_mult; 3134 /* change of basis */ 3135 PetscBool qr_needed; 3136 PetscBT change_basis,qr_needed_idx; 3137 /* auxiliary stuff */ 3138 PetscInt *nnz,*is_indices; 3139 PetscInt ncc; 3140 /* some quantities */ 3141 PetscInt n_vertices,total_primal_vertices,valid_constraints; 3142 PetscInt size_of_constraint,max_size_of_constraint=0,max_constraints,temp_constraints; 3143 3144 PetscFunctionBegin; 3145 /* Destroy Mat objects computed previously */ 3146 ierr = MatDestroy(&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 3147 ierr = MatDestroy(&pcbddc->ConstraintMatrix);CHKERRQ(ierr); 3148 /* save info on constraints from previous setup (if any) */ 3149 olocal_primal_size = pcbddc->local_primal_size; 3150 olocal_primal_size_cc = pcbddc->local_primal_size_cc; 3151 ierr = PetscMalloc2(olocal_primal_size_cc,&olocal_primal_ref_node,olocal_primal_size_cc,&olocal_primal_ref_mult);CHKERRQ(ierr); 3152 ierr = PetscMemcpy(olocal_primal_ref_node,pcbddc->local_primal_ref_node,olocal_primal_size_cc*sizeof(PetscInt));CHKERRQ(ierr); 3153 ierr = PetscMemcpy(olocal_primal_ref_mult,pcbddc->local_primal_ref_mult,olocal_primal_size_cc*sizeof(PetscInt));CHKERRQ(ierr); 3154 ierr = PetscFree2(pcbddc->local_primal_ref_node,pcbddc->local_primal_ref_mult);CHKERRQ(ierr); 3155 ierr = PetscFree(pcbddc->primal_indices_local_idxs);CHKERRQ(ierr); 3156 3157 /* print some info */ 3158 if (pcbddc->dbg_flag) { 3159 IS vertices; 3160 PetscInt nv,nedges,nfaces; 3161 ierr = PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph,&nfaces,NULL,&nedges,NULL,&vertices);CHKERRQ(ierr); 3162 ierr = ISGetSize(vertices,&nv);CHKERRQ(ierr); 3163 ierr = ISDestroy(&vertices);CHKERRQ(ierr); 3164 ierr = PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 3165 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"--------------------------------------------------------------\n");CHKERRQ(ierr); 3166 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate vertices (%d)\n",PetscGlobalRank,nv,pcbddc->use_vertices);CHKERRQ(ierr); 3167 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate edges (%d)\n",PetscGlobalRank,nedges,pcbddc->use_edges);CHKERRQ(ierr); 3168 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate faces (%d)\n",PetscGlobalRank,nfaces,pcbddc->use_faces);CHKERRQ(ierr); 3169 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 3170 ierr = PetscViewerASCIIPopSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 3171 } 3172 3173 if (!pcbddc->adaptive_selection) { 3174 IS ISForVertices,*ISForFaces,*ISForEdges; 3175 MatNullSpace nearnullsp; 3176 const Vec *nearnullvecs; 3177 Vec *localnearnullsp; 3178 PetscScalar *array; 3179 PetscInt n_ISForFaces,n_ISForEdges,nnsp_size; 3180 PetscBool nnsp_has_cnst; 3181 /* LAPACK working arrays for SVD or POD */ 3182 PetscBool skip_lapack,boolforchange; 3183 PetscScalar *work; 3184 PetscReal *singular_vals; 3185 #if defined(PETSC_USE_COMPLEX) 3186 PetscReal *rwork; 3187 #endif 3188 #if defined(PETSC_MISSING_LAPACK_GESVD) 3189 PetscScalar *temp_basis,*correlation_mat; 3190 #else 3191 PetscBLASInt dummy_int=1; 3192 PetscScalar dummy_scalar=1.; 3193 #endif 3194 3195 /* Get index sets for faces, edges and vertices from graph */ 3196 ierr = PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph,&n_ISForFaces,&ISForFaces,&n_ISForEdges,&ISForEdges,&ISForVertices);CHKERRQ(ierr); 3197 /* free unneeded index sets */ 3198 if (!pcbddc->use_vertices) { 3199 ierr = ISDestroy(&ISForVertices);CHKERRQ(ierr); 3200 } 3201 if (!pcbddc->use_edges) { 3202 for (i=0;i<n_ISForEdges;i++) { 3203 ierr = ISDestroy(&ISForEdges[i]);CHKERRQ(ierr); 3204 } 3205 ierr = PetscFree(ISForEdges);CHKERRQ(ierr); 3206 n_ISForEdges = 0; 3207 } 3208 if (!pcbddc->use_faces) { 3209 for (i=0;i<n_ISForFaces;i++) { 3210 ierr = ISDestroy(&ISForFaces[i]);CHKERRQ(ierr); 3211 } 3212 ierr = PetscFree(ISForFaces);CHKERRQ(ierr); 3213 n_ISForFaces = 0; 3214 } 3215 3216 #if defined(PETSC_USE_DEBUG) 3217 /* HACK: when solving singular problems not using vertices, a change of basis is mandatory. 3218 Also use_change_of_basis should be consistent among processors */ 3219 if (pcbddc->NullSpace) { 3220 PetscBool tbool[2],gbool[2]; 3221 3222 if (!ISForVertices && !pcbddc->user_ChangeOfBasisMatrix) { 3223 pcbddc->use_change_of_basis = PETSC_TRUE; 3224 if (!ISForEdges) { 3225 pcbddc->use_change_on_faces = PETSC_TRUE; 3226 } 3227 } 3228 tbool[0] = pcbddc->use_change_of_basis; 3229 tbool[1] = pcbddc->use_change_on_faces; 3230 ierr = MPI_Allreduce(tbool,gbool,2,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 3231 pcbddc->use_change_of_basis = gbool[0]; 3232 pcbddc->use_change_on_faces = gbool[1]; 3233 } 3234 #endif 3235 3236 /* check if near null space is attached to global mat */ 3237 ierr = MatGetNearNullSpace(pc->pmat,&nearnullsp);CHKERRQ(ierr); 3238 if (nearnullsp) { 3239 ierr = MatNullSpaceGetVecs(nearnullsp,&nnsp_has_cnst,&nnsp_size,&nearnullvecs);CHKERRQ(ierr); 3240 /* remove any stored info */ 3241 ierr = MatNullSpaceDestroy(&pcbddc->onearnullspace);CHKERRQ(ierr); 3242 ierr = PetscFree(pcbddc->onearnullvecs_state);CHKERRQ(ierr); 3243 /* store information for BDDC solver reuse */ 3244 ierr = PetscObjectReference((PetscObject)nearnullsp);CHKERRQ(ierr); 3245 pcbddc->onearnullspace = nearnullsp; 3246 ierr = PetscMalloc1(nnsp_size,&pcbddc->onearnullvecs_state);CHKERRQ(ierr); 3247 for (i=0;i<nnsp_size;i++) { 3248 ierr = PetscObjectStateGet((PetscObject)nearnullvecs[i],&pcbddc->onearnullvecs_state[i]);CHKERRQ(ierr); 3249 } 3250 } else { /* if near null space is not provided BDDC uses constants by default */ 3251 nnsp_size = 0; 3252 nnsp_has_cnst = PETSC_TRUE; 3253 } 3254 /* get max number of constraints on a single cc */ 3255 max_constraints = nnsp_size; 3256 if (nnsp_has_cnst) max_constraints++; 3257 3258 /* 3259 Evaluate maximum storage size needed by the procedure 3260 - Indices for connected component i stored at "constraints_idxs + constraints_idxs_ptr[i]" 3261 - Values for constraints on connected component i stored at "constraints_data + constraints_data_ptr[i]" 3262 There can be multiple constraints per connected component 3263 */ 3264 n_vertices = 0; 3265 if (ISForVertices) { 3266 ierr = ISGetSize(ISForVertices,&n_vertices);CHKERRQ(ierr); 3267 } 3268 ncc = n_vertices+n_ISForFaces+n_ISForEdges; 3269 ierr = PetscMalloc3(ncc+1,&constraints_idxs_ptr,ncc+1,&constraints_data_ptr,ncc,&constraints_n);CHKERRQ(ierr); 3270 3271 total_counts = n_ISForFaces+n_ISForEdges; 3272 total_counts *= max_constraints; 3273 total_counts += n_vertices; 3274 ierr = PetscBTCreate(total_counts,&change_basis);CHKERRQ(ierr); 3275 3276 total_counts = 0; 3277 max_size_of_constraint = 0; 3278 for (i=0;i<n_ISForEdges+n_ISForFaces;i++) { 3279 IS used_is; 3280 if (i<n_ISForEdges) { 3281 used_is = ISForEdges[i]; 3282 } else { 3283 used_is = ISForFaces[i-n_ISForEdges]; 3284 } 3285 ierr = ISGetSize(used_is,&j);CHKERRQ(ierr); 3286 total_counts += j; 3287 max_size_of_constraint = PetscMax(j,max_size_of_constraint); 3288 } 3289 ierr = PetscMalloc3(total_counts*max_constraints+n_vertices,&constraints_data,total_counts+n_vertices,&constraints_idxs,total_counts+n_vertices,&constraints_idxs_B);CHKERRQ(ierr); 3290 3291 /* get local part of global near null space vectors */ 3292 ierr = PetscMalloc1(nnsp_size,&localnearnullsp);CHKERRQ(ierr); 3293 for (k=0;k<nnsp_size;k++) { 3294 ierr = VecDuplicate(pcis->vec1_N,&localnearnullsp[k]);CHKERRQ(ierr); 3295 ierr = VecScatterBegin(matis->rctx,nearnullvecs[k],localnearnullsp[k],INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3296 ierr = VecScatterEnd(matis->rctx,nearnullvecs[k],localnearnullsp[k],INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3297 } 3298 3299 /* whether or not to skip lapack calls */ 3300 skip_lapack = PETSC_TRUE; 3301 if (n_ISForFaces+n_ISForEdges && max_constraints > 1 && !pcbddc->use_nnsp_true) skip_lapack = PETSC_FALSE; 3302 3303 /* First we issue queries to allocate optimal workspace for LAPACKgesvd (or LAPACKsyev if SVD is missing) */ 3304 if (!skip_lapack) { 3305 PetscScalar temp_work; 3306 3307 #if defined(PETSC_MISSING_LAPACK_GESVD) 3308 /* Proper Orthogonal Decomposition (POD) using the snapshot method */ 3309 ierr = PetscMalloc1(max_constraints*max_constraints,&correlation_mat);CHKERRQ(ierr); 3310 ierr = PetscMalloc1(max_constraints,&singular_vals);CHKERRQ(ierr); 3311 ierr = PetscMalloc1(max_size_of_constraint*max_constraints,&temp_basis);CHKERRQ(ierr); 3312 #if defined(PETSC_USE_COMPLEX) 3313 ierr = PetscMalloc1(3*max_constraints,&rwork);CHKERRQ(ierr); 3314 #endif 3315 /* now we evaluate the optimal workspace using query with lwork=-1 */ 3316 ierr = PetscBLASIntCast(max_constraints,&Blas_N);CHKERRQ(ierr); 3317 ierr = PetscBLASIntCast(max_constraints,&Blas_LDA);CHKERRQ(ierr); 3318 lwork = -1; 3319 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 3320 #if !defined(PETSC_USE_COMPLEX) 3321 PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,&temp_work,&lwork,&lierr)); 3322 #else 3323 PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,&temp_work,&lwork,rwork,&lierr)); 3324 #endif 3325 ierr = PetscFPTrapPop();CHKERRQ(ierr); 3326 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to SYEV Lapack routine %d",(int)lierr); 3327 #else /* on missing GESVD */ 3328 /* SVD */ 3329 PetscInt max_n,min_n; 3330 max_n = max_size_of_constraint; 3331 min_n = max_constraints; 3332 if (max_size_of_constraint < max_constraints) { 3333 min_n = max_size_of_constraint; 3334 max_n = max_constraints; 3335 } 3336 ierr = PetscMalloc1(min_n,&singular_vals);CHKERRQ(ierr); 3337 #if defined(PETSC_USE_COMPLEX) 3338 ierr = PetscMalloc1(5*min_n,&rwork);CHKERRQ(ierr); 3339 #endif 3340 /* now we evaluate the optimal workspace using query with lwork=-1 */ 3341 lwork = -1; 3342 ierr = PetscBLASIntCast(max_n,&Blas_M);CHKERRQ(ierr); 3343 ierr = PetscBLASIntCast(min_n,&Blas_N);CHKERRQ(ierr); 3344 ierr = PetscBLASIntCast(max_n,&Blas_LDA);CHKERRQ(ierr); 3345 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 3346 #if !defined(PETSC_USE_COMPLEX) 3347 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&constraints_data[0],&Blas_LDA,singular_vals,&dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,&temp_work,&lwork,&lierr)); 3348 #else 3349 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&constraints_data[0],&Blas_LDA,singular_vals,&dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,&temp_work,&lwork,rwork,&lierr)); 3350 #endif 3351 ierr = PetscFPTrapPop();CHKERRQ(ierr); 3352 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to GESVD Lapack routine %d",(int)lierr); 3353 #endif /* on missing GESVD */ 3354 /* Allocate optimal workspace */ 3355 ierr = PetscBLASIntCast((PetscInt)PetscRealPart(temp_work),&lwork);CHKERRQ(ierr); 3356 ierr = PetscMalloc1(lwork,&work);CHKERRQ(ierr); 3357 } 3358 /* Now we can loop on constraining sets */ 3359 total_counts = 0; 3360 constraints_idxs_ptr[0] = 0; 3361 constraints_data_ptr[0] = 0; 3362 /* vertices */ 3363 if (n_vertices) { 3364 ierr = ISGetIndices(ISForVertices,(const PetscInt**)&is_indices);CHKERRQ(ierr); 3365 ierr = PetscMemcpy(constraints_idxs,is_indices,n_vertices*sizeof(PetscInt));CHKERRQ(ierr); 3366 for (i=0;i<n_vertices;i++) { 3367 constraints_n[total_counts] = 1; 3368 constraints_data[total_counts] = 1.0; 3369 constraints_idxs_ptr[total_counts+1] = constraints_idxs_ptr[total_counts]+1; 3370 constraints_data_ptr[total_counts+1] = constraints_data_ptr[total_counts]+1; 3371 total_counts++; 3372 } 3373 ierr = ISRestoreIndices(ISForVertices,(const PetscInt**)&is_indices);CHKERRQ(ierr); 3374 n_vertices = total_counts; 3375 } 3376 3377 /* edges and faces */ 3378 total_counts_cc = total_counts; 3379 for (ncc=0;ncc<n_ISForEdges+n_ISForFaces;ncc++) { 3380 IS used_is; 3381 PetscBool idxs_copied = PETSC_FALSE; 3382 3383 if (ncc<n_ISForEdges) { 3384 used_is = ISForEdges[ncc]; 3385 boolforchange = pcbddc->use_change_of_basis; /* change or not the basis on the edge */ 3386 } else { 3387 used_is = ISForFaces[ncc-n_ISForEdges]; 3388 boolforchange = (PetscBool)(pcbddc->use_change_of_basis && pcbddc->use_change_on_faces); /* change or not the basis on the face */ 3389 } 3390 temp_constraints = 0; /* zero the number of constraints I have on this conn comp */ 3391 3392 ierr = ISGetSize(used_is,&size_of_constraint);CHKERRQ(ierr); 3393 ierr = ISGetIndices(used_is,(const PetscInt**)&is_indices);CHKERRQ(ierr); 3394 /* change of basis should not be performed on local periodic nodes */ 3395 if (pcbddc->mat_graph->mirrors && pcbddc->mat_graph->mirrors[is_indices[0]]) boolforchange = PETSC_FALSE; 3396 if (nnsp_has_cnst) { 3397 PetscScalar quad_value; 3398 3399 ierr = PetscMemcpy(constraints_idxs + constraints_idxs_ptr[total_counts_cc],is_indices,size_of_constraint*sizeof(PetscInt));CHKERRQ(ierr); 3400 idxs_copied = PETSC_TRUE; 3401 3402 if (!pcbddc->use_nnsp_true) { 3403 quad_value = (PetscScalar)(1.0/PetscSqrtReal((PetscReal)size_of_constraint)); 3404 } else { 3405 quad_value = 1.0; 3406 } 3407 for (j=0;j<size_of_constraint;j++) { 3408 constraints_data[constraints_data_ptr[total_counts_cc]+j] = quad_value; 3409 } 3410 temp_constraints++; 3411 total_counts++; 3412 } 3413 for (k=0;k<nnsp_size;k++) { 3414 PetscReal real_value; 3415 PetscScalar *ptr_to_data; 3416 3417 ierr = VecGetArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr); 3418 ptr_to_data = &constraints_data[constraints_data_ptr[total_counts_cc]+temp_constraints*size_of_constraint]; 3419 for (j=0;j<size_of_constraint;j++) { 3420 ptr_to_data[j] = array[is_indices[j]]; 3421 } 3422 ierr = VecRestoreArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr); 3423 /* check if array is null on the connected component */ 3424 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 3425 PetscStackCallBLAS("BLASasum",real_value = BLASasum_(&Blas_N,ptr_to_data,&Blas_one)); 3426 if (real_value > 0.0) { /* keep indices and values */ 3427 temp_constraints++; 3428 total_counts++; 3429 if (!idxs_copied) { 3430 ierr = PetscMemcpy(constraints_idxs + constraints_idxs_ptr[total_counts_cc],is_indices,size_of_constraint*sizeof(PetscInt));CHKERRQ(ierr); 3431 idxs_copied = PETSC_TRUE; 3432 } 3433 } 3434 } 3435 ierr = ISRestoreIndices(used_is,(const PetscInt**)&is_indices);CHKERRQ(ierr); 3436 valid_constraints = temp_constraints; 3437 if (!pcbddc->use_nnsp_true && temp_constraints) { 3438 if (temp_constraints == 1) { /* just normalize the constraint */ 3439 PetscScalar norm,*ptr_to_data; 3440 3441 ptr_to_data = &constraints_data[constraints_data_ptr[total_counts_cc]]; 3442 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 3443 PetscStackCallBLAS("BLASdot",norm = BLASdot_(&Blas_N,ptr_to_data,&Blas_one,ptr_to_data,&Blas_one)); 3444 norm = 1.0/PetscSqrtReal(PetscRealPart(norm)); 3445 PetscStackCallBLAS("BLASscal",BLASscal_(&Blas_N,&norm,ptr_to_data,&Blas_one)); 3446 } else { /* perform SVD */ 3447 PetscReal tol = 1.0e-8; /* tolerance for retaining eigenmodes */ 3448 PetscScalar *ptr_to_data = &constraints_data[constraints_data_ptr[total_counts_cc]]; 3449 3450 #if defined(PETSC_MISSING_LAPACK_GESVD) 3451 /* SVD: Y = U*S*V^H -> U (eigenvectors of Y*Y^H) = Y*V*(S)^\dag 3452 POD: Y^H*Y = V*D*V^H, D = S^H*S -> U = Y*V*D^(-1/2) 3453 -> When PETSC_USE_COMPLEX and PETSC_MISSING_LAPACK_GESVD are defined 3454 the constraints basis will differ (by a complex factor with absolute value equal to 1) 3455 from that computed using LAPACKgesvd 3456 -> This is due to a different computation of eigenvectors in LAPACKheev 3457 -> The quality of the POD-computed basis will be the same */ 3458 ierr = PetscMemzero(correlation_mat,temp_constraints*temp_constraints*sizeof(PetscScalar));CHKERRQ(ierr); 3459 /* Store upper triangular part of correlation matrix */ 3460 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 3461 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 3462 for (j=0;j<temp_constraints;j++) { 3463 for (k=0;k<j+1;k++) { 3464 PetscStackCallBLAS("BLASdot",correlation_mat[j*temp_constraints+k] = BLASdot_(&Blas_N,ptr_to_data+k*size_of_constraint,&Blas_one,ptr_to_data+j*size_of_constraint,&Blas_one)); 3465 } 3466 } 3467 /* compute eigenvalues and eigenvectors of correlation matrix */ 3468 ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr); 3469 ierr = PetscBLASIntCast(temp_constraints,&Blas_LDA);CHKERRQ(ierr); 3470 #if !defined(PETSC_USE_COMPLEX) 3471 PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,work,&lwork,&lierr)); 3472 #else 3473 PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,work,&lwork,rwork,&lierr)); 3474 #endif 3475 ierr = PetscFPTrapPop();CHKERRQ(ierr); 3476 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYEV Lapack routine %d",(int)lierr); 3477 /* retain eigenvalues greater than tol: note that LAPACKsyev gives eigs in ascending order */ 3478 j = 0; 3479 while (j < temp_constraints && singular_vals[j] < tol) j++; 3480 total_counts = total_counts-j; 3481 valid_constraints = temp_constraints-j; 3482 /* scale and copy POD basis into used quadrature memory */ 3483 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 3484 ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr); 3485 ierr = PetscBLASIntCast(temp_constraints,&Blas_K);CHKERRQ(ierr); 3486 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 3487 ierr = PetscBLASIntCast(temp_constraints,&Blas_LDB);CHKERRQ(ierr); 3488 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDC);CHKERRQ(ierr); 3489 if (j<temp_constraints) { 3490 PetscInt ii; 3491 for (k=j;k<temp_constraints;k++) singular_vals[k] = 1.0/PetscSqrtReal(singular_vals[k]); 3492 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 3493 PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&Blas_M,&Blas_N,&Blas_K,&one,ptr_to_data,&Blas_LDA,correlation_mat,&Blas_LDB,&zero,temp_basis,&Blas_LDC)); 3494 ierr = PetscFPTrapPop();CHKERRQ(ierr); 3495 for (k=0;k<temp_constraints-j;k++) { 3496 for (ii=0;ii<size_of_constraint;ii++) { 3497 ptr_to_data[k*size_of_constraint+ii] = singular_vals[temp_constraints-1-k]*temp_basis[(temp_constraints-1-k)*size_of_constraint+ii]; 3498 } 3499 } 3500 } 3501 #else /* on missing GESVD */ 3502 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 3503 ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr); 3504 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 3505 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 3506 #if !defined(PETSC_USE_COMPLEX) 3507 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,ptr_to_data,&Blas_LDA,singular_vals,&dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,work,&lwork,&lierr)); 3508 #else 3509 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,ptr_to_data,&Blas_LDA,singular_vals,&dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,work,&lwork,rwork,&lierr)); 3510 #endif 3511 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in GESVD Lapack routine %d",(int)lierr); 3512 ierr = PetscFPTrapPop();CHKERRQ(ierr); 3513 /* retain eigenvalues greater than tol: note that LAPACKgesvd gives eigs in descending order */ 3514 k = temp_constraints; 3515 if (k > size_of_constraint) k = size_of_constraint; 3516 j = 0; 3517 while (j < k && singular_vals[k-j-1] < tol) j++; 3518 valid_constraints = k-j; 3519 total_counts = total_counts-temp_constraints+valid_constraints; 3520 #endif /* on missing GESVD */ 3521 } 3522 } 3523 /* update pointers information */ 3524 if (valid_constraints) { 3525 constraints_n[total_counts_cc] = valid_constraints; 3526 constraints_idxs_ptr[total_counts_cc+1] = constraints_idxs_ptr[total_counts_cc]+size_of_constraint; 3527 constraints_data_ptr[total_counts_cc+1] = constraints_data_ptr[total_counts_cc]+size_of_constraint*valid_constraints; 3528 /* set change_of_basis flag */ 3529 if (boolforchange) { 3530 PetscBTSet(change_basis,total_counts_cc); 3531 } 3532 total_counts_cc++; 3533 } 3534 } 3535 /* free workspace */ 3536 if (!skip_lapack) { 3537 ierr = PetscFree(work);CHKERRQ(ierr); 3538 #if defined(PETSC_USE_COMPLEX) 3539 ierr = PetscFree(rwork);CHKERRQ(ierr); 3540 #endif 3541 ierr = PetscFree(singular_vals);CHKERRQ(ierr); 3542 #if defined(PETSC_MISSING_LAPACK_GESVD) 3543 ierr = PetscFree(correlation_mat);CHKERRQ(ierr); 3544 ierr = PetscFree(temp_basis);CHKERRQ(ierr); 3545 #endif 3546 } 3547 for (k=0;k<nnsp_size;k++) { 3548 ierr = VecDestroy(&localnearnullsp[k]);CHKERRQ(ierr); 3549 } 3550 ierr = PetscFree(localnearnullsp);CHKERRQ(ierr); 3551 /* free index sets of faces, edges and vertices */ 3552 for (i=0;i<n_ISForFaces;i++) { 3553 ierr = ISDestroy(&ISForFaces[i]);CHKERRQ(ierr); 3554 } 3555 if (n_ISForFaces) { 3556 ierr = PetscFree(ISForFaces);CHKERRQ(ierr); 3557 } 3558 for (i=0;i<n_ISForEdges;i++) { 3559 ierr = ISDestroy(&ISForEdges[i]);CHKERRQ(ierr); 3560 } 3561 if (n_ISForEdges) { 3562 ierr = PetscFree(ISForEdges);CHKERRQ(ierr); 3563 } 3564 ierr = ISDestroy(&ISForVertices);CHKERRQ(ierr); 3565 } else { 3566 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 3567 3568 total_counts = 0; 3569 n_vertices = 0; 3570 if (sub_schurs->is_vertices && pcbddc->use_vertices) { 3571 ierr = ISGetLocalSize(sub_schurs->is_vertices,&n_vertices);CHKERRQ(ierr); 3572 } 3573 max_constraints = 0; 3574 total_counts_cc = 0; 3575 for (i=0;i<sub_schurs->n_subs+n_vertices;i++) { 3576 total_counts += pcbddc->adaptive_constraints_n[i]; 3577 if (pcbddc->adaptive_constraints_n[i]) total_counts_cc++; 3578 max_constraints = PetscMax(max_constraints,pcbddc->adaptive_constraints_n[i]); 3579 } 3580 constraints_idxs_ptr = pcbddc->adaptive_constraints_idxs_ptr; 3581 constraints_data_ptr = pcbddc->adaptive_constraints_data_ptr; 3582 constraints_idxs = pcbddc->adaptive_constraints_idxs; 3583 constraints_data = pcbddc->adaptive_constraints_data; 3584 /* constraints_n differs from pcbddc->adaptive_constraints_n */ 3585 ierr = PetscMalloc1(total_counts_cc,&constraints_n);CHKERRQ(ierr); 3586 total_counts_cc = 0; 3587 for (i=0;i<sub_schurs->n_subs+n_vertices;i++) { 3588 if (pcbddc->adaptive_constraints_n[i]) { 3589 constraints_n[total_counts_cc++] = pcbddc->adaptive_constraints_n[i]; 3590 } 3591 } 3592 #if 0 3593 printf("Found %d totals (%d)\n",total_counts_cc,total_counts); 3594 for (i=0;i<total_counts_cc;i++) { 3595 printf("const %d, start %d",i,constraints_idxs_ptr[i]); 3596 printf(" end %d:\n",constraints_idxs_ptr[i+1]); 3597 for (j=constraints_idxs_ptr[i];j<constraints_idxs_ptr[i+1];j++) { 3598 printf(" %d",constraints_idxs[j]); 3599 } 3600 printf("\n"); 3601 printf("number of cc: %d\n",constraints_n[i]); 3602 } 3603 for (i=0;i<n_vertices;i++) { 3604 PetscPrintf(PETSC_COMM_SELF,"[%d] vertex %d, n %d\n",PetscGlobalRank,i,pcbddc->adaptive_constraints_n[i]); 3605 } 3606 for (i=0;i<sub_schurs->n_subs;i++) { 3607 PetscPrintf(PETSC_COMM_SELF,"[%d] sub %d, edge %d, n %d\n",PetscGlobalRank,i,(PetscBool)PetscBTLookup(sub_schurs->is_edge,i),pcbddc->adaptive_constraints_n[i+n_vertices]); 3608 } 3609 #endif 3610 3611 max_size_of_constraint = 0; 3612 for (i=0;i<total_counts_cc;i++) max_size_of_constraint = PetscMax(max_size_of_constraint,constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i]); 3613 ierr = PetscMalloc1(constraints_idxs_ptr[total_counts_cc],&constraints_idxs_B);CHKERRQ(ierr); 3614 /* Change of basis */ 3615 ierr = PetscBTCreate(total_counts_cc,&change_basis);CHKERRQ(ierr); 3616 if (pcbddc->use_change_of_basis) { 3617 for (i=0;i<sub_schurs->n_subs;i++) { 3618 if (PetscBTLookup(sub_schurs->is_edge,i) || pcbddc->use_change_on_faces) { 3619 ierr = PetscBTSet(change_basis,i+n_vertices);CHKERRQ(ierr); 3620 } 3621 } 3622 } 3623 } 3624 pcbddc->local_primal_size = total_counts; 3625 ierr = PetscMalloc1(pcbddc->local_primal_size+pcbddc->benign_n,&pcbddc->primal_indices_local_idxs);CHKERRQ(ierr); 3626 3627 /* map constraints_idxs in boundary numbering */ 3628 ierr = ISGlobalToLocalMappingApply(pcis->BtoNmap,IS_GTOLM_DROP,constraints_idxs_ptr[total_counts_cc],constraints_idxs,&i,constraints_idxs_B);CHKERRQ(ierr); 3629 if (i != constraints_idxs_ptr[total_counts_cc]) { 3630 SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_SUP,"Error in boundary numbering for constraints indices %d != %d\n",constraints_idxs_ptr[total_counts_cc],i); 3631 } 3632 3633 /* Create constraint matrix */ 3634 ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->ConstraintMatrix);CHKERRQ(ierr); 3635 ierr = MatSetType(pcbddc->ConstraintMatrix,MATAIJ);CHKERRQ(ierr); 3636 ierr = MatSetSizes(pcbddc->ConstraintMatrix,pcbddc->local_primal_size,pcis->n,pcbddc->local_primal_size,pcis->n);CHKERRQ(ierr); 3637 3638 /* find primal_dofs: subdomain corners plus dofs selected as primal after change of basis */ 3639 /* determine if a QR strategy is needed for change of basis */ 3640 qr_needed = PETSC_FALSE; 3641 ierr = PetscBTCreate(total_counts_cc,&qr_needed_idx);CHKERRQ(ierr); 3642 total_primal_vertices=0; 3643 pcbddc->local_primal_size_cc = 0; 3644 for (i=0;i<total_counts_cc;i++) { 3645 size_of_constraint = constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i]; 3646 if (size_of_constraint == 1) { 3647 pcbddc->primal_indices_local_idxs[total_primal_vertices++] = constraints_idxs[constraints_idxs_ptr[i]]; 3648 pcbddc->local_primal_size_cc += 1; 3649 } else if (PetscBTLookup(change_basis,i)) { 3650 for (k=0;k<constraints_n[i];k++) { 3651 pcbddc->primal_indices_local_idxs[total_primal_vertices++] = constraints_idxs[constraints_idxs_ptr[i]+k]; 3652 } 3653 pcbddc->local_primal_size_cc += constraints_n[i]; 3654 if (constraints_n[i] > 1 || pcbddc->use_qr_single || pcbddc->faster_deluxe) { 3655 PetscBTSet(qr_needed_idx,i); 3656 qr_needed = PETSC_TRUE; 3657 } 3658 } else { 3659 pcbddc->local_primal_size_cc += 1; 3660 } 3661 } 3662 /* note that the local variable n_vertices used below stores the number of pointwise constraints */ 3663 pcbddc->n_vertices = total_primal_vertices; 3664 /* permute indices in order to have a sorted set of vertices */ 3665 ierr = PetscSortInt(total_primal_vertices,pcbddc->primal_indices_local_idxs);CHKERRQ(ierr); 3666 3667 ierr = PetscMalloc2(pcbddc->local_primal_size_cc+pcbddc->benign_n,&pcbddc->local_primal_ref_node,pcbddc->local_primal_size_cc+pcbddc->benign_n,&pcbddc->local_primal_ref_mult);CHKERRQ(ierr); 3668 ierr = PetscMemcpy(pcbddc->local_primal_ref_node,pcbddc->primal_indices_local_idxs,total_primal_vertices*sizeof(PetscInt));CHKERRQ(ierr); 3669 for (i=0;i<total_primal_vertices;i++) pcbddc->local_primal_ref_mult[i] = 1; 3670 3671 /* nonzero structure of constraint matrix */ 3672 /* and get reference dof for local constraints */ 3673 ierr = PetscMalloc1(pcbddc->local_primal_size,&nnz);CHKERRQ(ierr); 3674 for (i=0;i<total_primal_vertices;i++) nnz[i] = 1; 3675 3676 j = total_primal_vertices; 3677 total_counts = total_primal_vertices; 3678 cum = total_primal_vertices; 3679 for (i=n_vertices;i<total_counts_cc;i++) { 3680 if (!PetscBTLookup(change_basis,i)) { 3681 pcbddc->local_primal_ref_node[cum] = constraints_idxs[constraints_idxs_ptr[i]]; 3682 pcbddc->local_primal_ref_mult[cum] = constraints_n[i]; 3683 cum++; 3684 size_of_constraint = constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i]; 3685 for (k=0;k<constraints_n[i];k++) { 3686 pcbddc->primal_indices_local_idxs[total_counts++] = constraints_idxs[constraints_idxs_ptr[i]+k]; 3687 nnz[j+k] = size_of_constraint; 3688 } 3689 j += constraints_n[i]; 3690 } 3691 } 3692 ierr = MatSeqAIJSetPreallocation(pcbddc->ConstraintMatrix,0,nnz);CHKERRQ(ierr); 3693 ierr = PetscFree(nnz);CHKERRQ(ierr); 3694 3695 /* set values in constraint matrix */ 3696 for (i=0;i<total_primal_vertices;i++) { 3697 ierr = MatSetValue(pcbddc->ConstraintMatrix,i,pcbddc->local_primal_ref_node[i],1.0,INSERT_VALUES);CHKERRQ(ierr); 3698 } 3699 total_counts = total_primal_vertices; 3700 for (i=n_vertices;i<total_counts_cc;i++) { 3701 if (!PetscBTLookup(change_basis,i)) { 3702 PetscInt *cols; 3703 3704 size_of_constraint = constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i]; 3705 cols = constraints_idxs+constraints_idxs_ptr[i]; 3706 for (k=0;k<constraints_n[i];k++) { 3707 PetscInt row = total_counts+k; 3708 PetscScalar *vals; 3709 3710 vals = constraints_data+constraints_data_ptr[i]+k*size_of_constraint; 3711 ierr = MatSetValues(pcbddc->ConstraintMatrix,1,&row,size_of_constraint,cols,vals,INSERT_VALUES);CHKERRQ(ierr); 3712 } 3713 total_counts += constraints_n[i]; 3714 } 3715 } 3716 /* assembling */ 3717 ierr = MatAssemblyBegin(pcbddc->ConstraintMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3718 ierr = MatAssemblyEnd(pcbddc->ConstraintMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3719 3720 /* 3721 ierr = PetscViewerSetFormat(PETSC_VIEWER_STDOUT_SELF,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr); 3722 ierr = MatView(pcbddc->ConstraintMatrix,(PetscViewer)0);CHKERRQ(ierr); 3723 */ 3724 /* Create matrix for change of basis. We don't need it in case pcbddc->use_change_of_basis is FALSE */ 3725 if (pcbddc->use_change_of_basis) { 3726 /* dual and primal dofs on a single cc */ 3727 PetscInt dual_dofs,primal_dofs; 3728 /* working stuff for GEQRF */ 3729 PetscScalar *qr_basis,*qr_tau = NULL,*qr_work,lqr_work_t; 3730 PetscBLASInt lqr_work; 3731 /* working stuff for UNGQR */ 3732 PetscScalar *gqr_work,lgqr_work_t; 3733 PetscBLASInt lgqr_work; 3734 /* working stuff for TRTRS */ 3735 PetscScalar *trs_rhs; 3736 PetscBLASInt Blas_NRHS; 3737 /* pointers for values insertion into change of basis matrix */ 3738 PetscInt *start_rows,*start_cols; 3739 PetscScalar *start_vals; 3740 /* working stuff for values insertion */ 3741 PetscBT is_primal; 3742 PetscInt *aux_primal_numbering_B; 3743 /* matrix sizes */ 3744 PetscInt global_size,local_size; 3745 /* temporary change of basis */ 3746 Mat localChangeOfBasisMatrix; 3747 /* extra space for debugging */ 3748 PetscScalar *dbg_work; 3749 3750 /* local temporary change of basis acts on local interfaces -> dimension is n_B x n_B */ 3751 ierr = MatCreate(PETSC_COMM_SELF,&localChangeOfBasisMatrix);CHKERRQ(ierr); 3752 ierr = MatSetType(localChangeOfBasisMatrix,MATAIJ);CHKERRQ(ierr); 3753 ierr = MatSetSizes(localChangeOfBasisMatrix,pcis->n,pcis->n,pcis->n,pcis->n);CHKERRQ(ierr); 3754 /* nonzeros for local mat */ 3755 ierr = PetscMalloc1(pcis->n,&nnz);CHKERRQ(ierr); 3756 for (i=0;i<pcis->n;i++) nnz[i]=1; 3757 for (i=n_vertices;i<total_counts_cc;i++) { 3758 if (PetscBTLookup(change_basis,i)) { 3759 size_of_constraint = constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i]; 3760 if (PetscBTLookup(qr_needed_idx,i)) { 3761 for (j=0;j<size_of_constraint;j++) nnz[constraints_idxs[constraints_idxs_ptr[i]+j]] = size_of_constraint; 3762 } else { 3763 nnz[constraints_idxs[constraints_idxs_ptr[i]]] = size_of_constraint; 3764 for (j=1;j<size_of_constraint;j++) nnz[constraints_idxs[constraints_idxs_ptr[i]+j]] = 2; 3765 } 3766 } 3767 } 3768 ierr = MatSeqAIJSetPreallocation(localChangeOfBasisMatrix,0,nnz);CHKERRQ(ierr); 3769 ierr = PetscFree(nnz);CHKERRQ(ierr); 3770 /* Set initial identity in the matrix */ 3771 for (i=0;i<pcis->n;i++) { 3772 ierr = MatSetValue(localChangeOfBasisMatrix,i,i,1.0,INSERT_VALUES);CHKERRQ(ierr); 3773 } 3774 3775 if (pcbddc->dbg_flag) { 3776 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"--------------------------------------------------------------\n");CHKERRQ(ierr); 3777 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Checking change of basis computation for subdomain %04d\n",PetscGlobalRank);CHKERRQ(ierr); 3778 } 3779 3780 3781 /* Now we loop on the constraints which need a change of basis */ 3782 /* 3783 Change of basis matrix is evaluated similarly to the FIRST APPROACH in 3784 Klawonn and Widlund, Dual-primal FETI-DP methods for linear elasticity, (see Sect 6.2.1) 3785 3786 Basic blocks of change of basis matrix T computed by 3787 3788 - Using the following block transformation if there is only a primal dof on the cc (and -pc_bddc_use_qr_single is not specified) 3789 3790 | 1 0 ... 0 s_1/S | 3791 | 0 1 ... 0 s_2/S | 3792 | ... | 3793 | 0 ... 1 s_{n-1}/S | 3794 | -s_1/s_n ... -s_{n-1}/s_n s_n/S | 3795 3796 with S = \sum_{i=1}^n s_i^2 3797 NOTE: in the above example, the primal dof is the last one of the edge in LOCAL ordering 3798 in the current implementation, the primal dof is the first one of the edge in GLOBAL ordering 3799 3800 - QR decomposition of constraints otherwise 3801 */ 3802 if (qr_needed) { 3803 /* space to store Q */ 3804 ierr = PetscMalloc1(max_size_of_constraint*max_size_of_constraint,&qr_basis);CHKERRQ(ierr); 3805 /* first we issue queries for optimal work */ 3806 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_M);CHKERRQ(ierr); 3807 ierr = PetscBLASIntCast(max_constraints,&Blas_N);CHKERRQ(ierr); 3808 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 3809 lqr_work = -1; 3810 PetscStackCallBLAS("LAPACKgeqrf",LAPACKgeqrf_(&Blas_M,&Blas_N,qr_basis,&Blas_LDA,qr_tau,&lqr_work_t,&lqr_work,&lierr)); 3811 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to GEQRF Lapack routine %d",(int)lierr); 3812 ierr = PetscBLASIntCast((PetscInt)PetscRealPart(lqr_work_t),&lqr_work);CHKERRQ(ierr); 3813 ierr = PetscMalloc1((PetscInt)PetscRealPart(lqr_work_t),&qr_work);CHKERRQ(ierr); 3814 lgqr_work = -1; 3815 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_M);CHKERRQ(ierr); 3816 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_N);CHKERRQ(ierr); 3817 ierr = PetscBLASIntCast(max_constraints,&Blas_K);CHKERRQ(ierr); 3818 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 3819 if (Blas_K>Blas_M) Blas_K=Blas_M; /* adjust just for computing optimal work */ 3820 PetscStackCallBLAS("LAPACKungqr",LAPACKungqr_(&Blas_M,&Blas_N,&Blas_K,qr_basis,&Blas_LDA,qr_tau,&lgqr_work_t,&lgqr_work,&lierr)); 3821 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to UNGQR Lapack routine %d",(int)lierr); 3822 ierr = PetscBLASIntCast((PetscInt)PetscRealPart(lgqr_work_t),&lgqr_work);CHKERRQ(ierr); 3823 ierr = PetscMalloc1((PetscInt)PetscRealPart(lgqr_work_t),&gqr_work);CHKERRQ(ierr); 3824 /* array to store scaling factors for reflectors */ 3825 ierr = PetscMalloc1(max_constraints,&qr_tau);CHKERRQ(ierr); 3826 /* array to store rhs and solution of triangular solver */ 3827 ierr = PetscMalloc1(max_constraints*max_constraints,&trs_rhs);CHKERRQ(ierr); 3828 /* allocating workspace for check */ 3829 if (pcbddc->dbg_flag) { 3830 ierr = PetscMalloc1(max_size_of_constraint*(max_constraints+max_size_of_constraint),&dbg_work);CHKERRQ(ierr); 3831 } 3832 } 3833 /* array to store whether a node is primal or not */ 3834 ierr = PetscBTCreate(pcis->n_B,&is_primal);CHKERRQ(ierr); 3835 ierr = PetscMalloc1(total_primal_vertices,&aux_primal_numbering_B);CHKERRQ(ierr); 3836 ierr = ISGlobalToLocalMappingApply(pcis->BtoNmap,IS_GTOLM_DROP,total_primal_vertices,pcbddc->local_primal_ref_node,&i,aux_primal_numbering_B);CHKERRQ(ierr); 3837 if (i != total_primal_vertices) { 3838 SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_SUP,"Error in boundary numbering for BDDC vertices! %d != %d\n",total_primal_vertices,i); 3839 } 3840 for (i=0;i<total_primal_vertices;i++) { 3841 ierr = PetscBTSet(is_primal,aux_primal_numbering_B[i]);CHKERRQ(ierr); 3842 } 3843 ierr = PetscFree(aux_primal_numbering_B);CHKERRQ(ierr); 3844 3845 /* loop on constraints and see whether or not they need a change of basis and compute it */ 3846 for (total_counts=n_vertices;total_counts<total_counts_cc;total_counts++) { 3847 size_of_constraint = constraints_idxs_ptr[total_counts+1]-constraints_idxs_ptr[total_counts]; 3848 if (PetscBTLookup(change_basis,total_counts)) { 3849 /* get constraint info */ 3850 primal_dofs = constraints_n[total_counts]; 3851 dual_dofs = size_of_constraint-primal_dofs; 3852 3853 if (pcbddc->dbg_flag) { 3854 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Constraints %d: %d need a change of basis (size %d)\n",total_counts,primal_dofs,size_of_constraint);CHKERRQ(ierr); 3855 } 3856 3857 if (PetscBTLookup(qr_needed_idx,total_counts)) { /* QR */ 3858 3859 /* copy quadrature constraints for change of basis check */ 3860 if (pcbddc->dbg_flag) { 3861 ierr = PetscMemcpy(dbg_work,&constraints_data[constraints_data_ptr[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr); 3862 } 3863 /* copy temporary constraints into larger work vector (in order to store all columns of Q) */ 3864 ierr = PetscMemcpy(qr_basis,&constraints_data[constraints_data_ptr[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr); 3865 3866 /* compute QR decomposition of constraints */ 3867 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 3868 ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr); 3869 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 3870 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 3871 PetscStackCallBLAS("LAPACKgeqrf",LAPACKgeqrf_(&Blas_M,&Blas_N,qr_basis,&Blas_LDA,qr_tau,qr_work,&lqr_work,&lierr)); 3872 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in GEQRF Lapack routine %d",(int)lierr); 3873 ierr = PetscFPTrapPop();CHKERRQ(ierr); 3874 3875 /* explictly compute R^-T */ 3876 ierr = PetscMemzero(trs_rhs,primal_dofs*primal_dofs*sizeof(*trs_rhs));CHKERRQ(ierr); 3877 for (j=0;j<primal_dofs;j++) trs_rhs[j*(primal_dofs+1)] = 1.0; 3878 ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr); 3879 ierr = PetscBLASIntCast(primal_dofs,&Blas_NRHS);CHKERRQ(ierr); 3880 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 3881 ierr = PetscBLASIntCast(primal_dofs,&Blas_LDB);CHKERRQ(ierr); 3882 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 3883 PetscStackCallBLAS("LAPACKtrtrs",LAPACKtrtrs_("U","T","N",&Blas_N,&Blas_NRHS,qr_basis,&Blas_LDA,trs_rhs,&Blas_LDB,&lierr)); 3884 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in TRTRS Lapack routine %d",(int)lierr); 3885 ierr = PetscFPTrapPop();CHKERRQ(ierr); 3886 3887 /* explicitly compute all columns of Q (Q = [Q1 | Q2] ) overwriting QR factorization in qr_basis */ 3888 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 3889 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 3890 ierr = PetscBLASIntCast(primal_dofs,&Blas_K);CHKERRQ(ierr); 3891 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 3892 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 3893 PetscStackCallBLAS("LAPACKungqr",LAPACKungqr_(&Blas_M,&Blas_N,&Blas_K,qr_basis,&Blas_LDA,qr_tau,gqr_work,&lgqr_work,&lierr)); 3894 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in UNGQR Lapack routine %d",(int)lierr); 3895 ierr = PetscFPTrapPop();CHKERRQ(ierr); 3896 3897 /* first primal_dofs columns of Q need to be re-scaled in order to be unitary w.r.t constraints 3898 i.e. C_{pxn}*Q_{nxn} should be equal to [I_pxp | 0_pxd] (see check below) 3899 where n=size_of_constraint, p=primal_dofs, d=dual_dofs (n=p+d), I and 0 identity and null matrix resp. */ 3900 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 3901 ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr); 3902 ierr = PetscBLASIntCast(primal_dofs,&Blas_K);CHKERRQ(ierr); 3903 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 3904 ierr = PetscBLASIntCast(primal_dofs,&Blas_LDB);CHKERRQ(ierr); 3905 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDC);CHKERRQ(ierr); 3906 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 3907 PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&Blas_M,&Blas_N,&Blas_K,&one,qr_basis,&Blas_LDA,trs_rhs,&Blas_LDB,&zero,constraints_data+constraints_data_ptr[total_counts],&Blas_LDC)); 3908 ierr = PetscFPTrapPop();CHKERRQ(ierr); 3909 ierr = PetscMemcpy(qr_basis,&constraints_data[constraints_data_ptr[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr); 3910 3911 /* insert values in change of basis matrix respecting global ordering of new primal dofs */ 3912 start_rows = &constraints_idxs[constraints_idxs_ptr[total_counts]]; 3913 /* insert cols for primal dofs */ 3914 for (j=0;j<primal_dofs;j++) { 3915 start_vals = &qr_basis[j*size_of_constraint]; 3916 start_cols = &constraints_idxs[constraints_idxs_ptr[total_counts]+j]; 3917 ierr = MatSetValues(localChangeOfBasisMatrix,size_of_constraint,start_rows,1,start_cols,start_vals,INSERT_VALUES);CHKERRQ(ierr); 3918 } 3919 /* insert cols for dual dofs */ 3920 for (j=0,k=0;j<dual_dofs;k++) { 3921 if (!PetscBTLookup(is_primal,constraints_idxs_B[constraints_idxs_ptr[total_counts]+k])) { 3922 start_vals = &qr_basis[(primal_dofs+j)*size_of_constraint]; 3923 start_cols = &constraints_idxs[constraints_idxs_ptr[total_counts]+k]; 3924 ierr = MatSetValues(localChangeOfBasisMatrix,size_of_constraint,start_rows,1,start_cols,start_vals,INSERT_VALUES);CHKERRQ(ierr); 3925 j++; 3926 } 3927 } 3928 3929 /* check change of basis */ 3930 if (pcbddc->dbg_flag) { 3931 PetscInt ii,jj; 3932 PetscBool valid_qr=PETSC_TRUE; 3933 ierr = PetscBLASIntCast(primal_dofs,&Blas_M);CHKERRQ(ierr); 3934 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 3935 ierr = PetscBLASIntCast(size_of_constraint,&Blas_K);CHKERRQ(ierr); 3936 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 3937 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDB);CHKERRQ(ierr); 3938 ierr = PetscBLASIntCast(primal_dofs,&Blas_LDC);CHKERRQ(ierr); 3939 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 3940 PetscStackCallBLAS("BLASgemm",BLASgemm_("T","N",&Blas_M,&Blas_N,&Blas_K,&one,dbg_work,&Blas_LDA,qr_basis,&Blas_LDB,&zero,&dbg_work[size_of_constraint*primal_dofs],&Blas_LDC)); 3941 ierr = PetscFPTrapPop();CHKERRQ(ierr); 3942 for (jj=0;jj<size_of_constraint;jj++) { 3943 for (ii=0;ii<primal_dofs;ii++) { 3944 if (ii != jj && PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]) > 1.e-12) valid_qr = PETSC_FALSE; 3945 if (ii == jj && PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]-1.0) > 1.e-12) valid_qr = PETSC_FALSE; 3946 } 3947 } 3948 if (!valid_qr) { 3949 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> wrong change of basis!\n");CHKERRQ(ierr); 3950 for (jj=0;jj<size_of_constraint;jj++) { 3951 for (ii=0;ii<primal_dofs;ii++) { 3952 if (ii != jj && PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]) > 1.e-12) { 3953 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\tQr basis function %d is not orthogonal to constraint %d (%1.14e)!\n",jj,ii,PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii])); 3954 } 3955 if (ii == jj && PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]-1.0) > 1.e-12) { 3956 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\tQr basis function %d is not unitary w.r.t constraint %d (%1.14e)!\n",jj,ii,PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii])); 3957 } 3958 } 3959 } 3960 } else { 3961 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> right change of basis!\n");CHKERRQ(ierr); 3962 } 3963 } 3964 } else { /* simple transformation block */ 3965 PetscInt row,col; 3966 PetscScalar val,norm; 3967 3968 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 3969 PetscStackCallBLAS("BLASdot",norm = BLASdot_(&Blas_N,constraints_data+constraints_data_ptr[total_counts],&Blas_one,constraints_data+constraints_data_ptr[total_counts],&Blas_one)); 3970 for (j=0;j<size_of_constraint;j++) { 3971 PetscInt row_B = constraints_idxs_B[constraints_idxs_ptr[total_counts]+j]; 3972 row = constraints_idxs[constraints_idxs_ptr[total_counts]+j]; 3973 if (!PetscBTLookup(is_primal,row_B)) { 3974 col = constraints_idxs[constraints_idxs_ptr[total_counts]]; 3975 ierr = MatSetValue(localChangeOfBasisMatrix,row,row,1.0,INSERT_VALUES);CHKERRQ(ierr); 3976 ierr = MatSetValue(localChangeOfBasisMatrix,row,col,constraints_data[constraints_data_ptr[total_counts]+j]/norm,INSERT_VALUES);CHKERRQ(ierr); 3977 } else { 3978 for (k=0;k<size_of_constraint;k++) { 3979 col = constraints_idxs[constraints_idxs_ptr[total_counts]+k]; 3980 if (row != col) { 3981 val = -constraints_data[constraints_data_ptr[total_counts]+k]/constraints_data[constraints_data_ptr[total_counts]]; 3982 } else { 3983 val = constraints_data[constraints_data_ptr[total_counts]]/norm; 3984 } 3985 ierr = MatSetValue(localChangeOfBasisMatrix,row,col,val,INSERT_VALUES);CHKERRQ(ierr); 3986 } 3987 } 3988 } 3989 if (pcbddc->dbg_flag) { 3990 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> using standard change of basis\n");CHKERRQ(ierr); 3991 } 3992 } 3993 } else { 3994 if (pcbddc->dbg_flag) { 3995 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Constraint %d does not need a change of basis (size %d)\n",total_counts,size_of_constraint);CHKERRQ(ierr); 3996 } 3997 } 3998 } 3999 4000 /* free workspace */ 4001 if (qr_needed) { 4002 if (pcbddc->dbg_flag) { 4003 ierr = PetscFree(dbg_work);CHKERRQ(ierr); 4004 } 4005 ierr = PetscFree(trs_rhs);CHKERRQ(ierr); 4006 ierr = PetscFree(qr_tau);CHKERRQ(ierr); 4007 ierr = PetscFree(qr_work);CHKERRQ(ierr); 4008 ierr = PetscFree(gqr_work);CHKERRQ(ierr); 4009 ierr = PetscFree(qr_basis);CHKERRQ(ierr); 4010 } 4011 ierr = PetscBTDestroy(&is_primal);CHKERRQ(ierr); 4012 ierr = MatAssemblyBegin(localChangeOfBasisMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4013 ierr = MatAssemblyEnd(localChangeOfBasisMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4014 4015 /* assembling of global change of variable */ 4016 { 4017 Mat tmat; 4018 PetscInt bs; 4019 4020 ierr = VecGetSize(pcis->vec1_global,&global_size);CHKERRQ(ierr); 4021 ierr = VecGetLocalSize(pcis->vec1_global,&local_size);CHKERRQ(ierr); 4022 ierr = MatDuplicate(pc->pmat,MAT_DO_NOT_COPY_VALUES,&tmat);CHKERRQ(ierr); 4023 ierr = MatISSetLocalMat(tmat,localChangeOfBasisMatrix);CHKERRQ(ierr); 4024 ierr = MatCreate(PetscObjectComm((PetscObject)pc),&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 4025 ierr = MatSetType(pcbddc->ChangeOfBasisMatrix,MATAIJ);CHKERRQ(ierr); 4026 ierr = MatGetBlockSize(pc->pmat,&bs);CHKERRQ(ierr); 4027 ierr = MatSetBlockSize(pcbddc->ChangeOfBasisMatrix,bs);CHKERRQ(ierr); 4028 ierr = MatSetSizes(pcbddc->ChangeOfBasisMatrix,local_size,local_size,global_size,global_size);CHKERRQ(ierr); 4029 ierr = MatISSetMPIXAIJPreallocation_Private(tmat,pcbddc->ChangeOfBasisMatrix,PETSC_TRUE);CHKERRQ(ierr); 4030 ierr = MatISGetMPIXAIJ(tmat,MAT_REUSE_MATRIX,&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 4031 ierr = MatDestroy(&tmat);CHKERRQ(ierr); 4032 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 4033 ierr = VecSet(pcis->vec1_N,1.0);CHKERRQ(ierr); 4034 ierr = VecScatterBegin(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4035 ierr = VecScatterEnd(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4036 ierr = VecReciprocal(pcis->vec1_global);CHKERRQ(ierr); 4037 ierr = MatDiagonalScale(pcbddc->ChangeOfBasisMatrix,pcis->vec1_global,NULL);CHKERRQ(ierr); 4038 } 4039 /* check */ 4040 if (pcbddc->dbg_flag) { 4041 PetscReal error; 4042 Vec x,x_change; 4043 4044 ierr = VecDuplicate(pcis->vec1_global,&x);CHKERRQ(ierr); 4045 ierr = VecDuplicate(pcis->vec1_global,&x_change);CHKERRQ(ierr); 4046 ierr = VecSetRandom(x,NULL);CHKERRQ(ierr); 4047 ierr = VecCopy(x,pcis->vec1_global);CHKERRQ(ierr); 4048 ierr = VecScatterBegin(matis->rctx,x,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4049 ierr = VecScatterEnd(matis->rctx,x,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4050 ierr = MatMult(localChangeOfBasisMatrix,pcis->vec1_N,pcis->vec2_N);CHKERRQ(ierr); 4051 ierr = VecScatterBegin(matis->rctx,pcis->vec2_N,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4052 ierr = VecScatterEnd(matis->rctx,pcis->vec2_N,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4053 ierr = MatMult(pcbddc->ChangeOfBasisMatrix,pcis->vec1_global,x_change);CHKERRQ(ierr); 4054 ierr = VecAXPY(x,-1.0,x_change);CHKERRQ(ierr); 4055 ierr = VecNorm(x,NORM_INFINITY,&error);CHKERRQ(ierr); 4056 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4057 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Error global vs local change: %1.6e\n",error);CHKERRQ(ierr); 4058 ierr = VecDestroy(&x);CHKERRQ(ierr); 4059 ierr = VecDestroy(&x_change);CHKERRQ(ierr); 4060 } 4061 4062 /* adapt sub_schurs computed (if any) */ 4063 if (pcbddc->use_deluxe_scaling) { 4064 PCBDDCSubSchurs sub_schurs=pcbddc->sub_schurs; 4065 if (sub_schurs->S_Ej_all) { 4066 Mat S_new,tmat; 4067 ISLocalToGlobalMapping NtoSall; 4068 IS is_all_N,is_V,is_V_Sall; 4069 const PetscScalar *array; 4070 const PetscInt *idxs_V,*idxs_all; 4071 PetscInt i,n_V; 4072 4073 ierr = ISLocalToGlobalMappingApplyIS(pcis->BtoNmap,sub_schurs->is_Ej_all,&is_all_N);CHKERRQ(ierr); 4074 ierr = MatGetSubMatrix(localChangeOfBasisMatrix,is_all_N,is_all_N,MAT_INITIAL_MATRIX,&tmat);CHKERRQ(ierr); 4075 ierr = ISCreateGeneral(PETSC_COMM_SELF,pcbddc->n_vertices,pcbddc->local_primal_ref_node,PETSC_COPY_VALUES,&is_V);CHKERRQ(ierr); 4076 ierr = ISLocalToGlobalMappingCreateIS(is_all_N,&NtoSall);CHKERRQ(ierr); 4077 ierr = ISGlobalToLocalMappingApplyIS(NtoSall,IS_GTOLM_DROP,is_V,&is_V_Sall);CHKERRQ(ierr); 4078 ierr = ISLocalToGlobalMappingDestroy(&NtoSall);CHKERRQ(ierr); 4079 ierr = ISDestroy(&is_all_N);CHKERRQ(ierr); 4080 ierr = ISDestroy(&is_V);CHKERRQ(ierr); 4081 ierr = MatPtAP(sub_schurs->S_Ej_all,tmat,MAT_INITIAL_MATRIX,1.0,&S_new);CHKERRQ(ierr); 4082 ierr = MatDestroy(&sub_schurs->S_Ej_all);CHKERRQ(ierr); 4083 ierr = PetscObjectReference((PetscObject)S_new);CHKERRQ(ierr); 4084 ierr = MatZeroRowsColumnsIS(S_new,is_V_Sall,1.,NULL,NULL);CHKERRQ(ierr); 4085 ierr = ISGetLocalSize(is_V_Sall,&n_V);CHKERRQ(ierr); 4086 ierr = ISGetIndices(is_V_Sall,&idxs_V);CHKERRQ(ierr); 4087 ierr = ISGetIndices(sub_schurs->is_Ej_all,&idxs_all);CHKERRQ(ierr); 4088 ierr = VecGetArrayRead(pcis->D,&array);CHKERRQ(ierr); 4089 for (i=0;i<n_V;i++) { 4090 PetscScalar val; 4091 PetscInt idx; 4092 4093 idx = idxs_V[i]; 4094 val = array[idxs_all[idxs_V[i]]]; 4095 ierr = MatSetValue(S_new,idx,idx,val,INSERT_VALUES);CHKERRQ(ierr); 4096 } 4097 ierr = MatAssemblyBegin(S_new,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4098 ierr = MatAssemblyEnd(S_new,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4099 sub_schurs->S_Ej_all = S_new; 4100 ierr = MatDestroy(&S_new);CHKERRQ(ierr); 4101 if (sub_schurs->sum_S_Ej_all) { 4102 ierr = MatPtAP(sub_schurs->sum_S_Ej_all,tmat,MAT_INITIAL_MATRIX,1.0,&S_new);CHKERRQ(ierr); 4103 ierr = MatDestroy(&sub_schurs->sum_S_Ej_all);CHKERRQ(ierr); 4104 ierr = PetscObjectReference((PetscObject)S_new);CHKERRQ(ierr); 4105 ierr = MatZeroRowsColumnsIS(S_new,is_V_Sall,1.,NULL,NULL);CHKERRQ(ierr); 4106 sub_schurs->sum_S_Ej_all = S_new; 4107 ierr = MatDestroy(&S_new);CHKERRQ(ierr); 4108 } 4109 ierr = VecRestoreArrayRead(pcis->D,&array);CHKERRQ(ierr); 4110 ierr = ISRestoreIndices(sub_schurs->is_Ej_all,&idxs_all);CHKERRQ(ierr); 4111 ierr = ISRestoreIndices(is_V_Sall,&idxs_V);CHKERRQ(ierr); 4112 ierr = MatDestroy(&tmat);CHKERRQ(ierr); 4113 ierr = ISDestroy(&is_V_Sall);CHKERRQ(ierr); 4114 } 4115 } 4116 ierr = MatDestroy(&localChangeOfBasisMatrix);CHKERRQ(ierr); 4117 } else if (pcbddc->user_ChangeOfBasisMatrix) { 4118 ierr = PetscObjectReference((PetscObject)pcbddc->user_ChangeOfBasisMatrix);CHKERRQ(ierr); 4119 pcbddc->ChangeOfBasisMatrix = pcbddc->user_ChangeOfBasisMatrix; 4120 } 4121 4122 /* set up change of basis context */ 4123 if (pcbddc->ChangeOfBasisMatrix) { 4124 PCBDDCChange_ctx change_ctx; 4125 4126 if (!pcbddc->new_global_mat) { 4127 PetscInt global_size,local_size; 4128 4129 ierr = VecGetSize(pcis->vec1_global,&global_size);CHKERRQ(ierr); 4130 ierr = VecGetLocalSize(pcis->vec1_global,&local_size);CHKERRQ(ierr); 4131 ierr = MatCreate(PetscObjectComm((PetscObject)pc),&pcbddc->new_global_mat);CHKERRQ(ierr); 4132 ierr = MatSetSizes(pcbddc->new_global_mat,local_size,local_size,global_size,global_size);CHKERRQ(ierr); 4133 ierr = MatSetType(pcbddc->new_global_mat,MATSHELL);CHKERRQ(ierr); 4134 ierr = MatShellSetOperation(pcbddc->new_global_mat,MATOP_MULT,(void (*)(void))PCBDDCMatMult_Private);CHKERRQ(ierr); 4135 ierr = MatShellSetOperation(pcbddc->new_global_mat,MATOP_MULT_TRANSPOSE,(void (*)(void))PCBDDCMatMultTranspose_Private);CHKERRQ(ierr); 4136 ierr = PetscNew(&change_ctx);CHKERRQ(ierr); 4137 ierr = MatShellSetContext(pcbddc->new_global_mat,change_ctx);CHKERRQ(ierr); 4138 } else { 4139 ierr = MatShellGetContext(pcbddc->new_global_mat,&change_ctx);CHKERRQ(ierr); 4140 ierr = MatDestroy(&change_ctx->global_change);CHKERRQ(ierr); 4141 ierr = VecDestroyVecs(2,&change_ctx->work);CHKERRQ(ierr); 4142 } 4143 if (!pcbddc->user_ChangeOfBasisMatrix) { 4144 ierr = PetscObjectReference((PetscObject)pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 4145 change_ctx->global_change = pcbddc->ChangeOfBasisMatrix; 4146 } else { 4147 ierr = PetscObjectReference((PetscObject)pcbddc->user_ChangeOfBasisMatrix);CHKERRQ(ierr); 4148 change_ctx->global_change = pcbddc->user_ChangeOfBasisMatrix; 4149 } 4150 ierr = VecDuplicateVecs(pcis->vec1_global,2,&change_ctx->work);CHKERRQ(ierr); 4151 ierr = MatSetUp(pcbddc->new_global_mat);CHKERRQ(ierr); 4152 ierr = MatAssemblyBegin(pcbddc->new_global_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4153 ierr = MatAssemblyEnd(pcbddc->new_global_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4154 } else { 4155 ierr = MatDestroy(&pcbddc->new_global_mat);CHKERRQ(ierr); 4156 } 4157 4158 /* add pressure dofs to set of primal nodes for numbering purposes */ 4159 for (i=0;i<pcbddc->benign_n;i++) { 4160 pcbddc->local_primal_ref_node[pcbddc->local_primal_size_cc] = pcbddc->benign_p0_lidx[i]; 4161 pcbddc->primal_indices_local_idxs[pcbddc->local_primal_size] = pcbddc->benign_p0_lidx[i]; 4162 pcbddc->local_primal_ref_mult[pcbddc->local_primal_size_cc] = 1; 4163 pcbddc->local_primal_size_cc++; 4164 pcbddc->local_primal_size++; 4165 } 4166 4167 /* check if a new primal space has been introduced (also take into account benign trick) */ 4168 pcbddc->new_primal_space_local = PETSC_TRUE; 4169 if (olocal_primal_size == pcbddc->local_primal_size) { 4170 ierr = PetscMemcmp(pcbddc->local_primal_ref_node,olocal_primal_ref_node,olocal_primal_size_cc*sizeof(PetscInt),&pcbddc->new_primal_space_local);CHKERRQ(ierr); 4171 pcbddc->new_primal_space_local = (PetscBool)(!pcbddc->new_primal_space_local); 4172 if (!pcbddc->new_primal_space_local) { 4173 ierr = PetscMemcmp(pcbddc->local_primal_ref_mult,olocal_primal_ref_mult,olocal_primal_size_cc*sizeof(PetscInt),&pcbddc->new_primal_space_local);CHKERRQ(ierr); 4174 pcbddc->new_primal_space_local = (PetscBool)(!pcbddc->new_primal_space_local); 4175 } 4176 } 4177 ierr = PetscFree2(olocal_primal_ref_node,olocal_primal_ref_mult);CHKERRQ(ierr); 4178 /* new_primal_space will be used for numbering of coarse dofs, so it should be the same across all subdomains */ 4179 ierr = MPI_Allreduce(&pcbddc->new_primal_space_local,&pcbddc->new_primal_space,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 4180 4181 /* flush dbg viewer */ 4182 if (pcbddc->dbg_flag) { 4183 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4184 } 4185 4186 /* free workspace */ 4187 ierr = PetscBTDestroy(&qr_needed_idx);CHKERRQ(ierr); 4188 ierr = PetscBTDestroy(&change_basis);CHKERRQ(ierr); 4189 if (!pcbddc->adaptive_selection) { 4190 ierr = PetscFree3(constraints_idxs_ptr,constraints_data_ptr,constraints_n);CHKERRQ(ierr); 4191 ierr = PetscFree3(constraints_data,constraints_idxs,constraints_idxs_B);CHKERRQ(ierr); 4192 } else { 4193 ierr = PetscFree5(pcbddc->adaptive_constraints_n, 4194 pcbddc->adaptive_constraints_idxs_ptr, 4195 pcbddc->adaptive_constraints_data_ptr, 4196 pcbddc->adaptive_constraints_idxs, 4197 pcbddc->adaptive_constraints_data);CHKERRQ(ierr); 4198 ierr = PetscFree(constraints_n);CHKERRQ(ierr); 4199 ierr = PetscFree(constraints_idxs_B);CHKERRQ(ierr); 4200 } 4201 PetscFunctionReturn(0); 4202 } 4203 4204 #undef __FUNCT__ 4205 #define __FUNCT__ "PCBDDCAnalyzeInterface" 4206 PetscErrorCode PCBDDCAnalyzeInterface(PC pc) 4207 { 4208 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 4209 PC_IS *pcis = (PC_IS*)pc->data; 4210 Mat_IS *matis = (Mat_IS*)pc->pmat->data; 4211 PetscInt ierr,i,vertex_size,N; 4212 PetscViewer viewer=pcbddc->dbg_viewer; 4213 4214 PetscFunctionBegin; 4215 /* Reset previously computed graph */ 4216 ierr = PCBDDCGraphReset(pcbddc->mat_graph);CHKERRQ(ierr); 4217 /* Init local Graph struct */ 4218 ierr = MatGetSize(pc->pmat,&N,NULL);CHKERRQ(ierr); 4219 ierr = PCBDDCGraphInit(pcbddc->mat_graph,pcis->mapping,N);CHKERRQ(ierr); 4220 4221 /* Check validity of the csr graph passed in by the user */ 4222 if (pcbddc->mat_graph->nvtxs_csr && pcbddc->mat_graph->nvtxs_csr != pcbddc->mat_graph->nvtxs) { 4223 SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Invalid size of local CSR graph! Found %d, expected %d\n",pcbddc->mat_graph->nvtxs_csr,pcbddc->mat_graph->nvtxs); 4224 } 4225 4226 /* Set default CSR adjacency of local dofs if not provided by the user with PCBDDCSetLocalAdjacencyGraph */ 4227 if (!pcbddc->mat_graph->xadj || !pcbddc->mat_graph->adjncy) { 4228 PetscInt *xadj,*adjncy; 4229 PetscInt nvtxs; 4230 PetscBool flg_row=PETSC_FALSE; 4231 4232 if (pcbddc->use_local_adj) { 4233 4234 ierr = MatGetRowIJ(matis->A,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);CHKERRQ(ierr); 4235 if (flg_row) { 4236 ierr = PCBDDCSetLocalAdjacencyGraph(pc,nvtxs,xadj,adjncy,PETSC_COPY_VALUES);CHKERRQ(ierr); 4237 pcbddc->computed_rowadj = PETSC_TRUE; 4238 } 4239 ierr = MatRestoreRowIJ(matis->A,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);CHKERRQ(ierr); 4240 } else if (pcbddc->current_level && pcis->n_B) { /* just compute subdomain's connected components for coarser levels when the local boundary is not empty */ 4241 IS is_dummy; 4242 ISLocalToGlobalMapping l2gmap_dummy; 4243 PetscInt j,sum; 4244 PetscInt *cxadj,*cadjncy; 4245 const PetscInt *idxs; 4246 PCBDDCGraph graph; 4247 PetscBT is_on_boundary; 4248 4249 ierr = ISCreateStride(PETSC_COMM_SELF,pcis->n,0,1,&is_dummy);CHKERRQ(ierr); 4250 ierr = ISLocalToGlobalMappingCreateIS(is_dummy,&l2gmap_dummy);CHKERRQ(ierr); 4251 ierr = ISDestroy(&is_dummy);CHKERRQ(ierr); 4252 ierr = PCBDDCGraphCreate(&graph);CHKERRQ(ierr); 4253 ierr = PCBDDCGraphInit(graph,l2gmap_dummy,pcis->n);CHKERRQ(ierr); 4254 ierr = ISLocalToGlobalMappingDestroy(&l2gmap_dummy);CHKERRQ(ierr); 4255 ierr = MatGetRowIJ(matis->A,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);CHKERRQ(ierr); 4256 if (flg_row) { 4257 graph->xadj = xadj; 4258 graph->adjncy = adjncy; 4259 } 4260 ierr = PCBDDCGraphSetUp(graph,1,NULL,NULL,0,NULL,NULL);CHKERRQ(ierr); 4261 ierr = PCBDDCGraphComputeConnectedComponents(graph);CHKERRQ(ierr); 4262 ierr = MatRestoreRowIJ(matis->A,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);CHKERRQ(ierr); 4263 4264 if (pcbddc->dbg_flag) { 4265 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"[%d] Found %d subdomains (local size %d)\n",PetscGlobalRank,graph->ncc,pcis->n);CHKERRQ(ierr); 4266 for (i=0;i<graph->ncc;i++) { 4267 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"[%d] %d cc size %d\n",PetscGlobalRank,i,graph->cptr[i+1]-graph->cptr[i]);CHKERRQ(ierr); 4268 } 4269 } 4270 4271 ierr = PetscBTCreate(pcis->n,&is_on_boundary);CHKERRQ(ierr); 4272 ierr = ISGetIndices(pcis->is_B_local,&idxs);CHKERRQ(ierr); 4273 for (i=0;i<pcis->n_B;i++) { 4274 ierr = PetscBTSet(is_on_boundary,idxs[i]);CHKERRQ(ierr); 4275 } 4276 ierr = ISRestoreIndices(pcis->is_B_local,&idxs);CHKERRQ(ierr); 4277 4278 ierr = PetscCalloc1(pcis->n+1,&cxadj);CHKERRQ(ierr); 4279 sum = 0; 4280 for (i=0;i<graph->ncc;i++) { 4281 PetscInt sizecc = 0; 4282 for (j=graph->cptr[i];j<graph->cptr[i+1];j++) { 4283 if (PetscBTLookup(is_on_boundary,graph->queue[j])) { 4284 sizecc++; 4285 } 4286 } 4287 for (j=graph->cptr[i];j<graph->cptr[i+1];j++) { 4288 if (PetscBTLookup(is_on_boundary,graph->queue[j])) { 4289 cxadj[graph->queue[j]] = sizecc; 4290 } 4291 } 4292 sum += sizecc*sizecc; 4293 } 4294 ierr = PetscMalloc1(sum,&cadjncy);CHKERRQ(ierr); 4295 sum = 0; 4296 for (i=0;i<pcis->n;i++) { 4297 PetscInt temp = cxadj[i]; 4298 cxadj[i] = sum; 4299 sum += temp; 4300 } 4301 cxadj[pcis->n] = sum; 4302 for (i=0;i<graph->ncc;i++) { 4303 for (j=graph->cptr[i];j<graph->cptr[i+1];j++) { 4304 if (PetscBTLookup(is_on_boundary,graph->queue[j])) { 4305 PetscInt k,sizecc = 0; 4306 for (k=graph->cptr[i];k<graph->cptr[i+1];k++) { 4307 if (PetscBTLookup(is_on_boundary,graph->queue[k])) { 4308 cadjncy[cxadj[graph->queue[j]]+sizecc]=graph->queue[k]; 4309 sizecc++; 4310 } 4311 } 4312 } 4313 } 4314 } 4315 if (sum) { 4316 ierr = PCBDDCSetLocalAdjacencyGraph(pc,pcis->n,cxadj,cadjncy,PETSC_OWN_POINTER);CHKERRQ(ierr); 4317 } else { 4318 ierr = PetscFree(cxadj);CHKERRQ(ierr); 4319 ierr = PetscFree(cadjncy);CHKERRQ(ierr); 4320 } 4321 graph->xadj = 0; 4322 graph->adjncy = 0; 4323 ierr = PCBDDCGraphDestroy(&graph);CHKERRQ(ierr); 4324 ierr = PetscBTDestroy(&is_on_boundary);CHKERRQ(ierr); 4325 } 4326 } 4327 if (pcbddc->dbg_flag) { 4328 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4329 } 4330 4331 /* Set default dofs' splitting if no information has been provided by the user with PCBDDCSetDofsSplitting or PCBDDCSetDofsSplittingLocal */ 4332 vertex_size = 1; 4333 if (pcbddc->user_provided_isfordofs) { 4334 if (pcbddc->n_ISForDofs) { /* need to convert from global to local and remove references to global dofs splitting */ 4335 ierr = PetscMalloc1(pcbddc->n_ISForDofs,&pcbddc->ISForDofsLocal);CHKERRQ(ierr); 4336 for (i=0;i<pcbddc->n_ISForDofs;i++) { 4337 ierr = PCBDDCGlobalToLocal(matis->rctx,pcis->vec1_global,pcis->vec1_N,pcbddc->ISForDofs[i],&pcbddc->ISForDofsLocal[i]);CHKERRQ(ierr); 4338 ierr = ISDestroy(&pcbddc->ISForDofs[i]);CHKERRQ(ierr); 4339 } 4340 pcbddc->n_ISForDofsLocal = pcbddc->n_ISForDofs; 4341 pcbddc->n_ISForDofs = 0; 4342 ierr = PetscFree(pcbddc->ISForDofs);CHKERRQ(ierr); 4343 } 4344 /* mat block size as vertex size (used for elasticity with rigid body modes as nearnullspace) */ 4345 ierr = MatGetBlockSize(matis->A,&vertex_size);CHKERRQ(ierr); 4346 } else { 4347 if (!pcbddc->n_ISForDofsLocal) { /* field split not present, create it in local ordering */ 4348 ierr = MatGetBlockSize(pc->pmat,&pcbddc->n_ISForDofsLocal);CHKERRQ(ierr); 4349 ierr = PetscMalloc1(pcbddc->n_ISForDofsLocal,&pcbddc->ISForDofsLocal);CHKERRQ(ierr); 4350 for (i=0;i<pcbddc->n_ISForDofsLocal;i++) { 4351 ierr = ISCreateStride(PetscObjectComm((PetscObject)pc),pcis->n/pcbddc->n_ISForDofsLocal,i,pcbddc->n_ISForDofsLocal,&pcbddc->ISForDofsLocal[i]);CHKERRQ(ierr); 4352 } 4353 } 4354 } 4355 4356 /* Setup of Graph */ 4357 if (!pcbddc->DirichletBoundariesLocal && pcbddc->DirichletBoundaries) { /* need to convert from global to local */ 4358 ierr = PCBDDCGlobalToLocal(matis->rctx,pcis->vec1_global,pcis->vec1_N,pcbddc->DirichletBoundaries,&pcbddc->DirichletBoundariesLocal);CHKERRQ(ierr); 4359 } 4360 if (!pcbddc->NeumannBoundariesLocal && pcbddc->NeumannBoundaries) { /* need to convert from global to local */ 4361 ierr = PCBDDCGlobalToLocal(matis->rctx,pcis->vec1_global,pcis->vec1_N,pcbddc->NeumannBoundaries,&pcbddc->NeumannBoundariesLocal);CHKERRQ(ierr); 4362 } 4363 if (!pcbddc->user_primal_vertices_local && pcbddc->user_primal_vertices) { /* need to convert from global to local */ 4364 ierr = PCBDDCGlobalToLocal(matis->rctx,pcis->vec1_global,pcis->vec1_N,pcbddc->user_primal_vertices,&pcbddc->user_primal_vertices_local);CHKERRQ(ierr); 4365 } 4366 ierr = PCBDDCGraphSetUp(pcbddc->mat_graph,vertex_size,pcbddc->NeumannBoundariesLocal,pcbddc->DirichletBoundariesLocal,pcbddc->n_ISForDofsLocal,pcbddc->ISForDofsLocal,pcbddc->user_primal_vertices_local);CHKERRQ(ierr); 4367 4368 /* attach info on disconnected subdomains if present */ 4369 if (pcbddc->n_local_subs) { 4370 PetscInt *local_subs; 4371 4372 ierr = PetscMalloc1(N,&local_subs);CHKERRQ(ierr); 4373 for (i=0;i<pcbddc->n_local_subs;i++) { 4374 const PetscInt *idxs; 4375 PetscInt nl,j; 4376 4377 ierr = ISGetLocalSize(pcbddc->local_subs[i],&nl);CHKERRQ(ierr); 4378 ierr = ISGetIndices(pcbddc->local_subs[i],&idxs);CHKERRQ(ierr); 4379 for (j=0;j<nl;j++) { 4380 local_subs[idxs[j]] = i; 4381 } 4382 ierr = ISRestoreIndices(pcbddc->local_subs[i],&idxs);CHKERRQ(ierr); 4383 } 4384 pcbddc->mat_graph->n_local_subs = pcbddc->n_local_subs; 4385 pcbddc->mat_graph->local_subs = local_subs; 4386 } 4387 4388 /* Graph's connected components analysis */ 4389 ierr = PCBDDCGraphComputeConnectedComponents(pcbddc->mat_graph);CHKERRQ(ierr); 4390 4391 /* print some info to stdout */ 4392 if (pcbddc->dbg_flag) { 4393 ierr = PCBDDCGraphASCIIView(pcbddc->mat_graph,pcbddc->dbg_flag,viewer);CHKERRQ(ierr); 4394 } 4395 4396 /* mark topography has done */ 4397 pcbddc->recompute_topography = PETSC_FALSE; 4398 PetscFunctionReturn(0); 4399 } 4400 4401 /* given an index sets possibly with holes, renumbers the indexes removing the holes */ 4402 #undef __FUNCT__ 4403 #define __FUNCT__ "PCBDDCSubsetNumbering" 4404 PetscErrorCode PCBDDCSubsetNumbering(IS subset, IS subset_mult, PetscInt *N_n, IS *subset_n) 4405 { 4406 PetscSF sf; 4407 PetscLayout map; 4408 const PetscInt *idxs; 4409 PetscInt *leaf_data,*root_data,*gidxs; 4410 PetscInt N,n,i,lbounds[2],gbounds[2],Nl; 4411 PetscInt n_n,nlocals,start,first_index; 4412 PetscMPIInt commsize; 4413 PetscBool first_found; 4414 PetscErrorCode ierr; 4415 4416 PetscFunctionBegin; 4417 ierr = ISGetLocalSize(subset,&n);CHKERRQ(ierr); 4418 if (subset_mult) { 4419 PetscCheckSameComm(subset,1,subset_mult,2); 4420 ierr = ISGetLocalSize(subset,&i);CHKERRQ(ierr); 4421 if (i != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Local subset and multiplicity sizes don't match! %d != %d",n,i); 4422 } 4423 /* create workspace layout for computing global indices of subset */ 4424 ierr = ISGetIndices(subset,&idxs);CHKERRQ(ierr); 4425 lbounds[0] = lbounds[1] = 0; 4426 for (i=0;i<n;i++) { 4427 if (idxs[i] < lbounds[0]) lbounds[0] = idxs[i]; 4428 else if (idxs[i] > lbounds[1]) lbounds[1] = idxs[i]; 4429 } 4430 lbounds[0] = -lbounds[0]; 4431 ierr = MPI_Allreduce(lbounds,gbounds,2,MPIU_INT,MPI_MAX,PetscObjectComm((PetscObject)subset));CHKERRQ(ierr); 4432 gbounds[0] = -gbounds[0]; 4433 N = gbounds[1] - gbounds[0] + 1; 4434 ierr = PetscLayoutCreate(PetscObjectComm((PetscObject)subset),&map);CHKERRQ(ierr); 4435 ierr = PetscLayoutSetBlockSize(map,1);CHKERRQ(ierr); 4436 ierr = PetscLayoutSetSize(map,N);CHKERRQ(ierr); 4437 ierr = PetscLayoutSetUp(map);CHKERRQ(ierr); 4438 ierr = PetscLayoutGetLocalSize(map,&Nl);CHKERRQ(ierr); 4439 4440 /* create sf : leaf_data == multiplicity of indexes, root data == global index in layout */ 4441 ierr = PetscMalloc2(n,&leaf_data,Nl,&root_data);CHKERRQ(ierr); 4442 if (subset_mult) { 4443 const PetscInt* idxs_mult; 4444 4445 ierr = ISGetIndices(subset_mult,&idxs_mult);CHKERRQ(ierr); 4446 ierr = PetscMemcpy(leaf_data,idxs_mult,n*sizeof(PetscInt));CHKERRQ(ierr); 4447 ierr = ISRestoreIndices(subset_mult,&idxs_mult);CHKERRQ(ierr); 4448 } else { 4449 for (i=0;i<n;i++) leaf_data[i] = 1; 4450 } 4451 /* local size of new subset */ 4452 n_n = 0; 4453 for (i=0;i<n;i++) n_n += leaf_data[i]; 4454 4455 /* global indexes in layout */ 4456 ierr = PetscMalloc1(n_n,&gidxs);CHKERRQ(ierr); /* allocating possibly extra space in gidxs which will be used later */ 4457 for (i=0;i<n;i++) gidxs[i] = idxs[i] - gbounds[0]; 4458 ierr = ISRestoreIndices(subset,&idxs);CHKERRQ(ierr); 4459 ierr = PetscSFCreate(PetscObjectComm((PetscObject)subset),&sf);CHKERRQ(ierr); 4460 ierr = PetscSFSetGraphLayout(sf,map,n,NULL,PETSC_COPY_VALUES,gidxs);CHKERRQ(ierr); 4461 ierr = PetscLayoutDestroy(&map);CHKERRQ(ierr); 4462 4463 /* reduce from leaves to roots */ 4464 ierr = PetscMemzero(root_data,Nl*sizeof(PetscInt));CHKERRQ(ierr); 4465 ierr = PetscSFReduceBegin(sf,MPIU_INT,leaf_data,root_data,MPI_MAX);CHKERRQ(ierr); 4466 ierr = PetscSFReduceEnd(sf,MPIU_INT,leaf_data,root_data,MPI_MAX);CHKERRQ(ierr); 4467 4468 /* count indexes in local part of layout */ 4469 nlocals = 0; 4470 first_index = -1; 4471 first_found = PETSC_FALSE; 4472 for (i=0;i<Nl;i++) { 4473 if (!first_found && root_data[i]) { 4474 first_found = PETSC_TRUE; 4475 first_index = i; 4476 } 4477 nlocals += root_data[i]; 4478 } 4479 4480 /* cumulative of number of indexes and size of subset without holes */ 4481 #if defined(PETSC_HAVE_MPI_EXSCAN) 4482 start = 0; 4483 ierr = MPI_Exscan(&nlocals,&start,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)subset));CHKERRQ(ierr); 4484 #else 4485 ierr = MPI_Scan(&nlocals,&start,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)subset));CHKERRQ(ierr); 4486 start = start-nlocals; 4487 #endif 4488 4489 if (N_n) { /* compute total size of new subset if requested */ 4490 *N_n = start + nlocals; 4491 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)subset),&commsize);CHKERRQ(ierr); 4492 ierr = MPI_Bcast(N_n,1,MPIU_INT,commsize-1,PetscObjectComm((PetscObject)subset));CHKERRQ(ierr); 4493 } 4494 4495 /* adapt root data with cumulative */ 4496 if (first_found) { 4497 PetscInt old_index; 4498 4499 root_data[first_index] += start; 4500 old_index = first_index; 4501 for (i=first_index+1;i<Nl;i++) { 4502 if (root_data[i]) { 4503 root_data[i] += root_data[old_index]; 4504 old_index = i; 4505 } 4506 } 4507 } 4508 4509 /* from roots to leaves */ 4510 ierr = PetscSFBcastBegin(sf,MPIU_INT,root_data,leaf_data);CHKERRQ(ierr); 4511 ierr = PetscSFBcastEnd(sf,MPIU_INT,root_data,leaf_data);CHKERRQ(ierr); 4512 ierr = PetscSFDestroy(&sf);CHKERRQ(ierr); 4513 4514 /* create new IS with global indexes without holes */ 4515 if (subset_mult) { 4516 const PetscInt* idxs_mult; 4517 PetscInt cum; 4518 4519 cum = 0; 4520 ierr = ISGetIndices(subset_mult,&idxs_mult);CHKERRQ(ierr); 4521 for (i=0;i<n;i++) { 4522 PetscInt j; 4523 for (j=0;j<idxs_mult[i];j++) gidxs[cum++] = leaf_data[i] - idxs_mult[i] + j; 4524 } 4525 ierr = ISRestoreIndices(subset_mult,&idxs_mult);CHKERRQ(ierr); 4526 } else { 4527 for (i=0;i<n;i++) { 4528 gidxs[i] = leaf_data[i]-1; 4529 } 4530 } 4531 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)subset),n_n,gidxs,PETSC_OWN_POINTER,subset_n);CHKERRQ(ierr); 4532 ierr = PetscFree2(leaf_data,root_data);CHKERRQ(ierr); 4533 PetscFunctionReturn(0); 4534 } 4535 4536 #undef __FUNCT__ 4537 #define __FUNCT__ "PCBDDCOrthonormalizeVecs" 4538 PetscErrorCode PCBDDCOrthonormalizeVecs(PetscInt n, Vec vecs[]) 4539 { 4540 PetscInt i,j; 4541 PetscScalar *alphas; 4542 PetscErrorCode ierr; 4543 4544 PetscFunctionBegin; 4545 /* this implements stabilized Gram-Schmidt */ 4546 ierr = PetscMalloc1(n,&alphas);CHKERRQ(ierr); 4547 for (i=0;i<n;i++) { 4548 ierr = VecNormalize(vecs[i],NULL);CHKERRQ(ierr); 4549 if (i<n) { ierr = VecMDot(vecs[i],n-i-1,&vecs[i+1],&alphas[i+1]);CHKERRQ(ierr); } 4550 for (j=i+1;j<n;j++) { ierr = VecAXPY(vecs[j],PetscConj(-alphas[j]),vecs[i]);CHKERRQ(ierr); } 4551 } 4552 ierr = PetscFree(alphas);CHKERRQ(ierr); 4553 PetscFunctionReturn(0); 4554 } 4555 4556 #undef __FUNCT__ 4557 #define __FUNCT__ "MatISGetSubassemblingPattern" 4558 PetscErrorCode MatISGetSubassemblingPattern(Mat mat, PetscInt n_subdomains, PetscInt redprocs, IS* is_sends) 4559 { 4560 IS ranks_send_to; 4561 PetscInt n_neighs,*neighs,*n_shared,**shared; 4562 PetscMPIInt size,rank,color; 4563 PetscInt *xadj,*adjncy; 4564 PetscInt *adjncy_wgt,*v_wgt,*ranks_send_to_idx; 4565 PetscInt i,local_size,threshold=0; 4566 PetscBool use_vwgt=PETSC_FALSE,use_square=PETSC_FALSE; 4567 PetscSubcomm subcomm; 4568 PetscErrorCode ierr; 4569 4570 PetscFunctionBegin; 4571 ierr = PetscOptionsGetBool(NULL,"-matis_partitioning_use_square",&use_square,NULL);CHKERRQ(ierr); 4572 ierr = PetscOptionsGetBool(NULL,"-matis_partitioning_use_vwgt",&use_vwgt,NULL);CHKERRQ(ierr); 4573 ierr = PetscOptionsGetInt(NULL,"-matis_partitioning_threshold",&threshold,NULL);CHKERRQ(ierr); 4574 4575 /* Get info on mapping */ 4576 ierr = ISLocalToGlobalMappingGetSize(mat->rmap->mapping,&local_size);CHKERRQ(ierr); 4577 ierr = ISLocalToGlobalMappingGetInfo(mat->rmap->mapping,&n_neighs,&neighs,&n_shared,&shared);CHKERRQ(ierr); 4578 4579 /* build local CSR graph of subdomains' connectivity */ 4580 ierr = PetscMalloc1(2,&xadj);CHKERRQ(ierr); 4581 xadj[0] = 0; 4582 xadj[1] = PetscMax(n_neighs-1,0); 4583 ierr = PetscMalloc1(xadj[1],&adjncy);CHKERRQ(ierr); 4584 ierr = PetscMalloc1(xadj[1],&adjncy_wgt);CHKERRQ(ierr); 4585 4586 if (threshold) { 4587 PetscInt xadj_count = 0; 4588 for (i=1;i<n_neighs;i++) { 4589 if (n_shared[i] > threshold) { 4590 adjncy[xadj_count] = neighs[i]; 4591 adjncy_wgt[xadj_count] = n_shared[i]; 4592 xadj_count++; 4593 } 4594 } 4595 xadj[1] = xadj_count; 4596 } else { 4597 if (xadj[1]) { 4598 ierr = PetscMemcpy(adjncy,&neighs[1],xadj[1]*sizeof(*adjncy));CHKERRQ(ierr); 4599 ierr = PetscMemcpy(adjncy_wgt,&n_shared[1],xadj[1]*sizeof(*adjncy_wgt));CHKERRQ(ierr); 4600 } 4601 } 4602 ierr = ISLocalToGlobalMappingRestoreInfo(mat->rmap->mapping,&n_neighs,&neighs,&n_shared,&shared);CHKERRQ(ierr); 4603 if (use_square) { 4604 for (i=0;i<xadj[1];i++) { 4605 adjncy_wgt[i] = adjncy_wgt[i]*adjncy_wgt[i]; 4606 } 4607 } 4608 ierr = PetscSortIntWithArray(xadj[1],adjncy,adjncy_wgt);CHKERRQ(ierr); 4609 4610 ierr = PetscMalloc1(1,&ranks_send_to_idx);CHKERRQ(ierr); 4611 4612 /* 4613 Restrict work on active processes only. 4614 */ 4615 ierr = PetscSubcommCreate(PetscObjectComm((PetscObject)mat),&subcomm);CHKERRQ(ierr); 4616 ierr = PetscSubcommSetNumber(subcomm,2);CHKERRQ(ierr); /* 2 groups, active process and not active processes */ 4617 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr); 4618 ierr = PetscMPIIntCast(!local_size,&color);CHKERRQ(ierr); 4619 ierr = PetscSubcommSetTypeGeneral(subcomm,color,rank);CHKERRQ(ierr); 4620 if (color) { 4621 ierr = PetscFree(xadj);CHKERRQ(ierr); 4622 ierr = PetscFree(adjncy);CHKERRQ(ierr); 4623 ierr = PetscFree(adjncy_wgt);CHKERRQ(ierr); 4624 } else { 4625 Mat subdomain_adj; 4626 IS new_ranks,new_ranks_contig; 4627 MatPartitioning partitioner; 4628 PetscInt prank,rstart=0,rend=0; 4629 PetscInt *is_indices,*oldranks; 4630 PetscBool aggregate; 4631 4632 ierr = MPI_Comm_size(PetscSubcommChild(subcomm),&size);CHKERRQ(ierr); 4633 ierr = PetscMalloc1(size,&oldranks);CHKERRQ(ierr); 4634 prank = rank; 4635 ierr = MPI_Allgather(&prank,1,MPIU_INT,oldranks,1,MPIU_INT,PetscSubcommChild(subcomm));CHKERRQ(ierr); 4636 /* 4637 for (i=0;i<size;i++) { 4638 PetscPrintf(subcomm->comm,"oldranks[%d] = %d\n",i,oldranks[i]); 4639 } 4640 */ 4641 for (i=0;i<xadj[1];i++) { 4642 ierr = PetscFindInt(adjncy[i],size,oldranks,&adjncy[i]);CHKERRQ(ierr); 4643 } 4644 ierr = PetscSortIntWithArray(xadj[1],adjncy,adjncy_wgt);CHKERRQ(ierr); 4645 aggregate = ((redprocs > 0 && redprocs < size) ? PETSC_TRUE : PETSC_FALSE); 4646 if (aggregate) { 4647 PetscInt lrows,row,ncols,*cols; 4648 PetscMPIInt nrank; 4649 PetscScalar *vals; 4650 4651 ierr = MPI_Comm_rank(PetscSubcommChild(subcomm),&nrank);CHKERRQ(ierr); 4652 lrows = 0; 4653 if (nrank<redprocs) { 4654 lrows = size/redprocs; 4655 if (nrank<size%redprocs) lrows++; 4656 } 4657 ierr = MatCreateAIJ(PetscSubcommChild(subcomm),lrows,lrows,size,size,50,NULL,50,NULL,&subdomain_adj);CHKERRQ(ierr); 4658 ierr = MatGetOwnershipRange(subdomain_adj,&rstart,&rend);CHKERRQ(ierr); 4659 ierr = MatSetOption(subdomain_adj,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr); 4660 ierr = MatSetOption(subdomain_adj,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr); 4661 row = nrank; 4662 ncols = xadj[1]-xadj[0]; 4663 cols = adjncy; 4664 ierr = PetscMalloc1(ncols,&vals);CHKERRQ(ierr); 4665 for (i=0;i<ncols;i++) vals[i] = adjncy_wgt[i]; 4666 ierr = MatSetValues(subdomain_adj,1,&row,ncols,cols,vals,INSERT_VALUES);CHKERRQ(ierr); 4667 ierr = MatAssemblyBegin(subdomain_adj,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4668 ierr = MatAssemblyEnd(subdomain_adj,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4669 ierr = PetscFree(xadj);CHKERRQ(ierr); 4670 ierr = PetscFree(adjncy);CHKERRQ(ierr); 4671 ierr = PetscFree(adjncy_wgt);CHKERRQ(ierr); 4672 ierr = PetscFree(vals);CHKERRQ(ierr); 4673 } else { 4674 ierr = MatCreateMPIAdj(PetscSubcommChild(subcomm),1,(PetscInt)size,xadj,adjncy,adjncy_wgt,&subdomain_adj);CHKERRQ(ierr); 4675 } 4676 /* ierr = MatView(subdomain_adj,0);CHKERRQ(ierr); */ 4677 4678 /* Partition */ 4679 ierr = MatPartitioningCreate(PetscSubcommChild(subcomm),&partitioner);CHKERRQ(ierr); 4680 ierr = MatPartitioningSetAdjacency(partitioner,subdomain_adj);CHKERRQ(ierr); 4681 if (use_vwgt) { 4682 ierr = PetscMalloc1(1,&v_wgt);CHKERRQ(ierr); 4683 v_wgt[0] = local_size; 4684 ierr = MatPartitioningSetVertexWeights(partitioner,v_wgt);CHKERRQ(ierr); 4685 } 4686 n_subdomains = PetscMin((PetscInt)size,n_subdomains); 4687 ierr = MatPartitioningSetNParts(partitioner,n_subdomains);CHKERRQ(ierr); 4688 ierr = MatPartitioningSetFromOptions(partitioner);CHKERRQ(ierr); 4689 ierr = MatPartitioningApply(partitioner,&new_ranks);CHKERRQ(ierr); 4690 /* ierr = MatPartitioningView(partitioner,0);CHKERRQ(ierr); */ 4691 4692 /* renumber new_ranks to avoid "holes" in new set of processors */ 4693 ierr = PCBDDCSubsetNumbering(new_ranks,NULL,NULL,&new_ranks_contig);CHKERRQ(ierr); 4694 ierr = ISDestroy(&new_ranks);CHKERRQ(ierr); 4695 ierr = ISGetIndices(new_ranks_contig,(const PetscInt**)&is_indices);CHKERRQ(ierr); 4696 if (!redprocs) { 4697 ranks_send_to_idx[0] = oldranks[is_indices[0]]; 4698 } else { 4699 PetscInt idxs[1]; 4700 PetscMPIInt tag; 4701 MPI_Request *reqs; 4702 4703 ierr = PetscObjectGetNewTag((PetscObject)subdomain_adj,&tag);CHKERRQ(ierr); 4704 ierr = PetscMalloc1(rend-rstart,&reqs);CHKERRQ(ierr); 4705 for (i=rstart;i<rend;i++) { 4706 ierr = MPI_Isend(is_indices+i-rstart,1,MPIU_INT,i,tag,PetscSubcommChild(subcomm),&reqs[i-rstart]);CHKERRQ(ierr); 4707 } 4708 ierr = MPI_Recv(idxs,1,MPIU_INT,MPI_ANY_SOURCE,tag,PetscSubcommChild(subcomm),MPI_STATUS_IGNORE);CHKERRQ(ierr); 4709 ierr = MPI_Waitall(rend-rstart,reqs,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 4710 ierr = PetscFree(reqs);CHKERRQ(ierr); 4711 ranks_send_to_idx[0] = oldranks[idxs[0]]; 4712 } 4713 ierr = ISRestoreIndices(new_ranks_contig,(const PetscInt**)&is_indices);CHKERRQ(ierr); 4714 /* clean up */ 4715 ierr = PetscFree(oldranks);CHKERRQ(ierr); 4716 ierr = ISDestroy(&new_ranks_contig);CHKERRQ(ierr); 4717 ierr = MatDestroy(&subdomain_adj);CHKERRQ(ierr); 4718 ierr = MatPartitioningDestroy(&partitioner);CHKERRQ(ierr); 4719 } 4720 ierr = PetscSubcommDestroy(&subcomm);CHKERRQ(ierr); 4721 4722 /* assemble parallel IS for sends */ 4723 i = 1; 4724 if (color) i=0; 4725 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)mat),i,ranks_send_to_idx,PETSC_OWN_POINTER,&ranks_send_to);CHKERRQ(ierr); 4726 /* get back IS */ 4727 *is_sends = ranks_send_to; 4728 PetscFunctionReturn(0); 4729 } 4730 4731 typedef enum {MATDENSE_PRIVATE=0,MATAIJ_PRIVATE,MATBAIJ_PRIVATE,MATSBAIJ_PRIVATE}MatTypePrivate; 4732 4733 #undef __FUNCT__ 4734 #define __FUNCT__ "MatISSubassemble" 4735 PetscErrorCode MatISSubassemble(Mat mat, IS is_sends, PetscInt n_subdomains, PetscBool restrict_comm, PetscBool restrict_full, MatReuse reuse, Mat *mat_n, PetscInt nis, IS isarray[]) 4736 { 4737 Mat local_mat; 4738 IS is_sends_internal; 4739 PetscInt rows,cols,new_local_rows; 4740 PetscInt i,bs,buf_size_idxs,buf_size_idxs_is,buf_size_vals; 4741 PetscBool ismatis,isdense,newisdense,destroy_mat; 4742 ISLocalToGlobalMapping l2gmap; 4743 PetscInt* l2gmap_indices; 4744 const PetscInt* is_indices; 4745 MatType new_local_type; 4746 /* buffers */ 4747 PetscInt *ptr_idxs,*send_buffer_idxs,*recv_buffer_idxs; 4748 PetscInt *ptr_idxs_is,*send_buffer_idxs_is,*recv_buffer_idxs_is; 4749 PetscInt *recv_buffer_idxs_local; 4750 PetscScalar *ptr_vals,*send_buffer_vals,*recv_buffer_vals; 4751 /* MPI */ 4752 MPI_Comm comm,comm_n; 4753 PetscSubcomm subcomm; 4754 PetscMPIInt n_sends,n_recvs,commsize; 4755 PetscMPIInt *iflags,*ilengths_idxs,*ilengths_vals,*ilengths_idxs_is; 4756 PetscMPIInt *onodes,*onodes_is,*olengths_idxs,*olengths_idxs_is,*olengths_vals; 4757 PetscMPIInt len,tag_idxs,tag_idxs_is,tag_vals,source_dest; 4758 MPI_Request *send_req_idxs,*send_req_idxs_is,*send_req_vals; 4759 MPI_Request *recv_req_idxs,*recv_req_idxs_is,*recv_req_vals; 4760 PetscErrorCode ierr; 4761 4762 PetscFunctionBegin; 4763 /* TODO: add missing checks */ 4764 PetscValidLogicalCollectiveInt(mat,n_subdomains,3); 4765 PetscValidLogicalCollectiveBool(mat,restrict_comm,4); 4766 PetscValidLogicalCollectiveEnum(mat,reuse,5); 4767 PetscValidLogicalCollectiveInt(mat,nis,7); 4768 ierr = PetscObjectTypeCompare((PetscObject)mat,MATIS,&ismatis);CHKERRQ(ierr); 4769 if (!ismatis) SETERRQ1(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot use %s on a matrix object which is not of type MATIS",__FUNCT__); 4770 ierr = MatISGetLocalMat(mat,&local_mat);CHKERRQ(ierr); 4771 ierr = PetscObjectTypeCompare((PetscObject)local_mat,MATSEQDENSE,&isdense);CHKERRQ(ierr); 4772 if (!isdense) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Currently cannot subassemble MATIS when local matrix type is not of type SEQDENSE"); 4773 ierr = MatGetSize(local_mat,&rows,&cols);CHKERRQ(ierr); 4774 if (rows != cols) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Local MATIS matrices should be square"); 4775 if (reuse == MAT_REUSE_MATRIX && *mat_n) { 4776 PetscInt mrows,mcols,mnrows,mncols; 4777 ierr = PetscObjectTypeCompare((PetscObject)*mat_n,MATIS,&ismatis);CHKERRQ(ierr); 4778 if (!ismatis) SETERRQ(PetscObjectComm((PetscObject)*mat_n),PETSC_ERR_SUP,"Cannot reuse a matrix which is not of type MATIS"); 4779 ierr = MatGetSize(mat,&mrows,&mcols);CHKERRQ(ierr); 4780 ierr = MatGetSize(*mat_n,&mnrows,&mncols);CHKERRQ(ierr); 4781 if (mrows != mnrows) SETERRQ2(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot reuse matrix! Wrong number of rows %D != %D",mrows,mnrows); 4782 if (mcols != mncols) SETERRQ2(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot reuse matrix! Wrong number of cols %D != %D",mcols,mncols); 4783 } 4784 ierr = MatGetBlockSize(local_mat,&bs);CHKERRQ(ierr); 4785 PetscValidLogicalCollectiveInt(mat,bs,0); 4786 /* prepare IS for sending if not provided */ 4787 if (!is_sends) { 4788 if (!n_subdomains) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"You should specify either an IS or a target number of subdomains"); 4789 ierr = MatISGetSubassemblingPattern(mat,n_subdomains,0,&is_sends_internal);CHKERRQ(ierr); 4790 } else { 4791 ierr = PetscObjectReference((PetscObject)is_sends);CHKERRQ(ierr); 4792 is_sends_internal = is_sends; 4793 } 4794 4795 /* get comm */ 4796 ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr); 4797 4798 /* compute number of sends */ 4799 ierr = ISGetLocalSize(is_sends_internal,&i);CHKERRQ(ierr); 4800 ierr = PetscMPIIntCast(i,&n_sends);CHKERRQ(ierr); 4801 4802 /* compute number of receives */ 4803 ierr = MPI_Comm_size(comm,&commsize);CHKERRQ(ierr); 4804 ierr = PetscMalloc1(commsize,&iflags);CHKERRQ(ierr); 4805 ierr = PetscMemzero(iflags,commsize*sizeof(*iflags));CHKERRQ(ierr); 4806 ierr = ISGetIndices(is_sends_internal,&is_indices);CHKERRQ(ierr); 4807 for (i=0;i<n_sends;i++) iflags[is_indices[i]] = 1; 4808 ierr = PetscGatherNumberOfMessages(comm,iflags,NULL,&n_recvs);CHKERRQ(ierr); 4809 ierr = PetscFree(iflags);CHKERRQ(ierr); 4810 4811 /* restrict comm if requested */ 4812 subcomm = 0; 4813 destroy_mat = PETSC_FALSE; 4814 if (restrict_comm) { 4815 PetscMPIInt color,subcommsize; 4816 4817 color = 0; 4818 if (restrict_full) { 4819 if (!n_recvs) color = 1; /* processes not receiving anything will not partecipate in new comm (full restriction) */ 4820 } else { 4821 if (!n_recvs && n_sends) color = 1; /* just those processes that are sending but not receiving anything will not partecipate in new comm */ 4822 } 4823 ierr = MPI_Allreduce(&color,&subcommsize,1,MPI_INT,MPI_SUM,comm);CHKERRQ(ierr); 4824 subcommsize = commsize - subcommsize; 4825 /* check if reuse has been requested */ 4826 if (reuse == MAT_REUSE_MATRIX) { 4827 if (*mat_n) { 4828 PetscMPIInt subcommsize2; 4829 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)*mat_n),&subcommsize2);CHKERRQ(ierr); 4830 if (subcommsize != subcommsize2) SETERRQ2(PetscObjectComm((PetscObject)*mat_n),PETSC_ERR_PLIB,"Cannot reuse matrix! wrong subcomm size %d != %d",subcommsize,subcommsize2); 4831 comm_n = PetscObjectComm((PetscObject)*mat_n); 4832 } else { 4833 comm_n = PETSC_COMM_SELF; 4834 } 4835 } else { /* MAT_INITIAL_MATRIX */ 4836 PetscMPIInt rank; 4837 4838 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 4839 ierr = PetscSubcommCreate(comm,&subcomm);CHKERRQ(ierr); 4840 ierr = PetscSubcommSetNumber(subcomm,2);CHKERRQ(ierr); 4841 ierr = PetscSubcommSetTypeGeneral(subcomm,color,rank);CHKERRQ(ierr); 4842 comm_n = PetscSubcommChild(subcomm); 4843 } 4844 /* flag to destroy *mat_n if not significative */ 4845 if (color) destroy_mat = PETSC_TRUE; 4846 } else { 4847 comm_n = comm; 4848 } 4849 4850 /* prepare send/receive buffers */ 4851 ierr = PetscMalloc1(commsize,&ilengths_idxs);CHKERRQ(ierr); 4852 ierr = PetscMemzero(ilengths_idxs,commsize*sizeof(*ilengths_idxs));CHKERRQ(ierr); 4853 ierr = PetscMalloc1(commsize,&ilengths_vals);CHKERRQ(ierr); 4854 ierr = PetscMemzero(ilengths_vals,commsize*sizeof(*ilengths_vals));CHKERRQ(ierr); 4855 if (nis) { 4856 ierr = PetscCalloc1(commsize,&ilengths_idxs_is);CHKERRQ(ierr); 4857 } 4858 4859 /* Get data from local matrices */ 4860 if (!isdense) { 4861 SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Subassembling of AIJ local matrices not yet implemented"); 4862 /* TODO: See below some guidelines on how to prepare the local buffers */ 4863 /* 4864 send_buffer_vals should contain the raw values of the local matrix 4865 send_buffer_idxs should contain: 4866 - MatType_PRIVATE type 4867 - PetscInt size_of_l2gmap 4868 - PetscInt global_row_indices[size_of_l2gmap] 4869 - PetscInt all_other_info_which_is_needed_to_compute_preallocation_and_set_values 4870 */ 4871 } else { 4872 ierr = MatDenseGetArray(local_mat,&send_buffer_vals);CHKERRQ(ierr); 4873 ierr = ISLocalToGlobalMappingGetSize(mat->rmap->mapping,&i);CHKERRQ(ierr); 4874 ierr = PetscMalloc1(i+2,&send_buffer_idxs);CHKERRQ(ierr); 4875 send_buffer_idxs[0] = (PetscInt)MATDENSE_PRIVATE; 4876 send_buffer_idxs[1] = i; 4877 ierr = ISLocalToGlobalMappingGetIndices(mat->rmap->mapping,(const PetscInt**)&ptr_idxs);CHKERRQ(ierr); 4878 ierr = PetscMemcpy(&send_buffer_idxs[2],ptr_idxs,i*sizeof(PetscInt));CHKERRQ(ierr); 4879 ierr = ISLocalToGlobalMappingRestoreIndices(mat->rmap->mapping,(const PetscInt**)&ptr_idxs);CHKERRQ(ierr); 4880 ierr = PetscMPIIntCast(i,&len);CHKERRQ(ierr); 4881 for (i=0;i<n_sends;i++) { 4882 ilengths_vals[is_indices[i]] = len*len; 4883 ilengths_idxs[is_indices[i]] = len+2; 4884 } 4885 } 4886 ierr = PetscGatherMessageLengths2(comm,n_sends,n_recvs,ilengths_idxs,ilengths_vals,&onodes,&olengths_idxs,&olengths_vals);CHKERRQ(ierr); 4887 /* additional is (if any) */ 4888 if (nis) { 4889 PetscMPIInt psum; 4890 PetscInt j; 4891 for (j=0,psum=0;j<nis;j++) { 4892 PetscInt plen; 4893 ierr = ISGetLocalSize(isarray[j],&plen);CHKERRQ(ierr); 4894 ierr = PetscMPIIntCast(plen,&len);CHKERRQ(ierr); 4895 psum += len+1; /* indices + lenght */ 4896 } 4897 ierr = PetscMalloc1(psum,&send_buffer_idxs_is);CHKERRQ(ierr); 4898 for (j=0,psum=0;j<nis;j++) { 4899 PetscInt plen; 4900 const PetscInt *is_array_idxs; 4901 ierr = ISGetLocalSize(isarray[j],&plen);CHKERRQ(ierr); 4902 send_buffer_idxs_is[psum] = plen; 4903 ierr = ISGetIndices(isarray[j],&is_array_idxs);CHKERRQ(ierr); 4904 ierr = PetscMemcpy(&send_buffer_idxs_is[psum+1],is_array_idxs,plen*sizeof(PetscInt));CHKERRQ(ierr); 4905 ierr = ISRestoreIndices(isarray[j],&is_array_idxs);CHKERRQ(ierr); 4906 psum += plen+1; /* indices + lenght */ 4907 } 4908 for (i=0;i<n_sends;i++) { 4909 ilengths_idxs_is[is_indices[i]] = psum; 4910 } 4911 ierr = PetscGatherMessageLengths(comm,n_sends,n_recvs,ilengths_idxs_is,&onodes_is,&olengths_idxs_is);CHKERRQ(ierr); 4912 } 4913 4914 buf_size_idxs = 0; 4915 buf_size_vals = 0; 4916 buf_size_idxs_is = 0; 4917 for (i=0;i<n_recvs;i++) { 4918 buf_size_idxs += (PetscInt)olengths_idxs[i]; 4919 buf_size_vals += (PetscInt)olengths_vals[i]; 4920 if (nis) buf_size_idxs_is += (PetscInt)olengths_idxs_is[i]; 4921 } 4922 ierr = PetscMalloc1(buf_size_idxs,&recv_buffer_idxs);CHKERRQ(ierr); 4923 ierr = PetscMalloc1(buf_size_vals,&recv_buffer_vals);CHKERRQ(ierr); 4924 ierr = PetscMalloc1(buf_size_idxs_is,&recv_buffer_idxs_is);CHKERRQ(ierr); 4925 4926 /* get new tags for clean communications */ 4927 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_idxs);CHKERRQ(ierr); 4928 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_vals);CHKERRQ(ierr); 4929 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_idxs_is);CHKERRQ(ierr); 4930 4931 /* allocate for requests */ 4932 ierr = PetscMalloc1(n_sends,&send_req_idxs);CHKERRQ(ierr); 4933 ierr = PetscMalloc1(n_sends,&send_req_vals);CHKERRQ(ierr); 4934 ierr = PetscMalloc1(n_sends,&send_req_idxs_is);CHKERRQ(ierr); 4935 ierr = PetscMalloc1(n_recvs,&recv_req_idxs);CHKERRQ(ierr); 4936 ierr = PetscMalloc1(n_recvs,&recv_req_vals);CHKERRQ(ierr); 4937 ierr = PetscMalloc1(n_recvs,&recv_req_idxs_is);CHKERRQ(ierr); 4938 4939 /* communications */ 4940 ptr_idxs = recv_buffer_idxs; 4941 ptr_vals = recv_buffer_vals; 4942 ptr_idxs_is = recv_buffer_idxs_is; 4943 for (i=0;i<n_recvs;i++) { 4944 source_dest = onodes[i]; 4945 ierr = MPI_Irecv(ptr_idxs,olengths_idxs[i],MPIU_INT,source_dest,tag_idxs,comm,&recv_req_idxs[i]);CHKERRQ(ierr); 4946 ierr = MPI_Irecv(ptr_vals,olengths_vals[i],MPIU_SCALAR,source_dest,tag_vals,comm,&recv_req_vals[i]);CHKERRQ(ierr); 4947 ptr_idxs += olengths_idxs[i]; 4948 ptr_vals += olengths_vals[i]; 4949 if (nis) { 4950 ierr = MPI_Irecv(ptr_idxs_is,olengths_idxs_is[i],MPIU_INT,source_dest,tag_idxs_is,comm,&recv_req_idxs_is[i]);CHKERRQ(ierr); 4951 ptr_idxs_is += olengths_idxs_is[i]; 4952 } 4953 } 4954 for (i=0;i<n_sends;i++) { 4955 ierr = PetscMPIIntCast(is_indices[i],&source_dest);CHKERRQ(ierr); 4956 ierr = MPI_Isend(send_buffer_idxs,ilengths_idxs[source_dest],MPIU_INT,source_dest,tag_idxs,comm,&send_req_idxs[i]);CHKERRQ(ierr); 4957 ierr = MPI_Isend(send_buffer_vals,ilengths_vals[source_dest],MPIU_SCALAR,source_dest,tag_vals,comm,&send_req_vals[i]);CHKERRQ(ierr); 4958 if (nis) { 4959 ierr = MPI_Isend(send_buffer_idxs_is,ilengths_idxs_is[source_dest],MPIU_INT,source_dest,tag_idxs_is,comm,&send_req_idxs_is[i]);CHKERRQ(ierr); 4960 } 4961 } 4962 ierr = ISRestoreIndices(is_sends_internal,&is_indices);CHKERRQ(ierr); 4963 ierr = ISDestroy(&is_sends_internal);CHKERRQ(ierr); 4964 4965 /* assemble new l2g map */ 4966 ierr = MPI_Waitall(n_recvs,recv_req_idxs,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 4967 ptr_idxs = recv_buffer_idxs; 4968 new_local_rows = 0; 4969 for (i=0;i<n_recvs;i++) { 4970 new_local_rows += *(ptr_idxs+1); /* second element is the local size of the l2gmap */ 4971 ptr_idxs += olengths_idxs[i]; 4972 } 4973 ierr = PetscMalloc1(new_local_rows,&l2gmap_indices);CHKERRQ(ierr); 4974 ptr_idxs = recv_buffer_idxs; 4975 new_local_rows = 0; 4976 for (i=0;i<n_recvs;i++) { 4977 ierr = PetscMemcpy(&l2gmap_indices[new_local_rows],ptr_idxs+2,(*(ptr_idxs+1))*sizeof(PetscInt));CHKERRQ(ierr); 4978 new_local_rows += *(ptr_idxs+1); /* second element is the local size of the l2gmap */ 4979 ptr_idxs += olengths_idxs[i]; 4980 } 4981 ierr = PetscSortRemoveDupsInt(&new_local_rows,l2gmap_indices);CHKERRQ(ierr); 4982 ierr = ISLocalToGlobalMappingCreate(comm_n,1,new_local_rows,l2gmap_indices,PETSC_COPY_VALUES,&l2gmap);CHKERRQ(ierr); 4983 ierr = PetscFree(l2gmap_indices);CHKERRQ(ierr); 4984 4985 /* infer new local matrix type from received local matrices type */ 4986 /* currently if all local matrices are of type X, then the resulting matrix will be of type X, except for the dense case */ 4987 /* it also assumes that if the block size is set, than it is the same among all local matrices (see checks at the beginning of the function) */ 4988 if (n_recvs) { 4989 MatTypePrivate new_local_type_private = (MatTypePrivate)send_buffer_idxs[0]; 4990 ptr_idxs = recv_buffer_idxs; 4991 for (i=0;i<n_recvs;i++) { 4992 if ((PetscInt)new_local_type_private != *ptr_idxs) { 4993 new_local_type_private = MATAIJ_PRIVATE; 4994 break; 4995 } 4996 ptr_idxs += olengths_idxs[i]; 4997 } 4998 switch (new_local_type_private) { 4999 case MATDENSE_PRIVATE: 5000 if (n_recvs>1) { /* subassembling of dense matrices does not give a dense matrix! */ 5001 new_local_type = MATSEQAIJ; 5002 bs = 1; 5003 } else { /* if I receive only 1 dense matrix */ 5004 new_local_type = MATSEQDENSE; 5005 bs = 1; 5006 } 5007 break; 5008 case MATAIJ_PRIVATE: 5009 new_local_type = MATSEQAIJ; 5010 bs = 1; 5011 break; 5012 case MATBAIJ_PRIVATE: 5013 new_local_type = MATSEQBAIJ; 5014 break; 5015 case MATSBAIJ_PRIVATE: 5016 new_local_type = MATSEQSBAIJ; 5017 break; 5018 default: 5019 SETERRQ2(comm,PETSC_ERR_SUP,"Unsupported private type %d in %s",new_local_type_private,__FUNCT__); 5020 break; 5021 } 5022 } else { /* by default, new_local_type is seqdense */ 5023 new_local_type = MATSEQDENSE; 5024 bs = 1; 5025 } 5026 5027 /* create MATIS object if needed */ 5028 if (reuse == MAT_INITIAL_MATRIX) { 5029 ierr = MatGetSize(mat,&rows,&cols);CHKERRQ(ierr); 5030 ierr = MatCreateIS(comm_n,bs,PETSC_DECIDE,PETSC_DECIDE,rows,cols,l2gmap,NULL,mat_n);CHKERRQ(ierr); 5031 } else { 5032 /* it also destroys the local matrices */ 5033 ierr = MatSetLocalToGlobalMapping(*mat_n,l2gmap,l2gmap);CHKERRQ(ierr); 5034 } 5035 ierr = MatISGetLocalMat(*mat_n,&local_mat);CHKERRQ(ierr); 5036 ierr = MatSetType(local_mat,new_local_type);CHKERRQ(ierr); 5037 5038 ierr = MPI_Waitall(n_recvs,recv_req_vals,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 5039 5040 /* Global to local map of received indices */ 5041 ierr = PetscMalloc1(buf_size_idxs,&recv_buffer_idxs_local);CHKERRQ(ierr); /* needed for values insertion */ 5042 ierr = ISGlobalToLocalMappingApply(l2gmap,IS_GTOLM_MASK,buf_size_idxs,recv_buffer_idxs,&i,recv_buffer_idxs_local);CHKERRQ(ierr); 5043 ierr = ISLocalToGlobalMappingDestroy(&l2gmap);CHKERRQ(ierr); 5044 5045 /* restore attributes -> type of incoming data and its size */ 5046 buf_size_idxs = 0; 5047 for (i=0;i<n_recvs;i++) { 5048 recv_buffer_idxs_local[buf_size_idxs] = recv_buffer_idxs[buf_size_idxs]; 5049 recv_buffer_idxs_local[buf_size_idxs+1] = recv_buffer_idxs[buf_size_idxs+1]; 5050 buf_size_idxs += (PetscInt)olengths_idxs[i]; 5051 } 5052 ierr = PetscFree(recv_buffer_idxs);CHKERRQ(ierr); 5053 5054 /* set preallocation */ 5055 ierr = PetscObjectTypeCompare((PetscObject)local_mat,MATSEQDENSE,&newisdense);CHKERRQ(ierr); 5056 if (!newisdense) { 5057 PetscInt *new_local_nnz=0; 5058 5059 ptr_vals = recv_buffer_vals; 5060 ptr_idxs = recv_buffer_idxs_local; 5061 if (n_recvs) { 5062 ierr = PetscCalloc1(new_local_rows,&new_local_nnz);CHKERRQ(ierr); 5063 } 5064 for (i=0;i<n_recvs;i++) { 5065 PetscInt j; 5066 if (*ptr_idxs == (PetscInt)MATDENSE_PRIVATE) { /* preallocation provided for dense case only */ 5067 for (j=0;j<*(ptr_idxs+1);j++) { 5068 new_local_nnz[*(ptr_idxs+2+j)] += *(ptr_idxs+1); 5069 } 5070 } else { 5071 /* TODO */ 5072 } 5073 ptr_idxs += olengths_idxs[i]; 5074 } 5075 if (new_local_nnz) { 5076 for (i=0;i<new_local_rows;i++) new_local_nnz[i] = PetscMin(new_local_nnz[i],new_local_rows); 5077 ierr = MatSeqAIJSetPreallocation(local_mat,0,new_local_nnz);CHKERRQ(ierr); 5078 for (i=0;i<new_local_rows;i++) new_local_nnz[i] /= bs; 5079 ierr = MatSeqBAIJSetPreallocation(local_mat,bs,0,new_local_nnz);CHKERRQ(ierr); 5080 for (i=0;i<new_local_rows;i++) new_local_nnz[i] = PetscMax(new_local_nnz[i]-i,0); 5081 ierr = MatSeqSBAIJSetPreallocation(local_mat,bs,0,new_local_nnz);CHKERRQ(ierr); 5082 } else { 5083 ierr = MatSetUp(local_mat);CHKERRQ(ierr); 5084 } 5085 ierr = PetscFree(new_local_nnz);CHKERRQ(ierr); 5086 } else { 5087 ierr = MatSetUp(local_mat);CHKERRQ(ierr); 5088 } 5089 5090 /* set values */ 5091 ptr_vals = recv_buffer_vals; 5092 ptr_idxs = recv_buffer_idxs_local; 5093 for (i=0;i<n_recvs;i++) { 5094 if (*ptr_idxs == (PetscInt)MATDENSE_PRIVATE) { /* values insertion provided for dense case only */ 5095 ierr = MatSetOption(local_mat,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr); 5096 ierr = MatSetValues(local_mat,*(ptr_idxs+1),ptr_idxs+2,*(ptr_idxs+1),ptr_idxs+2,ptr_vals,ADD_VALUES);CHKERRQ(ierr); 5097 ierr = MatAssemblyBegin(local_mat,MAT_FLUSH_ASSEMBLY);CHKERRQ(ierr); 5098 ierr = MatAssemblyEnd(local_mat,MAT_FLUSH_ASSEMBLY);CHKERRQ(ierr); 5099 ierr = MatSetOption(local_mat,MAT_ROW_ORIENTED,PETSC_TRUE);CHKERRQ(ierr); 5100 } else { 5101 /* TODO */ 5102 } 5103 ptr_idxs += olengths_idxs[i]; 5104 ptr_vals += olengths_vals[i]; 5105 } 5106 ierr = MatAssemblyBegin(local_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5107 ierr = MatAssemblyEnd(local_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5108 ierr = MatAssemblyBegin(*mat_n,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5109 ierr = MatAssemblyEnd(*mat_n,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5110 ierr = PetscFree(recv_buffer_vals);CHKERRQ(ierr); 5111 ierr = PetscFree(recv_buffer_idxs_local);CHKERRQ(ierr); 5112 5113 #if 0 5114 if (!restrict_comm) { /* check */ 5115 Vec lvec,rvec; 5116 PetscReal infty_error; 5117 5118 ierr = MatCreateVecs(mat,&rvec,&lvec);CHKERRQ(ierr); 5119 ierr = VecSetRandom(rvec,NULL);CHKERRQ(ierr); 5120 ierr = MatMult(mat,rvec,lvec);CHKERRQ(ierr); 5121 ierr = VecScale(lvec,-1.0);CHKERRQ(ierr); 5122 ierr = MatMultAdd(*mat_n,rvec,lvec,lvec);CHKERRQ(ierr); 5123 ierr = VecNorm(lvec,NORM_INFINITY,&infty_error);CHKERRQ(ierr); 5124 ierr = PetscPrintf(PetscObjectComm((PetscObject)mat),"Infinity error subassembling %1.6e\n",infty_error); 5125 ierr = VecDestroy(&rvec);CHKERRQ(ierr); 5126 ierr = VecDestroy(&lvec);CHKERRQ(ierr); 5127 } 5128 #endif 5129 5130 /* assemble new additional is (if any) */ 5131 if (nis) { 5132 PetscInt **temp_idxs,*count_is,j,psum; 5133 5134 ierr = MPI_Waitall(n_recvs,recv_req_idxs_is,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 5135 ierr = PetscCalloc1(nis,&count_is);CHKERRQ(ierr); 5136 ptr_idxs = recv_buffer_idxs_is; 5137 psum = 0; 5138 for (i=0;i<n_recvs;i++) { 5139 for (j=0;j<nis;j++) { 5140 PetscInt plen = *(ptr_idxs); /* first element is the local size of IS's indices */ 5141 count_is[j] += plen; /* increment counting of buffer for j-th IS */ 5142 psum += plen; 5143 ptr_idxs += plen+1; /* shift pointer to received data */ 5144 } 5145 } 5146 ierr = PetscMalloc1(nis,&temp_idxs);CHKERRQ(ierr); 5147 ierr = PetscMalloc1(psum,&temp_idxs[0]);CHKERRQ(ierr); 5148 for (i=1;i<nis;i++) { 5149 temp_idxs[i] = temp_idxs[i-1]+count_is[i-1]; 5150 } 5151 ierr = PetscMemzero(count_is,nis*sizeof(PetscInt));CHKERRQ(ierr); 5152 ptr_idxs = recv_buffer_idxs_is; 5153 for (i=0;i<n_recvs;i++) { 5154 for (j=0;j<nis;j++) { 5155 PetscInt plen = *(ptr_idxs); /* first element is the local size of IS's indices */ 5156 ierr = PetscMemcpy(&temp_idxs[j][count_is[j]],ptr_idxs+1,plen*sizeof(PetscInt));CHKERRQ(ierr); 5157 count_is[j] += plen; /* increment starting point of buffer for j-th IS */ 5158 ptr_idxs += plen+1; /* shift pointer to received data */ 5159 } 5160 } 5161 for (i=0;i<nis;i++) { 5162 ierr = ISDestroy(&isarray[i]);CHKERRQ(ierr); 5163 ierr = PetscSortRemoveDupsInt(&count_is[i],temp_idxs[i]);CHKERRQ(ierr);CHKERRQ(ierr); 5164 ierr = ISCreateGeneral(comm_n,count_is[i],temp_idxs[i],PETSC_COPY_VALUES,&isarray[i]);CHKERRQ(ierr); 5165 } 5166 ierr = PetscFree(count_is);CHKERRQ(ierr); 5167 ierr = PetscFree(temp_idxs[0]);CHKERRQ(ierr); 5168 ierr = PetscFree(temp_idxs);CHKERRQ(ierr); 5169 } 5170 /* free workspace */ 5171 ierr = PetscFree(recv_buffer_idxs_is);CHKERRQ(ierr); 5172 ierr = MPI_Waitall(n_sends,send_req_idxs,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 5173 ierr = PetscFree(send_buffer_idxs);CHKERRQ(ierr); 5174 ierr = MPI_Waitall(n_sends,send_req_vals,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 5175 if (isdense) { 5176 ierr = MatISGetLocalMat(mat,&local_mat);CHKERRQ(ierr); 5177 ierr = MatDenseRestoreArray(local_mat,&send_buffer_vals);CHKERRQ(ierr); 5178 } else { 5179 /* ierr = PetscFree(send_buffer_vals);CHKERRQ(ierr); */ 5180 } 5181 if (nis) { 5182 ierr = MPI_Waitall(n_sends,send_req_idxs_is,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 5183 ierr = PetscFree(send_buffer_idxs_is);CHKERRQ(ierr); 5184 } 5185 ierr = PetscFree(recv_req_idxs);CHKERRQ(ierr); 5186 ierr = PetscFree(recv_req_vals);CHKERRQ(ierr); 5187 ierr = PetscFree(recv_req_idxs_is);CHKERRQ(ierr); 5188 ierr = PetscFree(send_req_idxs);CHKERRQ(ierr); 5189 ierr = PetscFree(send_req_vals);CHKERRQ(ierr); 5190 ierr = PetscFree(send_req_idxs_is);CHKERRQ(ierr); 5191 ierr = PetscFree(ilengths_vals);CHKERRQ(ierr); 5192 ierr = PetscFree(ilengths_idxs);CHKERRQ(ierr); 5193 ierr = PetscFree(olengths_vals);CHKERRQ(ierr); 5194 ierr = PetscFree(olengths_idxs);CHKERRQ(ierr); 5195 ierr = PetscFree(onodes);CHKERRQ(ierr); 5196 if (nis) { 5197 ierr = PetscFree(ilengths_idxs_is);CHKERRQ(ierr); 5198 ierr = PetscFree(olengths_idxs_is);CHKERRQ(ierr); 5199 ierr = PetscFree(onodes_is);CHKERRQ(ierr); 5200 } 5201 ierr = PetscSubcommDestroy(&subcomm);CHKERRQ(ierr); 5202 if (destroy_mat) { /* destroy mat is true only if restrict comm is true and process will not partecipate */ 5203 ierr = MatDestroy(mat_n);CHKERRQ(ierr); 5204 for (i=0;i<nis;i++) { 5205 ierr = ISDestroy(&isarray[i]);CHKERRQ(ierr); 5206 } 5207 *mat_n = NULL; 5208 } 5209 PetscFunctionReturn(0); 5210 } 5211 5212 /* temporary hack into ksp private data structure */ 5213 #include <petsc/private/kspimpl.h> 5214 5215 #undef __FUNCT__ 5216 #define __FUNCT__ "PCBDDCSetUpCoarseSolver" 5217 PetscErrorCode PCBDDCSetUpCoarseSolver(PC pc,PetscScalar* coarse_submat_vals) 5218 { 5219 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 5220 PC_IS *pcis = (PC_IS*)pc->data; 5221 Mat coarse_mat,coarse_mat_is,coarse_submat_dense; 5222 MatNullSpace CoarseNullSpace=NULL; 5223 ISLocalToGlobalMapping coarse_islg; 5224 IS coarse_is,*isarray; 5225 PetscInt i,im_active=-1,active_procs=-1; 5226 PetscInt nis,nisdofs,nisneu,nisvert; 5227 PC pc_temp; 5228 PCType coarse_pc_type; 5229 KSPType coarse_ksp_type; 5230 PetscBool multilevel_requested,multilevel_allowed; 5231 PetscBool isredundant,isbddc,isnn,coarse_reuse; 5232 Mat t_coarse_mat_is; 5233 PetscInt void_procs,ncoarse_ml,ncoarse_ds,ncoarse; 5234 PetscMPIInt all_procs; 5235 PetscBool csin_ml,csin_ds,csin,csin_type_simple,redist; 5236 PetscBool compute_vecs = PETSC_FALSE; 5237 PetscScalar *array; 5238 PetscErrorCode ierr; 5239 5240 PetscFunctionBegin; 5241 /* Assign global numbering to coarse dofs */ 5242 if (pcbddc->new_primal_space || pcbddc->coarse_size == -1) { /* a new primal space is present or it is the first initialization, so recompute global numbering */ 5243 PetscInt ocoarse_size; 5244 compute_vecs = PETSC_TRUE; 5245 ocoarse_size = pcbddc->coarse_size; 5246 ierr = PetscFree(pcbddc->global_primal_indices);CHKERRQ(ierr); 5247 ierr = PCBDDCComputePrimalNumbering(pc,&pcbddc->coarse_size,&pcbddc->global_primal_indices);CHKERRQ(ierr); 5248 /* see if we can avoid some work */ 5249 if (pcbddc->coarse_ksp) { /* coarse ksp has already been created */ 5250 /* if the coarse size is different or we are using adaptive selection, better to not reuse the coarse matrix */ 5251 if (ocoarse_size != pcbddc->coarse_size || pcbddc->adaptive_selection) { 5252 PC pc; 5253 PetscBool isbddc; 5254 5255 /* temporary workaround since PCBDDC does not have a reset method so far */ 5256 ierr = KSPGetPC(pcbddc->coarse_ksp,&pc);CHKERRQ(ierr); 5257 ierr = PetscObjectTypeCompare((PetscObject)pc,PCBDDC,&isbddc);CHKERRQ(ierr); 5258 if (isbddc) { 5259 ierr = PCDestroy(&pc);CHKERRQ(ierr); 5260 } 5261 ierr = KSPReset(pcbddc->coarse_ksp);CHKERRQ(ierr); 5262 coarse_reuse = PETSC_FALSE; 5263 } else { /* we can safely reuse already computed coarse matrix */ 5264 coarse_reuse = PETSC_TRUE; 5265 } 5266 } else { /* there's no coarse ksp, so we need to create the coarse matrix too */ 5267 coarse_reuse = PETSC_FALSE; 5268 } 5269 /* reset any subassembling information */ 5270 ierr = ISDestroy(&pcbddc->coarse_subassembling);CHKERRQ(ierr); 5271 ierr = ISDestroy(&pcbddc->coarse_subassembling_init);CHKERRQ(ierr); 5272 } else { /* primal space is unchanged, so we can reuse coarse matrix */ 5273 coarse_reuse = PETSC_TRUE; 5274 } 5275 5276 /* count "active" (i.e. with positive local size) and "void" processes */ 5277 im_active = !!(pcis->n); 5278 ierr = MPI_Allreduce(&im_active,&active_procs,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 5279 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)pc),&all_procs);CHKERRQ(ierr); 5280 void_procs = all_procs-active_procs; 5281 csin_type_simple = PETSC_TRUE; 5282 redist = PETSC_FALSE; 5283 if (pcbddc->current_level && void_procs) { 5284 csin_ml = PETSC_TRUE; 5285 ncoarse_ml = void_procs; 5286 /* it has no sense to redistribute on a set of processors larger than the number of active processes */ 5287 if (pcbddc->redistribute_coarse > 0 && pcbddc->redistribute_coarse < active_procs) { 5288 csin_ds = PETSC_TRUE; 5289 ncoarse_ds = pcbddc->redistribute_coarse; 5290 redist = PETSC_TRUE; 5291 } else { 5292 csin_ds = PETSC_TRUE; 5293 ncoarse_ds = active_procs; 5294 redist = PETSC_TRUE; 5295 } 5296 } else { 5297 csin_ml = PETSC_FALSE; 5298 ncoarse_ml = all_procs; 5299 if (void_procs) { 5300 csin_ds = PETSC_TRUE; 5301 ncoarse_ds = void_procs; 5302 csin_type_simple = PETSC_FALSE; 5303 } else { 5304 if (pcbddc->redistribute_coarse > 0 && pcbddc->redistribute_coarse < all_procs) { 5305 csin_ds = PETSC_TRUE; 5306 ncoarse_ds = pcbddc->redistribute_coarse; 5307 redist = PETSC_TRUE; 5308 } else { 5309 csin_ds = PETSC_FALSE; 5310 ncoarse_ds = all_procs; 5311 } 5312 } 5313 } 5314 5315 /* 5316 test if we can go multilevel: three conditions must be satisfied: 5317 - we have not exceeded the number of levels requested 5318 - we can actually subassemble the active processes 5319 - we can find a suitable number of MPI processes where we can place the subassembled problem 5320 */ 5321 multilevel_allowed = PETSC_FALSE; 5322 multilevel_requested = PETSC_FALSE; 5323 if (pcbddc->current_level < pcbddc->max_levels) { 5324 multilevel_requested = PETSC_TRUE; 5325 if (active_procs/pcbddc->coarsening_ratio < 2 || ncoarse_ml/pcbddc->coarsening_ratio < 2) { 5326 multilevel_allowed = PETSC_FALSE; 5327 } else { 5328 multilevel_allowed = PETSC_TRUE; 5329 } 5330 } 5331 /* determine number of process partecipating to coarse solver */ 5332 if (multilevel_allowed) { 5333 ncoarse = ncoarse_ml; 5334 csin = csin_ml; 5335 redist = PETSC_FALSE; 5336 } else { 5337 ncoarse = ncoarse_ds; 5338 csin = csin_ds; 5339 } 5340 5341 /* creates temporary l2gmap and IS for coarse indexes */ 5342 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),pcbddc->local_primal_size,pcbddc->global_primal_indices,PETSC_COPY_VALUES,&coarse_is);CHKERRQ(ierr); 5343 ierr = ISLocalToGlobalMappingCreateIS(coarse_is,&coarse_islg);CHKERRQ(ierr); 5344 5345 /* creates temporary MATIS object for coarse matrix */ 5346 ierr = MatCreateSeqDense(PETSC_COMM_SELF,pcbddc->local_primal_size,pcbddc->local_primal_size,NULL,&coarse_submat_dense);CHKERRQ(ierr); 5347 ierr = MatDenseGetArray(coarse_submat_dense,&array);CHKERRQ(ierr); 5348 ierr = PetscMemcpy(array,coarse_submat_vals,sizeof(*coarse_submat_vals)*pcbddc->local_primal_size*pcbddc->local_primal_size);CHKERRQ(ierr); 5349 ierr = MatDenseRestoreArray(coarse_submat_dense,&array);CHKERRQ(ierr); 5350 ierr = MatCreateIS(PetscObjectComm((PetscObject)pc),1,PETSC_DECIDE,PETSC_DECIDE,pcbddc->coarse_size,pcbddc->coarse_size,coarse_islg,NULL,&t_coarse_mat_is);CHKERRQ(ierr); 5351 ierr = MatISSetLocalMat(t_coarse_mat_is,coarse_submat_dense);CHKERRQ(ierr); 5352 ierr = MatAssemblyBegin(t_coarse_mat_is,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5353 ierr = MatAssemblyEnd(t_coarse_mat_is,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5354 ierr = MatDestroy(&coarse_submat_dense);CHKERRQ(ierr); 5355 5356 /* compute dofs splitting and neumann boundaries for coarse dofs */ 5357 if (multilevel_allowed && (pcbddc->n_ISForDofsLocal || pcbddc->NeumannBoundariesLocal || (pcbddc->benign_saddle_point && pcbddc->user_primal_vertices_local))) { /* protects from unneded computations */ 5358 PetscInt *tidxs,*tidxs2,nout,tsize,i; 5359 const PetscInt *idxs; 5360 ISLocalToGlobalMapping tmap; 5361 5362 /* create map between primal indices (in local representative ordering) and local primal numbering */ 5363 ierr = ISLocalToGlobalMappingCreate(PETSC_COMM_SELF,1,pcbddc->local_primal_size,pcbddc->primal_indices_local_idxs,PETSC_COPY_VALUES,&tmap);CHKERRQ(ierr); 5364 /* allocate space for temporary storage */ 5365 ierr = PetscMalloc1(pcbddc->local_primal_size,&tidxs);CHKERRQ(ierr); 5366 ierr = PetscMalloc1(pcbddc->local_primal_size,&tidxs2);CHKERRQ(ierr); 5367 /* allocate for IS array */ 5368 nisdofs = pcbddc->n_ISForDofsLocal; 5369 nisneu = !!pcbddc->NeumannBoundariesLocal; 5370 nisvert = 0; 5371 if (pcbddc->benign_saddle_point && pcbddc->user_primal_vertices_local) { 5372 nisvert = 1; 5373 } 5374 nis = nisdofs + nisneu + nisvert; 5375 ierr = PetscMalloc1(nis,&isarray);CHKERRQ(ierr); 5376 /* dofs splitting */ 5377 for (i=0;i<nisdofs;i++) { 5378 /* ierr = ISView(pcbddc->ISForDofsLocal[i],0);CHKERRQ(ierr); */ 5379 ierr = ISGetLocalSize(pcbddc->ISForDofsLocal[i],&tsize);CHKERRQ(ierr); 5380 ierr = ISGetIndices(pcbddc->ISForDofsLocal[i],&idxs);CHKERRQ(ierr); 5381 ierr = ISGlobalToLocalMappingApply(tmap,IS_GTOLM_DROP,tsize,idxs,&nout,tidxs);CHKERRQ(ierr); 5382 ierr = ISRestoreIndices(pcbddc->ISForDofsLocal[i],&idxs);CHKERRQ(ierr); 5383 ierr = ISLocalToGlobalMappingApply(coarse_islg,nout,tidxs,tidxs2);CHKERRQ(ierr); 5384 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),nout,tidxs2,PETSC_COPY_VALUES,&isarray[i]);CHKERRQ(ierr); 5385 /* ierr = ISView(isarray[i],0);CHKERRQ(ierr); */ 5386 } 5387 /* neumann boundaries */ 5388 if (pcbddc->NeumannBoundariesLocal) { 5389 /* ierr = ISView(pcbddc->NeumannBoundariesLocal,0);CHKERRQ(ierr); */ 5390 ierr = ISGetLocalSize(pcbddc->NeumannBoundariesLocal,&tsize);CHKERRQ(ierr); 5391 ierr = ISGetIndices(pcbddc->NeumannBoundariesLocal,&idxs);CHKERRQ(ierr); 5392 ierr = ISGlobalToLocalMappingApply(tmap,IS_GTOLM_DROP,tsize,idxs,&nout,tidxs);CHKERRQ(ierr); 5393 ierr = ISRestoreIndices(pcbddc->NeumannBoundariesLocal,&idxs);CHKERRQ(ierr); 5394 ierr = ISLocalToGlobalMappingApply(coarse_islg,nout,tidxs,tidxs2);CHKERRQ(ierr); 5395 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),nout,tidxs2,PETSC_COPY_VALUES,&isarray[nisdofs]);CHKERRQ(ierr); 5396 /* ierr = ISView(isarray[nisdofs],0);CHKERRQ(ierr); */ 5397 } 5398 /* primal vertices (benign) */ 5399 if (pcbddc->benign_saddle_point && pcbddc->user_primal_vertices_local) { 5400 ierr = ISGetLocalSize(pcbddc->user_primal_vertices_local,&tsize);CHKERRQ(ierr); 5401 ierr = ISGetIndices(pcbddc->user_primal_vertices_local,&idxs);CHKERRQ(ierr); 5402 ierr = ISGlobalToLocalMappingApply(tmap,IS_GTOLM_DROP,tsize,idxs,&nout,tidxs);CHKERRQ(ierr); 5403 ierr = ISRestoreIndices(pcbddc->user_primal_vertices_local,&idxs);CHKERRQ(ierr); 5404 ierr = ISLocalToGlobalMappingApply(coarse_islg,nout,tidxs,tidxs2);CHKERRQ(ierr); 5405 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),nout,tidxs2,PETSC_COPY_VALUES,&isarray[nis-1]);CHKERRQ(ierr); 5406 } 5407 /* free memory */ 5408 ierr = PetscFree(tidxs);CHKERRQ(ierr); 5409 ierr = PetscFree(tidxs2);CHKERRQ(ierr); 5410 ierr = ISLocalToGlobalMappingDestroy(&tmap);CHKERRQ(ierr); 5411 } else { 5412 nis = 0; 5413 nisdofs = 0; 5414 nisneu = 0; 5415 nisvert = 0; 5416 isarray = NULL; 5417 } 5418 /* destroy no longer needed map */ 5419 ierr = ISLocalToGlobalMappingDestroy(&coarse_islg);CHKERRQ(ierr); 5420 5421 /* restrict on coarse candidates (if needed) */ 5422 coarse_mat_is = NULL; 5423 if (csin) { 5424 if (!pcbddc->coarse_subassembling_init ) { /* creates subassembling init pattern if not present */ 5425 if (redist) { 5426 PetscMPIInt rank; 5427 PetscInt spc,n_spc_p1,dest[1],destsize; 5428 5429 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)pc),&rank);CHKERRQ(ierr); 5430 spc = active_procs/ncoarse; 5431 n_spc_p1 = active_procs%ncoarse; 5432 if (im_active) { 5433 destsize = 1; 5434 if (rank > n_spc_p1*(spc+1)-1) { 5435 dest[0] = n_spc_p1+(rank-(n_spc_p1*(spc+1)))/spc; 5436 } else { 5437 dest[0] = rank/(spc+1); 5438 } 5439 } else { 5440 destsize = 0; 5441 } 5442 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),destsize,dest,PETSC_COPY_VALUES,&pcbddc->coarse_subassembling_init);CHKERRQ(ierr); 5443 } else if (csin_type_simple) { 5444 PetscMPIInt rank; 5445 PetscInt issize,isidx; 5446 5447 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)pc),&rank);CHKERRQ(ierr); 5448 if (im_active) { 5449 issize = 1; 5450 isidx = (PetscInt)rank; 5451 } else { 5452 issize = 0; 5453 isidx = -1; 5454 } 5455 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),issize,&isidx,PETSC_COPY_VALUES,&pcbddc->coarse_subassembling_init);CHKERRQ(ierr); 5456 } else { /* get a suitable subassembling pattern from MATIS code */ 5457 ierr = MatISGetSubassemblingPattern(t_coarse_mat_is,ncoarse,pcbddc->coarse_adj_red,&pcbddc->coarse_subassembling_init);CHKERRQ(ierr); 5458 } 5459 5460 /* we need to shift on coarse candidates either if we are not redistributing or we are redistributing and we have enough void processes */ 5461 if (!redist || ncoarse <= void_procs) { 5462 PetscInt ncoarse_cand,tissize,*nisindices; 5463 PetscInt *coarse_candidates; 5464 const PetscInt* tisindices; 5465 5466 /* get coarse candidates' ranks in pc communicator */ 5467 ierr = PetscMalloc1(all_procs,&coarse_candidates);CHKERRQ(ierr); 5468 ierr = MPI_Allgather(&im_active,1,MPIU_INT,coarse_candidates,1,MPIU_INT,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 5469 for (i=0,ncoarse_cand=0;i<all_procs;i++) { 5470 if (!coarse_candidates[i]) { 5471 coarse_candidates[ncoarse_cand++]=i; 5472 } 5473 } 5474 if (ncoarse_cand < ncoarse) SETERRQ2(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"This should not happen! %d < %d",ncoarse_cand,ncoarse); 5475 5476 5477 if (pcbddc->dbg_flag) { 5478 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 5479 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Subassembling pattern init (before shift)\n");CHKERRQ(ierr); 5480 ierr = ISView(pcbddc->coarse_subassembling_init,pcbddc->dbg_viewer);CHKERRQ(ierr); 5481 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Coarse candidates\n");CHKERRQ(ierr); 5482 for (i=0;i<ncoarse_cand;i++) { 5483 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"%d ",coarse_candidates[i]);CHKERRQ(ierr); 5484 } 5485 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"\n");CHKERRQ(ierr); 5486 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 5487 } 5488 /* shift the pattern on coarse candidates */ 5489 ierr = ISGetLocalSize(pcbddc->coarse_subassembling_init,&tissize);CHKERRQ(ierr); 5490 ierr = ISGetIndices(pcbddc->coarse_subassembling_init,&tisindices);CHKERRQ(ierr); 5491 ierr = PetscMalloc1(tissize,&nisindices);CHKERRQ(ierr); 5492 for (i=0;i<tissize;i++) nisindices[i] = coarse_candidates[tisindices[i]]; 5493 ierr = ISRestoreIndices(pcbddc->coarse_subassembling_init,&tisindices);CHKERRQ(ierr); 5494 ierr = ISGeneralSetIndices(pcbddc->coarse_subassembling_init,tissize,nisindices,PETSC_OWN_POINTER);CHKERRQ(ierr); 5495 ierr = PetscFree(coarse_candidates);CHKERRQ(ierr); 5496 } 5497 if (pcbddc->dbg_flag) { 5498 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 5499 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Subassembling pattern init\n");CHKERRQ(ierr); 5500 ierr = ISView(pcbddc->coarse_subassembling_init,pcbddc->dbg_viewer);CHKERRQ(ierr); 5501 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 5502 } 5503 } 5504 /* get temporary coarse mat in IS format restricted on coarse procs (plus additional index sets of isarray) */ 5505 if (multilevel_allowed) { /* we need to keep tracking of void processes for future placements */ 5506 ierr = MatISSubassemble(t_coarse_mat_is,pcbddc->coarse_subassembling_init,0,PETSC_TRUE,PETSC_FALSE,MAT_INITIAL_MATRIX,&coarse_mat_is,nis,isarray);CHKERRQ(ierr); 5507 } else { /* this is the last level, so use just receiving processes in subcomm */ 5508 ierr = MatISSubassemble(t_coarse_mat_is,pcbddc->coarse_subassembling_init,0,PETSC_TRUE,PETSC_TRUE,MAT_INITIAL_MATRIX,&coarse_mat_is,nis,isarray);CHKERRQ(ierr); 5509 } 5510 } else { 5511 if (pcbddc->dbg_flag) { 5512 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 5513 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Subassembling pattern init not needed\n");CHKERRQ(ierr); 5514 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 5515 } 5516 ierr = PetscObjectReference((PetscObject)t_coarse_mat_is);CHKERRQ(ierr); 5517 coarse_mat_is = t_coarse_mat_is; 5518 } 5519 5520 /* create local to global scatters for coarse problem */ 5521 if (compute_vecs) { 5522 PetscInt lrows; 5523 ierr = VecDestroy(&pcbddc->coarse_vec);CHKERRQ(ierr); 5524 if (coarse_mat_is) { 5525 ierr = MatGetLocalSize(coarse_mat_is,&lrows,NULL);CHKERRQ(ierr); 5526 } else { 5527 lrows = 0; 5528 } 5529 ierr = VecCreate(PetscObjectComm((PetscObject)pc),&pcbddc->coarse_vec);CHKERRQ(ierr); 5530 ierr = VecSetSizes(pcbddc->coarse_vec,lrows,PETSC_DECIDE);CHKERRQ(ierr); 5531 ierr = VecSetType(pcbddc->coarse_vec,VECSTANDARD);CHKERRQ(ierr); 5532 ierr = VecScatterDestroy(&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr); 5533 ierr = VecScatterCreate(pcbddc->vec1_P,NULL,pcbddc->coarse_vec,coarse_is,&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr); 5534 } 5535 ierr = ISDestroy(&coarse_is);CHKERRQ(ierr); 5536 ierr = MatDestroy(&t_coarse_mat_is);CHKERRQ(ierr); 5537 5538 /* set defaults for coarse KSP and PC */ 5539 if (multilevel_allowed) { 5540 coarse_ksp_type = KSPRICHARDSON; 5541 coarse_pc_type = PCBDDC; 5542 } else { 5543 coarse_ksp_type = KSPPREONLY; 5544 coarse_pc_type = PCREDUNDANT; 5545 } 5546 5547 /* print some info if requested */ 5548 if (pcbddc->dbg_flag) { 5549 if (!multilevel_allowed) { 5550 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 5551 if (multilevel_requested) { 5552 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Not enough active processes on level %d (active processes %d, coarsening ratio %d)\n",pcbddc->current_level,active_procs,pcbddc->coarsening_ratio);CHKERRQ(ierr); 5553 } else if (pcbddc->max_levels) { 5554 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Maximum number of requested levels reached (%d)\n",pcbddc->max_levels);CHKERRQ(ierr); 5555 } 5556 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 5557 } 5558 } 5559 5560 /* create the coarse KSP object only once with defaults */ 5561 if (coarse_mat_is) { 5562 MatReuse coarse_mat_reuse; 5563 PetscViewer dbg_viewer = NULL; 5564 if (pcbddc->dbg_flag) { 5565 dbg_viewer = PETSC_VIEWER_STDOUT_(PetscObjectComm((PetscObject)coarse_mat_is)); 5566 ierr = PetscViewerASCIIAddTab(dbg_viewer,2*pcbddc->current_level);CHKERRQ(ierr); 5567 } 5568 if (!pcbddc->coarse_ksp) { 5569 char prefix[256],str_level[16]; 5570 size_t len; 5571 ierr = KSPCreate(PetscObjectComm((PetscObject)coarse_mat_is),&pcbddc->coarse_ksp);CHKERRQ(ierr); 5572 ierr = KSPSetErrorIfNotConverged(pcbddc->coarse_ksp,pc->erroriffailure);CHKERRQ(ierr); 5573 ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->coarse_ksp,(PetscObject)pc,1);CHKERRQ(ierr); 5574 ierr = KSPSetTolerances(pcbddc->coarse_ksp,PETSC_DEFAULT,PETSC_DEFAULT,PETSC_DEFAULT,1);CHKERRQ(ierr); 5575 ierr = KSPSetOperators(pcbddc->coarse_ksp,coarse_mat_is,coarse_mat_is);CHKERRQ(ierr); 5576 ierr = KSPSetType(pcbddc->coarse_ksp,coarse_ksp_type);CHKERRQ(ierr); 5577 ierr = KSPSetNormType(pcbddc->coarse_ksp,KSP_NORM_NONE);CHKERRQ(ierr); 5578 ierr = KSPGetPC(pcbddc->coarse_ksp,&pc_temp);CHKERRQ(ierr); 5579 ierr = PCSetType(pc_temp,coarse_pc_type);CHKERRQ(ierr); 5580 /* prefix */ 5581 ierr = PetscStrcpy(prefix,"");CHKERRQ(ierr); 5582 ierr = PetscStrcpy(str_level,"");CHKERRQ(ierr); 5583 if (!pcbddc->current_level) { 5584 ierr = PetscStrcpy(prefix,((PetscObject)pc)->prefix);CHKERRQ(ierr); 5585 ierr = PetscStrcat(prefix,"pc_bddc_coarse_");CHKERRQ(ierr); 5586 } else { 5587 ierr = PetscStrlen(((PetscObject)pc)->prefix,&len);CHKERRQ(ierr); 5588 if (pcbddc->current_level>1) len -= 3; /* remove "lX_" with X level number */ 5589 if (pcbddc->current_level>10) len -= 1; /* remove another char from level number */ 5590 ierr = PetscStrncpy(prefix,((PetscObject)pc)->prefix,len+1);CHKERRQ(ierr); 5591 sprintf(str_level,"l%d_",(int)(pcbddc->current_level)); 5592 ierr = PetscStrcat(prefix,str_level);CHKERRQ(ierr); 5593 } 5594 ierr = KSPSetOptionsPrefix(pcbddc->coarse_ksp,prefix);CHKERRQ(ierr); 5595 /* propagate BDDC info to the next level (these are dummy calls if pc_temp is not of type PCBDDC) */ 5596 ierr = PCBDDCSetLevel(pc_temp,pcbddc->current_level+1);CHKERRQ(ierr); 5597 ierr = PCBDDCSetCoarseningRatio(pc_temp,pcbddc->coarsening_ratio);CHKERRQ(ierr); 5598 ierr = PCBDDCSetLevels(pc_temp,pcbddc->max_levels);CHKERRQ(ierr); 5599 /* allow user customization */ 5600 ierr = KSPSetFromOptions(pcbddc->coarse_ksp);CHKERRQ(ierr); 5601 } 5602 /* propagate BDDC info to the next level (these are dummy calls if pc_temp is not of type PCBDDC) */ 5603 ierr = KSPGetPC(pcbddc->coarse_ksp,&pc_temp);CHKERRQ(ierr); 5604 if (nisdofs) { 5605 ierr = PCBDDCSetDofsSplitting(pc_temp,nisdofs,isarray);CHKERRQ(ierr); 5606 for (i=0;i<nisdofs;i++) { 5607 ierr = ISDestroy(&isarray[i]);CHKERRQ(ierr); 5608 } 5609 } 5610 if (nisneu) { 5611 ierr = PCBDDCSetNeumannBoundaries(pc_temp,isarray[nisdofs]);CHKERRQ(ierr); 5612 ierr = ISDestroy(&isarray[nisdofs]);CHKERRQ(ierr); 5613 } 5614 if (nisvert) { 5615 ierr = PCBDDCSetPrimalVerticesIS(pc_temp,isarray[nis-1]);CHKERRQ(ierr); 5616 ierr = ISDestroy(&isarray[nis-1]);CHKERRQ(ierr); 5617 } 5618 5619 /* get some info after set from options */ 5620 ierr = PetscObjectTypeCompare((PetscObject)pc_temp,PCNN,&isnn);CHKERRQ(ierr); 5621 ierr = PetscObjectTypeCompare((PetscObject)pc_temp,PCBDDC,&isbddc);CHKERRQ(ierr); 5622 ierr = PetscObjectTypeCompare((PetscObject)pc_temp,PCREDUNDANT,&isredundant);CHKERRQ(ierr); 5623 if (isbddc && !multilevel_allowed) { /* multilevel can only be requested via pc_bddc_set_levels */ 5624 ierr = PCSetType(pc_temp,coarse_pc_type);CHKERRQ(ierr); 5625 isbddc = PETSC_FALSE; 5626 } 5627 ierr = PCFactorSetReuseFill(pc_temp,PETSC_TRUE);CHKERRQ(ierr); 5628 if (isredundant) { 5629 KSP inner_ksp; 5630 PC inner_pc; 5631 ierr = PCRedundantGetKSP(pc_temp,&inner_ksp);CHKERRQ(ierr); 5632 ierr = KSPGetPC(inner_ksp,&inner_pc);CHKERRQ(ierr); 5633 ierr = PCFactorSetReuseFill(inner_pc,PETSC_TRUE);CHKERRQ(ierr); 5634 } 5635 5636 /* assemble coarse matrix */ 5637 if (coarse_reuse) { 5638 ierr = KSPGetOperators(pcbddc->coarse_ksp,&coarse_mat,NULL);CHKERRQ(ierr); 5639 ierr = PetscObjectReference((PetscObject)coarse_mat);CHKERRQ(ierr); 5640 coarse_mat_reuse = MAT_REUSE_MATRIX; 5641 } else { 5642 coarse_mat_reuse = MAT_INITIAL_MATRIX; 5643 } 5644 if (isbddc || isnn) { 5645 if (pcbddc->coarsening_ratio > 1) { 5646 if (!pcbddc->coarse_subassembling) { /* subassembling info is not present */ 5647 ierr = MatISGetSubassemblingPattern(coarse_mat_is,active_procs/pcbddc->coarsening_ratio,pcbddc->coarse_adj_red,&pcbddc->coarse_subassembling);CHKERRQ(ierr); 5648 if (pcbddc->dbg_flag) { 5649 ierr = PetscViewerASCIIPrintf(dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 5650 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Subassembling pattern\n");CHKERRQ(ierr); 5651 ierr = ISView(pcbddc->coarse_subassembling,dbg_viewer);CHKERRQ(ierr); 5652 ierr = PetscViewerFlush(dbg_viewer);CHKERRQ(ierr); 5653 } 5654 } 5655 ierr = MatISSubassemble(coarse_mat_is,pcbddc->coarse_subassembling,0,PETSC_FALSE,PETSC_FALSE,coarse_mat_reuse,&coarse_mat,0,NULL);CHKERRQ(ierr); 5656 } else { 5657 ierr = PetscObjectReference((PetscObject)coarse_mat_is);CHKERRQ(ierr); 5658 coarse_mat = coarse_mat_is; 5659 } 5660 } else { 5661 ierr = MatISGetMPIXAIJ(coarse_mat_is,coarse_mat_reuse,&coarse_mat);CHKERRQ(ierr); 5662 } 5663 ierr = MatDestroy(&coarse_mat_is);CHKERRQ(ierr); 5664 5665 /* propagate symmetry info of coarse matrix */ 5666 ierr = MatSetOption(coarse_mat,MAT_STRUCTURALLY_SYMMETRIC,PETSC_TRUE);CHKERRQ(ierr); 5667 if (pc->pmat->symmetric_set) { 5668 ierr = MatSetOption(coarse_mat,MAT_SYMMETRIC,pc->pmat->symmetric);CHKERRQ(ierr); 5669 } 5670 if (pc->pmat->hermitian_set) { 5671 ierr = MatSetOption(coarse_mat,MAT_HERMITIAN,pc->pmat->hermitian);CHKERRQ(ierr); 5672 } 5673 if (pc->pmat->spd_set) { 5674 ierr = MatSetOption(coarse_mat,MAT_SPD,pc->pmat->spd);CHKERRQ(ierr); 5675 } 5676 /* set operators */ 5677 ierr = KSPSetOperators(pcbddc->coarse_ksp,coarse_mat,coarse_mat);CHKERRQ(ierr); 5678 if (pcbddc->dbg_flag) { 5679 ierr = PetscViewerASCIISubtractTab(dbg_viewer,2*pcbddc->current_level);CHKERRQ(ierr); 5680 } 5681 } else { /* processes non partecipating to coarse solver (if any) */ 5682 coarse_mat = 0; 5683 } 5684 ierr = PetscFree(isarray);CHKERRQ(ierr); 5685 #if 0 5686 { 5687 PetscViewer viewer; 5688 char filename[256]; 5689 sprintf(filename,"coarse_mat_level%d.m",pcbddc->current_level); 5690 ierr = PetscViewerASCIIOpen(PetscObjectComm((PetscObject)coarse_mat),filename,&viewer);CHKERRQ(ierr); 5691 ierr = PetscViewerSetFormat(viewer,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr); 5692 ierr = MatView(coarse_mat,viewer);CHKERRQ(ierr); 5693 ierr = PetscViewerDestroy(&viewer);CHKERRQ(ierr); 5694 } 5695 #endif 5696 5697 /* Compute coarse null space (special handling by BDDC only) */ 5698 #if 0 5699 if (pcbddc->NullSpace) { 5700 ierr = PCBDDCNullSpaceAssembleCoarse(pc,coarse_mat,&CoarseNullSpace);CHKERRQ(ierr); 5701 } 5702 #endif 5703 /* hack */ 5704 if (pcbddc->coarse_ksp) { 5705 Vec crhs,csol; 5706 5707 ierr = KSPGetSolution(pcbddc->coarse_ksp,&csol);CHKERRQ(ierr); 5708 ierr = KSPGetRhs(pcbddc->coarse_ksp,&crhs);CHKERRQ(ierr); 5709 if (!csol) { 5710 ierr = MatCreateVecs(coarse_mat,&((pcbddc->coarse_ksp)->vec_sol),NULL);CHKERRQ(ierr); 5711 } 5712 if (!crhs) { 5713 ierr = MatCreateVecs(coarse_mat,NULL,&((pcbddc->coarse_ksp)->vec_rhs));CHKERRQ(ierr); 5714 } 5715 } 5716 5717 /* compute null space for coarse solver if the benign trick has been requested */ 5718 if (pcbddc->benign_null) { 5719 5720 ierr = VecSet(pcbddc->vec1_P,0.);CHKERRQ(ierr); 5721 for (i=0;i<pcbddc->benign_n;i++) { 5722 ierr = VecSetValue(pcbddc->vec1_P,pcbddc->local_primal_size-pcbddc->benign_n+i,1.0,INSERT_VALUES);CHKERRQ(ierr); 5723 } 5724 ierr = VecAssemblyBegin(pcbddc->vec1_P);CHKERRQ(ierr); 5725 ierr = VecAssemblyEnd(pcbddc->vec1_P);CHKERRQ(ierr); 5726 ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,pcbddc->vec1_P,pcbddc->coarse_vec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 5727 ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,pcbddc->vec1_P,pcbddc->coarse_vec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 5728 if (coarse_mat) { 5729 Vec nullv; 5730 PetscScalar *array,*array2; 5731 PetscInt nl; 5732 5733 ierr = MatCreateVecs(coarse_mat,&nullv,NULL);CHKERRQ(ierr); 5734 ierr = VecGetLocalSize(nullv,&nl);CHKERRQ(ierr); 5735 ierr = VecGetArrayRead(pcbddc->coarse_vec,(const PetscScalar**)&array);CHKERRQ(ierr); 5736 ierr = VecGetArray(nullv,&array2);CHKERRQ(ierr); 5737 ierr = PetscMemcpy(array2,array,nl*sizeof(*array));CHKERRQ(ierr); 5738 ierr = VecRestoreArray(nullv,&array2);CHKERRQ(ierr); 5739 ierr = VecRestoreArrayRead(pcbddc->coarse_vec,(const PetscScalar**)&array);CHKERRQ(ierr); 5740 ierr = VecNormalize(nullv,NULL);CHKERRQ(ierr); 5741 ierr = MatNullSpaceCreate(PetscObjectComm((PetscObject)coarse_mat),PETSC_FALSE,1,&nullv,&CoarseNullSpace);CHKERRQ(ierr); 5742 ierr = VecDestroy(&nullv);CHKERRQ(ierr); 5743 } 5744 } 5745 5746 if (pcbddc->coarse_ksp) { 5747 PetscBool ispreonly; 5748 5749 if (CoarseNullSpace) { 5750 PetscBool isnull; 5751 ierr = MatNullSpaceTest(CoarseNullSpace,coarse_mat,&isnull);CHKERRQ(ierr); 5752 if (isnull) { 5753 if (isbddc && !pcbddc->benign_saddle_point) { 5754 ierr = PCBDDCSetNullSpace(pc_temp,CoarseNullSpace);CHKERRQ(ierr); 5755 } else { 5756 ierr = MatSetNullSpace(coarse_mat,CoarseNullSpace);CHKERRQ(ierr); 5757 } 5758 } else { 5759 ierr = MatNullSpaceDestroy(&CoarseNullSpace);CHKERRQ(ierr); 5760 } 5761 } 5762 /* setup coarse ksp */ 5763 ierr = KSPSetUp(pcbddc->coarse_ksp);CHKERRQ(ierr); 5764 /* Check coarse problem if in debug mode or if solving with an iterative method */ 5765 ierr = PetscObjectTypeCompare((PetscObject)pcbddc->coarse_ksp,KSPPREONLY,&ispreonly);CHKERRQ(ierr); 5766 if (pcbddc->dbg_flag || (!ispreonly && pcbddc->use_coarse_estimates) ) { 5767 KSP check_ksp; 5768 KSPType check_ksp_type; 5769 PC check_pc; 5770 Vec check_vec,coarse_vec; 5771 PetscReal abs_infty_error,infty_error,lambda_min=1.0,lambda_max=1.0; 5772 PetscInt its; 5773 PetscBool compute_eigs; 5774 PetscReal *eigs_r,*eigs_c; 5775 PetscInt neigs; 5776 const char *prefix; 5777 5778 /* Create ksp object suitable for estimation of extreme eigenvalues */ 5779 ierr = KSPCreate(PetscObjectComm((PetscObject)pcbddc->coarse_ksp),&check_ksp);CHKERRQ(ierr); 5780 ierr = KSPSetErrorIfNotConverged(pcbddc->coarse_ksp,pc->erroriffailure);CHKERRQ(ierr); 5781 ierr = KSPSetOperators(check_ksp,coarse_mat,coarse_mat);CHKERRQ(ierr); 5782 ierr = KSPSetTolerances(check_ksp,1.e-12,1.e-12,PETSC_DEFAULT,pcbddc->coarse_size);CHKERRQ(ierr); 5783 if (ispreonly) { 5784 check_ksp_type = KSPPREONLY; 5785 compute_eigs = PETSC_FALSE; 5786 } else { 5787 check_ksp_type = KSPGMRES; 5788 compute_eigs = PETSC_TRUE; 5789 } 5790 ierr = KSPSetType(check_ksp,check_ksp_type);CHKERRQ(ierr); 5791 ierr = KSPSetComputeSingularValues(check_ksp,compute_eigs);CHKERRQ(ierr); 5792 ierr = KSPSetComputeEigenvalues(check_ksp,compute_eigs);CHKERRQ(ierr); 5793 ierr = KSPGMRESSetRestart(check_ksp,pcbddc->coarse_size+1);CHKERRQ(ierr); 5794 ierr = KSPGetOptionsPrefix(pcbddc->coarse_ksp,&prefix);CHKERRQ(ierr); 5795 ierr = KSPSetOptionsPrefix(check_ksp,prefix);CHKERRQ(ierr); 5796 ierr = KSPAppendOptionsPrefix(check_ksp,"check_");CHKERRQ(ierr); 5797 ierr = KSPSetFromOptions(check_ksp);CHKERRQ(ierr); 5798 ierr = KSPSetUp(check_ksp);CHKERRQ(ierr); 5799 ierr = KSPGetPC(pcbddc->coarse_ksp,&check_pc);CHKERRQ(ierr); 5800 ierr = KSPSetPC(check_ksp,check_pc);CHKERRQ(ierr); 5801 /* create random vec */ 5802 ierr = KSPGetSolution(pcbddc->coarse_ksp,&coarse_vec);CHKERRQ(ierr); 5803 ierr = VecDuplicate(coarse_vec,&check_vec);CHKERRQ(ierr); 5804 ierr = VecSetRandom(check_vec,NULL);CHKERRQ(ierr); 5805 if (CoarseNullSpace) { 5806 ierr = MatNullSpaceRemove(CoarseNullSpace,check_vec);CHKERRQ(ierr); 5807 } 5808 ierr = MatMult(coarse_mat,check_vec,coarse_vec);CHKERRQ(ierr); 5809 /* solve coarse problem */ 5810 ierr = KSPSolve(check_ksp,coarse_vec,coarse_vec);CHKERRQ(ierr); 5811 if (CoarseNullSpace) { 5812 ierr = MatNullSpaceRemove(CoarseNullSpace,coarse_vec);CHKERRQ(ierr); 5813 } 5814 /* set eigenvalue estimation if preonly has not been requested */ 5815 if (compute_eigs) { 5816 ierr = PetscMalloc1(pcbddc->coarse_size+1,&eigs_r);CHKERRQ(ierr); 5817 ierr = PetscMalloc1(pcbddc->coarse_size+1,&eigs_c);CHKERRQ(ierr); 5818 ierr = KSPComputeEigenvalues(check_ksp,pcbddc->coarse_size+1,eigs_r,eigs_c,&neigs);CHKERRQ(ierr); 5819 lambda_max = eigs_r[neigs-1]; 5820 lambda_min = eigs_r[0]; 5821 if (pcbddc->use_coarse_estimates) { 5822 if (lambda_max>lambda_min) { 5823 ierr = KSPChebyshevSetEigenvalues(pcbddc->coarse_ksp,lambda_max,lambda_min);CHKERRQ(ierr); 5824 ierr = KSPRichardsonSetScale(pcbddc->coarse_ksp,2.0/(lambda_max+lambda_min));CHKERRQ(ierr); 5825 } 5826 } 5827 } 5828 5829 /* check coarse problem residual error */ 5830 if (pcbddc->dbg_flag) { 5831 PetscViewer dbg_viewer = PETSC_VIEWER_STDOUT_(PetscObjectComm((PetscObject)pcbddc->coarse_ksp)); 5832 ierr = PetscViewerASCIIAddTab(dbg_viewer,2*(pcbddc->current_level+1));CHKERRQ(ierr); 5833 ierr = VecAXPY(check_vec,-1.0,coarse_vec);CHKERRQ(ierr); 5834 ierr = VecNorm(check_vec,NORM_INFINITY,&infty_error);CHKERRQ(ierr); 5835 ierr = MatMult(coarse_mat,check_vec,coarse_vec);CHKERRQ(ierr); 5836 ierr = VecNorm(coarse_vec,NORM_INFINITY,&abs_infty_error);CHKERRQ(ierr); 5837 ierr = VecDestroy(&check_vec);CHKERRQ(ierr); 5838 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem details (use estimates %d)\n",pcbddc->use_coarse_estimates);CHKERRQ(ierr); 5839 ierr = PetscObjectPrintClassNamePrefixType((PetscObject)(pcbddc->coarse_ksp),dbg_viewer);CHKERRQ(ierr); 5840 ierr = PetscObjectPrintClassNamePrefixType((PetscObject)(check_pc),dbg_viewer);CHKERRQ(ierr); 5841 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem exact infty_error : %1.6e\n",infty_error);CHKERRQ(ierr); 5842 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem residual infty_error: %1.6e\n",abs_infty_error);CHKERRQ(ierr); 5843 if (CoarseNullSpace) { 5844 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem is singular\n");CHKERRQ(ierr); 5845 } 5846 if (compute_eigs) { 5847 PetscReal lambda_max_s,lambda_min_s; 5848 ierr = KSPGetType(check_ksp,&check_ksp_type);CHKERRQ(ierr); 5849 ierr = KSPGetIterationNumber(check_ksp,&its);CHKERRQ(ierr); 5850 ierr = KSPComputeExtremeSingularValues(check_ksp,&lambda_max_s,&lambda_min_s);CHKERRQ(ierr); 5851 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem eigenvalues (estimated with %d iterations of %s): %1.6e %1.6e (%1.6e %1.6e)\n",its,check_ksp_type,lambda_min,lambda_max,lambda_min_s,lambda_max_s);CHKERRQ(ierr); 5852 for (i=0;i<neigs;i++) { 5853 ierr = PetscViewerASCIIPrintf(dbg_viewer,"%1.6e %1.6ei\n",eigs_r[i],eigs_c[i]);CHKERRQ(ierr); 5854 } 5855 } 5856 ierr = PetscViewerFlush(dbg_viewer);CHKERRQ(ierr); 5857 ierr = PetscViewerASCIISubtractTab(dbg_viewer,2*(pcbddc->current_level+1));CHKERRQ(ierr); 5858 } 5859 ierr = KSPDestroy(&check_ksp);CHKERRQ(ierr); 5860 if (compute_eigs) { 5861 ierr = PetscFree(eigs_r);CHKERRQ(ierr); 5862 ierr = PetscFree(eigs_c);CHKERRQ(ierr); 5863 } 5864 } 5865 } 5866 /* print additional info */ 5867 if (pcbddc->dbg_flag) { 5868 /* waits until all processes reaches this point */ 5869 ierr = PetscBarrier((PetscObject)pc);CHKERRQ(ierr); 5870 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Coarse solver setup completed at level %d\n",pcbddc->current_level);CHKERRQ(ierr); 5871 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 5872 } 5873 5874 /* free memory */ 5875 ierr = MatNullSpaceDestroy(&CoarseNullSpace);CHKERRQ(ierr); 5876 ierr = MatDestroy(&coarse_mat);CHKERRQ(ierr); 5877 PetscFunctionReturn(0); 5878 } 5879 5880 #undef __FUNCT__ 5881 #define __FUNCT__ "PCBDDCComputePrimalNumbering" 5882 PetscErrorCode PCBDDCComputePrimalNumbering(PC pc,PetscInt* coarse_size_n,PetscInt** local_primal_indices_n) 5883 { 5884 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 5885 PC_IS* pcis = (PC_IS*)pc->data; 5886 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 5887 IS subset,subset_mult,subset_n; 5888 PetscInt local_size,coarse_size=0; 5889 PetscInt *local_primal_indices=NULL; 5890 const PetscInt *t_local_primal_indices; 5891 PetscErrorCode ierr; 5892 5893 PetscFunctionBegin; 5894 /* Compute global number of coarse dofs */ 5895 if (pcbddc->local_primal_size && !pcbddc->local_primal_ref_node) { 5896 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"BDDC ConstraintsSetUp should be called first"); 5897 } 5898 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)(pc->pmat)),pcbddc->local_primal_size_cc,pcbddc->local_primal_ref_node,PETSC_COPY_VALUES,&subset_n);CHKERRQ(ierr); 5899 ierr = ISLocalToGlobalMappingApplyIS(pcis->mapping,subset_n,&subset);CHKERRQ(ierr); 5900 ierr = ISDestroy(&subset_n);CHKERRQ(ierr); 5901 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)(pc->pmat)),pcbddc->local_primal_size_cc,pcbddc->local_primal_ref_mult,PETSC_COPY_VALUES,&subset_mult);CHKERRQ(ierr); 5902 ierr = PCBDDCSubsetNumbering(subset,subset_mult,&coarse_size,&subset_n);CHKERRQ(ierr); 5903 ierr = ISDestroy(&subset);CHKERRQ(ierr); 5904 ierr = ISDestroy(&subset_mult);CHKERRQ(ierr); 5905 ierr = ISGetLocalSize(subset_n,&local_size);CHKERRQ(ierr); 5906 if (local_size != pcbddc->local_primal_size) { 5907 SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Invalid number of local primal indices computed %d != %d",local_size,pcbddc->local_primal_size); 5908 } 5909 ierr = PetscMalloc1(local_size,&local_primal_indices);CHKERRQ(ierr); 5910 ierr = ISGetIndices(subset_n,&t_local_primal_indices);CHKERRQ(ierr); 5911 ierr = PetscMemcpy(local_primal_indices,t_local_primal_indices,local_size*sizeof(PetscInt));CHKERRQ(ierr); 5912 ierr = ISRestoreIndices(subset_n,&t_local_primal_indices);CHKERRQ(ierr); 5913 ierr = ISDestroy(&subset_n);CHKERRQ(ierr); 5914 5915 /* check numbering */ 5916 if (pcbddc->dbg_flag) { 5917 PetscScalar coarsesum,*array,*array2; 5918 PetscInt i; 5919 PetscBool set_error = PETSC_FALSE,set_error_reduced = PETSC_FALSE; 5920 5921 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 5922 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 5923 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Check coarse indices\n");CHKERRQ(ierr); 5924 ierr = PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 5925 /* counter */ 5926 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 5927 ierr = VecSet(pcis->vec1_N,1.0);CHKERRQ(ierr); 5928 ierr = VecScatterBegin(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 5929 ierr = VecScatterEnd(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 5930 ierr = VecScatterBegin(matis->rctx,pcis->vec1_global,pcis->vec2_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 5931 ierr = VecScatterEnd(matis->rctx,pcis->vec1_global,pcis->vec2_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 5932 ierr = VecSet(pcis->vec1_N,0.0);CHKERRQ(ierr); 5933 for (i=0;i<pcbddc->local_primal_size;i++) { 5934 ierr = VecSetValue(pcis->vec1_N,pcbddc->primal_indices_local_idxs[i],1.0,INSERT_VALUES);CHKERRQ(ierr); 5935 } 5936 ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr); 5937 ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr); 5938 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 5939 ierr = VecScatterBegin(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 5940 ierr = VecScatterEnd(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 5941 ierr = VecScatterBegin(matis->rctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 5942 ierr = VecScatterEnd(matis->rctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 5943 ierr = VecGetArray(pcis->vec1_N,&array);CHKERRQ(ierr); 5944 ierr = VecGetArray(pcis->vec2_N,&array2);CHKERRQ(ierr); 5945 for (i=0;i<pcis->n;i++) { 5946 if (array[i] != 0.0 && array[i] != array2[i]) { 5947 PetscInt owned = (PetscInt)PetscRealPart(array[i]); 5948 PetscInt neigh = (PetscInt)PetscRealPart(array2[i]); 5949 set_error = PETSC_TRUE; 5950 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d: local index %d owned by a %d processes instead of %d!\n",PetscGlobalRank,i,owned,neigh);CHKERRQ(ierr); 5951 } 5952 } 5953 ierr = VecRestoreArray(pcis->vec2_N,&array2);CHKERRQ(ierr); 5954 ierr = MPI_Allreduce(&set_error,&set_error_reduced,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 5955 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 5956 for (i=0;i<pcis->n;i++) { 5957 if (PetscRealPart(array[i]) > 0.0) array[i] = 1.0/PetscRealPart(array[i]); 5958 } 5959 ierr = VecRestoreArray(pcis->vec1_N,&array);CHKERRQ(ierr); 5960 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 5961 ierr = VecScatterBegin(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 5962 ierr = VecScatterEnd(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 5963 ierr = VecSum(pcis->vec1_global,&coarsesum);CHKERRQ(ierr); 5964 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Size of coarse problem is %d (%lf)\n",coarse_size,PetscRealPart(coarsesum));CHKERRQ(ierr); 5965 if (pcbddc->dbg_flag > 1 || set_error_reduced) { 5966 PetscInt *gidxs; 5967 5968 ierr = PetscMalloc1(pcbddc->local_primal_size,&gidxs);CHKERRQ(ierr); 5969 ierr = ISLocalToGlobalMappingApply(pcis->mapping,pcbddc->local_primal_size,pcbddc->primal_indices_local_idxs,gidxs);CHKERRQ(ierr); 5970 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Distribution of local primal indices\n");CHKERRQ(ierr); 5971 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 5972 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d\n",PetscGlobalRank);CHKERRQ(ierr); 5973 for (i=0;i<pcbddc->local_primal_size;i++) { 5974 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local_primal_indices[%d]=%d (%d,%d)\n",i,local_primal_indices[i],pcbddc->primal_indices_local_idxs[i],gidxs[i]);CHKERRQ(ierr); 5975 } 5976 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 5977 ierr = PetscFree(gidxs);CHKERRQ(ierr); 5978 } 5979 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 5980 ierr = PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 5981 if (set_error_reduced) SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"BDDC Numbering of coarse dofs failed"); 5982 } 5983 /* ierr = PetscPrintf(PetscObjectComm((PetscObject)pc),"Size of coarse problem is %d\n",coarse_size);CHKERRQ(ierr); */ 5984 /* get back data */ 5985 *coarse_size_n = coarse_size; 5986 *local_primal_indices_n = local_primal_indices; 5987 PetscFunctionReturn(0); 5988 } 5989 5990 #undef __FUNCT__ 5991 #define __FUNCT__ "PCBDDCGlobalToLocal" 5992 PetscErrorCode PCBDDCGlobalToLocal(VecScatter g2l_ctx,Vec gwork, Vec lwork, IS globalis, IS* localis) 5993 { 5994 IS localis_t; 5995 PetscInt i,lsize,*idxs,n; 5996 PetscScalar *vals; 5997 PetscErrorCode ierr; 5998 5999 PetscFunctionBegin; 6000 /* get indices in local ordering exploiting local to global map */ 6001 ierr = ISGetLocalSize(globalis,&lsize);CHKERRQ(ierr); 6002 ierr = PetscMalloc1(lsize,&vals);CHKERRQ(ierr); 6003 for (i=0;i<lsize;i++) vals[i] = 1.0; 6004 ierr = ISGetIndices(globalis,(const PetscInt**)&idxs);CHKERRQ(ierr); 6005 ierr = VecSet(gwork,0.0);CHKERRQ(ierr); 6006 ierr = VecSet(lwork,0.0);CHKERRQ(ierr); 6007 if (idxs) { /* multilevel guard */ 6008 ierr = VecSetValues(gwork,lsize,idxs,vals,INSERT_VALUES);CHKERRQ(ierr); 6009 } 6010 ierr = VecAssemblyBegin(gwork);CHKERRQ(ierr); 6011 ierr = ISRestoreIndices(globalis,(const PetscInt**)&idxs);CHKERRQ(ierr); 6012 ierr = PetscFree(vals);CHKERRQ(ierr); 6013 ierr = VecAssemblyEnd(gwork);CHKERRQ(ierr); 6014 /* now compute set in local ordering */ 6015 ierr = VecScatterBegin(g2l_ctx,gwork,lwork,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 6016 ierr = VecScatterEnd(g2l_ctx,gwork,lwork,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 6017 ierr = VecGetArrayRead(lwork,(const PetscScalar**)&vals);CHKERRQ(ierr); 6018 ierr = VecGetSize(lwork,&n);CHKERRQ(ierr); 6019 for (i=0,lsize=0;i<n;i++) { 6020 if (PetscRealPart(vals[i]) > 0.5) { 6021 lsize++; 6022 } 6023 } 6024 ierr = PetscMalloc1(lsize,&idxs);CHKERRQ(ierr); 6025 for (i=0,lsize=0;i<n;i++) { 6026 if (PetscRealPart(vals[i]) > 0.5) { 6027 idxs[lsize++] = i; 6028 } 6029 } 6030 ierr = VecRestoreArrayRead(lwork,(const PetscScalar**)&vals);CHKERRQ(ierr); 6031 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)gwork),lsize,idxs,PETSC_OWN_POINTER,&localis_t);CHKERRQ(ierr); 6032 *localis = localis_t; 6033 PetscFunctionReturn(0); 6034 } 6035 6036 /* the next two functions will be called in KSPMatMult if a change of basis has been requested */ 6037 #undef __FUNCT__ 6038 #define __FUNCT__ "PCBDDCMatMult_Private" 6039 static PetscErrorCode PCBDDCMatMult_Private(Mat A, Vec x, Vec y) 6040 { 6041 PCBDDCChange_ctx change_ctx; 6042 PetscErrorCode ierr; 6043 6044 PetscFunctionBegin; 6045 ierr = MatShellGetContext(A,&change_ctx);CHKERRQ(ierr); 6046 ierr = MatMult(change_ctx->global_change,x,change_ctx->work[0]);CHKERRQ(ierr); 6047 ierr = MatMult(change_ctx->original_mat,change_ctx->work[0],change_ctx->work[1]);CHKERRQ(ierr); 6048 ierr = MatMultTranspose(change_ctx->global_change,change_ctx->work[1],y);CHKERRQ(ierr); 6049 PetscFunctionReturn(0); 6050 } 6051 6052 #undef __FUNCT__ 6053 #define __FUNCT__ "PCBDDCMatMultTranspose_Private" 6054 static PetscErrorCode PCBDDCMatMultTranspose_Private(Mat A, Vec x, Vec y) 6055 { 6056 PCBDDCChange_ctx change_ctx; 6057 PetscErrorCode ierr; 6058 6059 PetscFunctionBegin; 6060 ierr = MatShellGetContext(A,&change_ctx);CHKERRQ(ierr); 6061 ierr = MatMult(change_ctx->global_change,x,change_ctx->work[0]);CHKERRQ(ierr); 6062 ierr = MatMultTranspose(change_ctx->original_mat,change_ctx->work[0],change_ctx->work[1]);CHKERRQ(ierr); 6063 ierr = MatMultTranspose(change_ctx->global_change,change_ctx->work[1],y);CHKERRQ(ierr); 6064 PetscFunctionReturn(0); 6065 } 6066 6067 #undef __FUNCT__ 6068 #define __FUNCT__ "PCBDDCSetUpSubSchurs" 6069 PetscErrorCode PCBDDCSetUpSubSchurs(PC pc) 6070 { 6071 PC_IS *pcis=(PC_IS*)pc->data; 6072 PC_BDDC *pcbddc=(PC_BDDC*)pc->data; 6073 PCBDDCSubSchurs sub_schurs=pcbddc->sub_schurs; 6074 Mat S_j; 6075 PetscInt *used_xadj,*used_adjncy; 6076 PetscBool free_used_adj; 6077 PetscErrorCode ierr; 6078 6079 PetscFunctionBegin; 6080 /* decide the adjacency to be used for determining internal problems for local schur on subsets */ 6081 free_used_adj = PETSC_FALSE; 6082 if (pcbddc->sub_schurs_layers == -1) { 6083 used_xadj = NULL; 6084 used_adjncy = NULL; 6085 } else { 6086 if (pcbddc->sub_schurs_use_useradj && pcbddc->mat_graph->xadj) { 6087 used_xadj = pcbddc->mat_graph->xadj; 6088 used_adjncy = pcbddc->mat_graph->adjncy; 6089 } else if (pcbddc->computed_rowadj) { 6090 used_xadj = pcbddc->mat_graph->xadj; 6091 used_adjncy = pcbddc->mat_graph->adjncy; 6092 } else { 6093 PetscBool flg_row=PETSC_FALSE; 6094 const PetscInt *xadj,*adjncy; 6095 PetscInt nvtxs; 6096 6097 ierr = MatGetRowIJ(pcbddc->local_mat,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,&xadj,&adjncy,&flg_row);CHKERRQ(ierr); 6098 if (flg_row) { 6099 ierr = PetscMalloc2(nvtxs+1,&used_xadj,xadj[nvtxs],&used_adjncy);CHKERRQ(ierr); 6100 ierr = PetscMemcpy(used_xadj,xadj,(nvtxs+1)*sizeof(*xadj));CHKERRQ(ierr); 6101 ierr = PetscMemcpy(used_adjncy,adjncy,(xadj[nvtxs])*sizeof(*adjncy));CHKERRQ(ierr); 6102 free_used_adj = PETSC_TRUE; 6103 } else { 6104 pcbddc->sub_schurs_layers = -1; 6105 used_xadj = NULL; 6106 used_adjncy = NULL; 6107 } 6108 ierr = MatRestoreRowIJ(pcbddc->local_mat,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,&xadj,&adjncy,&flg_row);CHKERRQ(ierr); 6109 } 6110 } 6111 6112 /* setup sub_schurs data */ 6113 ierr = MatCreateSchurComplement(pcis->A_II,pcis->A_II,pcis->A_IB,pcis->A_BI,pcis->A_BB,&S_j);CHKERRQ(ierr); 6114 if (!sub_schurs->schur_explicit) { 6115 /* pcbddc->ksp_D up to date only if not using MatFactor with Schur complement support */ 6116 ierr = MatSchurComplementSetKSP(S_j,pcbddc->ksp_D);CHKERRQ(ierr); 6117 ierr = PCBDDCSubSchursSetUp(sub_schurs,NULL,S_j,PETSC_FALSE,used_xadj,used_adjncy,pcbddc->sub_schurs_layers,pcbddc->faster_deluxe,pcbddc->adaptive_selection,PETSC_FALSE,PETSC_FALSE,0,NULL,NULL);CHKERRQ(ierr); 6118 } else { 6119 PetscBool reuse_solvers = (PetscBool)!pcbddc->use_change_of_basis; 6120 PetscBool isseqaij; 6121 PetscInt benign_n; 6122 6123 if (!pcbddc->use_vertices && reuse_solvers) { 6124 PetscInt n_vertices; 6125 6126 ierr = ISGetLocalSize(sub_schurs->is_vertices,&n_vertices);CHKERRQ(ierr); 6127 reuse_solvers = (PetscBool)!n_vertices; 6128 } 6129 ierr = PetscObjectTypeCompare((PetscObject)pcbddc->local_mat,MATSEQAIJ,&isseqaij);CHKERRQ(ierr); 6130 if (!isseqaij) { 6131 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 6132 if (matis->A == pcbddc->local_mat) { 6133 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 6134 ierr = MatConvert(matis->A,MATSEQAIJ,MAT_INITIAL_MATRIX,&pcbddc->local_mat);CHKERRQ(ierr); 6135 } else { 6136 ierr = MatConvert(pcbddc->local_mat,MATSEQAIJ,MAT_REUSE_MATRIX,&pcbddc->local_mat);CHKERRQ(ierr); 6137 } 6138 } 6139 if (!pcbddc->benign_change_explicit) { 6140 benign_n = pcbddc->benign_n; 6141 } else { 6142 benign_n = 0; 6143 } 6144 ierr = PCBDDCSubSchursSetUp(sub_schurs,pcbddc->local_mat,S_j,pcbddc->sub_schurs_exact_schur,used_xadj,used_adjncy,pcbddc->sub_schurs_layers,pcbddc->faster_deluxe,pcbddc->adaptive_selection,reuse_solvers,pcbddc->benign_saddle_point,benign_n,pcbddc->benign_p0_lidx,pcbddc->benign_zerodiag_subs);CHKERRQ(ierr); 6145 } 6146 6147 /* free adjacency */ 6148 if (free_used_adj) { 6149 ierr = PetscFree2(used_xadj,used_adjncy);CHKERRQ(ierr); 6150 } 6151 PetscFunctionReturn(0); 6152 } 6153 6154 #undef __FUNCT__ 6155 #define __FUNCT__ "PCBDDCInitSubSchurs" 6156 PetscErrorCode PCBDDCInitSubSchurs(PC pc) 6157 { 6158 PC_IS *pcis=(PC_IS*)pc->data; 6159 PC_BDDC *pcbddc=(PC_BDDC*)pc->data; 6160 PCBDDCSubSchurs sub_schurs=pcbddc->sub_schurs; 6161 PCBDDCGraph graph; 6162 PetscErrorCode ierr; 6163 6164 PetscFunctionBegin; 6165 /* attach interface graph for determining subsets */ 6166 if (pcbddc->sub_schurs_rebuild) { /* in case rebuild has been requested, it uses a graph generated only by the neighbouring information */ 6167 IS verticesIS,verticescomm; 6168 PetscInt vsize,*idxs; 6169 6170 ierr = PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph,NULL,NULL,NULL,NULL,&verticesIS);CHKERRQ(ierr); 6171 ierr = ISGetSize(verticesIS,&vsize);CHKERRQ(ierr); 6172 ierr = ISGetIndices(verticesIS,(const PetscInt**)&idxs);CHKERRQ(ierr); 6173 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),vsize,idxs,PETSC_COPY_VALUES,&verticescomm);CHKERRQ(ierr); 6174 ierr = ISRestoreIndices(verticesIS,(const PetscInt**)&idxs);CHKERRQ(ierr); 6175 ierr = ISDestroy(&verticesIS);CHKERRQ(ierr); 6176 ierr = PCBDDCGraphCreate(&graph);CHKERRQ(ierr); 6177 ierr = PCBDDCGraphInit(graph,pcbddc->mat_graph->l2gmap,pcbddc->mat_graph->nvtxs_global);CHKERRQ(ierr); 6178 ierr = PCBDDCGraphSetUp(graph,0,NULL,pcbddc->DirichletBoundariesLocal,0,NULL,verticescomm);CHKERRQ(ierr); 6179 ierr = ISDestroy(&verticescomm);CHKERRQ(ierr); 6180 ierr = PCBDDCGraphComputeConnectedComponents(graph);CHKERRQ(ierr); 6181 /* 6182 if (pcbddc->dbg_flag) { 6183 ierr = PCBDDCGraphASCIIView(graph,pcbddc->dbg_flag,pcbddc->dbg_viewer);CHKERRQ(ierr); 6184 } 6185 */ 6186 } else { 6187 graph = pcbddc->mat_graph; 6188 } 6189 6190 /* sub_schurs init */ 6191 ierr = PCBDDCSubSchursInit(sub_schurs,pcis->is_I_local,pcis->is_B_local,graph,pcis->BtoNmap);CHKERRQ(ierr); 6192 6193 /* free graph struct */ 6194 if (pcbddc->sub_schurs_rebuild) { 6195 ierr = PCBDDCGraphDestroy(&graph);CHKERRQ(ierr); 6196 } 6197 PetscFunctionReturn(0); 6198 } 6199 6200 #undef __FUNCT__ 6201 #define __FUNCT__ "PCBDDCCheckOperator" 6202 PetscErrorCode PCBDDCCheckOperator(PC pc) 6203 { 6204 PC_IS *pcis=(PC_IS*)pc->data; 6205 PC_BDDC *pcbddc=(PC_BDDC*)pc->data; 6206 PetscErrorCode ierr; 6207 6208 PetscFunctionBegin; 6209 if (pcbddc->n_vertices == pcbddc->local_primal_size) { 6210 IS zerodiag = NULL; 6211 Mat S_j,B0_B=NULL; 6212 Vec dummy_vec=NULL,vec_check_B,vec_scale_P; 6213 PetscScalar *p0_check,*array,*array2; 6214 PetscReal norm; 6215 PetscInt i; 6216 6217 /* B0 and B0_B */ 6218 if (zerodiag) { 6219 IS dummy; 6220 6221 ierr = ISCreateStride(PETSC_COMM_SELF,pcbddc->benign_n,0,1,&dummy);CHKERRQ(ierr); 6222 ierr = MatGetSubMatrix(pcbddc->benign_B0,dummy,pcis->is_B_local,MAT_INITIAL_MATRIX,&B0_B);CHKERRQ(ierr); 6223 ierr = MatCreateVecs(B0_B,NULL,&dummy_vec);CHKERRQ(ierr); 6224 ierr = ISDestroy(&dummy);CHKERRQ(ierr); 6225 } 6226 /* I need a primal vector to scale primal nodes since BDDC sums contibutions */ 6227 ierr = VecDuplicate(pcbddc->vec1_P,&vec_scale_P);CHKERRQ(ierr); 6228 ierr = VecSet(pcbddc->vec1_P,1.0);CHKERRQ(ierr); 6229 ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,pcbddc->vec1_P,pcbddc->coarse_vec,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 6230 ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,pcbddc->vec1_P,pcbddc->coarse_vec,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 6231 ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,pcbddc->coarse_vec,vec_scale_P,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 6232 ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,pcbddc->coarse_vec,vec_scale_P,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 6233 ierr = VecReciprocal(vec_scale_P);CHKERRQ(ierr); 6234 /* S_j */ 6235 ierr = MatCreateSchurComplement(pcis->A_II,pcis->A_II,pcis->A_IB,pcis->A_BI,pcis->A_BB,&S_j);CHKERRQ(ierr); 6236 ierr = MatSchurComplementSetKSP(S_j,pcbddc->ksp_D);CHKERRQ(ierr); 6237 6238 /* mimic vector in \widetilde{W}_\Gamma */ 6239 ierr = VecSetRandom(pcis->vec1_N,NULL);CHKERRQ(ierr); 6240 /* continuous in primal space */ 6241 ierr = VecSetRandom(pcbddc->coarse_vec,NULL);CHKERRQ(ierr); 6242 ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,pcbddc->coarse_vec,pcbddc->vec1_P,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 6243 ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,pcbddc->coarse_vec,pcbddc->vec1_P,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 6244 ierr = VecGetArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 6245 ierr = PetscCalloc1(pcbddc->benign_n,&p0_check);CHKERRQ(ierr); 6246 for (i=0;i<pcbddc->benign_n;i++) p0_check[i] = array[pcbddc->local_primal_size-pcbddc->benign_n+i]; 6247 ierr = VecSetValues(pcis->vec1_N,pcbddc->local_primal_size,pcbddc->local_primal_ref_node,array,INSERT_VALUES);CHKERRQ(ierr); 6248 ierr = VecRestoreArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 6249 ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr); 6250 ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr); 6251 ierr = VecScatterBegin(pcis->N_to_B,pcis->vec1_N,pcis->vec2_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 6252 ierr = VecScatterEnd(pcis->N_to_B,pcis->vec1_N,pcis->vec2_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 6253 ierr = VecDuplicate(pcis->vec2_B,&vec_check_B);CHKERRQ(ierr); 6254 ierr = VecCopy(pcis->vec2_B,vec_check_B);CHKERRQ(ierr); 6255 6256 /* assemble rhs for coarse problem */ 6257 /* widetilde{S}_\Gamma w_\Gamma + \widetilde{B0}^T_B p0 */ 6258 /* local with Schur */ 6259 ierr = MatMult(S_j,pcis->vec2_B,pcis->vec1_B);CHKERRQ(ierr); 6260 if (zerodiag) { 6261 ierr = VecGetArray(dummy_vec,&array);CHKERRQ(ierr); 6262 for (i=0;i<pcbddc->benign_n;i++) array[i] = p0_check[i]; 6263 ierr = VecRestoreArray(dummy_vec,&array);CHKERRQ(ierr); 6264 ierr = MatMultTransposeAdd(B0_B,dummy_vec,pcis->vec1_B,pcis->vec1_B);CHKERRQ(ierr); 6265 } 6266 /* sum on primal nodes the local contributions */ 6267 ierr = VecScatterBegin(pcis->N_to_B,pcis->vec1_B,pcis->vec1_N,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 6268 ierr = VecScatterEnd(pcis->N_to_B,pcis->vec1_B,pcis->vec1_N,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 6269 ierr = VecGetArray(pcis->vec1_N,&array);CHKERRQ(ierr); 6270 ierr = VecGetArray(pcbddc->vec1_P,&array2);CHKERRQ(ierr); 6271 for (i=0;i<pcbddc->local_primal_size;i++) array2[i] = array[pcbddc->local_primal_ref_node[i]]; 6272 ierr = VecRestoreArray(pcbddc->vec1_P,&array2);CHKERRQ(ierr); 6273 ierr = VecRestoreArray(pcis->vec1_N,&array);CHKERRQ(ierr); 6274 ierr = VecSet(pcbddc->coarse_vec,0.);CHKERRQ(ierr); 6275 ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,pcbddc->vec1_P,pcbddc->coarse_vec,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 6276 ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,pcbddc->vec1_P,pcbddc->coarse_vec,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 6277 ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,pcbddc->coarse_vec,pcbddc->vec1_P,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 6278 ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,pcbddc->coarse_vec,pcbddc->vec1_P,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 6279 ierr = VecGetArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 6280 /* scale primal nodes (BDDC sums contibutions) */ 6281 ierr = VecPointwiseMult(pcbddc->vec1_P,vec_scale_P,pcbddc->vec1_P);CHKERRQ(ierr); 6282 ierr = VecSetValues(pcis->vec1_N,pcbddc->local_primal_size,pcbddc->local_primal_ref_node,array,INSERT_VALUES);CHKERRQ(ierr); 6283 ierr = VecRestoreArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 6284 ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr); 6285 ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr); 6286 ierr = VecScatterBegin(pcis->N_to_B,pcis->vec1_N,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 6287 ierr = VecScatterEnd(pcis->N_to_B,pcis->vec1_N,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 6288 /* global: \widetilde{B0}_B w_\Gamma */ 6289 if (zerodiag) { 6290 ierr = MatMult(B0_B,pcis->vec2_B,dummy_vec);CHKERRQ(ierr); 6291 ierr = VecGetArray(dummy_vec,&array);CHKERRQ(ierr); 6292 for (i=0;i<pcbddc->benign_n;i++) pcbddc->benign_p0[i] = array[i]; 6293 ierr = VecRestoreArray(dummy_vec,&array);CHKERRQ(ierr); 6294 } 6295 /* BDDC */ 6296 ierr = VecSet(pcis->vec1_D,0.);CHKERRQ(ierr); 6297 ierr = PCBDDCApplyInterfacePreconditioner(pc,PETSC_FALSE);CHKERRQ(ierr); 6298 6299 ierr = VecCopy(pcis->vec1_B,pcis->vec2_B);CHKERRQ(ierr); 6300 ierr = VecAXPY(pcis->vec1_B,-1.0,vec_check_B);CHKERRQ(ierr); 6301 ierr = VecNorm(pcis->vec1_B,NORM_INFINITY,&norm);CHKERRQ(ierr); 6302 PetscPrintf(PETSC_COMM_SELF,"[%d] BDDC local error is %1.4e\n",PetscGlobalRank,norm); 6303 for (i=0;i<pcbddc->benign_n;i++) { 6304 PetscPrintf(PETSC_COMM_SELF,"[%d] BDDC p0[%d] error is %1.4e\n",PetscGlobalRank,i,PetscAbsScalar(pcbddc->benign_p0[i]-p0_check[i])); 6305 } 6306 ierr = PetscFree(p0_check);CHKERRQ(ierr); 6307 ierr = VecDestroy(&vec_scale_P);CHKERRQ(ierr); 6308 ierr = VecDestroy(&vec_check_B);CHKERRQ(ierr); 6309 ierr = VecDestroy(&dummy_vec);CHKERRQ(ierr); 6310 ierr = MatDestroy(&S_j);CHKERRQ(ierr); 6311 ierr = MatDestroy(&B0_B);CHKERRQ(ierr); 6312 } 6313 PetscFunctionReturn(0); 6314 } 6315