1 #include <../src/mat/impls/aij/seq/aij.h> 2 #include <../src/ksp/pc/impls/bddc/bddc.h> 3 #include <../src/ksp/pc/impls/bddc/bddcprivate.h> 4 #include <petscblaslapack.h> 5 6 static PetscErrorCode PCBDDCMatMultTranspose_Private(Mat A, Vec x, Vec y); 7 static PetscErrorCode PCBDDCMatMult_Private(Mat A, Vec x, Vec y); 8 9 #undef __FUNCT__ 10 #define __FUNCT__ "PCBDDCBenignMatMult_Private_Private" 11 PetscErrorCode PCBDDCBenignMatMult_Private_Private(Mat A, Vec x, Vec y, PetscBool transpose) 12 { 13 PCBDDCBenignMatMult_ctx ctx; 14 PetscErrorCode ierr; 15 PetscBool apply_right,apply_left,reset_x; 16 17 PetscFunctionBegin; 18 ierr = MatShellGetContext(A,&ctx);CHKERRQ(ierr); 19 if (transpose) { 20 apply_right = ctx->apply_left; 21 apply_left = ctx->apply_right; 22 } else { 23 apply_right = ctx->apply_right; 24 apply_left = ctx->apply_left; 25 } 26 reset_x = PETSC_FALSE; 27 if (apply_right) { 28 const PetscScalar *ax; 29 PetscInt nl,i; 30 31 ierr = VecGetLocalSize(x,&nl);CHKERRQ(ierr); 32 ierr = VecGetArrayRead(x,&ax);CHKERRQ(ierr); 33 ierr = PetscMemcpy(ctx->work,ax,nl*sizeof(PetscScalar));CHKERRQ(ierr); 34 ierr = VecRestoreArrayRead(x,&ax);CHKERRQ(ierr); 35 for (i=0;i<ctx->benign_n;i++) { 36 PetscScalar sum,val; 37 const PetscInt *idxs; 38 PetscInt nz,j; 39 ierr = ISGetLocalSize(ctx->benign_zerodiag_subs[i],&nz);CHKERRQ(ierr); 40 ierr = ISGetIndices(ctx->benign_zerodiag_subs[i],&idxs);CHKERRQ(ierr); 41 sum = 0.; 42 if (ctx->apply_p0) { 43 val = ctx->work[idxs[nz-1]]; 44 for (j=0;j<nz-1;j++) { 45 sum += ctx->work[idxs[j]]; 46 ctx->work[idxs[j]] += val; 47 } 48 } else { 49 for (j=0;j<nz-1;j++) { 50 sum += ctx->work[idxs[j]]; 51 } 52 } 53 ctx->work[idxs[nz-1]] -= sum; 54 ierr = ISRestoreIndices(ctx->benign_zerodiag_subs[i],&idxs);CHKERRQ(ierr); 55 } 56 ierr = VecPlaceArray(x,ctx->work);CHKERRQ(ierr); 57 reset_x = PETSC_TRUE; 58 } 59 if (transpose) { 60 ierr = MatMultTranspose(ctx->A,x,y);CHKERRQ(ierr); 61 } else { 62 ierr = MatMult(ctx->A,x,y);CHKERRQ(ierr); 63 } 64 if (reset_x) { 65 ierr = VecResetArray(x);CHKERRQ(ierr); 66 } 67 if (apply_left) { 68 PetscScalar *ay; 69 PetscInt i; 70 71 ierr = VecGetArray(y,&ay);CHKERRQ(ierr); 72 for (i=0;i<ctx->benign_n;i++) { 73 PetscScalar sum,val; 74 const PetscInt *idxs; 75 PetscInt nz,j; 76 ierr = ISGetLocalSize(ctx->benign_zerodiag_subs[i],&nz);CHKERRQ(ierr); 77 ierr = ISGetIndices(ctx->benign_zerodiag_subs[i],&idxs);CHKERRQ(ierr); 78 val = -ay[idxs[nz-1]]; 79 if (ctx->apply_p0) { 80 sum = 0.; 81 for (j=0;j<nz-1;j++) { 82 sum += ay[idxs[j]]; 83 ay[idxs[j]] += val; 84 } 85 ay[idxs[nz-1]] += sum; 86 } else { 87 for (j=0;j<nz-1;j++) { 88 ay[idxs[j]] += val; 89 } 90 ay[idxs[nz-1]] = 0.; 91 } 92 ierr = ISRestoreIndices(ctx->benign_zerodiag_subs[i],&idxs);CHKERRQ(ierr); 93 } 94 ierr = VecRestoreArray(y,&ay);CHKERRQ(ierr); 95 } 96 PetscFunctionReturn(0); 97 } 98 99 #undef __FUNCT__ 100 #define __FUNCT__ "PCBDDCBenignMatMultTranspose_Private" 101 PetscErrorCode PCBDDCBenignMatMultTranspose_Private(Mat A, Vec x, Vec y) 102 { 103 PetscErrorCode ierr; 104 105 PetscFunctionBegin; 106 ierr = PCBDDCBenignMatMult_Private_Private(A,x,y,PETSC_TRUE);CHKERRQ(ierr); 107 PetscFunctionReturn(0); 108 } 109 110 #undef __FUNCT__ 111 #define __FUNCT__ "PCBDDCBenignMatMult_Private" 112 PetscErrorCode PCBDDCBenignMatMult_Private(Mat A, Vec x, Vec y) 113 { 114 PetscErrorCode ierr; 115 116 PetscFunctionBegin; 117 ierr = PCBDDCBenignMatMult_Private_Private(A,x,y,PETSC_FALSE);CHKERRQ(ierr); 118 PetscFunctionReturn(0); 119 } 120 121 #undef __FUNCT__ 122 #define __FUNCT__ "PCBDDCBenignShellMat" 123 PetscErrorCode PCBDDCBenignShellMat(PC pc, PetscBool restore) 124 { 125 PC_IS *pcis = (PC_IS*)pc->data; 126 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 127 Mat_IS *matis = (Mat_IS*)pc->pmat->data; 128 PCBDDCBenignMatMult_ctx ctx; 129 PetscErrorCode ierr; 130 131 PetscFunctionBegin; 132 if (!restore) { 133 Mat A,A_IB,A_BI; 134 PetscScalar *work; 135 PCBDDCReuseMumps reuse = pcbddc->sub_schurs->reuse_mumps; 136 137 if (!reuse) { 138 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Cannot setup shell matrices!"); 139 } 140 141 /* local mat */ 142 ierr = MatCreate(PETSC_COMM_SELF,&A);CHKERRQ(ierr); 143 ierr = MatSetSizes(A,pcis->n,pcis->n,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr); 144 ierr = MatSetType(A,MATSHELL);CHKERRQ(ierr); 145 ierr = MatShellSetOperation(A,MATOP_MULT,(void (*)(void))PCBDDCBenignMatMult_Private);CHKERRQ(ierr); 146 ierr = MatShellSetOperation(A,MATOP_MULT_TRANSPOSE,(void (*)(void))PCBDDCBenignMatMultTranspose_Private);CHKERRQ(ierr); 147 ierr = PetscNew(&ctx);CHKERRQ(ierr); 148 ierr = MatShellSetContext(A,ctx);CHKERRQ(ierr); 149 ctx->apply_left = PETSC_TRUE; 150 ctx->apply_right = PETSC_TRUE; 151 ctx->apply_p0 = PETSC_TRUE; 152 ctx->benign_n = pcbddc->benign_n; 153 ctx->benign_zerodiag_subs = pcbddc->benign_zerodiag_subs; 154 ctx->A = matis->A; 155 ierr = PetscMalloc1(pcis->n,&work);CHKERRQ(ierr); 156 ctx->work = work; 157 ierr = MatSetUp(A);CHKERRQ(ierr); 158 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 159 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 160 matis->A = A; 161 162 /* A_IB */ 163 ierr = MatCreate(PETSC_COMM_SELF,&A_IB);CHKERRQ(ierr); 164 ierr = MatSetSizes(A_IB,pcis->n-pcis->n_B,pcis->n_B,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr); 165 ierr = MatSetType(A_IB,MATSHELL);CHKERRQ(ierr); 166 ierr = MatShellSetOperation(A_IB,MATOP_MULT,(void (*)(void))PCBDDCBenignMatMult_Private);CHKERRQ(ierr); 167 ierr = MatShellSetOperation(A_IB,MATOP_MULT_TRANSPOSE,(void (*)(void))PCBDDCBenignMatMultTranspose_Private);CHKERRQ(ierr); 168 ierr = PetscNew(&ctx);CHKERRQ(ierr); 169 ierr = MatShellSetContext(A_IB,ctx);CHKERRQ(ierr); 170 ctx->apply_left = PETSC_TRUE; 171 ctx->apply_right = PETSC_FALSE; 172 ctx->apply_p0 = PETSC_FALSE; 173 ctx->benign_n = pcbddc->benign_n; 174 ctx->benign_zerodiag_subs = reuse->benign_zerodiag_subs; 175 ctx->A = pcis->A_IB; 176 ctx->work = work; 177 ierr = MatSetUp(A_IB);CHKERRQ(ierr); 178 ierr = MatAssemblyBegin(A_IB,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 179 ierr = MatAssemblyEnd(A_IB,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 180 pcis->A_IB = A_IB; 181 182 /* A_BI as A_IB^T */ 183 ierr = MatCreateTranspose(A_IB,&A_BI);CHKERRQ(ierr); 184 pcbddc->benign_original_mat = pcis->A_BI; 185 pcis->A_BI = A_BI; 186 } else { 187 ierr = MatShellGetContext(matis->A,&ctx);CHKERRQ(ierr); 188 ierr = MatDestroy(&matis->A);CHKERRQ(ierr); 189 matis->A = ctx->A; 190 ierr = PetscFree(ctx);CHKERRQ(ierr); 191 ierr = MatShellGetContext(pcis->A_IB,&ctx);CHKERRQ(ierr); 192 ierr = MatDestroy(&pcis->A_IB);CHKERRQ(ierr); 193 pcis->A_IB = ctx->A; 194 ierr = PetscFree(ctx->work);CHKERRQ(ierr); 195 ierr = PetscFree(ctx);CHKERRQ(ierr); 196 ierr = MatDestroy(&pcis->A_BI);CHKERRQ(ierr); 197 pcis->A_BI = pcbddc->benign_original_mat; 198 pcbddc->benign_original_mat = NULL; 199 } 200 PetscFunctionReturn(0); 201 } 202 203 /* used just in bddc debug mode */ 204 #undef __FUNCT__ 205 #define __FUNCT__ "PCBDDCBenignProject" 206 PetscErrorCode PCBDDCBenignProject(PC pc, IS is1, IS is2, Mat *B) 207 { 208 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 209 Mat_IS *matis = (Mat_IS*)pc->pmat->data; 210 Mat An; 211 PetscErrorCode ierr; 212 213 PetscFunctionBegin; 214 ierr = MatPtAP(matis->A,pcbddc->benign_change,MAT_INITIAL_MATRIX,2.0,&An);CHKERRQ(ierr); 215 ierr = MatZeroRowsColumns(An,pcbddc->benign_n,pcbddc->benign_p0_lidx,1.0,NULL,NULL);CHKERRQ(ierr); 216 if (is1) { 217 ierr = MatGetSubMatrix(An,is1,is2,MAT_INITIAL_MATRIX,B);CHKERRQ(ierr); 218 ierr = MatDestroy(&An);CHKERRQ(ierr); 219 } else { 220 *B = An; 221 } 222 PetscFunctionReturn(0); 223 } 224 225 /* TODO: add reuse flag */ 226 #undef __FUNCT__ 227 #define __FUNCT__ "MatSeqAIJCompress" 228 PetscErrorCode MatSeqAIJCompress(Mat A, Mat *B) 229 { 230 Mat Bt; 231 PetscScalar *a,*bdata; 232 const PetscInt *ii,*ij; 233 PetscInt m,n,i,nnz,*bii,*bij; 234 PetscBool flg_row; 235 PetscErrorCode ierr; 236 237 PetscFunctionBegin; 238 ierr = MatGetSize(A,&n,&m);CHKERRQ(ierr); 239 ierr = MatGetRowIJ(A,0,PETSC_FALSE,PETSC_FALSE,&n,&ii,&ij,&flg_row);CHKERRQ(ierr); 240 ierr = MatSeqAIJGetArray(A,&a);CHKERRQ(ierr); 241 nnz = n; 242 for (i=0;i<ii[n];i++) { 243 if (PetscLikely(PetscAbsScalar(a[i]) > PETSC_SMALL)) nnz++; 244 } 245 ierr = PetscMalloc1(n+1,&bii);CHKERRQ(ierr); 246 ierr = PetscMalloc1(nnz,&bij);CHKERRQ(ierr); 247 ierr = PetscMalloc1(nnz,&bdata);CHKERRQ(ierr); 248 nnz = 0; 249 bii[0] = 0; 250 for (i=0;i<n;i++) { 251 PetscInt j; 252 for (j=ii[i];j<ii[i+1];j++) { 253 PetscScalar entry = a[j]; 254 if (PetscLikely(PetscAbsScalar(entry) > PETSC_SMALL) || ij[j] == i) { 255 bij[nnz] = ij[j]; 256 bdata[nnz] = entry; 257 nnz++; 258 } 259 } 260 bii[i+1] = nnz; 261 } 262 ierr = MatSeqAIJRestoreArray(A,&a);CHKERRQ(ierr); 263 ierr = MatCreateSeqAIJWithArrays(PetscObjectComm((PetscObject)A),n,m,bii,bij,bdata,&Bt);CHKERRQ(ierr); 264 ierr = MatRestoreRowIJ(A,0,PETSC_FALSE,PETSC_FALSE,&n,&ii,&ij,&flg_row);CHKERRQ(ierr); 265 { 266 Mat_SeqAIJ *b = (Mat_SeqAIJ*)(Bt->data); 267 b->free_a = PETSC_TRUE; 268 b->free_ij = PETSC_TRUE; 269 } 270 *B = Bt; 271 PetscFunctionReturn(0); 272 } 273 274 #undef __FUNCT__ 275 #define __FUNCT__ "MatDetectDisconnectedComponents" 276 PetscErrorCode MatDetectDisconnectedComponents(Mat A, PetscBool filter, PetscInt *ncc, IS* cc[]) 277 { 278 Mat B; 279 IS is_dummy,*cc_n; 280 ISLocalToGlobalMapping l2gmap_dummy; 281 PCBDDCGraph graph; 282 PetscInt i,n; 283 PetscInt *xadj,*adjncy; 284 PetscInt *xadj_filtered,*adjncy_filtered; 285 PetscBool flg_row,isseqaij; 286 PetscErrorCode ierr; 287 288 PetscFunctionBegin; 289 ierr = PetscObjectTypeCompare((PetscObject)A,MATSEQAIJ,&isseqaij);CHKERRQ(ierr); 290 if (!isseqaij && filter) { 291 PetscBool isseqdense; 292 293 ierr = PetscObjectTypeCompare((PetscObject)A,MATSEQDENSE,&isseqdense);CHKERRQ(ierr); 294 if (!isseqdense) { 295 ierr = MatConvert(A,MATSEQAIJ,MAT_INITIAL_MATRIX,&B);CHKERRQ(ierr); 296 } else { /* TODO: rectangular case and LDA */ 297 PetscScalar *array; 298 PetscReal chop=1.e-6; 299 300 ierr = MatDuplicate(A,MAT_COPY_VALUES,&B);CHKERRQ(ierr); 301 ierr = MatDenseGetArray(B,&array);CHKERRQ(ierr); 302 ierr = MatGetSize(B,&n,NULL);CHKERRQ(ierr); 303 for (i=0;i<n;i++) { 304 PetscInt j; 305 for (j=i+1;j<n;j++) { 306 PetscReal thresh = chop*(PetscAbsScalar(array[i*(n+1)])+PetscAbsScalar(array[j*(n+1)])); 307 if (PetscAbsScalar(array[i*n+j]) < thresh) array[i*n+j] = 0.; 308 if (PetscAbsScalar(array[j*n+i]) < thresh) array[j*n+i] = 0.; 309 } 310 } 311 ierr = MatDenseRestoreArray(B,&array);CHKERRQ(ierr); 312 ierr = MatConvert(B,MATSEQAIJ,MAT_REUSE_MATRIX,&B);CHKERRQ(ierr); 313 } 314 } else { 315 B = A; 316 } 317 ierr = MatGetRowIJ(B,0,PETSC_TRUE,PETSC_FALSE,&n,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);CHKERRQ(ierr); 318 319 /* if filter is true, then removes entries lower than PETSC_SMALL in magnitude */ 320 if (filter) { 321 PetscScalar *data; 322 PetscInt j,cum; 323 324 ierr = PetscCalloc2(n+1,&xadj_filtered,xadj[n],&adjncy_filtered);CHKERRQ(ierr); 325 ierr = MatSeqAIJGetArray(B,&data);CHKERRQ(ierr); 326 cum = 0; 327 for (i=0;i<n;i++) { 328 PetscInt t; 329 330 for (j=xadj[i];j<xadj[i+1];j++) { 331 if (PetscUnlikely(PetscAbsScalar(data[j]) < PETSC_SMALL)) { 332 continue; 333 } 334 adjncy_filtered[cum+xadj_filtered[i]++] = adjncy[j]; 335 } 336 t = xadj_filtered[i]; 337 xadj_filtered[i] = cum; 338 cum += t; 339 } 340 ierr = MatSeqAIJRestoreArray(B,&data);CHKERRQ(ierr); 341 } else { 342 xadj_filtered = NULL; 343 adjncy_filtered = NULL; 344 } 345 346 /* compute local connected components using PCBDDCGraph */ 347 ierr = ISCreateStride(PETSC_COMM_SELF,n,0,1,&is_dummy);CHKERRQ(ierr); 348 ierr = ISLocalToGlobalMappingCreateIS(is_dummy,&l2gmap_dummy);CHKERRQ(ierr); 349 ierr = ISDestroy(&is_dummy);CHKERRQ(ierr); 350 ierr = PCBDDCGraphCreate(&graph);CHKERRQ(ierr); 351 ierr = PCBDDCGraphInit(graph,l2gmap_dummy,n);CHKERRQ(ierr); 352 ierr = ISLocalToGlobalMappingDestroy(&l2gmap_dummy);CHKERRQ(ierr); 353 if (xadj_filtered) { 354 graph->xadj = xadj_filtered; 355 graph->adjncy = adjncy_filtered; 356 } else { 357 graph->xadj = xadj; 358 graph->adjncy = adjncy; 359 } 360 ierr = PCBDDCGraphSetUp(graph,1,NULL,NULL,0,NULL,NULL);CHKERRQ(ierr); 361 ierr = PCBDDCGraphComputeConnectedComponents(graph);CHKERRQ(ierr); 362 /* partial clean up */ 363 ierr = PetscFree2(xadj_filtered,adjncy_filtered);CHKERRQ(ierr); 364 ierr = MatRestoreRowIJ(B,0,PETSC_TRUE,PETSC_FALSE,&n,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);CHKERRQ(ierr); 365 if (A != B) { 366 ierr = MatDestroy(&B);CHKERRQ(ierr); 367 } 368 369 /* get back data */ 370 if (ncc) *ncc = graph->ncc; 371 if (cc) { 372 ierr = PetscMalloc1(graph->ncc,&cc_n);CHKERRQ(ierr); 373 for (i=0;i<graph->ncc;i++) { 374 ierr = ISCreateGeneral(PETSC_COMM_SELF,graph->cptr[i+1]-graph->cptr[i],graph->queue+graph->cptr[i],PETSC_COPY_VALUES,&cc_n[i]);CHKERRQ(ierr); 375 } 376 *cc = cc_n; 377 } 378 /* clean up graph */ 379 graph->xadj = 0; 380 graph->adjncy = 0; 381 ierr = PCBDDCGraphDestroy(&graph);CHKERRQ(ierr); 382 PetscFunctionReturn(0); 383 } 384 385 #undef __FUNCT__ 386 #define __FUNCT__ "PCBDDCBenignCheck" 387 PetscErrorCode PCBDDCBenignCheck(PC pc, IS zerodiag) 388 { 389 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 390 PC_IS* pcis = (PC_IS*)(pc->data); 391 IS dirIS = NULL; 392 PetscInt i; 393 PetscErrorCode ierr; 394 395 PetscFunctionBegin; 396 ierr = PCBDDCGraphGetDirichletDofs(pcbddc->mat_graph,&dirIS);CHKERRQ(ierr); 397 if (zerodiag) { 398 Mat A; 399 Vec vec3_N; 400 PetscScalar *vals; 401 const PetscInt *idxs; 402 PetscInt nz; 403 404 /* p0 */ 405 ierr = VecSet(pcis->vec1_N,0.);CHKERRQ(ierr); 406 ierr = PetscMalloc1(pcis->n,&vals);CHKERRQ(ierr); 407 ierr = ISGetLocalSize(zerodiag,&nz);CHKERRQ(ierr); 408 ierr = ISGetIndices(zerodiag,&idxs);CHKERRQ(ierr); 409 for (i=0;i<nz;i++) vals[i] = 1.; 410 ierr = VecSetValues(pcis->vec1_N,nz,idxs,vals,INSERT_VALUES);CHKERRQ(ierr); 411 ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr); 412 ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr); 413 /* v_I */ 414 ierr = VecSetRandom(pcis->vec2_N,NULL);CHKERRQ(ierr); 415 for (i=0;i<nz;i++) vals[i] = 0.; 416 ierr = VecSetValues(pcis->vec2_N,nz,idxs,vals,INSERT_VALUES);CHKERRQ(ierr); 417 ierr = ISRestoreIndices(zerodiag,&idxs);CHKERRQ(ierr); 418 ierr = ISGetIndices(pcis->is_B_local,&idxs);CHKERRQ(ierr); 419 for (i=0;i<pcis->n_B;i++) vals[i] = 0.; 420 ierr = VecSetValues(pcis->vec2_N,pcis->n_B,idxs,vals,INSERT_VALUES);CHKERRQ(ierr); 421 ierr = ISRestoreIndices(pcis->is_B_local,&idxs);CHKERRQ(ierr); 422 if (dirIS) { 423 PetscInt n; 424 425 ierr = ISGetLocalSize(dirIS,&n);CHKERRQ(ierr); 426 ierr = ISGetIndices(dirIS,&idxs);CHKERRQ(ierr); 427 for (i=0;i<n;i++) vals[i] = 0.; 428 ierr = VecSetValues(pcis->vec2_N,n,idxs,vals,INSERT_VALUES);CHKERRQ(ierr); 429 ierr = ISRestoreIndices(dirIS,&idxs);CHKERRQ(ierr); 430 } 431 ierr = VecAssemblyBegin(pcis->vec2_N);CHKERRQ(ierr); 432 ierr = VecAssemblyEnd(pcis->vec2_N);CHKERRQ(ierr); 433 ierr = VecDuplicate(pcis->vec1_N,&vec3_N);CHKERRQ(ierr); 434 ierr = VecSet(vec3_N,0.);CHKERRQ(ierr); 435 ierr = MatISGetLocalMat(pc->mat,&A);CHKERRQ(ierr); 436 ierr = MatMult(A,pcis->vec1_N,vec3_N);CHKERRQ(ierr); 437 ierr = VecDot(vec3_N,pcis->vec2_N,&vals[0]);CHKERRQ(ierr); 438 if (PetscAbsScalar(vals[0]) > PETSC_SMALL) { 439 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"Benign trick can not be applied! b(v_I,p_0) = %f (should be numerically 0.)",PetscAbsScalar(vals[0])); 440 } 441 ierr = PetscFree(vals);CHKERRQ(ierr); 442 ierr = VecDestroy(&vec3_N);CHKERRQ(ierr); 443 } 444 ierr = ISDestroy(&dirIS);CHKERRQ(ierr); 445 446 /* check PCBDDCBenignGetOrSetP0 */ 447 ierr = VecSetRandom(pcis->vec1_global,NULL);CHKERRQ(ierr); 448 for (i=0;i<pcbddc->benign_n;i++) pcbddc->benign_p0[i] = -PetscGlobalRank-i; 449 ierr = PCBDDCBenignGetOrSetP0(pc,pcis->vec1_global,PETSC_FALSE);CHKERRQ(ierr); 450 for (i=0;i<pcbddc->benign_n;i++) pcbddc->benign_p0[i] = 1; 451 ierr = PCBDDCBenignGetOrSetP0(pc,pcis->vec1_global,PETSC_TRUE);CHKERRQ(ierr); 452 for (i=0;i<pcbddc->benign_n;i++) { 453 if ((PetscInt)PetscRealPart(pcbddc->benign_p0[i]) != -PetscGlobalRank-i) { 454 SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error testing PCBDDCBenignGetOrSetP0! Found %1.4e at %d instead of %1.4e\n",pcbddc->benign_p0[i],i,-PetscGlobalRank-i);CHKERRQ(ierr); 455 } 456 } 457 PetscFunctionReturn(0); 458 } 459 460 #undef __FUNCT__ 461 #define __FUNCT__ "PCBDDCBenignDetectSaddlePoint" 462 PetscErrorCode PCBDDCBenignDetectSaddlePoint(PC pc, IS *zerodiaglocal) 463 { 464 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 465 IS pressures,zerodiag,*zerodiag_subs; 466 PetscInt nz,n; 467 PetscBool sorted,have_null,has_null_pressures,recompute_zerodiag; 468 PetscErrorCode ierr; 469 470 PetscFunctionBegin; 471 ierr = PetscSFDestroy(&pcbddc->benign_sf);CHKERRQ(ierr); 472 ierr = MatDestroy(&pcbddc->benign_B0);CHKERRQ(ierr); 473 ierr = MatDestroy(&pcbddc->benign_original_mat);CHKERRQ(ierr); 474 for (n=0;n<pcbddc->benign_n;n++) { 475 ierr = ISDestroy(&pcbddc->benign_zerodiag_subs[n]);CHKERRQ(ierr); 476 } 477 ierr = PetscFree(pcbddc->benign_zerodiag_subs);CHKERRQ(ierr); 478 pcbddc->benign_n = 0; 479 /* if a local info on dofs is present, assumes the last field is represented by "pressures" 480 otherwise, it uses only zerodiagonal dofs (ok if the pressure block is all zero; it could fail if it is not) 481 Checks if all the pressure dofs in each subdomain have a zero diagonal 482 If not, a change of basis on pressures is not needed 483 since the local Schur complements are SPD 484 */ 485 has_null_pressures = PETSC_TRUE; 486 have_null = PETSC_TRUE; 487 if (pcbddc->n_ISForDofsLocal) { 488 PetscInt npl,*idxs,p = pcbddc->n_ISForDofsLocal-1; 489 490 /* Dofs splitting for BDDC cannot have PETSC_COMM_SELF, so create a sequential IS */ 491 ierr = ISGetLocalSize(pcbddc->ISForDofsLocal[p],&npl);CHKERRQ(ierr); 492 ierr = ISGetIndices(pcbddc->ISForDofsLocal[p],(const PetscInt**)&idxs);CHKERRQ(ierr); 493 ierr = ISCreateGeneral(PETSC_COMM_SELF,npl,idxs,PETSC_COPY_VALUES,&pressures);CHKERRQ(ierr); 494 ierr = ISRestoreIndices(pcbddc->ISForDofsLocal[p],(const PetscInt**)&idxs);CHKERRQ(ierr); 495 ierr = ISSorted(pressures,&sorted);CHKERRQ(ierr); 496 if (!sorted) { 497 ierr = ISSort(pressures);CHKERRQ(ierr); 498 } 499 } else { 500 pressures = NULL; 501 } 502 /* pcis has not been setup yet, so get the local size from the subdomain matrix */ 503 ierr = MatGetLocalSize(pcbddc->local_mat,&n,NULL);CHKERRQ(ierr); 504 /* TODO: add check for shared dofs and raise error */ 505 ierr = MatFindZeroDiagonals(pcbddc->local_mat,&zerodiag);CHKERRQ(ierr); 506 ierr = ISSorted(zerodiag,&sorted);CHKERRQ(ierr); 507 if (!sorted) { 508 ierr = ISSort(zerodiag);CHKERRQ(ierr); 509 } 510 ierr = ISGetLocalSize(zerodiag,&nz);CHKERRQ(ierr); 511 if (!nz) { 512 if (n) have_null = PETSC_FALSE; 513 has_null_pressures = PETSC_FALSE; 514 ierr = ISDestroy(&zerodiag);CHKERRQ(ierr); 515 } 516 recompute_zerodiag = PETSC_FALSE; 517 /* in case disconnected subdomains info is present, split the pressures accordingly (otherwise the benign trick could fail) */ 518 zerodiag_subs = NULL; 519 pcbddc->benign_n = 0; 520 if (has_null_pressures) { 521 IS *subs; 522 PetscInt nsubs,i; 523 524 subs = pcbddc->local_subs; 525 nsubs = pcbddc->n_local_subs; 526 if (nsubs > 1) { 527 ierr = PetscCalloc1(nsubs,&zerodiag_subs);CHKERRQ(ierr); 528 for (i=0;i<nsubs;i++) { 529 ISLocalToGlobalMapping l2g; 530 IS t_zerodiag_subs; 531 PetscInt nl; 532 533 ierr = ISLocalToGlobalMappingCreateIS(subs[i],&l2g);CHKERRQ(ierr); 534 ierr = ISGlobalToLocalMappingApplyIS(l2g,IS_GTOLM_DROP,zerodiag,&t_zerodiag_subs);CHKERRQ(ierr); 535 ierr = ISGetLocalSize(t_zerodiag_subs,&nl);CHKERRQ(ierr); 536 if (nl) { 537 PetscBool valid = PETSC_TRUE; 538 539 if (pressures) { 540 IS t_pressure_subs; 541 ierr = ISGlobalToLocalMappingApplyIS(l2g,IS_GTOLM_DROP,pressures,&t_pressure_subs);CHKERRQ(ierr); 542 ierr = ISEqual(t_pressure_subs,t_zerodiag_subs,&valid);CHKERRQ(ierr); 543 ierr = ISDestroy(&t_pressure_subs);CHKERRQ(ierr); 544 } 545 if (valid) { 546 ierr = ISLocalToGlobalMappingApplyIS(l2g,t_zerodiag_subs,&zerodiag_subs[pcbddc->benign_n]);CHKERRQ(ierr); 547 pcbddc->benign_n++; 548 } else { 549 recompute_zerodiag = PETSC_TRUE; 550 } 551 } 552 ierr = ISDestroy(&t_zerodiag_subs);CHKERRQ(ierr); 553 ierr = ISLocalToGlobalMappingDestroy(&l2g);CHKERRQ(ierr); 554 } 555 } else { /* there's just one subdomain (or zero if they have not been detected */ 556 PetscBool valid = PETSC_TRUE; 557 if (pressures) { 558 ierr = ISEqual(pressures,zerodiag,&valid);CHKERRQ(ierr); 559 } 560 if (valid) { 561 pcbddc->benign_n = 1; 562 ierr = PetscMalloc1(pcbddc->benign_n,&zerodiag_subs);CHKERRQ(ierr); 563 ierr = PetscObjectReference((PetscObject)zerodiag);CHKERRQ(ierr); 564 zerodiag_subs[0] = zerodiag; 565 } 566 } 567 } 568 569 if (!pcbddc->benign_n) { 570 ierr = ISDestroy(&zerodiag);CHKERRQ(ierr); 571 recompute_zerodiag = PETSC_FALSE; 572 has_null_pressures = PETSC_FALSE; 573 have_null = PETSC_FALSE; 574 } 575 576 /* final check for null pressures */ 577 if (zerodiag && pressures) { 578 PetscInt nz,np; 579 ierr = ISGetLocalSize(zerodiag,&nz);CHKERRQ(ierr); 580 ierr = ISGetLocalSize(pressures,&np);CHKERRQ(ierr); 581 if (nz != np) have_null = PETSC_FALSE; 582 } 583 584 if (recompute_zerodiag) { 585 ierr = ISDestroy(&zerodiag);CHKERRQ(ierr); 586 if (pcbddc->benign_n == 1) { 587 ierr = PetscObjectReference((PetscObject)zerodiag_subs[0]);CHKERRQ(ierr); 588 zerodiag = zerodiag_subs[0]; 589 } else { 590 PetscInt i,nzn,*new_idxs; 591 592 nzn = 0; 593 for (i=0;i<pcbddc->benign_n;i++) { 594 PetscInt ns; 595 ierr = ISGetLocalSize(zerodiag_subs[i],&ns);CHKERRQ(ierr); 596 nzn += ns; 597 } 598 ierr = PetscMalloc1(nzn,&new_idxs);CHKERRQ(ierr); 599 nzn = 0; 600 for (i=0;i<pcbddc->benign_n;i++) { 601 PetscInt ns,*idxs; 602 ierr = ISGetLocalSize(zerodiag_subs[i],&ns);CHKERRQ(ierr); 603 ierr = ISGetIndices(zerodiag_subs[i],(const PetscInt**)&idxs);CHKERRQ(ierr); 604 ierr = PetscMemcpy(new_idxs+nzn,idxs,ns*sizeof(PetscInt));CHKERRQ(ierr); 605 ierr = ISRestoreIndices(zerodiag_subs[i],(const PetscInt**)&idxs);CHKERRQ(ierr); 606 nzn += ns; 607 } 608 ierr = PetscSortInt(nzn,new_idxs);CHKERRQ(ierr); 609 ierr = ISCreateGeneral(PETSC_COMM_SELF,nzn,new_idxs,PETSC_OWN_POINTER,&zerodiag);CHKERRQ(ierr); 610 } 611 have_null = PETSC_FALSE; 612 } 613 614 if (has_null_pressures) { 615 IS zerodiagc; 616 const PetscInt *idxs,*idxsc; 617 PetscInt i,s,*nnz; 618 619 ierr = ISGetLocalSize(zerodiag,&nz);CHKERRQ(ierr); 620 ierr = ISComplement(zerodiag,0,n,&zerodiagc);CHKERRQ(ierr); 621 ierr = ISGetIndices(zerodiagc,&idxsc);CHKERRQ(ierr); 622 /* local change of basis for pressures */ 623 ierr = MatDestroy(&pcbddc->benign_change);CHKERRQ(ierr); 624 ierr = MatCreate(PetscObjectComm((PetscObject)pcbddc->local_mat),&pcbddc->benign_change);CHKERRQ(ierr); 625 ierr = MatSetType(pcbddc->benign_change,MATAIJ);CHKERRQ(ierr); 626 ierr = MatSetSizes(pcbddc->benign_change,n,n,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr); 627 ierr = PetscMalloc1(n,&nnz);CHKERRQ(ierr); 628 for (i=0;i<n-nz;i++) nnz[idxsc[i]] = 1; /* identity on velocities plus pressure dofs for non-singular subdomains */ 629 for (i=0;i<pcbddc->benign_n;i++) { 630 PetscInt nzs,j; 631 632 ierr = ISGetLocalSize(zerodiag_subs[i],&nzs);CHKERRQ(ierr); 633 ierr = ISGetIndices(zerodiag_subs[i],&idxs);CHKERRQ(ierr); 634 for (j=0;j<nzs-1;j++) nnz[idxs[j]] = 2; /* change on pressures */ 635 nnz[idxs[nzs-1]] = nzs; /* last local pressure dof in subdomain */ 636 ierr = ISRestoreIndices(zerodiag_subs[i],&idxs);CHKERRQ(ierr); 637 } 638 ierr = MatSeqAIJSetPreallocation(pcbddc->benign_change,0,nnz);CHKERRQ(ierr); 639 ierr = PetscFree(nnz);CHKERRQ(ierr); 640 /* set identity on velocities */ 641 for (i=0;i<n-nz;i++) { 642 ierr = MatSetValue(pcbddc->benign_change,idxsc[i],idxsc[i],1.,INSERT_VALUES);CHKERRQ(ierr); 643 } 644 ierr = ISRestoreIndices(zerodiagc,&idxsc);CHKERRQ(ierr); 645 ierr = ISDestroy(&zerodiagc);CHKERRQ(ierr); 646 ierr = PetscFree3(pcbddc->benign_p0_lidx,pcbddc->benign_p0_gidx,pcbddc->benign_p0);CHKERRQ(ierr); 647 ierr = PetscMalloc3(pcbddc->benign_n,&pcbddc->benign_p0_lidx,pcbddc->benign_n,&pcbddc->benign_p0_gidx,pcbddc->benign_n,&pcbddc->benign_p0);CHKERRQ(ierr); 648 /* set change on pressures */ 649 for (s=0;s<pcbddc->benign_n;s++) { 650 PetscScalar *array; 651 PetscInt nzs; 652 653 ierr = ISGetLocalSize(zerodiag_subs[s],&nzs);CHKERRQ(ierr); 654 ierr = ISGetIndices(zerodiag_subs[s],&idxs);CHKERRQ(ierr); 655 for (i=0;i<nzs-1;i++) { 656 PetscScalar vals[2]; 657 PetscInt cols[2]; 658 659 cols[0] = idxs[i]; 660 cols[1] = idxs[nzs-1]; 661 vals[0] = 1.; 662 vals[1] = 1.; 663 ierr = MatSetValues(pcbddc->benign_change,1,cols,2,cols,vals,INSERT_VALUES);CHKERRQ(ierr); 664 } 665 ierr = PetscMalloc1(nzs,&array);CHKERRQ(ierr); 666 for (i=0;i<nzs-1;i++) array[i] = -1.; 667 array[nzs-1] = 1.; 668 ierr = MatSetValues(pcbddc->benign_change,1,idxs+nzs-1,nzs,idxs,array,INSERT_VALUES);CHKERRQ(ierr); 669 /* store local idxs for p0 */ 670 pcbddc->benign_p0_lidx[s] = idxs[nzs-1]; 671 ierr = ISRestoreIndices(zerodiag_subs[s],&idxs);CHKERRQ(ierr); 672 ierr = PetscFree(array);CHKERRQ(ierr); 673 } 674 ierr = MatAssemblyBegin(pcbddc->benign_change,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 675 ierr = MatAssemblyEnd(pcbddc->benign_change,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 676 /* project if needed */ 677 if (pcbddc->benign_change_explicit) { 678 ierr = MatPtAP(pcbddc->local_mat,pcbddc->benign_change,MAT_INITIAL_MATRIX,2.0,&pcbddc->benign_original_mat);CHKERRQ(ierr); 679 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 680 ierr = MatSeqAIJCompress(pcbddc->benign_original_mat,&pcbddc->local_mat);CHKERRQ(ierr); 681 ierr = MatDestroy(&pcbddc->benign_original_mat);CHKERRQ(ierr); 682 } 683 /* store global idxs for p0 */ 684 ierr = ISLocalToGlobalMappingApply(pc->pmat->rmap->mapping,pcbddc->benign_n,pcbddc->benign_p0_lidx,pcbddc->benign_p0_gidx);CHKERRQ(ierr); 685 } 686 pcbddc->benign_zerodiag_subs = zerodiag_subs; 687 ierr = ISDestroy(&pressures);CHKERRQ(ierr); 688 689 /* determines if the coarse solver will be singular or not */ 690 ierr = MPI_Allreduce(&have_null,&pcbddc->benign_null,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 691 *zerodiaglocal = zerodiag; 692 PetscFunctionReturn(0); 693 } 694 695 #undef __FUNCT__ 696 #define __FUNCT__ "PCBDDCBenignGetOrSetP0" 697 PetscErrorCode PCBDDCBenignGetOrSetP0(PC pc, Vec v, PetscBool get) 698 { 699 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 700 PetscErrorCode ierr; 701 702 PetscFunctionBegin; 703 if (!pcbddc->benign_sf) { 704 ierr = PetscSFCreate(PetscObjectComm((PetscObject)pc),&pcbddc->benign_sf);CHKERRQ(ierr); 705 ierr = PetscSFSetGraphLayout(pcbddc->benign_sf,pc->pmat->rmap,pcbddc->benign_n,NULL,PETSC_OWN_POINTER,pcbddc->benign_p0_gidx);CHKERRQ(ierr); 706 } 707 if (get) { /* use SF to get values */ 708 PetscScalar *array; 709 710 ierr = VecGetArrayRead(v,(const PetscScalar**)&array);CHKERRQ(ierr); 711 ierr = PetscSFBcastBegin(pcbddc->benign_sf,MPIU_SCALAR,array,pcbddc->benign_p0);CHKERRQ(ierr); 712 ierr = PetscSFBcastEnd(pcbddc->benign_sf,MPIU_SCALAR,array,pcbddc->benign_p0);CHKERRQ(ierr); 713 ierr = VecRestoreArrayRead(v,(const PetscScalar**)&array);CHKERRQ(ierr); 714 } else { /* use VecSetValues (not scalable, I should try to find a better solution (defining a new MPI_OP for reduction) */ 715 ierr = VecSetValues(v,pcbddc->benign_n,pcbddc->benign_p0_gidx,pcbddc->benign_p0,INSERT_VALUES);CHKERRQ(ierr); 716 ierr = VecAssemblyBegin(v);CHKERRQ(ierr); 717 ierr = VecAssemblyEnd(v);CHKERRQ(ierr); 718 } 719 PetscFunctionReturn(0); 720 } 721 722 #undef __FUNCT__ 723 #define __FUNCT__ "PCBDDCBenignPopOrPushB0" 724 PetscErrorCode PCBDDCBenignPopOrPushB0(PC pc, PetscBool pop) 725 { 726 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 727 PetscErrorCode ierr; 728 729 PetscFunctionBegin; 730 /* TODO: add error checking 731 - avoid nested pop (or push) calls. 732 - cannot push before pop. 733 - cannot call this if pcbddc->local_mat is NULL 734 */ 735 if (!pcbddc->benign_n) { 736 PetscFunctionReturn(0); 737 } 738 if (pop) { 739 if (pcbddc->benign_change_explicit) { 740 IS is_p0; 741 MatReuse reuse; 742 743 /* extract B_0 */ 744 reuse = MAT_INITIAL_MATRIX; 745 if (pcbddc->benign_B0) { 746 reuse = MAT_REUSE_MATRIX; 747 } 748 ierr = ISCreateGeneral(PETSC_COMM_SELF,pcbddc->benign_n,pcbddc->benign_p0_lidx,PETSC_COPY_VALUES,&is_p0);CHKERRQ(ierr); 749 ierr = MatGetSubMatrix(pcbddc->local_mat,is_p0,NULL,reuse,&pcbddc->benign_B0);CHKERRQ(ierr); 750 /* remove rows and cols from local problem */ 751 ierr = MatSetOption(pcbddc->local_mat,MAT_KEEP_NONZERO_PATTERN,PETSC_TRUE);CHKERRQ(ierr); 752 ierr = MatSetOption(pcbddc->local_mat,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr); 753 ierr = MatZeroRowsColumnsIS(pcbddc->local_mat,is_p0,1.0,NULL,NULL);CHKERRQ(ierr); 754 ierr = ISDestroy(&is_p0);CHKERRQ(ierr); 755 } else { 756 Mat_IS *matis = (Mat_IS*)pc->pmat->data; 757 PetscScalar *vals; 758 PetscInt i,n,*idxs_ins; 759 760 ierr = VecGetLocalSize(matis->y,&n);CHKERRQ(ierr); 761 ierr = PetscMalloc2(n,&idxs_ins,n,&vals);CHKERRQ(ierr); 762 if (!pcbddc->benign_B0) { 763 ierr = MatCreate(PetscObjectComm((PetscObject)pcbddc->local_mat),&pcbddc->benign_B0);CHKERRQ(ierr); 764 ierr = MatSetType(pcbddc->benign_B0,MATAIJ);CHKERRQ(ierr); 765 ierr = MatSetSizes(pcbddc->benign_B0,pcbddc->benign_n,n,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr); 766 /* this matrix is very sparse: the nnz pattern is not known unless we do 2 sweeps of the next loop. 767 Setting nnz=100 should be more than enough */ 768 ierr = MatSeqAIJSetPreallocation(pcbddc->benign_B0,100,NULL);CHKERRQ(ierr); 769 } 770 771 for (i=0;i<pcbddc->benign_n;i++) { 772 PetscScalar *array; 773 PetscInt *idxs,j,nz,cum; 774 775 ierr = VecSet(matis->x,0.);CHKERRQ(ierr); 776 ierr = ISGetLocalSize(pcbddc->benign_zerodiag_subs[i],&nz);CHKERRQ(ierr); 777 ierr = ISGetIndices(pcbddc->benign_zerodiag_subs[i],(const PetscInt**)&idxs);CHKERRQ(ierr); 778 for (j=0;j<nz;j++) vals[j] = 1.; 779 ierr = VecSetValues(matis->x,nz,idxs,vals,INSERT_VALUES);CHKERRQ(ierr); 780 ierr = VecAssemblyBegin(matis->x);CHKERRQ(ierr); 781 ierr = VecAssemblyEnd(matis->x);CHKERRQ(ierr); 782 ierr = VecSet(matis->y,0.);CHKERRQ(ierr); 783 ierr = MatMult(matis->A,matis->x,matis->y);CHKERRQ(ierr); 784 ierr = VecGetArray(matis->y,&array);CHKERRQ(ierr); 785 cum = 0; 786 for (j=0;j<n;j++) { 787 if (PetscUnlikely(PetscAbsReal(array[j]) > PETSC_SMALL)) { 788 vals[cum] = array[j]; 789 idxs_ins[cum] = j; 790 cum++; 791 } 792 } 793 ierr = MatSetValues(pcbddc->benign_B0,1,&i,cum,idxs_ins,vals,INSERT_VALUES);CHKERRQ(ierr); 794 ierr = VecRestoreArray(matis->y,&array);CHKERRQ(ierr); 795 ierr = ISRestoreIndices(pcbddc->benign_zerodiag_subs[i],(const PetscInt**)&idxs);CHKERRQ(ierr); 796 } 797 ierr = MatAssemblyBegin(pcbddc->benign_B0,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 798 ierr = MatAssemblyEnd(pcbddc->benign_B0,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 799 ierr = PetscFree2(idxs_ins,vals);CHKERRQ(ierr); 800 } 801 } else { /* push */ 802 if (pcbddc->benign_change_explicit) { 803 PetscInt i; 804 805 for (i=0;i<pcbddc->benign_n;i++) { 806 PetscScalar *B0_vals; 807 PetscInt *B0_cols,B0_ncol; 808 809 ierr = MatGetRow(pcbddc->benign_B0,i,&B0_ncol,(const PetscInt**)&B0_cols,(const PetscScalar**)&B0_vals);CHKERRQ(ierr); 810 ierr = MatSetValues(pcbddc->local_mat,1,pcbddc->benign_p0_lidx+i,B0_ncol,B0_cols,B0_vals,INSERT_VALUES);CHKERRQ(ierr); 811 ierr = MatSetValues(pcbddc->local_mat,B0_ncol,B0_cols,1,pcbddc->benign_p0_lidx+i,B0_vals,INSERT_VALUES);CHKERRQ(ierr); 812 ierr = MatSetValue(pcbddc->local_mat,pcbddc->benign_p0_lidx[i],pcbddc->benign_p0_lidx[i],0.0,INSERT_VALUES);CHKERRQ(ierr); 813 ierr = MatRestoreRow(pcbddc->benign_B0,i,&B0_ncol,(const PetscInt**)&B0_cols,(const PetscScalar**)&B0_vals);CHKERRQ(ierr); 814 } 815 ierr = MatAssemblyBegin(pcbddc->local_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 816 ierr = MatAssemblyEnd(pcbddc->local_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 817 } else { 818 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Cannot push B0!\n"); 819 } 820 } 821 PetscFunctionReturn(0); 822 } 823 824 #undef __FUNCT__ 825 #define __FUNCT__ "PCBDDCAdaptiveSelection" 826 PetscErrorCode PCBDDCAdaptiveSelection(PC pc) 827 { 828 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 829 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 830 PetscBLASInt B_dummyint,B_neigs,B_ierr,B_lwork; 831 PetscBLASInt *B_iwork,*B_ifail; 832 PetscScalar *work,lwork; 833 PetscScalar *St,*S,*eigv; 834 PetscScalar *Sarray,*Starray; 835 PetscReal *eigs,thresh; 836 PetscInt i,nmax,nmin,nv,cum,mss,cum2,cumarray,maxneigs; 837 PetscBool allocated_S_St; 838 #if defined(PETSC_USE_COMPLEX) 839 PetscReal *rwork; 840 #endif 841 PetscErrorCode ierr; 842 843 PetscFunctionBegin; 844 if (!sub_schurs->use_mumps) { 845 SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_SUP,"Adaptive selection of constraints requires MUMPS"); 846 } 847 848 if (sub_schurs->n_subs && (!sub_schurs->is_hermitian || !sub_schurs->is_posdef)) { 849 SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_SUP,"Adaptive selection not yet implemented for general matrix pencils (herm %d, posdef %d)\n",sub_schurs->is_hermitian,sub_schurs->is_posdef); 850 } 851 852 if (pcbddc->dbg_flag) { 853 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 854 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 855 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Check adaptive selection of constraints\n");CHKERRQ(ierr); 856 ierr = PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 857 } 858 859 if (pcbddc->dbg_flag) { 860 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d cc %d (%d,%d).\n",PetscGlobalRank,sub_schurs->n_subs,sub_schurs->is_hermitian,sub_schurs->is_posdef); 861 } 862 863 /* max size of subsets */ 864 mss = 0; 865 for (i=0;i<sub_schurs->n_subs;i++) { 866 PetscInt subset_size; 867 868 ierr = ISGetLocalSize(sub_schurs->is_subs[i],&subset_size);CHKERRQ(ierr); 869 mss = PetscMax(mss,subset_size); 870 } 871 872 /* min/max and threshold */ 873 nmax = pcbddc->adaptive_nmax > 0 ? pcbddc->adaptive_nmax : mss; 874 nmin = pcbddc->adaptive_nmin > 0 ? pcbddc->adaptive_nmin : 0; 875 nmax = PetscMax(nmin,nmax); 876 allocated_S_St = PETSC_FALSE; 877 if (nmin) { 878 allocated_S_St = PETSC_TRUE; 879 } 880 881 /* allocate lapack workspace */ 882 cum = cum2 = 0; 883 maxneigs = 0; 884 for (i=0;i<sub_schurs->n_subs;i++) { 885 PetscInt n,subset_size; 886 887 ierr = ISGetLocalSize(sub_schurs->is_subs[i],&subset_size);CHKERRQ(ierr); 888 n = PetscMin(subset_size,nmax); 889 cum += subset_size; 890 cum2 += subset_size*n; 891 maxneigs = PetscMax(maxneigs,n); 892 } 893 if (mss) { 894 if (sub_schurs->is_hermitian && sub_schurs->is_posdef) { 895 PetscBLASInt B_itype = 1; 896 PetscBLASInt B_N = mss; 897 PetscReal zero = 0.0; 898 PetscReal eps = 0.0; /* dlamch? */ 899 900 B_lwork = -1; 901 S = NULL; 902 St = NULL; 903 eigs = NULL; 904 eigv = NULL; 905 B_iwork = NULL; 906 B_ifail = NULL; 907 #if defined(PETSC_USE_COMPLEX) 908 rwork = NULL; 909 #endif 910 thresh = 1.0; 911 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 912 #if defined(PETSC_USE_COMPLEX) 913 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&zero,&thresh,&B_dummyint,&B_dummyint,&eps,&B_neigs,eigs,eigv,&B_N,&lwork,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr)); 914 #else 915 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&zero,&thresh,&B_dummyint,&B_dummyint,&eps,&B_neigs,eigs,eigv,&B_N,&lwork,&B_lwork,B_iwork,B_ifail,&B_ierr)); 916 #endif 917 if (B_ierr != 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to SYGVX Lapack routine %d",(int)B_ierr); 918 ierr = PetscFPTrapPop();CHKERRQ(ierr); 919 } else { 920 /* TODO */ 921 } 922 } else { 923 lwork = 0; 924 } 925 926 nv = 0; 927 if (sub_schurs->is_vertices && pcbddc->use_vertices) { /* complement set of active subsets, each entry is a vertex (boundary made by active subsets, vertices and dirichlet dofs) */ 928 ierr = ISGetLocalSize(sub_schurs->is_vertices,&nv);CHKERRQ(ierr); 929 } 930 ierr = PetscBLASIntCast((PetscInt)PetscRealPart(lwork),&B_lwork);CHKERRQ(ierr); 931 if (allocated_S_St) { 932 ierr = PetscMalloc2(mss*mss,&S,mss*mss,&St);CHKERRQ(ierr); 933 } 934 ierr = PetscMalloc5(mss*mss,&eigv,mss,&eigs,B_lwork,&work,5*mss,&B_iwork,mss,&B_ifail);CHKERRQ(ierr); 935 #if defined(PETSC_USE_COMPLEX) 936 ierr = PetscMalloc1(7*mss,&rwork);CHKERRQ(ierr); 937 #endif 938 ierr = PetscMalloc5(nv+sub_schurs->n_subs,&pcbddc->adaptive_constraints_n, 939 nv+sub_schurs->n_subs+1,&pcbddc->adaptive_constraints_idxs_ptr, 940 nv+sub_schurs->n_subs+1,&pcbddc->adaptive_constraints_data_ptr, 941 nv+cum,&pcbddc->adaptive_constraints_idxs, 942 nv+cum2,&pcbddc->adaptive_constraints_data);CHKERRQ(ierr); 943 ierr = PetscMemzero(pcbddc->adaptive_constraints_n,(nv+sub_schurs->n_subs)*sizeof(PetscInt));CHKERRQ(ierr); 944 945 maxneigs = 0; 946 cum = cum2 = cumarray = 0; 947 pcbddc->adaptive_constraints_idxs_ptr[0] = 0; 948 pcbddc->adaptive_constraints_data_ptr[0] = 0; 949 if (sub_schurs->is_vertices && pcbddc->use_vertices) { 950 const PetscInt *idxs; 951 952 ierr = ISGetIndices(sub_schurs->is_vertices,&idxs);CHKERRQ(ierr); 953 for (cum=0;cum<nv;cum++) { 954 pcbddc->adaptive_constraints_n[cum] = 1; 955 pcbddc->adaptive_constraints_idxs[cum] = idxs[cum]; 956 pcbddc->adaptive_constraints_data[cum] = 1.0; 957 pcbddc->adaptive_constraints_idxs_ptr[cum+1] = pcbddc->adaptive_constraints_idxs_ptr[cum]+1; 958 pcbddc->adaptive_constraints_data_ptr[cum+1] = pcbddc->adaptive_constraints_data_ptr[cum]+1; 959 } 960 cum2 = cum; 961 ierr = ISRestoreIndices(sub_schurs->is_vertices,&idxs);CHKERRQ(ierr); 962 } 963 964 if (mss) { /* multilevel */ 965 ierr = MatSeqAIJGetArray(sub_schurs->sum_S_Ej_inv_all,&Sarray);CHKERRQ(ierr); 966 ierr = MatSeqAIJGetArray(sub_schurs->sum_S_Ej_tilda_all,&Starray);CHKERRQ(ierr); 967 } 968 969 thresh = pcbddc->adaptive_threshold; 970 for (i=0;i<sub_schurs->n_subs;i++) { 971 const PetscInt *idxs; 972 PetscReal infty = PETSC_MAX_REAL; 973 PetscInt j,subset_size,eigs_start = 0; 974 PetscBLASInt B_N; 975 PetscBool same_data = PETSC_FALSE; 976 977 ierr = ISGetLocalSize(sub_schurs->is_subs[i],&subset_size);CHKERRQ(ierr); 978 ierr = ISGetIndices(sub_schurs->is_subs[i],&idxs);CHKERRQ(ierr); 979 ierr = PetscBLASIntCast(subset_size,&B_N);CHKERRQ(ierr); 980 if (allocated_S_St) { /* S and S_t should be copied since we could need them later */ 981 if (sub_schurs->is_hermitian) { 982 PetscInt j,k; 983 if (sub_schurs->n_subs == 1) { /* zeroing memory to use PetscMemcmp later */ 984 ierr = PetscMemzero(S,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 985 ierr = PetscMemzero(St,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 986 } 987 for (j=0;j<subset_size;j++) { 988 for (k=j;k<subset_size;k++) { 989 S [j*subset_size+k] = Sarray [cumarray+j*subset_size+k]; 990 St[j*subset_size+k] = Starray[cumarray+j*subset_size+k]; 991 } 992 } 993 } else { 994 ierr = PetscMemcpy(S,Sarray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 995 ierr = PetscMemcpy(St,Starray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 996 } 997 } else { 998 S = Sarray + cumarray; 999 St = Starray + cumarray; 1000 } 1001 1002 /* see if we can save some work */ 1003 if (sub_schurs->n_subs == 1) { 1004 ierr = PetscMemcmp(S,St,subset_size*subset_size*sizeof(PetscScalar),&same_data);CHKERRQ(ierr); 1005 } 1006 1007 if (same_data) { /* there's no need of constraints here, deluxe scaling is enough */ 1008 B_neigs = 0; 1009 } else { 1010 if (sub_schurs->is_hermitian && sub_schurs->is_posdef) { 1011 PetscBLASInt B_itype = 1; 1012 PetscBLASInt B_IL, B_IU; 1013 PetscReal eps = -1.0; /* dlamch? */ 1014 PetscInt nmin_s; 1015 1016 if (pcbddc->dbg_flag) { 1017 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Computing for sub %d/%d %d %d.\n",i,sub_schurs->n_subs,subset_size,pcbddc->mat_graph->count[idxs[0]]); 1018 } 1019 1020 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 1021 if (thresh > 1.+PETSC_SMALL) { 1022 1023 /* ask for eigenvalues larger than thresh */ 1024 #if defined(PETSC_USE_COMPLEX) 1025 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&thresh,&infty,&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr)); 1026 #else 1027 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&thresh,&infty,&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr)); 1028 #endif 1029 } else { 1030 B_IU = PetscMax(1,PetscMin(B_N,nmax)); 1031 B_IL = 1; 1032 #if defined(PETSC_USE_COMPLEX) 1033 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","I","L",&B_N,St,&B_N,S,&B_N,&thresh,&infty,&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr)); 1034 #else 1035 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","I","L",&B_N,St,&B_N,S,&B_N,&thresh,&infty,&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr)); 1036 #endif 1037 } 1038 ierr = PetscFPTrapPop();CHKERRQ(ierr); 1039 if (B_ierr) { 1040 if (B_ierr < 0 ) { 1041 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: illegal value for argument %d",-(int)B_ierr); 1042 } else if (B_ierr <= B_N) { 1043 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: %d eigenvalues failed to converge",(int)B_ierr); 1044 } else { 1045 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: leading minor of order %d is not positive definite",(int)B_ierr-B_N-1); 1046 } 1047 } 1048 1049 if (B_neigs > nmax) { 1050 if (pcbddc->dbg_flag) { 1051 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," found %d eigs, more than maximum required %d.\n",B_neigs,nmax); 1052 } 1053 eigs_start = B_neigs -nmax; 1054 B_neigs = nmax; 1055 } 1056 1057 nmin_s = PetscMin(nmin,B_N); 1058 if (B_neigs < nmin_s) { 1059 PetscBLASInt B_neigs2; 1060 1061 B_IU = B_N - B_neigs; 1062 B_IL = B_N - nmin_s + 1; 1063 if (pcbddc->dbg_flag) { 1064 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," found %d eigs, less than minimum required %d. Asking for %d to %d incl (fortran like)\n",B_neigs,nmin,B_IL,B_IU); 1065 } 1066 if (sub_schurs->is_hermitian) { 1067 PetscInt j; 1068 for (j=0;j<subset_size;j++) { 1069 ierr = PetscMemcpy(S+j*(subset_size+1),Sarray+cumarray+j*(subset_size+1),(subset_size-j)*sizeof(PetscScalar));CHKERRQ(ierr); 1070 } 1071 for (j=0;j<subset_size;j++) { 1072 ierr = PetscMemcpy(St+j*(subset_size+1),Starray+cumarray+j*(subset_size+1),(subset_size-j)*sizeof(PetscScalar));CHKERRQ(ierr); 1073 } 1074 } else { 1075 ierr = PetscMemcpy(S,Sarray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 1076 ierr = PetscMemcpy(St,Starray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 1077 } 1078 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 1079 #if defined(PETSC_USE_COMPLEX) 1080 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","I","L",&B_N,St,&B_N,S,&B_N,&thresh,&infty,&B_IL,&B_IU,&eps,&B_neigs2,eigs+B_neigs,eigv+B_neigs*subset_size,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr)); 1081 #else 1082 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","I","L",&B_N,St,&B_N,S,&B_N,&thresh,&infty,&B_IL,&B_IU,&eps,&B_neigs2,eigs+B_neigs,eigv+B_neigs*subset_size,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr)); 1083 #endif 1084 ierr = PetscFPTrapPop();CHKERRQ(ierr); 1085 B_neigs += B_neigs2; 1086 } 1087 if (B_ierr) { 1088 if (B_ierr < 0 ) { 1089 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: illegal value for argument %d",-(int)B_ierr); 1090 } else if (B_ierr <= B_N) { 1091 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: %d eigenvalues failed to converge",(int)B_ierr); 1092 } else { 1093 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: leading minor of order %d is not positive definite",(int)B_ierr-B_N-1); 1094 } 1095 } 1096 if (pcbddc->dbg_flag) { 1097 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," -> Got %d eigs\n",B_neigs);CHKERRQ(ierr); 1098 for (j=0;j<B_neigs;j++) { 1099 if (eigs[j] == 0.0) { 1100 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," Inf\n");CHKERRQ(ierr); 1101 } else { 1102 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," %1.6e\n",eigs[j+eigs_start]);CHKERRQ(ierr); 1103 } 1104 } 1105 } 1106 } else { 1107 /* TODO */ 1108 } 1109 } 1110 maxneigs = PetscMax(B_neigs,maxneigs); 1111 pcbddc->adaptive_constraints_n[i+nv] = B_neigs; 1112 if (B_neigs) { 1113 ierr = PetscMemcpy(pcbddc->adaptive_constraints_data+pcbddc->adaptive_constraints_data_ptr[cum],eigv+eigs_start*subset_size,B_neigs*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 1114 1115 if (pcbddc->dbg_flag > 1) { 1116 PetscInt ii; 1117 for (ii=0;ii<B_neigs;ii++) { 1118 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," -> Eigenvector %d/%d (%d)\n",ii,B_neigs,B_N);CHKERRQ(ierr); 1119 for (j=0;j<B_N;j++) { 1120 #if defined(PETSC_USE_COMPLEX) 1121 PetscReal r = PetscRealPart(pcbddc->adaptive_constraints_data[ii*subset_size+j+pcbddc->adaptive_constraints_data_ptr[cum]]); 1122 PetscReal c = PetscImaginaryPart(pcbddc->adaptive_constraints_data[ii*subset_size+j+pcbddc->adaptive_constraints_data_ptr[cum]]); 1123 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," %1.4e + %1.4e i\n",r,c);CHKERRQ(ierr); 1124 #else 1125 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," %1.4e\n",pcbddc->adaptive_constraints_data[ii*subset_size+j+pcbddc->adaptive_constraints_data_ptr[cum]]);CHKERRQ(ierr); 1126 #endif 1127 } 1128 } 1129 } 1130 #if 0 1131 for (j=0;j<B_neigs;j++) { 1132 PetscBLASInt Blas_N,Blas_one = 1.0; 1133 PetscScalar norm; 1134 ierr = PetscBLASIntCast(subset_size,&Blas_N);CHKERRQ(ierr); 1135 PetscStackCallBLAS("BLASdot",norm = BLASdot_(&Blas_N,pcbddc->adaptive_constraints_data+pcbddc->adaptive_constraints_data_ptr[cum]+j*subset_size, 1136 &Blas_one,pcbddc->adaptive_constraints_data+pcbddc->adaptive_constraints_data_ptr[cum]+j*subset_size,&Blas_one)); 1137 if (pcbddc->adaptive_constraints_data[cum2] > 0.0) { 1138 norm = 1.0/PetscSqrtReal(PetscRealPart(norm)); 1139 } else { 1140 norm = -1.0/PetscSqrtReal(PetscRealPart(norm)); 1141 } 1142 PetscStackCallBLAS("BLASscal",BLASscal_(&Blas_N,&norm,pcbddc->adaptive_constraints_data+pcbddc->adaptive_constraints_data_ptr[cum]+j*subset_size,&Blas_one)); 1143 } 1144 #endif 1145 ierr = PetscMemcpy(pcbddc->adaptive_constraints_idxs+pcbddc->adaptive_constraints_idxs_ptr[cum],idxs,subset_size*sizeof(PetscInt));CHKERRQ(ierr); 1146 pcbddc->adaptive_constraints_idxs_ptr[cum+1] = pcbddc->adaptive_constraints_idxs_ptr[cum] + subset_size; 1147 pcbddc->adaptive_constraints_data_ptr[cum+1] = pcbddc->adaptive_constraints_data_ptr[cum] + subset_size*B_neigs; 1148 cum++; 1149 } 1150 ierr = ISRestoreIndices(sub_schurs->is_subs[i],&idxs);CHKERRQ(ierr); 1151 /* shift for next computation */ 1152 cumarray += subset_size*subset_size; 1153 } 1154 if (pcbddc->dbg_flag) { 1155 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 1156 } 1157 1158 if (mss) { 1159 ierr = MatSeqAIJRestoreArray(sub_schurs->sum_S_Ej_inv_all,&Sarray);CHKERRQ(ierr); 1160 ierr = MatSeqAIJRestoreArray(sub_schurs->sum_S_Ej_tilda_all,&Starray);CHKERRQ(ierr); 1161 /* destroy matrices (junk) */ 1162 ierr = MatDestroy(&sub_schurs->sum_S_Ej_inv_all);CHKERRQ(ierr); 1163 ierr = MatDestroy(&sub_schurs->sum_S_Ej_tilda_all);CHKERRQ(ierr); 1164 } 1165 if (allocated_S_St) { 1166 ierr = PetscFree2(S,St);CHKERRQ(ierr); 1167 } 1168 ierr = PetscFree5(eigv,eigs,work,B_iwork,B_ifail);CHKERRQ(ierr); 1169 #if defined(PETSC_USE_COMPLEX) 1170 ierr = PetscFree(rwork);CHKERRQ(ierr); 1171 #endif 1172 if (pcbddc->dbg_flag) { 1173 PetscInt maxneigs_r; 1174 ierr = MPI_Allreduce(&maxneigs,&maxneigs_r,1,MPIU_INT,MPI_MAX,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 1175 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Maximum number of constraints per cc %d\n",maxneigs_r);CHKERRQ(ierr); 1176 } 1177 PetscFunctionReturn(0); 1178 } 1179 1180 #undef __FUNCT__ 1181 #define __FUNCT__ "PCBDDCSetUpSolvers" 1182 PetscErrorCode PCBDDCSetUpSolvers(PC pc) 1183 { 1184 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 1185 PetscScalar *coarse_submat_vals; 1186 PetscErrorCode ierr; 1187 1188 PetscFunctionBegin; 1189 /* Setup local scatters R_to_B and (optionally) R_to_D */ 1190 /* PCBDDCSetUpLocalWorkVectors should be called first! */ 1191 ierr = PCBDDCSetUpLocalScatters(pc);CHKERRQ(ierr); 1192 1193 /* Setup local neumann solver ksp_R */ 1194 /* PCBDDCSetUpLocalScatters should be called first! */ 1195 ierr = PCBDDCSetUpLocalSolvers(pc,PETSC_FALSE,PETSC_TRUE);CHKERRQ(ierr); 1196 1197 /* Change global null space passed in by the user if change of basis has been requested */ 1198 if (pcbddc->NullSpace && pcbddc->ChangeOfBasisMatrix) { 1199 ierr = PCBDDCNullSpaceAdaptGlobal(pc);CHKERRQ(ierr); 1200 } 1201 1202 /* 1203 Setup local correction and local part of coarse basis. 1204 Gives back the dense local part of the coarse matrix in column major ordering 1205 */ 1206 ierr = PCBDDCSetUpCorrection(pc,&coarse_submat_vals);CHKERRQ(ierr); 1207 1208 /* Compute total number of coarse nodes and setup coarse solver */ 1209 ierr = PCBDDCSetUpCoarseSolver(pc,coarse_submat_vals);CHKERRQ(ierr); 1210 1211 /* free */ 1212 ierr = PetscFree(coarse_submat_vals);CHKERRQ(ierr); 1213 PetscFunctionReturn(0); 1214 } 1215 1216 #undef __FUNCT__ 1217 #define __FUNCT__ "PCBDDCResetCustomization" 1218 PetscErrorCode PCBDDCResetCustomization(PC pc) 1219 { 1220 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 1221 PetscErrorCode ierr; 1222 1223 PetscFunctionBegin; 1224 ierr = PCBDDCGraphResetCSR(pcbddc->mat_graph);CHKERRQ(ierr); 1225 ierr = ISDestroy(&pcbddc->user_primal_vertices);CHKERRQ(ierr); 1226 ierr = ISDestroy(&pcbddc->user_primal_vertices_local);CHKERRQ(ierr); 1227 ierr = MatNullSpaceDestroy(&pcbddc->NullSpace);CHKERRQ(ierr); 1228 ierr = ISDestroy(&pcbddc->NeumannBoundaries);CHKERRQ(ierr); 1229 ierr = ISDestroy(&pcbddc->NeumannBoundariesLocal);CHKERRQ(ierr); 1230 ierr = ISDestroy(&pcbddc->DirichletBoundaries);CHKERRQ(ierr); 1231 ierr = MatNullSpaceDestroy(&pcbddc->onearnullspace);CHKERRQ(ierr); 1232 ierr = PetscFree(pcbddc->onearnullvecs_state);CHKERRQ(ierr); 1233 ierr = ISDestroy(&pcbddc->DirichletBoundariesLocal);CHKERRQ(ierr); 1234 ierr = PCBDDCSetDofsSplitting(pc,0,NULL);CHKERRQ(ierr); 1235 ierr = PCBDDCSetDofsSplittingLocal(pc,0,NULL);CHKERRQ(ierr); 1236 PetscFunctionReturn(0); 1237 } 1238 1239 #undef __FUNCT__ 1240 #define __FUNCT__ "PCBDDCResetTopography" 1241 PetscErrorCode PCBDDCResetTopography(PC pc) 1242 { 1243 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 1244 PetscInt i; 1245 PetscErrorCode ierr; 1246 1247 PetscFunctionBegin; 1248 ierr = MatDestroy(&pcbddc->user_ChangeOfBasisMatrix);CHKERRQ(ierr); 1249 ierr = MatDestroy(&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 1250 ierr = MatDestroy(&pcbddc->ConstraintMatrix);CHKERRQ(ierr); 1251 ierr = PCBDDCGraphReset(pcbddc->mat_graph);CHKERRQ(ierr); 1252 for (i=0;i<pcbddc->n_local_subs;i++) { 1253 ierr = ISDestroy(&pcbddc->local_subs[i]);CHKERRQ(ierr); 1254 } 1255 ierr = PetscFree(pcbddc->local_subs);CHKERRQ(ierr); 1256 ierr = PCBDDCSubSchursReset(pcbddc->sub_schurs);CHKERRQ(ierr); 1257 PetscFunctionReturn(0); 1258 } 1259 1260 #undef __FUNCT__ 1261 #define __FUNCT__ "PCBDDCResetSolvers" 1262 PetscErrorCode PCBDDCResetSolvers(PC pc) 1263 { 1264 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 1265 PetscErrorCode ierr; 1266 1267 PetscFunctionBegin; 1268 ierr = VecDestroy(&pcbddc->coarse_vec);CHKERRQ(ierr); 1269 if (pcbddc->coarse_phi_B) { 1270 PetscScalar *array; 1271 ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&array);CHKERRQ(ierr); 1272 ierr = PetscFree(array);CHKERRQ(ierr); 1273 } 1274 ierr = MatDestroy(&pcbddc->coarse_phi_B);CHKERRQ(ierr); 1275 ierr = MatDestroy(&pcbddc->coarse_phi_D);CHKERRQ(ierr); 1276 ierr = MatDestroy(&pcbddc->coarse_psi_B);CHKERRQ(ierr); 1277 ierr = MatDestroy(&pcbddc->coarse_psi_D);CHKERRQ(ierr); 1278 ierr = VecDestroy(&pcbddc->vec1_P);CHKERRQ(ierr); 1279 ierr = VecDestroy(&pcbddc->vec1_C);CHKERRQ(ierr); 1280 ierr = MatDestroy(&pcbddc->local_auxmat2);CHKERRQ(ierr); 1281 ierr = MatDestroy(&pcbddc->local_auxmat1);CHKERRQ(ierr); 1282 ierr = VecDestroy(&pcbddc->vec1_R);CHKERRQ(ierr); 1283 ierr = VecDestroy(&pcbddc->vec2_R);CHKERRQ(ierr); 1284 ierr = ISDestroy(&pcbddc->is_R_local);CHKERRQ(ierr); 1285 ierr = VecScatterDestroy(&pcbddc->R_to_B);CHKERRQ(ierr); 1286 ierr = VecScatterDestroy(&pcbddc->R_to_D);CHKERRQ(ierr); 1287 ierr = VecScatterDestroy(&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr); 1288 ierr = KSPDestroy(&pcbddc->ksp_D);CHKERRQ(ierr); 1289 ierr = KSPDestroy(&pcbddc->ksp_R);CHKERRQ(ierr); 1290 ierr = KSPDestroy(&pcbddc->coarse_ksp);CHKERRQ(ierr); 1291 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 1292 ierr = PetscFree(pcbddc->primal_indices_local_idxs);CHKERRQ(ierr); 1293 ierr = PetscFree2(pcbddc->local_primal_ref_node,pcbddc->local_primal_ref_mult);CHKERRQ(ierr); 1294 ierr = PetscFree(pcbddc->global_primal_indices);CHKERRQ(ierr); 1295 ierr = ISDestroy(&pcbddc->coarse_subassembling);CHKERRQ(ierr); 1296 ierr = ISDestroy(&pcbddc->coarse_subassembling_init);CHKERRQ(ierr); 1297 ierr = MatDestroy(&pcbddc->benign_change);CHKERRQ(ierr); 1298 ierr = VecDestroy(&pcbddc->benign_vec);CHKERRQ(ierr); 1299 ierr = MatDestroy(&pcbddc->benign_original_mat);CHKERRQ(ierr); 1300 ierr = MatDestroy(&pcbddc->benign_B0);CHKERRQ(ierr); 1301 ierr = PetscSFDestroy(&pcbddc->benign_sf);CHKERRQ(ierr); 1302 if (pcbddc->benign_zerodiag_subs) { 1303 PetscInt i; 1304 for (i=0;i<pcbddc->benign_n;i++) { 1305 ierr = ISDestroy(&pcbddc->benign_zerodiag_subs[i]);CHKERRQ(ierr); 1306 } 1307 ierr = PetscFree(pcbddc->benign_zerodiag_subs);CHKERRQ(ierr); 1308 } 1309 ierr = PetscFree3(pcbddc->benign_p0_lidx,pcbddc->benign_p0_gidx,pcbddc->benign_p0);CHKERRQ(ierr); 1310 PetscFunctionReturn(0); 1311 } 1312 1313 #undef __FUNCT__ 1314 #define __FUNCT__ "PCBDDCSetUpLocalWorkVectors" 1315 PetscErrorCode PCBDDCSetUpLocalWorkVectors(PC pc) 1316 { 1317 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 1318 PC_IS *pcis = (PC_IS*)pc->data; 1319 VecType impVecType; 1320 PetscInt n_constraints,n_R,old_size; 1321 PetscErrorCode ierr; 1322 1323 PetscFunctionBegin; 1324 if (!pcbddc->ConstraintMatrix) { 1325 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"BDDC Constraint matrix has not been created"); 1326 } 1327 /* get sizes */ 1328 n_constraints = pcbddc->local_primal_size - pcbddc->benign_n - pcbddc->n_vertices; 1329 n_R = pcis->n - pcbddc->n_vertices; 1330 ierr = VecGetType(pcis->vec1_N,&impVecType);CHKERRQ(ierr); 1331 /* local work vectors (try to avoid unneeded work)*/ 1332 /* R nodes */ 1333 old_size = -1; 1334 if (pcbddc->vec1_R) { 1335 ierr = VecGetSize(pcbddc->vec1_R,&old_size);CHKERRQ(ierr); 1336 } 1337 if (n_R != old_size) { 1338 ierr = VecDestroy(&pcbddc->vec1_R);CHKERRQ(ierr); 1339 ierr = VecDestroy(&pcbddc->vec2_R);CHKERRQ(ierr); 1340 ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_R);CHKERRQ(ierr); 1341 ierr = VecSetSizes(pcbddc->vec1_R,PETSC_DECIDE,n_R);CHKERRQ(ierr); 1342 ierr = VecSetType(pcbddc->vec1_R,impVecType);CHKERRQ(ierr); 1343 ierr = VecDuplicate(pcbddc->vec1_R,&pcbddc->vec2_R);CHKERRQ(ierr); 1344 } 1345 /* local primal dofs */ 1346 old_size = -1; 1347 if (pcbddc->vec1_P) { 1348 ierr = VecGetSize(pcbddc->vec1_P,&old_size);CHKERRQ(ierr); 1349 } 1350 if (pcbddc->local_primal_size != old_size) { 1351 ierr = VecDestroy(&pcbddc->vec1_P);CHKERRQ(ierr); 1352 ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_P);CHKERRQ(ierr); 1353 ierr = VecSetSizes(pcbddc->vec1_P,PETSC_DECIDE,pcbddc->local_primal_size);CHKERRQ(ierr); 1354 ierr = VecSetType(pcbddc->vec1_P,impVecType);CHKERRQ(ierr); 1355 } 1356 /* local explicit constraints */ 1357 old_size = -1; 1358 if (pcbddc->vec1_C) { 1359 ierr = VecGetSize(pcbddc->vec1_C,&old_size);CHKERRQ(ierr); 1360 } 1361 if (n_constraints && n_constraints != old_size) { 1362 ierr = VecDestroy(&pcbddc->vec1_C);CHKERRQ(ierr); 1363 ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_C);CHKERRQ(ierr); 1364 ierr = VecSetSizes(pcbddc->vec1_C,PETSC_DECIDE,n_constraints);CHKERRQ(ierr); 1365 ierr = VecSetType(pcbddc->vec1_C,impVecType);CHKERRQ(ierr); 1366 } 1367 PetscFunctionReturn(0); 1368 } 1369 1370 #undef __FUNCT__ 1371 #define __FUNCT__ "PCBDDCSetUpCorrection" 1372 PetscErrorCode PCBDDCSetUpCorrection(PC pc, PetscScalar **coarse_submat_vals_n) 1373 { 1374 PetscErrorCode ierr; 1375 /* pointers to pcis and pcbddc */ 1376 PC_IS* pcis = (PC_IS*)pc->data; 1377 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 1378 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 1379 /* submatrices of local problem */ 1380 Mat A_RV,A_VR,A_VV,local_auxmat2_R; 1381 /* submatrices of benign trick */ 1382 Mat B0_V = NULL; 1383 /* submatrices of local coarse problem */ 1384 Mat S_VV,S_CV,S_VC,S_CC; 1385 /* working matrices */ 1386 Mat C_CR; 1387 /* additional working stuff */ 1388 PC pc_R; 1389 Mat F; 1390 PetscBool isLU,isCHOL,isILU,need_benign_correction; 1391 1392 PetscScalar *coarse_submat_vals; /* TODO: use a PETSc matrix */ 1393 PetscScalar *work; 1394 PetscInt *idx_V_B; 1395 PetscInt lda_rhs,n,n_vertices,n_constraints,*p0_lidx_I; 1396 PetscInt i,n_R,n_D,n_B; 1397 1398 /* some shortcuts to scalars */ 1399 PetscScalar one=1.0,m_one=-1.0; 1400 1401 PetscFunctionBegin; 1402 if (!pcbddc->symmetric_primal && pcbddc->benign_n) { 1403 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Non-symmetric primal basis computation with benign trick not yet implemented"); 1404 } 1405 1406 /* Set Non-overlapping dimensions */ 1407 n_vertices = pcbddc->n_vertices; 1408 n_constraints = pcbddc->local_primal_size - pcbddc->benign_n - n_vertices; 1409 n_B = pcis->n_B; 1410 n_D = pcis->n - n_B; 1411 n_R = pcis->n - n_vertices; 1412 1413 /* vertices in boundary numbering */ 1414 ierr = PetscMalloc1(n_vertices,&idx_V_B);CHKERRQ(ierr); 1415 ierr = ISGlobalToLocalMappingApply(pcis->BtoNmap,IS_GTOLM_DROP,n_vertices,pcbddc->local_primal_ref_node,&i,idx_V_B);CHKERRQ(ierr); 1416 if (i != n_vertices) { 1417 SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error in boundary numbering for BDDC vertices! %d != %d\n",n_vertices,i); 1418 } 1419 1420 /* Subdomain contribution (Non-overlapping) to coarse matrix */ 1421 ierr = PetscCalloc1(pcbddc->local_primal_size*pcbddc->local_primal_size,&coarse_submat_vals);CHKERRQ(ierr); 1422 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_vertices,n_vertices,coarse_submat_vals,&S_VV);CHKERRQ(ierr); 1423 ierr = MatSeqDenseSetLDA(S_VV,pcbddc->local_primal_size);CHKERRQ(ierr); 1424 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_constraints,n_vertices,coarse_submat_vals+n_vertices,&S_CV);CHKERRQ(ierr); 1425 ierr = MatSeqDenseSetLDA(S_CV,pcbddc->local_primal_size);CHKERRQ(ierr); 1426 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_vertices,n_constraints,coarse_submat_vals+pcbddc->local_primal_size*n_vertices,&S_VC);CHKERRQ(ierr); 1427 ierr = MatSeqDenseSetLDA(S_VC,pcbddc->local_primal_size);CHKERRQ(ierr); 1428 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_constraints,n_constraints,coarse_submat_vals+(pcbddc->local_primal_size+1)*n_vertices,&S_CC);CHKERRQ(ierr); 1429 ierr = MatSeqDenseSetLDA(S_CC,pcbddc->local_primal_size);CHKERRQ(ierr); 1430 1431 /* determine if can use MatSolve routines instead of calling KSPSolve on ksp_R */ 1432 ierr = KSPGetPC(pcbddc->ksp_R,&pc_R);CHKERRQ(ierr); 1433 ierr = PetscObjectTypeCompare((PetscObject)pc_R,PCLU,&isLU);CHKERRQ(ierr); 1434 ierr = PetscObjectTypeCompare((PetscObject)pc_R,PCILU,&isILU);CHKERRQ(ierr); 1435 ierr = PetscObjectTypeCompare((PetscObject)pc_R,PCCHOLESKY,&isCHOL);CHKERRQ(ierr); 1436 lda_rhs = n_R; 1437 need_benign_correction = PETSC_FALSE; 1438 if (isLU || isILU || isCHOL) { 1439 ierr = PCFactorGetMatrix(pc_R,&F);CHKERRQ(ierr); 1440 } else if (sub_schurs->reuse_mumps) { 1441 PCBDDCReuseMumps reuse_mumps = sub_schurs->reuse_mumps; 1442 MatFactorType type; 1443 1444 F = reuse_mumps->F; 1445 ierr = MatGetFactorType(F,&type);CHKERRQ(ierr); 1446 if (type == MAT_FACTOR_CHOLESKY) isCHOL = PETSC_TRUE; 1447 ierr = MatGetSize(F,&lda_rhs,NULL);CHKERRQ(ierr); 1448 need_benign_correction = !!reuse_mumps->benign_n; 1449 } else { 1450 F = NULL; 1451 } 1452 1453 /* allocate workspace */ 1454 n = 0; 1455 if (n_constraints) { 1456 n += lda_rhs*n_constraints; 1457 } 1458 if (n_vertices) { 1459 n = PetscMax(2*lda_rhs*n_vertices,n); 1460 n = PetscMax((lda_rhs+n_B)*n_vertices,n); 1461 } 1462 ierr = PetscMalloc1(n,&work);CHKERRQ(ierr); 1463 1464 /* Precompute stuffs needed for preprocessing and application of BDDC*/ 1465 if (n_constraints) { 1466 Mat M1,M2,M3; 1467 Mat auxmat; 1468 IS is_aux; 1469 PetscScalar *array,*array2; 1470 1471 ierr = MatDestroy(&pcbddc->local_auxmat1);CHKERRQ(ierr); 1472 ierr = MatDestroy(&pcbddc->local_auxmat2);CHKERRQ(ierr); 1473 1474 /* Extract constraints on R nodes: C_{CR} */ 1475 ierr = ISCreateStride(PETSC_COMM_SELF,n_constraints,n_vertices,1,&is_aux);CHKERRQ(ierr); 1476 ierr = MatGetSubMatrix(pcbddc->ConstraintMatrix,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&C_CR);CHKERRQ(ierr); 1477 ierr = MatGetSubMatrix(pcbddc->ConstraintMatrix,is_aux,pcis->is_B_local,MAT_INITIAL_MATRIX,&auxmat);CHKERRQ(ierr); 1478 1479 /* Assemble local_auxmat2_R = (- A_{RR}^{-1} C^T_{CR}) needed by BDDC setup */ 1480 /* Assemble pcbddc->local_auxmat2 = R_to_B (- A_{RR}^{-1} C^T_{CR}) needed by BDDC application */ 1481 ierr = PetscMemzero(work,lda_rhs*n_constraints*sizeof(PetscScalar));CHKERRQ(ierr); 1482 for (i=0;i<n_constraints;i++) { 1483 const PetscScalar *row_cmat_values; 1484 const PetscInt *row_cmat_indices; 1485 PetscInt size_of_constraint,j; 1486 1487 ierr = MatGetRow(C_CR,i,&size_of_constraint,&row_cmat_indices,&row_cmat_values);CHKERRQ(ierr); 1488 for (j=0;j<size_of_constraint;j++) { 1489 work[row_cmat_indices[j]+i*lda_rhs] = -row_cmat_values[j]; 1490 } 1491 ierr = MatRestoreRow(C_CR,i,&size_of_constraint,&row_cmat_indices,&row_cmat_values);CHKERRQ(ierr); 1492 } 1493 ierr = MatCreateSeqDense(PETSC_COMM_SELF,lda_rhs,n_constraints,NULL,&local_auxmat2_R);CHKERRQ(ierr); 1494 if (F) { 1495 Mat B; 1496 1497 ierr = MatCreateSeqDense(PETSC_COMM_SELF,lda_rhs,n_constraints,work,&B);CHKERRQ(ierr); 1498 if (need_benign_correction) { 1499 PetscScalar *marr; 1500 PCBDDCReuseMumps reuse_mumps = sub_schurs->reuse_mumps; 1501 1502 ierr = MatDenseGetArray(B,&marr);CHKERRQ(ierr); 1503 for (i=0;i<n_constraints;i++) { 1504 ierr = VecPlaceArray(pcbddc->vec1_R,marr+i*lda_rhs);CHKERRQ(ierr); 1505 ierr = PCBDDCReuseSolversChangeInterior(reuse_mumps,pcbddc->vec1_R,NULL,PETSC_FALSE);CHKERRQ(ierr); 1506 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 1507 } 1508 ierr = MatDenseRestoreArray(B,&marr);CHKERRQ(ierr); 1509 } 1510 ierr = MatMatSolve(F,B,local_auxmat2_R);CHKERRQ(ierr); 1511 if (need_benign_correction) { 1512 PetscScalar *marr; 1513 PCBDDCReuseMumps reuse_mumps = sub_schurs->reuse_mumps; 1514 1515 ierr = MatDenseGetArray(local_auxmat2_R,&marr);CHKERRQ(ierr); 1516 for (i=0;i<n_constraints;i++) { 1517 ierr = VecPlaceArray(pcbddc->vec1_R,marr+i*lda_rhs);CHKERRQ(ierr); 1518 ierr = PCBDDCReuseSolversChangeInterior(reuse_mumps,pcbddc->vec1_R,NULL,PETSC_TRUE);CHKERRQ(ierr); 1519 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 1520 } 1521 ierr = MatDenseRestoreArray(local_auxmat2_R,&marr);CHKERRQ(ierr); 1522 } 1523 ierr = MatDestroy(&B);CHKERRQ(ierr); 1524 } else { 1525 PetscScalar *marr; 1526 1527 ierr = MatDenseGetArray(local_auxmat2_R,&marr);CHKERRQ(ierr); 1528 for (i=0;i<n_constraints;i++) { 1529 ierr = VecPlaceArray(pcbddc->vec1_R,work+i*lda_rhs);CHKERRQ(ierr); 1530 ierr = VecPlaceArray(pcbddc->vec2_R,marr+i*lda_rhs);CHKERRQ(ierr); 1531 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 1532 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 1533 ierr = VecResetArray(pcbddc->vec2_R);CHKERRQ(ierr); 1534 } 1535 ierr = MatDenseRestoreArray(local_auxmat2_R,&marr);CHKERRQ(ierr); 1536 } 1537 if (!pcbddc->switch_static) { 1538 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_B,n_constraints,NULL,&pcbddc->local_auxmat2);CHKERRQ(ierr); 1539 ierr = MatDenseGetArray(pcbddc->local_auxmat2,&array);CHKERRQ(ierr); 1540 ierr = MatDenseGetArray(local_auxmat2_R,&array2);CHKERRQ(ierr); 1541 for (i=0;i<n_constraints;i++) { 1542 ierr = VecPlaceArray(pcbddc->vec1_R,array2+i*lda_rhs);CHKERRQ(ierr); 1543 ierr = VecPlaceArray(pcis->vec1_B,array+i*n_B);CHKERRQ(ierr); 1544 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1545 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1546 ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr); 1547 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 1548 } 1549 ierr = MatDenseRestoreArray(local_auxmat2_R,&array2);CHKERRQ(ierr); 1550 ierr = MatDenseRestoreArray(pcbddc->local_auxmat2,&array);CHKERRQ(ierr); 1551 ierr = MatMatMult(auxmat,pcbddc->local_auxmat2,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&M3);CHKERRQ(ierr); 1552 } else { 1553 if (lda_rhs != n_R) { 1554 IS dummy; 1555 1556 ierr = ISCreateStride(PETSC_COMM_SELF,n_R,0,1,&dummy);CHKERRQ(ierr); 1557 ierr = MatGetSubMatrix(local_auxmat2_R,dummy,dummy,MAT_INITIAL_MATRIX,&pcbddc->local_auxmat2);CHKERRQ(ierr); 1558 ierr = ISDestroy(&dummy);CHKERRQ(ierr); 1559 } else { 1560 ierr = PetscObjectReference((PetscObject)local_auxmat2_R);CHKERRQ(ierr); 1561 pcbddc->local_auxmat2 = local_auxmat2_R; 1562 } 1563 ierr = MatMatMult(C_CR,pcbddc->local_auxmat2,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&M3);CHKERRQ(ierr); 1564 } 1565 ierr = ISDestroy(&is_aux);CHKERRQ(ierr); 1566 /* Assemble explicitly S_CC = ( C_{CR} A_{RR}^{-1} C^T_{CR} )^{-1} */ 1567 ierr = MatScale(M3,m_one);CHKERRQ(ierr); 1568 ierr = MatDuplicate(M3,MAT_DO_NOT_COPY_VALUES,&M1);CHKERRQ(ierr); 1569 ierr = MatDuplicate(M3,MAT_DO_NOT_COPY_VALUES,&M2);CHKERRQ(ierr); 1570 if (isCHOL) { 1571 ierr = MatCholeskyFactor(M3,NULL,NULL);CHKERRQ(ierr); 1572 } else { 1573 ierr = MatLUFactor(M3,NULL,NULL,NULL);CHKERRQ(ierr); 1574 } 1575 ierr = VecSet(pcbddc->vec1_C,one);CHKERRQ(ierr); 1576 ierr = MatDiagonalSet(M2,pcbddc->vec1_C,INSERT_VALUES);CHKERRQ(ierr); 1577 ierr = MatMatSolve(M3,M2,M1);CHKERRQ(ierr); 1578 ierr = MatDestroy(&M2);CHKERRQ(ierr); 1579 ierr = MatDestroy(&M3);CHKERRQ(ierr); 1580 /* Assemble local_auxmat1 = S_CC*C_{CB} needed by BDDC application in KSP and in preproc */ 1581 ierr = MatMatMult(M1,auxmat,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&pcbddc->local_auxmat1);CHKERRQ(ierr); 1582 ierr = MatDestroy(&auxmat);CHKERRQ(ierr); 1583 ierr = MatCopy(M1,S_CC,SAME_NONZERO_PATTERN);CHKERRQ(ierr); /* S_CC can have a different LDA, MatMatSolve doesn't support it */ 1584 ierr = MatDestroy(&M1);CHKERRQ(ierr); 1585 } 1586 /* Get submatrices from subdomain matrix */ 1587 if (pcbddc->benign_n) { 1588 IS dummy; 1589 Mat B0_R; 1590 PetscReal norm; 1591 1592 ierr = ISCreateStride(PETSC_COMM_SELF,pcbddc->benign_n,0,1,&dummy);CHKERRQ(ierr); 1593 ierr = MatGetSubMatrix(pcbddc->benign_B0,dummy,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&B0_R);CHKERRQ(ierr); 1594 ierr = MatNorm(B0_R,NORM_INFINITY,&norm);CHKERRQ(ierr); 1595 if (norm > PETSC_SMALL) { 1596 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"Benign trick can not be applied! ||B0_R|| = %f (should be numerically 0.)",norm); 1597 } 1598 ierr = MatDestroy(&B0_R);CHKERRQ(ierr); 1599 ierr = ISDestroy(&dummy);CHKERRQ(ierr); 1600 } 1601 1602 if (n_vertices) { 1603 IS is_aux; 1604 1605 if (sub_schurs->reuse_mumps) { /* is_R_local is not sorted, ISComplement doesn't like it */ 1606 IS tis; 1607 1608 ierr = ISDuplicate(pcbddc->is_R_local,&tis);CHKERRQ(ierr); 1609 ierr = ISSort(tis);CHKERRQ(ierr); 1610 ierr = ISComplement(tis,0,pcis->n,&is_aux);CHKERRQ(ierr); 1611 ierr = ISDestroy(&tis);CHKERRQ(ierr); 1612 } else { 1613 ierr = ISComplement(pcbddc->is_R_local,0,pcis->n,&is_aux);CHKERRQ(ierr); 1614 } 1615 if (pcbddc->benign_n && !pcbddc->benign_change_explicit) { 1616 ISLocalToGlobalMapping RtoN; 1617 Mat B,A_RVt; 1618 Mat_IS *matis = (Mat_IS*)pc->pmat->data; 1619 PetscInt *idxs_p0,n; 1620 1621 ierr = MatGetSubMatrix(matis->A,pcbddc->is_R_local,is_aux,MAT_INITIAL_MATRIX,&A_RVt);CHKERRQ(ierr); 1622 ierr = MatGetSubMatrix(pcbddc->benign_change,pcbddc->is_R_local,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&B);CHKERRQ(ierr); 1623 ierr = MatTranspose(B,MAT_REUSE_MATRIX,&B);CHKERRQ(ierr); 1624 ierr = PetscMalloc1(pcbddc->benign_n,&idxs_p0);CHKERRQ(ierr); 1625 ierr = ISLocalToGlobalMappingCreateIS(pcbddc->is_R_local,&RtoN);CHKERRQ(ierr); 1626 ierr = ISGlobalToLocalMappingApply(RtoN,IS_GTOLM_DROP,pcbddc->benign_n,pcbddc->benign_p0_lidx,&n,idxs_p0);CHKERRQ(ierr); 1627 if (n != pcbddc->benign_n) { 1628 SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error in R numbering for benign p0! %d != %d\n",n,pcbddc->benign_n); 1629 } 1630 ierr = ISLocalToGlobalMappingDestroy(&RtoN);CHKERRQ(ierr); 1631 ierr = MatZeroRows(B,pcbddc->benign_n,idxs_p0,0.,NULL,NULL);CHKERRQ(ierr); 1632 ierr = PetscFree(idxs_p0);CHKERRQ(ierr); 1633 ierr = MatMatMult(B,A_RVt,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&A_RV);CHKERRQ(ierr); 1634 ierr = MatDestroy(&B);CHKERRQ(ierr); 1635 ierr = MatDestroy(&A_RVt);CHKERRQ(ierr); 1636 ierr = MatTranspose(A_RV,MAT_INITIAL_MATRIX,&A_VR);CHKERRQ(ierr); 1637 ierr = MatGetSubMatrix(matis->A,is_aux,is_aux,MAT_INITIAL_MATRIX,&A_VV);CHKERRQ(ierr); 1638 } else { 1639 ierr = MatGetSubMatrix(pcbddc->local_mat,pcbddc->is_R_local,is_aux,MAT_INITIAL_MATRIX,&A_RV);CHKERRQ(ierr); 1640 ierr = MatGetSubMatrix(pcbddc->local_mat,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&A_VR);CHKERRQ(ierr); 1641 ierr = MatGetSubMatrix(pcbddc->local_mat,is_aux,is_aux,MAT_INITIAL_MATRIX,&A_VV);CHKERRQ(ierr); 1642 } 1643 if (pcbddc->benign_n) { 1644 IS dummy; 1645 1646 ierr = ISCreateStride(PETSC_COMM_SELF,pcbddc->benign_n,0,1,&dummy);CHKERRQ(ierr); 1647 ierr = MatGetSubMatrix(pcbddc->benign_B0,dummy,is_aux,MAT_INITIAL_MATRIX,&B0_V);CHKERRQ(ierr); 1648 ierr = ISDestroy(&dummy);CHKERRQ(ierr); 1649 } 1650 ierr = ISDestroy(&is_aux);CHKERRQ(ierr); 1651 } 1652 1653 /* Matrix of coarse basis functions (local) */ 1654 if (pcbddc->coarse_phi_B) { 1655 PetscInt on_B,on_primal,on_D=n_D; 1656 if (pcbddc->coarse_phi_D) { 1657 ierr = MatGetSize(pcbddc->coarse_phi_D,&on_D,NULL);CHKERRQ(ierr); 1658 } 1659 ierr = MatGetSize(pcbddc->coarse_phi_B,&on_B,&on_primal);CHKERRQ(ierr); 1660 if (on_B != n_B || on_primal != pcbddc->local_primal_size || on_D != n_D) { 1661 PetscScalar *marray; 1662 1663 ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&marray);CHKERRQ(ierr); 1664 ierr = PetscFree(marray);CHKERRQ(ierr); 1665 ierr = MatDestroy(&pcbddc->coarse_phi_B);CHKERRQ(ierr); 1666 ierr = MatDestroy(&pcbddc->coarse_psi_B);CHKERRQ(ierr); 1667 ierr = MatDestroy(&pcbddc->coarse_phi_D);CHKERRQ(ierr); 1668 ierr = MatDestroy(&pcbddc->coarse_psi_D);CHKERRQ(ierr); 1669 } 1670 } 1671 1672 if (!pcbddc->coarse_phi_B) { 1673 PetscScalar *marray; 1674 1675 n = n_B*pcbddc->local_primal_size; 1676 if (pcbddc->switch_static || pcbddc->dbg_flag) { 1677 n += n_D*pcbddc->local_primal_size; 1678 } 1679 if (!pcbddc->symmetric_primal) { 1680 n *= 2; 1681 } 1682 ierr = PetscCalloc1(n,&marray);CHKERRQ(ierr); 1683 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_B,pcbddc->local_primal_size,marray,&pcbddc->coarse_phi_B);CHKERRQ(ierr); 1684 n = n_B*pcbddc->local_primal_size; 1685 if (pcbddc->switch_static || pcbddc->dbg_flag) { 1686 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_D,pcbddc->local_primal_size,marray+n,&pcbddc->coarse_phi_D);CHKERRQ(ierr); 1687 n += n_D*pcbddc->local_primal_size; 1688 } 1689 if (!pcbddc->symmetric_primal) { 1690 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_B,pcbddc->local_primal_size,marray+n,&pcbddc->coarse_psi_B);CHKERRQ(ierr); 1691 if (pcbddc->switch_static || pcbddc->dbg_flag) { 1692 n = n_B*pcbddc->local_primal_size; 1693 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_D,pcbddc->local_primal_size,marray+n,&pcbddc->coarse_psi_D);CHKERRQ(ierr); 1694 } 1695 } else { 1696 ierr = PetscObjectReference((PetscObject)pcbddc->coarse_phi_B);CHKERRQ(ierr); 1697 pcbddc->coarse_psi_B = pcbddc->coarse_phi_B; 1698 if (pcbddc->switch_static || pcbddc->dbg_flag) { 1699 ierr = PetscObjectReference((PetscObject)pcbddc->coarse_phi_D);CHKERRQ(ierr); 1700 pcbddc->coarse_psi_D = pcbddc->coarse_phi_D; 1701 } 1702 } 1703 } 1704 1705 /* We are now ready to evaluate coarse basis functions and subdomain contribution to coarse problem */ 1706 p0_lidx_I = NULL; 1707 if (pcbddc->benign_n && (pcbddc->switch_static || pcbddc->dbg_flag)) { 1708 const PetscInt *idxs; 1709 1710 ierr = ISGetIndices(pcis->is_I_local,&idxs);CHKERRQ(ierr); 1711 ierr = PetscMalloc1(pcbddc->benign_n,&p0_lidx_I);CHKERRQ(ierr); 1712 for (i=0;i<pcbddc->benign_n;i++) { 1713 ierr = PetscFindInt(pcbddc->benign_p0_lidx[i],pcis->n-pcis->n_B,idxs,&p0_lidx_I[i]);CHKERRQ(ierr); 1714 } 1715 ierr = ISRestoreIndices(pcis->is_I_local,&idxs);CHKERRQ(ierr); 1716 } 1717 1718 /* vertices */ 1719 if (n_vertices) { 1720 1721 ierr = MatConvert(A_VV,MATDENSE,MAT_REUSE_MATRIX,&A_VV);CHKERRQ(ierr); 1722 1723 if (n_R) { 1724 Mat A_RRmA_RV,S_VVt; /* S_VVt with LDA=N */ 1725 PetscBLASInt B_N,B_one = 1; 1726 PetscScalar *x,*y; 1727 PetscBool isseqaij; 1728 1729 ierr = MatScale(A_RV,m_one);CHKERRQ(ierr); 1730 if (lda_rhs == n_R) { 1731 ierr = MatConvert(A_RV,MATDENSE,MAT_REUSE_MATRIX,&A_RV);CHKERRQ(ierr); 1732 } else { 1733 PetscScalar *av,*array; 1734 const PetscInt *xadj,*adjncy; 1735 PetscInt n; 1736 PetscBool flg_row; 1737 1738 array = work+lda_rhs*n_vertices; 1739 ierr = PetscMemzero(array,lda_rhs*n_vertices*sizeof(PetscScalar));CHKERRQ(ierr); 1740 ierr = MatConvert(A_RV,MATSEQAIJ,MAT_REUSE_MATRIX,&A_RV);CHKERRQ(ierr); 1741 ierr = MatGetRowIJ(A_RV,0,PETSC_FALSE,PETSC_FALSE,&n,&xadj,&adjncy,&flg_row);CHKERRQ(ierr); 1742 ierr = MatSeqAIJGetArray(A_RV,&av);CHKERRQ(ierr); 1743 for (i=0;i<n;i++) { 1744 PetscInt j; 1745 for (j=xadj[i];j<xadj[i+1];j++) array[lda_rhs*adjncy[j]+i] = av[j]; 1746 } 1747 ierr = MatRestoreRowIJ(A_RV,0,PETSC_FALSE,PETSC_FALSE,&n,&xadj,&adjncy,&flg_row);CHKERRQ(ierr); 1748 ierr = MatDestroy(&A_RV);CHKERRQ(ierr); 1749 ierr = MatCreateSeqDense(PETSC_COMM_SELF,lda_rhs,n_vertices,array,&A_RV);CHKERRQ(ierr); 1750 } 1751 ierr = MatCreateSeqDense(PETSC_COMM_SELF,lda_rhs,n_vertices,work,&A_RRmA_RV);CHKERRQ(ierr); 1752 if (F) { /* TODO could be optimized for symmetric problems */ 1753 if (need_benign_correction) { 1754 PetscScalar *marr; 1755 PCBDDCReuseMumps reuse_mumps = sub_schurs->reuse_mumps; 1756 1757 ierr = MatDenseGetArray(A_RV,&marr);CHKERRQ(ierr); 1758 for (i=0;i<n_vertices;i++) { 1759 ierr = VecPlaceArray(pcbddc->vec1_R,marr+i*lda_rhs);CHKERRQ(ierr); 1760 ierr = PCBDDCReuseSolversChangeInterior(reuse_mumps,pcbddc->vec1_R,NULL,PETSC_FALSE);CHKERRQ(ierr); 1761 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 1762 } 1763 ierr = MatDenseRestoreArray(A_RV,&marr);CHKERRQ(ierr); 1764 } 1765 ierr = MatMatSolve(F,A_RV,A_RRmA_RV);CHKERRQ(ierr); 1766 if (need_benign_correction) { 1767 PetscScalar *marr; 1768 PCBDDCReuseMumps reuse_mumps = sub_schurs->reuse_mumps; 1769 1770 ierr = MatDenseGetArray(A_RRmA_RV,&marr);CHKERRQ(ierr); 1771 for (i=0;i<n_vertices;i++) { 1772 ierr = VecPlaceArray(pcbddc->vec1_R,marr+i*lda_rhs);CHKERRQ(ierr); 1773 ierr = PCBDDCReuseSolversChangeInterior(reuse_mumps,pcbddc->vec1_R,NULL,PETSC_TRUE);CHKERRQ(ierr); 1774 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 1775 } 1776 ierr = MatDenseRestoreArray(A_RRmA_RV,&marr);CHKERRQ(ierr); 1777 } 1778 } else { 1779 ierr = MatDenseGetArray(A_RV,&y);CHKERRQ(ierr); 1780 for (i=0;i<n_vertices;i++) { 1781 ierr = VecPlaceArray(pcbddc->vec1_R,y+i*lda_rhs);CHKERRQ(ierr); 1782 ierr = VecPlaceArray(pcbddc->vec2_R,work+i*lda_rhs);CHKERRQ(ierr); 1783 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 1784 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 1785 ierr = VecResetArray(pcbddc->vec2_R);CHKERRQ(ierr); 1786 } 1787 ierr = MatDenseRestoreArray(A_RV,&y);CHKERRQ(ierr); 1788 } 1789 ierr = MatDestroy(&A_RV);CHKERRQ(ierr); 1790 /* S_VV and S_CV */ 1791 if (n_constraints) { 1792 Mat B; 1793 1794 ierr = PetscMemzero(work+lda_rhs*n_vertices,n_B*n_vertices*sizeof(PetscScalar));CHKERRQ(ierr); 1795 for (i=0;i<n_vertices;i++) { 1796 ierr = VecPlaceArray(pcbddc->vec1_R,work+i*lda_rhs);CHKERRQ(ierr); 1797 ierr = VecPlaceArray(pcis->vec1_B,work+lda_rhs*n_vertices+i*n_B);CHKERRQ(ierr); 1798 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1799 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1800 ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr); 1801 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 1802 } 1803 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_B,n_vertices,work+lda_rhs*n_vertices,&B);CHKERRQ(ierr); 1804 ierr = MatMatMult(pcbddc->local_auxmat1,B,MAT_REUSE_MATRIX,PETSC_DEFAULT,&S_CV);CHKERRQ(ierr); 1805 ierr = MatDestroy(&B);CHKERRQ(ierr); 1806 ierr = MatCreateSeqDense(PETSC_COMM_SELF,lda_rhs,n_vertices,work+lda_rhs*n_vertices,&B);CHKERRQ(ierr); 1807 ierr = MatMatMult(local_auxmat2_R,S_CV,MAT_REUSE_MATRIX,PETSC_DEFAULT,&B);CHKERRQ(ierr); 1808 ierr = MatScale(S_CV,m_one);CHKERRQ(ierr); 1809 ierr = PetscBLASIntCast(lda_rhs*n_vertices,&B_N);CHKERRQ(ierr); 1810 PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&B_N,&one,work+lda_rhs*n_vertices,&B_one,work,&B_one)); 1811 ierr = MatDestroy(&B);CHKERRQ(ierr); 1812 } 1813 ierr = PetscObjectTypeCompare((PetscObject)A_VR,MATSEQAIJ,&isseqaij);CHKERRQ(ierr); 1814 if (!isseqaij) { /* MatMatMult with SEQ(S)BAIJ below will raise an error */ 1815 ierr = MatConvert(A_VR,MATSEQAIJ,MAT_REUSE_MATRIX,&A_VR);CHKERRQ(ierr); 1816 } 1817 if (lda_rhs != n_R) { 1818 ierr = MatDestroy(&A_RRmA_RV);CHKERRQ(ierr); 1819 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_vertices,work,&A_RRmA_RV);CHKERRQ(ierr); 1820 ierr = MatSeqDenseSetLDA(A_RRmA_RV,lda_rhs);CHKERRQ(ierr); 1821 } 1822 ierr = MatMatMult(A_VR,A_RRmA_RV,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&S_VVt);CHKERRQ(ierr); 1823 ierr = MatDestroy(&A_RRmA_RV);CHKERRQ(ierr); 1824 ierr = PetscBLASIntCast(n_vertices*n_vertices,&B_N);CHKERRQ(ierr); 1825 ierr = MatDenseGetArray(A_VV,&x);CHKERRQ(ierr); 1826 ierr = MatDenseGetArray(S_VVt,&y);CHKERRQ(ierr); 1827 PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&B_N,&one,x,&B_one,y,&B_one)); 1828 ierr = MatDenseRestoreArray(A_VV,&x);CHKERRQ(ierr); 1829 ierr = MatDenseRestoreArray(S_VVt,&y);CHKERRQ(ierr); 1830 ierr = MatCopy(S_VVt,S_VV,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 1831 ierr = MatDestroy(&S_VVt);CHKERRQ(ierr); 1832 } else { 1833 ierr = MatCopy(A_VV,S_VV,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 1834 } 1835 for (i=0;i<pcbddc->benign_n;i++) { 1836 const PetscScalar *vals; 1837 const PetscInt *idxs; 1838 PetscInt n,j,primal_idx; 1839 1840 ierr = MatGetRow(B0_V,i,&n,&idxs,&vals);CHKERRQ(ierr); 1841 primal_idx = pcbddc->local_primal_size - pcbddc->benign_n + i; 1842 for (j=0;j<n;j++) { 1843 coarse_submat_vals[primal_idx*pcbddc->local_primal_size+idxs[j]] = vals[j]; 1844 coarse_submat_vals[idxs[j]*pcbddc->local_primal_size+primal_idx] = vals[j]; 1845 } 1846 ierr = MatRestoreRow(B0_V,i,&n,&idxs,&vals);CHKERRQ(ierr); 1847 } 1848 ierr = MatDestroy(&A_VV);CHKERRQ(ierr); 1849 1850 /* coarse basis functions */ 1851 for (i=0;i<n_vertices;i++) { 1852 PetscScalar *y; 1853 1854 ierr = VecPlaceArray(pcbddc->vec1_R,work+lda_rhs*i);CHKERRQ(ierr); 1855 ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&y);CHKERRQ(ierr); 1856 ierr = VecPlaceArray(pcis->vec1_B,y+n_B*i);CHKERRQ(ierr); 1857 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1858 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1859 y[n_B*i+idx_V_B[i]] = 1.0; 1860 ierr = MatDenseRestoreArray(pcbddc->coarse_phi_B,&y);CHKERRQ(ierr); 1861 ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr); 1862 1863 if (pcbddc->switch_static || pcbddc->dbg_flag) { 1864 PetscInt j; 1865 1866 ierr = MatDenseGetArray(pcbddc->coarse_phi_D,&y);CHKERRQ(ierr); 1867 ierr = VecPlaceArray(pcis->vec1_D,y+n_D*i);CHKERRQ(ierr); 1868 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1869 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1870 ierr = VecResetArray(pcis->vec1_D);CHKERRQ(ierr); 1871 for (j=0;j<pcbddc->benign_n;j++) y[n_D*i+p0_lidx_I[j]] = 0.0; 1872 ierr = MatDenseRestoreArray(pcbddc->coarse_phi_D,&y);CHKERRQ(ierr); 1873 } 1874 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 1875 } 1876 /* if n_R == 0 the object is not destroyed */ 1877 ierr = MatDestroy(&A_RV);CHKERRQ(ierr); 1878 } 1879 1880 if (n_constraints) { 1881 Mat B; 1882 1883 ierr = MatCreateSeqDense(PETSC_COMM_SELF,lda_rhs,n_constraints,work,&B);CHKERRQ(ierr); 1884 ierr = MatScale(S_CC,m_one);CHKERRQ(ierr); 1885 ierr = MatMatMult(local_auxmat2_R,S_CC,MAT_REUSE_MATRIX,PETSC_DEFAULT,&B);CHKERRQ(ierr); 1886 ierr = MatScale(S_CC,m_one);CHKERRQ(ierr); 1887 if (n_vertices) { 1888 if (isCHOL) { /* if we can solve the interior problem with cholesky, we should also be fine with transposing here */ 1889 ierr = MatTranspose(S_CV,MAT_REUSE_MATRIX,&S_VC);CHKERRQ(ierr); 1890 } else { 1891 Mat S_VCt; 1892 1893 if (lda_rhs != n_R) { 1894 ierr = MatDestroy(&B);CHKERRQ(ierr); 1895 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_vertices,work,&B);CHKERRQ(ierr); 1896 ierr = MatSeqDenseSetLDA(B,lda_rhs);CHKERRQ(ierr); 1897 } 1898 ierr = MatMatMult(A_VR,B,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&S_VCt);CHKERRQ(ierr); 1899 ierr = MatCopy(S_VCt,S_VC,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 1900 ierr = MatDestroy(&S_VCt);CHKERRQ(ierr); 1901 } 1902 } 1903 ierr = MatDestroy(&B);CHKERRQ(ierr); 1904 /* coarse basis functions */ 1905 for (i=0;i<n_constraints;i++) { 1906 PetscScalar *y; 1907 1908 ierr = VecPlaceArray(pcbddc->vec1_R,work+lda_rhs*i);CHKERRQ(ierr); 1909 ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&y);CHKERRQ(ierr); 1910 ierr = VecPlaceArray(pcis->vec1_B,y+n_B*(i+n_vertices));CHKERRQ(ierr); 1911 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1912 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1913 ierr = MatDenseRestoreArray(pcbddc->coarse_phi_B,&y);CHKERRQ(ierr); 1914 ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr); 1915 if (pcbddc->switch_static || pcbddc->dbg_flag) { 1916 PetscInt j; 1917 1918 ierr = MatDenseGetArray(pcbddc->coarse_phi_D,&y);CHKERRQ(ierr); 1919 ierr = VecPlaceArray(pcis->vec1_D,y+n_D*(i+n_vertices));CHKERRQ(ierr); 1920 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1921 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1922 ierr = VecResetArray(pcis->vec1_D);CHKERRQ(ierr); 1923 for (j=0;j<pcbddc->benign_n;j++) y[n_D*i+p0_lidx_I[j]] = 0.0; 1924 ierr = MatDenseRestoreArray(pcbddc->coarse_phi_D,&y);CHKERRQ(ierr); 1925 } 1926 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 1927 } 1928 } 1929 if (n_constraints) { 1930 ierr = MatDestroy(&local_auxmat2_R);CHKERRQ(ierr); 1931 } 1932 ierr = PetscFree(p0_lidx_I);CHKERRQ(ierr); 1933 ierr = MatDestroy(&B0_V);CHKERRQ(ierr); 1934 1935 /* compute other basis functions for non-symmetric problems */ 1936 if (!pcbddc->symmetric_primal) { 1937 Mat B_V=NULL,B_C=NULL; 1938 PetscScalar *marray; 1939 1940 if (n_constraints) { 1941 Mat S_CCT,C_CRT; 1942 1943 ierr = MatTranspose(C_CR,MAT_INITIAL_MATRIX,&C_CRT);CHKERRQ(ierr); 1944 ierr = MatTranspose(S_CC,MAT_INITIAL_MATRIX,&S_CCT);CHKERRQ(ierr); 1945 ierr = MatMatMult(C_CRT,S_CCT,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&B_C);CHKERRQ(ierr); 1946 ierr = MatDestroy(&S_CCT);CHKERRQ(ierr); 1947 if (n_vertices) { 1948 Mat S_VCT; 1949 1950 ierr = MatTranspose(S_VC,MAT_INITIAL_MATRIX,&S_VCT);CHKERRQ(ierr); 1951 ierr = MatMatMult(C_CRT,S_VCT,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&B_V);CHKERRQ(ierr); 1952 ierr = MatDestroy(&S_VCT);CHKERRQ(ierr); 1953 } 1954 ierr = MatDestroy(&C_CRT);CHKERRQ(ierr); 1955 } 1956 if (n_vertices && n_R) { 1957 PetscScalar *av,*marray; 1958 const PetscInt *xadj,*adjncy; 1959 PetscInt n; 1960 PetscBool flg_row; 1961 1962 /* B_V = B_V - A_VR^T */ 1963 ierr = MatConvert(A_VR,MATSEQAIJ,MAT_REUSE_MATRIX,&A_VR);CHKERRQ(ierr); 1964 ierr = MatGetRowIJ(A_VR,0,PETSC_FALSE,PETSC_FALSE,&n,&xadj,&adjncy,&flg_row);CHKERRQ(ierr); 1965 ierr = MatSeqAIJGetArray(A_VR,&av);CHKERRQ(ierr); 1966 ierr = MatDenseGetArray(B_V,&marray);CHKERRQ(ierr); 1967 for (i=0;i<n;i++) { 1968 PetscInt j; 1969 for (j=xadj[i];j<xadj[i+1];j++) marray[i*n_R + adjncy[j]] -= av[j]; 1970 } 1971 ierr = MatDenseRestoreArray(B_V,&marray);CHKERRQ(ierr); 1972 ierr = MatRestoreRowIJ(A_VR,0,PETSC_FALSE,PETSC_FALSE,&n,&xadj,&adjncy,&flg_row);CHKERRQ(ierr); 1973 ierr = MatDestroy(&A_VR);CHKERRQ(ierr); 1974 } 1975 1976 /* currently there's no support for MatTransposeMatSolve(F,B,X) */ 1977 ierr = MatDenseGetArray(B_V,&marray);CHKERRQ(ierr); 1978 for (i=0;i<n_vertices;i++) { 1979 ierr = VecPlaceArray(pcbddc->vec1_R,marray+i*n_R);CHKERRQ(ierr); 1980 ierr = VecPlaceArray(pcbddc->vec2_R,work+i*n_R);CHKERRQ(ierr); 1981 ierr = KSPSolveTranspose(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 1982 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 1983 ierr = VecResetArray(pcbddc->vec2_R);CHKERRQ(ierr); 1984 } 1985 ierr = MatDenseRestoreArray(B_V,&marray);CHKERRQ(ierr); 1986 ierr = MatDenseGetArray(B_C,&marray);CHKERRQ(ierr); 1987 for (i=n_vertices;i<n_constraints+n_vertices;i++) { 1988 ierr = VecPlaceArray(pcbddc->vec1_R,marray+(i-n_vertices)*n_R);CHKERRQ(ierr); 1989 ierr = VecPlaceArray(pcbddc->vec2_R,work+i*n_R);CHKERRQ(ierr); 1990 ierr = KSPSolveTranspose(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 1991 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 1992 ierr = VecResetArray(pcbddc->vec2_R);CHKERRQ(ierr); 1993 } 1994 ierr = MatDenseRestoreArray(B_C,&marray);CHKERRQ(ierr); 1995 /* coarse basis functions */ 1996 for (i=0;i<pcbddc->local_primal_size;i++) { 1997 PetscScalar *y; 1998 1999 ierr = VecPlaceArray(pcbddc->vec1_R,work+i*n_R);CHKERRQ(ierr); 2000 ierr = MatDenseGetArray(pcbddc->coarse_psi_B,&y);CHKERRQ(ierr); 2001 ierr = VecPlaceArray(pcis->vec1_B,y+n_B*i);CHKERRQ(ierr); 2002 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2003 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2004 if (i<n_vertices) { 2005 y[n_B*i+idx_V_B[i]] = 1.0; 2006 } 2007 ierr = MatDenseRestoreArray(pcbddc->coarse_psi_B,&y);CHKERRQ(ierr); 2008 ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr); 2009 2010 if (pcbddc->switch_static || pcbddc->dbg_flag) { 2011 ierr = MatDenseGetArray(pcbddc->coarse_psi_D,&y);CHKERRQ(ierr); 2012 ierr = VecPlaceArray(pcis->vec1_D,y+n_D*i);CHKERRQ(ierr); 2013 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2014 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2015 ierr = VecResetArray(pcis->vec1_D);CHKERRQ(ierr); 2016 ierr = MatDenseRestoreArray(pcbddc->coarse_psi_D,&y);CHKERRQ(ierr); 2017 } 2018 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 2019 } 2020 ierr = MatDestroy(&B_V);CHKERRQ(ierr); 2021 ierr = MatDestroy(&B_C);CHKERRQ(ierr); 2022 } 2023 /* free memory */ 2024 ierr = PetscFree(idx_V_B);CHKERRQ(ierr); 2025 ierr = MatDestroy(&S_VV);CHKERRQ(ierr); 2026 ierr = MatDestroy(&S_CV);CHKERRQ(ierr); 2027 ierr = MatDestroy(&S_VC);CHKERRQ(ierr); 2028 ierr = MatDestroy(&S_CC);CHKERRQ(ierr); 2029 ierr = PetscFree(work);CHKERRQ(ierr); 2030 if (n_vertices) { 2031 ierr = MatDestroy(&A_VR);CHKERRQ(ierr); 2032 } 2033 if (n_constraints) { 2034 ierr = MatDestroy(&C_CR);CHKERRQ(ierr); 2035 } 2036 /* Checking coarse_sub_mat and coarse basis functios */ 2037 /* Symmetric case : It should be \Phi^{(j)^T} A^{(j)} \Phi^{(j)}=coarse_sub_mat */ 2038 /* Non-symmetric case : It should be \Psi^{(j)^T} A^{(j)} \Phi^{(j)}=coarse_sub_mat */ 2039 if (pcbddc->dbg_flag) { 2040 Mat coarse_sub_mat; 2041 Mat AUXMAT,TM1,TM2,TM3,TM4; 2042 Mat coarse_phi_D,coarse_phi_B; 2043 Mat coarse_psi_D,coarse_psi_B; 2044 Mat A_II,A_BB,A_IB,A_BI; 2045 Mat C_B,CPHI; 2046 IS is_dummy; 2047 Vec mones; 2048 MatType checkmattype=MATSEQAIJ; 2049 PetscReal real_value; 2050 2051 if (pcbddc->benign_n && !pcbddc->benign_change_explicit) { 2052 Mat A; 2053 ierr = PCBDDCBenignProject(pc,NULL,NULL,&A);CHKERRQ(ierr); 2054 ierr = MatGetSubMatrix(A,pcis->is_I_local,pcis->is_I_local,MAT_INITIAL_MATRIX,&A_II);CHKERRQ(ierr); 2055 ierr = MatGetSubMatrix(A,pcis->is_I_local,pcis->is_B_local,MAT_INITIAL_MATRIX,&A_IB);CHKERRQ(ierr); 2056 ierr = MatGetSubMatrix(A,pcis->is_B_local,pcis->is_I_local,MAT_INITIAL_MATRIX,&A_BI);CHKERRQ(ierr); 2057 ierr = MatGetSubMatrix(A,pcis->is_B_local,pcis->is_B_local,MAT_INITIAL_MATRIX,&A_BB);CHKERRQ(ierr); 2058 ierr = MatDestroy(&A);CHKERRQ(ierr); 2059 } else { 2060 ierr = MatConvert(pcis->A_II,checkmattype,MAT_INITIAL_MATRIX,&A_II);CHKERRQ(ierr); 2061 ierr = MatConvert(pcis->A_IB,checkmattype,MAT_INITIAL_MATRIX,&A_IB);CHKERRQ(ierr); 2062 ierr = MatConvert(pcis->A_BI,checkmattype,MAT_INITIAL_MATRIX,&A_BI);CHKERRQ(ierr); 2063 ierr = MatConvert(pcis->A_BB,checkmattype,MAT_INITIAL_MATRIX,&A_BB);CHKERRQ(ierr); 2064 } 2065 ierr = MatConvert(pcbddc->coarse_phi_D,checkmattype,MAT_INITIAL_MATRIX,&coarse_phi_D);CHKERRQ(ierr); 2066 ierr = MatConvert(pcbddc->coarse_phi_B,checkmattype,MAT_INITIAL_MATRIX,&coarse_phi_B);CHKERRQ(ierr); 2067 if (!pcbddc->symmetric_primal) { 2068 ierr = MatConvert(pcbddc->coarse_psi_D,checkmattype,MAT_INITIAL_MATRIX,&coarse_psi_D);CHKERRQ(ierr); 2069 ierr = MatConvert(pcbddc->coarse_psi_B,checkmattype,MAT_INITIAL_MATRIX,&coarse_psi_B);CHKERRQ(ierr); 2070 } 2071 ierr = MatCreateSeqDense(PETSC_COMM_SELF,pcbddc->local_primal_size,pcbddc->local_primal_size,coarse_submat_vals,&coarse_sub_mat);CHKERRQ(ierr); 2072 2073 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 2074 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Check coarse sub mat computation (symmetric %d)\n",pcbddc->symmetric_primal);CHKERRQ(ierr); 2075 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 2076 if (!pcbddc->symmetric_primal) { 2077 ierr = MatMatMult(A_II,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 2078 ierr = MatTransposeMatMult(coarse_psi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM1);CHKERRQ(ierr); 2079 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 2080 ierr = MatMatMult(A_BB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 2081 ierr = MatTransposeMatMult(coarse_psi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM2);CHKERRQ(ierr); 2082 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 2083 ierr = MatMatMult(A_IB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 2084 ierr = MatTransposeMatMult(coarse_psi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM3);CHKERRQ(ierr); 2085 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 2086 ierr = MatMatMult(A_BI,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 2087 ierr = MatTransposeMatMult(coarse_psi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM4);CHKERRQ(ierr); 2088 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 2089 } else { 2090 ierr = MatPtAP(A_II,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&TM1);CHKERRQ(ierr); 2091 ierr = MatPtAP(A_BB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&TM2);CHKERRQ(ierr); 2092 ierr = MatMatMult(A_IB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 2093 ierr = MatTransposeMatMult(coarse_phi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM3);CHKERRQ(ierr); 2094 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 2095 ierr = MatMatMult(A_BI,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 2096 ierr = MatTransposeMatMult(coarse_phi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM4);CHKERRQ(ierr); 2097 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 2098 } 2099 ierr = MatAXPY(TM1,one,TM2,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 2100 ierr = MatAXPY(TM1,one,TM3,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 2101 ierr = MatAXPY(TM1,one,TM4,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 2102 ierr = MatConvert(TM1,MATSEQDENSE,MAT_REUSE_MATRIX,&TM1);CHKERRQ(ierr); 2103 if (pcbddc->benign_n) { 2104 Mat B0_I,B0_B,B0_BPHI,B0_IPHI; 2105 PetscScalar *data,*data2; 2106 PetscInt j; 2107 2108 ierr = ISCreateStride(PETSC_COMM_SELF,pcbddc->benign_n,0,1,&is_dummy);CHKERRQ(ierr); 2109 ierr = MatGetSubMatrix(pcbddc->benign_B0,is_dummy,pcis->is_B_local,MAT_INITIAL_MATRIX,&B0_B); 2110 ierr = MatGetSubMatrix(pcbddc->benign_B0,is_dummy,pcis->is_I_local,MAT_INITIAL_MATRIX,&B0_I); 2111 ierr = MatMatMult(B0_B,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&B0_BPHI);CHKERRQ(ierr); 2112 ierr = MatConvert(B0_BPHI,MATSEQDENSE,MAT_REUSE_MATRIX,&B0_BPHI);CHKERRQ(ierr); 2113 ierr = MatDenseGetArray(TM1,&data);CHKERRQ(ierr); 2114 ierr = MatDenseGetArray(B0_BPHI,&data2);CHKERRQ(ierr); 2115 for (j=0;j<pcbddc->benign_n;j++) { 2116 PetscInt primal_idx = pcbddc->local_primal_size - pcbddc->benign_n + j; 2117 for (i=0;i<pcbddc->local_primal_size;i++) { 2118 data[primal_idx*pcbddc->local_primal_size+i] += data2[i*pcbddc->benign_n+j]; 2119 data[i*pcbddc->local_primal_size+primal_idx] += data2[i*pcbddc->benign_n+j]; 2120 } 2121 } 2122 ierr = MatDenseRestoreArray(TM1,&data);CHKERRQ(ierr); 2123 ierr = MatDenseRestoreArray(B0_BPHI,&data2);CHKERRQ(ierr); 2124 ierr = MatDestroy(&B0_B);CHKERRQ(ierr); 2125 ierr = ISDestroy(&is_dummy);CHKERRQ(ierr); 2126 ierr = MatDestroy(&B0_BPHI);CHKERRQ(ierr); 2127 ierr = MatMatMult(B0_I,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&B0_IPHI);CHKERRQ(ierr); 2128 ierr = MatDestroy(&B0_I);CHKERRQ(ierr); 2129 ierr = MatNorm(B0_IPHI,NORM_FROBENIUS,&real_value);CHKERRQ(ierr); 2130 ierr = MatDestroy(&B0_IPHI);CHKERRQ(ierr); 2131 } 2132 #if 0 2133 { 2134 PetscViewer viewer; 2135 char filename[256]; 2136 sprintf(filename,"details_local_coarse_mat%d_level%d.m",PetscGlobalRank,pcbddc->current_level); 2137 ierr = PetscViewerASCIIOpen(PETSC_COMM_SELF,filename,&viewer);CHKERRQ(ierr); 2138 ierr = PetscViewerSetFormat(viewer,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr); 2139 ierr = PetscObjectSetName((PetscObject)coarse_sub_mat,"computed");CHKERRQ(ierr); 2140 ierr = MatView(coarse_sub_mat,viewer);CHKERRQ(ierr); 2141 ierr = PetscObjectSetName((PetscObject)TM1,"projected");CHKERRQ(ierr); 2142 ierr = MatView(TM1,viewer);CHKERRQ(ierr); 2143 ierr = PetscObjectSetName((PetscObject)pcbddc->coarse_phi_B,"phi_B");CHKERRQ(ierr); 2144 ierr = MatView(pcbddc->coarse_phi_B,viewer);CHKERRQ(ierr); 2145 if (pcbddc->coarse_phi_D) { 2146 ierr = PetscObjectSetName((PetscObject)pcbddc->coarse_phi_D,"phi_D");CHKERRQ(ierr); 2147 ierr = MatView(pcbddc->coarse_phi_D,viewer);CHKERRQ(ierr); 2148 } 2149 if (pcbddc->coarse_psi_B) { 2150 ierr = PetscObjectSetName((PetscObject)pcbddc->coarse_psi_B,"psi_B");CHKERRQ(ierr); 2151 ierr = MatView(pcbddc->coarse_psi_B,viewer);CHKERRQ(ierr); 2152 } 2153 if (pcbddc->coarse_phi_B) { 2154 ierr = PetscObjectSetName((PetscObject)pcbddc->coarse_psi_D,"psi_D");CHKERRQ(ierr); 2155 ierr = MatView(pcbddc->coarse_psi_D,viewer);CHKERRQ(ierr); 2156 } 2157 ierr = PetscViewerDestroy(&viewer);CHKERRQ(ierr); 2158 } 2159 #endif 2160 ierr = MatAXPY(TM1,m_one,coarse_sub_mat,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 2161 ierr = MatNorm(TM1,NORM_FROBENIUS,&real_value);CHKERRQ(ierr); 2162 ierr = PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 2163 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d matrix error % 1.14e\n",PetscGlobalRank,real_value);CHKERRQ(ierr); 2164 2165 /* check constraints */ 2166 if (!pcbddc->benign_n) { /* TODO: add benign case */ 2167 ierr = ISCreateStride(PETSC_COMM_SELF,pcbddc->local_primal_size,0,1,&is_dummy);CHKERRQ(ierr); 2168 ierr = MatGetSubMatrix(pcbddc->ConstraintMatrix,is_dummy,pcis->is_B_local,MAT_INITIAL_MATRIX,&C_B); 2169 ierr = MatMatMult(C_B,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&CPHI);CHKERRQ(ierr); 2170 ierr = MatCreateVecs(CPHI,&mones,NULL);CHKERRQ(ierr); 2171 ierr = VecSet(mones,-1.0);CHKERRQ(ierr); 2172 ierr = MatDiagonalSet(CPHI,mones,ADD_VALUES);CHKERRQ(ierr); 2173 ierr = MatNorm(CPHI,NORM_FROBENIUS,&real_value);CHKERRQ(ierr); 2174 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d phi constraints error % 1.14e\n",PetscGlobalRank,real_value);CHKERRQ(ierr); 2175 if (!pcbddc->symmetric_primal) { 2176 ierr = MatMatMult(C_B,coarse_psi_B,MAT_REUSE_MATRIX,1.0,&CPHI);CHKERRQ(ierr); 2177 ierr = VecSet(mones,-1.0);CHKERRQ(ierr); 2178 ierr = MatDiagonalSet(CPHI,mones,ADD_VALUES);CHKERRQ(ierr); 2179 ierr = MatNorm(CPHI,NORM_FROBENIUS,&real_value);CHKERRQ(ierr); 2180 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d psi constraints error % 1.14e\n",PetscGlobalRank,real_value);CHKERRQ(ierr); 2181 } 2182 ierr = MatDestroy(&C_B);CHKERRQ(ierr); 2183 ierr = MatDestroy(&CPHI);CHKERRQ(ierr); 2184 ierr = ISDestroy(&is_dummy);CHKERRQ(ierr); 2185 ierr = VecDestroy(&mones);CHKERRQ(ierr); 2186 } 2187 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 2188 ierr = MatDestroy(&A_II);CHKERRQ(ierr); 2189 ierr = MatDestroy(&A_BB);CHKERRQ(ierr); 2190 ierr = MatDestroy(&A_IB);CHKERRQ(ierr); 2191 ierr = MatDestroy(&A_BI);CHKERRQ(ierr); 2192 ierr = MatDestroy(&TM1);CHKERRQ(ierr); 2193 ierr = MatDestroy(&TM2);CHKERRQ(ierr); 2194 ierr = MatDestroy(&TM3);CHKERRQ(ierr); 2195 ierr = MatDestroy(&TM4);CHKERRQ(ierr); 2196 ierr = MatDestroy(&coarse_phi_D);CHKERRQ(ierr); 2197 ierr = MatDestroy(&coarse_phi_B);CHKERRQ(ierr); 2198 if (!pcbddc->symmetric_primal) { 2199 ierr = MatDestroy(&coarse_psi_D);CHKERRQ(ierr); 2200 ierr = MatDestroy(&coarse_psi_B);CHKERRQ(ierr); 2201 } 2202 ierr = MatDestroy(&coarse_sub_mat);CHKERRQ(ierr); 2203 } 2204 /* get back data */ 2205 *coarse_submat_vals_n = coarse_submat_vals; 2206 PetscFunctionReturn(0); 2207 } 2208 2209 #undef __FUNCT__ 2210 #define __FUNCT__ "MatGetSubMatrixUnsorted" 2211 PetscErrorCode MatGetSubMatrixUnsorted(Mat A, IS isrow, IS iscol, Mat* B) 2212 { 2213 Mat *work_mat; 2214 IS isrow_s,iscol_s; 2215 PetscBool rsorted,csorted; 2216 PetscInt rsize,*idxs_perm_r,csize,*idxs_perm_c; 2217 PetscErrorCode ierr; 2218 2219 PetscFunctionBegin; 2220 ierr = ISSorted(isrow,&rsorted);CHKERRQ(ierr); 2221 ierr = ISSorted(iscol,&csorted);CHKERRQ(ierr); 2222 ierr = ISGetLocalSize(isrow,&rsize);CHKERRQ(ierr); 2223 ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr); 2224 2225 if (!rsorted) { 2226 const PetscInt *idxs; 2227 PetscInt *idxs_sorted,i; 2228 2229 ierr = PetscMalloc1(rsize,&idxs_perm_r);CHKERRQ(ierr); 2230 ierr = PetscMalloc1(rsize,&idxs_sorted);CHKERRQ(ierr); 2231 for (i=0;i<rsize;i++) { 2232 idxs_perm_r[i] = i; 2233 } 2234 ierr = ISGetIndices(isrow,&idxs);CHKERRQ(ierr); 2235 ierr = PetscSortIntWithPermutation(rsize,idxs,idxs_perm_r);CHKERRQ(ierr); 2236 for (i=0;i<rsize;i++) { 2237 idxs_sorted[i] = idxs[idxs_perm_r[i]]; 2238 } 2239 ierr = ISRestoreIndices(isrow,&idxs);CHKERRQ(ierr); 2240 ierr = ISCreateGeneral(PETSC_COMM_SELF,rsize,idxs_sorted,PETSC_OWN_POINTER,&isrow_s);CHKERRQ(ierr); 2241 } else { 2242 ierr = PetscObjectReference((PetscObject)isrow);CHKERRQ(ierr); 2243 isrow_s = isrow; 2244 } 2245 2246 if (!csorted) { 2247 if (isrow == iscol) { 2248 ierr = PetscObjectReference((PetscObject)isrow_s);CHKERRQ(ierr); 2249 iscol_s = isrow_s; 2250 } else { 2251 const PetscInt *idxs; 2252 PetscInt *idxs_sorted,i; 2253 2254 ierr = PetscMalloc1(csize,&idxs_perm_c);CHKERRQ(ierr); 2255 ierr = PetscMalloc1(csize,&idxs_sorted);CHKERRQ(ierr); 2256 for (i=0;i<csize;i++) { 2257 idxs_perm_c[i] = i; 2258 } 2259 ierr = ISGetIndices(iscol,&idxs);CHKERRQ(ierr); 2260 ierr = PetscSortIntWithPermutation(csize,idxs,idxs_perm_c);CHKERRQ(ierr); 2261 for (i=0;i<csize;i++) { 2262 idxs_sorted[i] = idxs[idxs_perm_c[i]]; 2263 } 2264 ierr = ISRestoreIndices(iscol,&idxs);CHKERRQ(ierr); 2265 ierr = ISCreateGeneral(PETSC_COMM_SELF,csize,idxs_sorted,PETSC_OWN_POINTER,&iscol_s);CHKERRQ(ierr); 2266 } 2267 } else { 2268 ierr = PetscObjectReference((PetscObject)iscol);CHKERRQ(ierr); 2269 iscol_s = iscol; 2270 } 2271 2272 ierr = MatGetSubMatrices(A,1,&isrow_s,&iscol_s,MAT_INITIAL_MATRIX,&work_mat);CHKERRQ(ierr); 2273 2274 if (!rsorted || !csorted) { 2275 Mat new_mat; 2276 IS is_perm_r,is_perm_c; 2277 2278 if (!rsorted) { 2279 PetscInt *idxs_r,i; 2280 ierr = PetscMalloc1(rsize,&idxs_r);CHKERRQ(ierr); 2281 for (i=0;i<rsize;i++) { 2282 idxs_r[idxs_perm_r[i]] = i; 2283 } 2284 ierr = PetscFree(idxs_perm_r);CHKERRQ(ierr); 2285 ierr = ISCreateGeneral(PETSC_COMM_SELF,rsize,idxs_r,PETSC_OWN_POINTER,&is_perm_r);CHKERRQ(ierr); 2286 } else { 2287 ierr = ISCreateStride(PETSC_COMM_SELF,rsize,0,1,&is_perm_r);CHKERRQ(ierr); 2288 } 2289 ierr = ISSetPermutation(is_perm_r);CHKERRQ(ierr); 2290 2291 if (!csorted) { 2292 if (isrow_s == iscol_s) { 2293 ierr = PetscObjectReference((PetscObject)is_perm_r);CHKERRQ(ierr); 2294 is_perm_c = is_perm_r; 2295 } else { 2296 PetscInt *idxs_c,i; 2297 ierr = PetscMalloc1(csize,&idxs_c);CHKERRQ(ierr); 2298 for (i=0;i<csize;i++) { 2299 idxs_c[idxs_perm_c[i]] = i; 2300 } 2301 ierr = PetscFree(idxs_perm_c);CHKERRQ(ierr); 2302 ierr = ISCreateGeneral(PETSC_COMM_SELF,csize,idxs_c,PETSC_OWN_POINTER,&is_perm_c);CHKERRQ(ierr); 2303 } 2304 } else { 2305 ierr = ISCreateStride(PETSC_COMM_SELF,csize,0,1,&is_perm_c);CHKERRQ(ierr); 2306 } 2307 ierr = ISSetPermutation(is_perm_c);CHKERRQ(ierr); 2308 2309 ierr = MatPermute(work_mat[0],is_perm_r,is_perm_c,&new_mat);CHKERRQ(ierr); 2310 ierr = MatDestroy(&work_mat[0]);CHKERRQ(ierr); 2311 work_mat[0] = new_mat; 2312 ierr = ISDestroy(&is_perm_r);CHKERRQ(ierr); 2313 ierr = ISDestroy(&is_perm_c);CHKERRQ(ierr); 2314 } 2315 2316 ierr = PetscObjectReference((PetscObject)work_mat[0]);CHKERRQ(ierr); 2317 *B = work_mat[0]; 2318 ierr = MatDestroyMatrices(1,&work_mat);CHKERRQ(ierr); 2319 ierr = ISDestroy(&isrow_s);CHKERRQ(ierr); 2320 ierr = ISDestroy(&iscol_s);CHKERRQ(ierr); 2321 PetscFunctionReturn(0); 2322 } 2323 2324 #undef __FUNCT__ 2325 #define __FUNCT__ "PCBDDCComputeLocalMatrix" 2326 PetscErrorCode PCBDDCComputeLocalMatrix(PC pc, Mat ChangeOfBasisMatrix) 2327 { 2328 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 2329 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 2330 Mat new_mat; 2331 IS is_local,is_global; 2332 PetscInt local_size; 2333 PetscBool isseqaij; 2334 PetscErrorCode ierr; 2335 2336 PetscFunctionBegin; 2337 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 2338 ierr = MatGetSize(matis->A,&local_size,NULL);CHKERRQ(ierr); 2339 ierr = ISCreateStride(PetscObjectComm((PetscObject)matis->A),local_size,0,1,&is_local);CHKERRQ(ierr); 2340 ierr = ISLocalToGlobalMappingApplyIS(pc->pmat->rmap->mapping,is_local,&is_global);CHKERRQ(ierr); 2341 ierr = ISDestroy(&is_local);CHKERRQ(ierr); 2342 ierr = MatGetSubMatrixUnsorted(ChangeOfBasisMatrix,is_global,is_global,&new_mat);CHKERRQ(ierr); 2343 ierr = ISDestroy(&is_global);CHKERRQ(ierr); 2344 2345 /* check */ 2346 if (pcbddc->dbg_flag) { 2347 Vec x,x_change; 2348 PetscReal error; 2349 2350 ierr = MatCreateVecs(ChangeOfBasisMatrix,&x,&x_change);CHKERRQ(ierr); 2351 ierr = VecSetRandom(x,NULL);CHKERRQ(ierr); 2352 ierr = MatMult(ChangeOfBasisMatrix,x,x_change);CHKERRQ(ierr); 2353 ierr = VecScatterBegin(matis->cctx,x,matis->x,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2354 ierr = VecScatterEnd(matis->cctx,x,matis->x,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2355 ierr = MatMult(new_mat,matis->x,matis->y);CHKERRQ(ierr); 2356 ierr = VecScatterBegin(matis->rctx,matis->y,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2357 ierr = VecScatterEnd(matis->rctx,matis->y,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2358 ierr = VecAXPY(x,-1.0,x_change);CHKERRQ(ierr); 2359 ierr = VecNorm(x,NORM_INFINITY,&error);CHKERRQ(ierr); 2360 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 2361 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Error global vs local change on N: %1.6e\n",error);CHKERRQ(ierr); 2362 ierr = VecDestroy(&x);CHKERRQ(ierr); 2363 ierr = VecDestroy(&x_change);CHKERRQ(ierr); 2364 } 2365 2366 /* TODO: HOW TO WORK WITH BAIJ and SBAIJ and SEQDENSE? */ 2367 ierr = PetscObjectTypeCompare((PetscObject)matis->A,MATSEQAIJ,&isseqaij);CHKERRQ(ierr); 2368 if (isseqaij) { 2369 Mat M; 2370 2371 ierr = MatPtAP(matis->A,new_mat,MAT_INITIAL_MATRIX,2.0,&M);CHKERRQ(ierr); 2372 ierr = MatSeqAIJCompress(M,&pcbddc->local_mat);CHKERRQ(ierr); 2373 ierr = MatDestroy(&M);CHKERRQ(ierr); 2374 } else { 2375 Mat work_mat,M; 2376 2377 ierr = MatConvert(matis->A,MATSEQAIJ,MAT_INITIAL_MATRIX,&work_mat);CHKERRQ(ierr); 2378 ierr = MatPtAP(work_mat,new_mat,MAT_INITIAL_MATRIX,2.0,&M);CHKERRQ(ierr); 2379 ierr = MatSeqAIJCompress(M,&pcbddc->local_mat);CHKERRQ(ierr); 2380 ierr = MatDestroy(&M);CHKERRQ(ierr); 2381 ierr = MatDestroy(&work_mat);CHKERRQ(ierr); 2382 } 2383 if (matis->A->symmetric_set) { 2384 ierr = MatSetOption(pcbddc->local_mat,MAT_SYMMETRIC,matis->A->symmetric);CHKERRQ(ierr); 2385 #if !defined(PETSC_USE_COMPLEX) 2386 ierr = MatSetOption(pcbddc->local_mat,MAT_HERMITIAN,matis->A->symmetric);CHKERRQ(ierr); 2387 #endif 2388 } 2389 /* 2390 ierr = PetscViewerSetFormat(PETSC_VIEWER_STDOUT_SELF,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr); 2391 ierr = MatView(new_mat,(PetscViewer)0);CHKERRQ(ierr); 2392 */ 2393 ierr = MatDestroy(&new_mat);CHKERRQ(ierr); 2394 PetscFunctionReturn(0); 2395 } 2396 2397 #undef __FUNCT__ 2398 #define __FUNCT__ "PCBDDCSetUpLocalScatters" 2399 PetscErrorCode PCBDDCSetUpLocalScatters(PC pc) 2400 { 2401 PC_IS* pcis = (PC_IS*)(pc->data); 2402 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 2403 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 2404 PetscInt *idx_R_local=NULL; 2405 PetscInt n_vertices,i,j,n_R,n_D,n_B; 2406 PetscInt vbs,bs; 2407 PetscBT bitmask=NULL; 2408 PetscErrorCode ierr; 2409 2410 PetscFunctionBegin; 2411 /* 2412 No need to setup local scatters if 2413 - primal space is unchanged 2414 AND 2415 - we actually have locally some primal dofs (could not be true in multilevel or for isolated subdomains) 2416 AND 2417 - we are not in debugging mode (this is needed since there are Synchronized prints at the end of the subroutine 2418 */ 2419 if (!pcbddc->new_primal_space_local && pcbddc->local_primal_size && !pcbddc->dbg_flag) { 2420 PetscFunctionReturn(0); 2421 } 2422 /* destroy old objects */ 2423 ierr = ISDestroy(&pcbddc->is_R_local);CHKERRQ(ierr); 2424 ierr = VecScatterDestroy(&pcbddc->R_to_B);CHKERRQ(ierr); 2425 ierr = VecScatterDestroy(&pcbddc->R_to_D);CHKERRQ(ierr); 2426 /* Set Non-overlapping dimensions */ 2427 n_B = pcis->n_B; 2428 n_D = pcis->n - n_B; 2429 n_vertices = pcbddc->n_vertices; 2430 2431 /* Dohrmann's notation: dofs splitted in R (Remaining: all dofs but the vertices) and V (Vertices) */ 2432 2433 /* create auxiliary bitmask and allocate workspace */ 2434 if (!sub_schurs->reuse_mumps) { 2435 ierr = PetscMalloc1(pcis->n-n_vertices,&idx_R_local);CHKERRQ(ierr); 2436 ierr = PetscBTCreate(pcis->n,&bitmask);CHKERRQ(ierr); 2437 for (i=0;i<n_vertices;i++) { 2438 ierr = PetscBTSet(bitmask,pcbddc->local_primal_ref_node[i]);CHKERRQ(ierr); 2439 } 2440 2441 for (i=0, n_R=0; i<pcis->n; i++) { 2442 if (!PetscBTLookup(bitmask,i)) { 2443 idx_R_local[n_R++] = i; 2444 } 2445 } 2446 } else { /* A different ordering (already computed) is present if we are reusing MUMPS Schur solver */ 2447 PCBDDCReuseMumps reuse_mumps = sub_schurs->reuse_mumps; 2448 2449 ierr = ISGetIndices(reuse_mumps->is_R,(const PetscInt**)&idx_R_local);CHKERRQ(ierr); 2450 ierr = ISGetLocalSize(reuse_mumps->is_R,&n_R);CHKERRQ(ierr); 2451 } 2452 2453 /* Block code */ 2454 vbs = 1; 2455 ierr = MatGetBlockSize(pcbddc->local_mat,&bs);CHKERRQ(ierr); 2456 if (bs>1 && !(n_vertices%bs)) { 2457 PetscBool is_blocked = PETSC_TRUE; 2458 PetscInt *vary; 2459 if (!sub_schurs->reuse_mumps) { 2460 ierr = PetscMalloc1(pcis->n/bs,&vary);CHKERRQ(ierr); 2461 ierr = PetscMemzero(vary,pcis->n/bs*sizeof(PetscInt));CHKERRQ(ierr); 2462 /* Verify that the vertex indices correspond to each element in a block (code taken from sbaij2.c) */ 2463 /* it is ok to check this way since local_primal_ref_node are always sorted by local numbering and idx_R_local is obtained as a complement */ 2464 for (i=0; i<n_vertices; i++) vary[pcbddc->local_primal_ref_node[i]/bs]++; 2465 for (i=0; i<pcis->n/bs; i++) { 2466 if (vary[i]!=0 && vary[i]!=bs) { 2467 is_blocked = PETSC_FALSE; 2468 break; 2469 } 2470 } 2471 ierr = PetscFree(vary);CHKERRQ(ierr); 2472 } else { 2473 /* Verify directly the R set */ 2474 for (i=0; i<n_R/bs; i++) { 2475 PetscInt j,node=idx_R_local[bs*i]; 2476 for (j=1; j<bs; j++) { 2477 if (node != idx_R_local[bs*i+j]-j) { 2478 is_blocked = PETSC_FALSE; 2479 break; 2480 } 2481 } 2482 } 2483 } 2484 if (is_blocked) { /* build compressed IS for R nodes (complement of vertices) */ 2485 vbs = bs; 2486 for (i=0;i<n_R/vbs;i++) { 2487 idx_R_local[i] = idx_R_local[vbs*i]/vbs; 2488 } 2489 } 2490 } 2491 ierr = ISCreateBlock(PETSC_COMM_SELF,vbs,n_R/vbs,idx_R_local,PETSC_COPY_VALUES,&pcbddc->is_R_local);CHKERRQ(ierr); 2492 if (sub_schurs->reuse_mumps) { 2493 PCBDDCReuseMumps reuse_mumps = sub_schurs->reuse_mumps; 2494 2495 ierr = ISRestoreIndices(reuse_mumps->is_R,(const PetscInt**)&idx_R_local);CHKERRQ(ierr); 2496 ierr = ISDestroy(&reuse_mumps->is_R);CHKERRQ(ierr); 2497 ierr = PetscObjectReference((PetscObject)pcbddc->is_R_local);CHKERRQ(ierr); 2498 reuse_mumps->is_R = pcbddc->is_R_local; 2499 } else { 2500 ierr = PetscFree(idx_R_local);CHKERRQ(ierr); 2501 } 2502 2503 /* print some info if requested */ 2504 if (pcbddc->dbg_flag) { 2505 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 2506 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 2507 ierr = PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 2508 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d local dimensions\n",PetscGlobalRank);CHKERRQ(ierr); 2509 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local_size = %d, dirichlet_size = %d, boundary_size = %d\n",pcis->n,n_D,n_B);CHKERRQ(ierr); 2510 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"r_size = %d, v_size = %d, constraints = %d, local_primal_size = %d\n",n_R,n_vertices,pcbddc->local_primal_size-n_vertices-pcbddc->benign_n,pcbddc->local_primal_size);CHKERRQ(ierr); 2511 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 2512 } 2513 2514 /* VecScatters pcbddc->R_to_B and (optionally) pcbddc->R_to_D */ 2515 if (!sub_schurs->reuse_mumps) { 2516 IS is_aux1,is_aux2; 2517 PetscInt *aux_array1,*aux_array2,*is_indices,*idx_R_local; 2518 2519 ierr = ISGetIndices(pcbddc->is_R_local,(const PetscInt**)&idx_R_local);CHKERRQ(ierr); 2520 ierr = PetscMalloc1(pcis->n_B-n_vertices,&aux_array1);CHKERRQ(ierr); 2521 ierr = PetscMalloc1(pcis->n_B-n_vertices,&aux_array2);CHKERRQ(ierr); 2522 ierr = ISGetIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 2523 for (i=0; i<n_D; i++) { 2524 ierr = PetscBTSet(bitmask,is_indices[i]);CHKERRQ(ierr); 2525 } 2526 ierr = ISRestoreIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 2527 for (i=0, j=0; i<n_R; i++) { 2528 if (!PetscBTLookup(bitmask,idx_R_local[i])) { 2529 aux_array1[j++] = i; 2530 } 2531 } 2532 ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array1,PETSC_OWN_POINTER,&is_aux1);CHKERRQ(ierr); 2533 ierr = ISGetIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 2534 for (i=0, j=0; i<n_B; i++) { 2535 if (!PetscBTLookup(bitmask,is_indices[i])) { 2536 aux_array2[j++] = i; 2537 } 2538 } 2539 ierr = ISRestoreIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 2540 ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array2,PETSC_OWN_POINTER,&is_aux2);CHKERRQ(ierr); 2541 ierr = VecScatterCreate(pcbddc->vec1_R,is_aux1,pcis->vec1_B,is_aux2,&pcbddc->R_to_B);CHKERRQ(ierr); 2542 ierr = ISDestroy(&is_aux1);CHKERRQ(ierr); 2543 ierr = ISDestroy(&is_aux2);CHKERRQ(ierr); 2544 2545 if (pcbddc->switch_static || pcbddc->dbg_flag) { 2546 ierr = PetscMalloc1(n_D,&aux_array1);CHKERRQ(ierr); 2547 for (i=0, j=0; i<n_R; i++) { 2548 if (PetscBTLookup(bitmask,idx_R_local[i])) { 2549 aux_array1[j++] = i; 2550 } 2551 } 2552 ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array1,PETSC_OWN_POINTER,&is_aux1);CHKERRQ(ierr); 2553 ierr = VecScatterCreate(pcbddc->vec1_R,is_aux1,pcis->vec1_D,(IS)0,&pcbddc->R_to_D);CHKERRQ(ierr); 2554 ierr = ISDestroy(&is_aux1);CHKERRQ(ierr); 2555 } 2556 ierr = PetscBTDestroy(&bitmask);CHKERRQ(ierr); 2557 ierr = ISRestoreIndices(pcbddc->is_R_local,(const PetscInt**)&idx_R_local);CHKERRQ(ierr); 2558 } else { 2559 PCBDDCReuseMumps reuse_mumps = sub_schurs->reuse_mumps; 2560 IS tis; 2561 PetscInt schur_size; 2562 2563 ierr = ISGetLocalSize(reuse_mumps->is_B,&schur_size);CHKERRQ(ierr); 2564 ierr = ISCreateStride(PETSC_COMM_SELF,schur_size,n_D,1,&tis);CHKERRQ(ierr); 2565 ierr = VecScatterCreate(pcbddc->vec1_R,tis,pcis->vec1_B,reuse_mumps->is_B,&pcbddc->R_to_B);CHKERRQ(ierr); 2566 ierr = ISDestroy(&tis);CHKERRQ(ierr); 2567 if (pcbddc->switch_static || pcbddc->dbg_flag) { 2568 ierr = ISCreateStride(PETSC_COMM_SELF,n_D,0,1,&tis);CHKERRQ(ierr); 2569 ierr = VecScatterCreate(pcbddc->vec1_R,tis,pcis->vec1_D,(IS)0,&pcbddc->R_to_D);CHKERRQ(ierr); 2570 ierr = ISDestroy(&tis);CHKERRQ(ierr); 2571 } 2572 } 2573 PetscFunctionReturn(0); 2574 } 2575 2576 2577 #undef __FUNCT__ 2578 #define __FUNCT__ "PCBDDCSetUpLocalSolvers" 2579 PetscErrorCode PCBDDCSetUpLocalSolvers(PC pc, PetscBool dirichlet, PetscBool neumann) 2580 { 2581 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 2582 PC_IS *pcis = (PC_IS*)pc->data; 2583 PC pc_temp; 2584 Mat A_RR; 2585 MatReuse reuse; 2586 PetscScalar m_one = -1.0; 2587 PetscReal value; 2588 PetscInt n_D,n_R; 2589 PetscBool use_exact,use_exact_reduced,issbaij; 2590 PetscErrorCode ierr; 2591 /* prefixes stuff */ 2592 char dir_prefix[256],neu_prefix[256],str_level[16]; 2593 size_t len; 2594 2595 PetscFunctionBegin; 2596 2597 /* compute prefixes */ 2598 ierr = PetscStrcpy(dir_prefix,"");CHKERRQ(ierr); 2599 ierr = PetscStrcpy(neu_prefix,"");CHKERRQ(ierr); 2600 if (!pcbddc->current_level) { 2601 ierr = PetscStrcpy(dir_prefix,((PetscObject)pc)->prefix);CHKERRQ(ierr); 2602 ierr = PetscStrcpy(neu_prefix,((PetscObject)pc)->prefix);CHKERRQ(ierr); 2603 ierr = PetscStrcat(dir_prefix,"pc_bddc_dirichlet_");CHKERRQ(ierr); 2604 ierr = PetscStrcat(neu_prefix,"pc_bddc_neumann_");CHKERRQ(ierr); 2605 } else { 2606 ierr = PetscStrcpy(str_level,"");CHKERRQ(ierr); 2607 sprintf(str_level,"l%d_",(int)(pcbddc->current_level)); 2608 ierr = PetscStrlen(((PetscObject)pc)->prefix,&len);CHKERRQ(ierr); 2609 len -= 15; /* remove "pc_bddc_coarse_" */ 2610 if (pcbddc->current_level>1) len -= 3; /* remove "lX_" with X level number */ 2611 if (pcbddc->current_level>10) len -= 1; /* remove another char from level number */ 2612 ierr = PetscStrncpy(dir_prefix,((PetscObject)pc)->prefix,len+1);CHKERRQ(ierr); 2613 ierr = PetscStrncpy(neu_prefix,((PetscObject)pc)->prefix,len+1);CHKERRQ(ierr); 2614 ierr = PetscStrcat(dir_prefix,"pc_bddc_dirichlet_");CHKERRQ(ierr); 2615 ierr = PetscStrcat(neu_prefix,"pc_bddc_neumann_");CHKERRQ(ierr); 2616 ierr = PetscStrcat(dir_prefix,str_level);CHKERRQ(ierr); 2617 ierr = PetscStrcat(neu_prefix,str_level);CHKERRQ(ierr); 2618 } 2619 2620 /* DIRICHLET PROBLEM */ 2621 if (dirichlet) { 2622 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 2623 if (pcbddc->benign_n && !pcbddc->benign_change_explicit && pcbddc->dbg_flag) { 2624 Mat A_IIn; 2625 2626 ierr = PCBDDCBenignProject(pc,pcis->is_I_local,pcis->is_I_local,&A_IIn);CHKERRQ(ierr); 2627 ierr = MatDestroy(&pcis->A_II);CHKERRQ(ierr); 2628 pcis->A_II = A_IIn; 2629 } 2630 if (pcbddc->local_mat->symmetric_set) { 2631 ierr = MatSetOption(pcis->A_II,MAT_SYMMETRIC,pcbddc->local_mat->symmetric_set);CHKERRQ(ierr); 2632 } 2633 /* Matrix for Dirichlet problem is pcis->A_II */ 2634 n_D = pcis->n - pcis->n_B; 2635 if (!pcbddc->ksp_D) { /* create object if not yet build */ 2636 ierr = KSPCreate(PETSC_COMM_SELF,&pcbddc->ksp_D);CHKERRQ(ierr); 2637 ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->ksp_D,(PetscObject)pc,1);CHKERRQ(ierr); 2638 /* default */ 2639 ierr = KSPSetType(pcbddc->ksp_D,KSPPREONLY);CHKERRQ(ierr); 2640 ierr = KSPSetOptionsPrefix(pcbddc->ksp_D,dir_prefix);CHKERRQ(ierr); 2641 ierr = PetscObjectTypeCompare((PetscObject)pcis->A_II,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr); 2642 ierr = KSPGetPC(pcbddc->ksp_D,&pc_temp);CHKERRQ(ierr); 2643 if (issbaij) { 2644 ierr = PCSetType(pc_temp,PCCHOLESKY);CHKERRQ(ierr); 2645 } else { 2646 ierr = PCSetType(pc_temp,PCLU);CHKERRQ(ierr); 2647 } 2648 /* Allow user's customization */ 2649 ierr = KSPSetFromOptions(pcbddc->ksp_D);CHKERRQ(ierr); 2650 ierr = PCFactorSetReuseFill(pc_temp,PETSC_TRUE);CHKERRQ(ierr); 2651 } 2652 ierr = KSPSetOperators(pcbddc->ksp_D,pcis->A_II,pcis->A_II);CHKERRQ(ierr); 2653 if (sub_schurs->reuse_mumps) { 2654 PCBDDCReuseMumps reuse_mumps = sub_schurs->reuse_mumps; 2655 2656 ierr = KSPSetPC(pcbddc->ksp_D,reuse_mumps->interior_solver);CHKERRQ(ierr); 2657 } 2658 /* umfpack interface has a bug when matrix dimension is zero. TODO solve from umfpack interface */ 2659 if (!n_D) { 2660 ierr = KSPGetPC(pcbddc->ksp_D,&pc_temp);CHKERRQ(ierr); 2661 ierr = PCSetType(pc_temp,PCNONE);CHKERRQ(ierr); 2662 } 2663 /* Set Up KSP for Dirichlet problem of BDDC */ 2664 ierr = KSPSetUp(pcbddc->ksp_D);CHKERRQ(ierr); 2665 /* set ksp_D into pcis data */ 2666 ierr = KSPDestroy(&pcis->ksp_D);CHKERRQ(ierr); 2667 ierr = PetscObjectReference((PetscObject)pcbddc->ksp_D);CHKERRQ(ierr); 2668 pcis->ksp_D = pcbddc->ksp_D; 2669 } 2670 2671 /* NEUMANN PROBLEM */ 2672 A_RR = 0; 2673 if (neumann) { 2674 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 2675 PetscInt ibs,mbs; 2676 PetscBool issbaij; 2677 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 2678 /* Matrix for Neumann problem is A_RR -> we need to create/reuse it at this point */ 2679 ierr = ISGetSize(pcbddc->is_R_local,&n_R);CHKERRQ(ierr); 2680 if (pcbddc->ksp_R) { /* already created ksp */ 2681 PetscInt nn_R; 2682 ierr = KSPGetOperators(pcbddc->ksp_R,NULL,&A_RR);CHKERRQ(ierr); 2683 ierr = PetscObjectReference((PetscObject)A_RR);CHKERRQ(ierr); 2684 ierr = MatGetSize(A_RR,&nn_R,NULL);CHKERRQ(ierr); 2685 if (nn_R != n_R) { /* old ksp is not reusable, so reset it */ 2686 ierr = KSPReset(pcbddc->ksp_R);CHKERRQ(ierr); 2687 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 2688 reuse = MAT_INITIAL_MATRIX; 2689 } else { /* same sizes, but nonzero pattern depend on primal vertices so it can be changed */ 2690 if (pcbddc->new_primal_space_local) { /* we are not sure the matrix will have the same nonzero pattern */ 2691 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 2692 reuse = MAT_INITIAL_MATRIX; 2693 } else { /* safe to reuse the matrix */ 2694 reuse = MAT_REUSE_MATRIX; 2695 } 2696 } 2697 /* last check */ 2698 if (pc->flag == DIFFERENT_NONZERO_PATTERN) { 2699 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 2700 reuse = MAT_INITIAL_MATRIX; 2701 } 2702 } else { /* first time, so we need to create the matrix */ 2703 reuse = MAT_INITIAL_MATRIX; 2704 } 2705 /* extract A_RR */ 2706 ierr = MatGetBlockSize(pcbddc->local_mat,&mbs);CHKERRQ(ierr); 2707 ierr = ISGetBlockSize(pcbddc->is_R_local,&ibs);CHKERRQ(ierr); 2708 ierr = PetscObjectTypeCompare((PetscObject)pcbddc->local_mat,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr); 2709 if (ibs != mbs) { /* need to convert to SEQAIJ to extract any submatrix with is_R_local */ 2710 if (matis->A == pcbddc->local_mat) { 2711 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 2712 ierr = MatConvert(matis->A,MATSEQAIJ,MAT_INITIAL_MATRIX,&pcbddc->local_mat);CHKERRQ(ierr); 2713 } else { 2714 ierr = MatConvert(pcbddc->local_mat,MATSEQAIJ,MAT_REUSE_MATRIX,&pcbddc->local_mat);CHKERRQ(ierr); 2715 } 2716 } else if (issbaij) { /* need to convert to BAIJ to get offdiagonal blocks */ 2717 if (matis->A == pcbddc->local_mat) { 2718 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 2719 ierr = MatConvert(matis->A,MATSEQBAIJ,MAT_INITIAL_MATRIX,&pcbddc->local_mat);CHKERRQ(ierr); 2720 } else { 2721 ierr = MatConvert(pcbddc->local_mat,MATSEQBAIJ,MAT_REUSE_MATRIX,&pcbddc->local_mat);CHKERRQ(ierr); 2722 } 2723 } 2724 ierr = MatGetSubMatrix(pcbddc->local_mat,pcbddc->is_R_local,pcbddc->is_R_local,reuse,&A_RR);CHKERRQ(ierr); 2725 if (pcbddc->local_mat->symmetric_set) { 2726 ierr = MatSetOption(A_RR,MAT_SYMMETRIC,pcbddc->local_mat->symmetric_set);CHKERRQ(ierr); 2727 } 2728 if (pcbddc->benign_n && !pcbddc->benign_change_explicit && pcbddc->dbg_flag) { 2729 Mat A_RRn; 2730 ierr = PCBDDCBenignProject(pc,pcbddc->is_R_local,pcbddc->is_R_local,&A_RRn);CHKERRQ(ierr); 2731 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 2732 A_RR = A_RRn; 2733 } 2734 if (!pcbddc->ksp_R) { /* create object if not present */ 2735 ierr = KSPCreate(PETSC_COMM_SELF,&pcbddc->ksp_R);CHKERRQ(ierr); 2736 ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->ksp_R,(PetscObject)pc,1);CHKERRQ(ierr); 2737 /* default */ 2738 ierr = KSPSetType(pcbddc->ksp_R,KSPPREONLY);CHKERRQ(ierr); 2739 ierr = KSPSetOptionsPrefix(pcbddc->ksp_R,neu_prefix);CHKERRQ(ierr); 2740 ierr = KSPGetPC(pcbddc->ksp_R,&pc_temp);CHKERRQ(ierr); 2741 ierr = PetscObjectTypeCompare((PetscObject)A_RR,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr); 2742 if (issbaij) { 2743 ierr = PCSetType(pc_temp,PCCHOLESKY);CHKERRQ(ierr); 2744 } else { 2745 ierr = PCSetType(pc_temp,PCLU);CHKERRQ(ierr); 2746 } 2747 /* Allow user's customization */ 2748 ierr = KSPSetFromOptions(pcbddc->ksp_R);CHKERRQ(ierr); 2749 ierr = PCFactorSetReuseFill(pc_temp,PETSC_TRUE);CHKERRQ(ierr); 2750 } 2751 /* umfpack interface has a bug when matrix dimension is zero. TODO solve from umfpack interface */ 2752 if (!n_R) { 2753 ierr = KSPGetPC(pcbddc->ksp_R,&pc_temp);CHKERRQ(ierr); 2754 ierr = PCSetType(pc_temp,PCNONE);CHKERRQ(ierr); 2755 } 2756 /* Reuse MUMPS solver if it is present */ 2757 if (sub_schurs->reuse_mumps) { 2758 PCBDDCReuseMumps reuse_mumps = sub_schurs->reuse_mumps; 2759 2760 ierr = KSPSetPC(pcbddc->ksp_R,reuse_mumps->correction_solver);CHKERRQ(ierr); 2761 } 2762 ierr = KSPSetOperators(pcbddc->ksp_R,A_RR,A_RR);CHKERRQ(ierr); 2763 /* Set Up KSP for Neumann problem of BDDC */ 2764 ierr = KSPSetUp(pcbddc->ksp_R);CHKERRQ(ierr); 2765 } 2766 /* free Neumann problem's matrix */ 2767 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 2768 2769 /* check Dirichlet and Neumann solvers and adapt them if a nullspace correction is needed */ 2770 if (pcbddc->NullSpace || pcbddc->dbg_flag) { 2771 if (pcbddc->dbg_flag) { 2772 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 2773 ierr = PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 2774 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 2775 } 2776 if (dirichlet) { /* Dirichlet */ 2777 ierr = VecSetRandom(pcis->vec1_D,NULL);CHKERRQ(ierr); 2778 ierr = MatMult(pcis->A_II,pcis->vec1_D,pcis->vec2_D);CHKERRQ(ierr); 2779 ierr = KSPSolve(pcbddc->ksp_D,pcis->vec2_D,pcis->vec2_D);CHKERRQ(ierr); 2780 ierr = VecAXPY(pcis->vec1_D,m_one,pcis->vec2_D);CHKERRQ(ierr); 2781 ierr = VecNorm(pcis->vec1_D,NORM_INFINITY,&value);CHKERRQ(ierr); 2782 /* need to be adapted? */ 2783 use_exact = (PetscAbsReal(value) > 1.e-4 ? PETSC_FALSE : PETSC_TRUE); 2784 ierr = MPI_Allreduce(&use_exact,&use_exact_reduced,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 2785 ierr = PCBDDCSetUseExactDirichlet(pc,use_exact_reduced);CHKERRQ(ierr); 2786 /* print info */ 2787 if (pcbddc->dbg_flag) { 2788 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d infinity error for Dirichlet solve (%s) = % 1.14e \n",PetscGlobalRank,((PetscObject)(pcbddc->ksp_D))->prefix,value);CHKERRQ(ierr); 2789 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 2790 } 2791 if (pcbddc->NullSpace && !use_exact_reduced && !pcbddc->switch_static) { 2792 ierr = PCBDDCNullSpaceAssembleCorrection(pc,PETSC_TRUE,pcis->is_I_local);CHKERRQ(ierr); 2793 } 2794 } 2795 if (neumann) { /* Neumann */ 2796 ierr = KSPGetOperators(pcbddc->ksp_R,&A_RR,NULL);CHKERRQ(ierr); 2797 ierr = VecSetRandom(pcbddc->vec1_R,NULL);CHKERRQ(ierr); 2798 ierr = MatMult(A_RR,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 2799 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec2_R,pcbddc->vec2_R);CHKERRQ(ierr); 2800 ierr = VecAXPY(pcbddc->vec1_R,m_one,pcbddc->vec2_R);CHKERRQ(ierr); 2801 ierr = VecNorm(pcbddc->vec1_R,NORM_INFINITY,&value);CHKERRQ(ierr); 2802 /* need to be adapted? */ 2803 use_exact = (PetscAbsReal(value) > 1.e-4 ? PETSC_FALSE : PETSC_TRUE); 2804 ierr = MPI_Allreduce(&use_exact,&use_exact_reduced,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 2805 /* print info */ 2806 if (pcbddc->dbg_flag) { 2807 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d infinity error for Neumann solve (%s) = % 1.14e\n",PetscGlobalRank,((PetscObject)(pcbddc->ksp_R))->prefix,value);CHKERRQ(ierr); 2808 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 2809 } 2810 if (pcbddc->NullSpace && !use_exact_reduced) { /* is it the right logic? */ 2811 ierr = PCBDDCNullSpaceAssembleCorrection(pc,PETSC_FALSE,pcbddc->is_R_local);CHKERRQ(ierr); 2812 } 2813 } 2814 } 2815 PetscFunctionReturn(0); 2816 } 2817 2818 #undef __FUNCT__ 2819 #define __FUNCT__ "PCBDDCSolveSubstructureCorrection" 2820 static PetscErrorCode PCBDDCSolveSubstructureCorrection(PC pc, Vec inout_B, Vec inout_D, PetscBool applytranspose) 2821 { 2822 PetscErrorCode ierr; 2823 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 2824 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 2825 2826 PetscFunctionBegin; 2827 if (!sub_schurs->reuse_mumps) { 2828 ierr = VecSet(pcbddc->vec1_R,0.);CHKERRQ(ierr); 2829 } 2830 if (!pcbddc->switch_static) { 2831 if (applytranspose && pcbddc->local_auxmat1) { 2832 ierr = MatMultTranspose(pcbddc->local_auxmat2,inout_B,pcbddc->vec1_C);CHKERRQ(ierr); 2833 ierr = MatMultTransposeAdd(pcbddc->local_auxmat1,pcbddc->vec1_C,inout_B,inout_B);CHKERRQ(ierr); 2834 } 2835 if (!sub_schurs->reuse_mumps) { 2836 ierr = VecScatterBegin(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2837 ierr = VecScatterEnd(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2838 } else { 2839 PCBDDCReuseMumps reuse_mumps = sub_schurs->reuse_mumps; 2840 2841 ierr = VecScatterBegin(reuse_mumps->correction_scatter_B,inout_B,reuse_mumps->rhs_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2842 ierr = VecScatterEnd(reuse_mumps->correction_scatter_B,inout_B,reuse_mumps->rhs_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2843 } 2844 } else { 2845 ierr = VecScatterBegin(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2846 ierr = VecScatterEnd(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2847 ierr = VecScatterBegin(pcbddc->R_to_D,inout_D,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2848 ierr = VecScatterEnd(pcbddc->R_to_D,inout_D,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2849 if (applytranspose && pcbddc->local_auxmat1) { 2850 ierr = MatMultTranspose(pcbddc->local_auxmat2,pcbddc->vec1_R,pcbddc->vec1_C);CHKERRQ(ierr); 2851 ierr = MatMultTransposeAdd(pcbddc->local_auxmat1,pcbddc->vec1_C,inout_B,inout_B);CHKERRQ(ierr); 2852 ierr = VecScatterBegin(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2853 ierr = VecScatterEnd(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2854 } 2855 } 2856 if (!sub_schurs->reuse_mumps) { 2857 if (applytranspose) { 2858 ierr = KSPSolveTranspose(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr); 2859 } else { 2860 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr); 2861 } 2862 } else { 2863 PCBDDCReuseMumps reuse_mumps = sub_schurs->reuse_mumps; 2864 2865 if (applytranspose) { 2866 ierr = MatFactorSolveSchurComplementTranspose(reuse_mumps->F,reuse_mumps->rhs_B,reuse_mumps->sol_B);CHKERRQ(ierr); 2867 } else { 2868 ierr = MatFactorSolveSchurComplement(reuse_mumps->F,reuse_mumps->rhs_B,reuse_mumps->sol_B);CHKERRQ(ierr); 2869 } 2870 } 2871 ierr = VecSet(inout_B,0.);CHKERRQ(ierr); 2872 if (!pcbddc->switch_static) { 2873 if (!sub_schurs->reuse_mumps) { 2874 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2875 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2876 } else { 2877 PCBDDCReuseMumps reuse_mumps = sub_schurs->reuse_mumps; 2878 2879 ierr = VecScatterBegin(reuse_mumps->correction_scatter_B,reuse_mumps->sol_B,inout_B,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2880 ierr = VecScatterEnd(reuse_mumps->correction_scatter_B,reuse_mumps->sol_B,inout_B,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2881 } 2882 if (!applytranspose && pcbddc->local_auxmat1) { 2883 ierr = MatMult(pcbddc->local_auxmat1,inout_B,pcbddc->vec1_C);CHKERRQ(ierr); 2884 ierr = MatMultAdd(pcbddc->local_auxmat2,pcbddc->vec1_C,inout_B,inout_B);CHKERRQ(ierr); 2885 } 2886 } else { 2887 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2888 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2889 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,inout_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2890 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,inout_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2891 if (!applytranspose && pcbddc->local_auxmat1) { 2892 ierr = MatMult(pcbddc->local_auxmat1,inout_B,pcbddc->vec1_C);CHKERRQ(ierr); 2893 ierr = MatMultAdd(pcbddc->local_auxmat2,pcbddc->vec1_C,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr); 2894 } 2895 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2896 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2897 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,inout_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2898 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,inout_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2899 } 2900 PetscFunctionReturn(0); 2901 } 2902 2903 /* parameter apply transpose determines if the interface preconditioner should be applied transposed or not */ 2904 #undef __FUNCT__ 2905 #define __FUNCT__ "PCBDDCApplyInterfacePreconditioner" 2906 PetscErrorCode PCBDDCApplyInterfacePreconditioner(PC pc, PetscBool applytranspose) 2907 { 2908 PetscErrorCode ierr; 2909 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 2910 PC_IS* pcis = (PC_IS*) (pc->data); 2911 const PetscScalar zero = 0.0; 2912 2913 PetscFunctionBegin; 2914 /* Application of PSI^T or PHI^T (depending on applytranspose, see comment above) */ 2915 if (applytranspose) { 2916 ierr = MatMultTranspose(pcbddc->coarse_phi_B,pcis->vec1_B,pcbddc->vec1_P);CHKERRQ(ierr); 2917 if (pcbddc->switch_static) { ierr = MatMultTransposeAdd(pcbddc->coarse_phi_D,pcis->vec1_D,pcbddc->vec1_P,pcbddc->vec1_P);CHKERRQ(ierr); } 2918 } else { 2919 ierr = MatMultTranspose(pcbddc->coarse_psi_B,pcis->vec1_B,pcbddc->vec1_P);CHKERRQ(ierr); 2920 if (pcbddc->switch_static) { ierr = MatMultTransposeAdd(pcbddc->coarse_psi_D,pcis->vec1_D,pcbddc->vec1_P,pcbddc->vec1_P);CHKERRQ(ierr); } 2921 } 2922 2923 /* add p0 to the last value of vec1_P holding the coarse dof relative to p0 */ 2924 if (pcbddc->benign_n) { 2925 PetscScalar *array; 2926 PetscInt j; 2927 2928 ierr = VecGetArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 2929 for (j=0;j<pcbddc->benign_n;j++) array[pcbddc->local_primal_size-pcbddc->benign_n+j] += pcbddc->benign_p0[j]; 2930 ierr = VecRestoreArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 2931 } 2932 2933 /* start communications from local primal nodes to rhs of coarse solver */ 2934 ierr = VecSet(pcbddc->coarse_vec,zero);CHKERRQ(ierr); 2935 ierr = PCBDDCScatterCoarseDataBegin(pc,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2936 ierr = PCBDDCScatterCoarseDataEnd(pc,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2937 2938 /* Coarse solution -> rhs and sol updated inside PCBDDCScattarCoarseDataBegin/End */ 2939 /* TODO remove null space when doing multilevel */ 2940 if (pcbddc->coarse_ksp) { 2941 Mat coarse_mat; 2942 Vec rhs,sol; 2943 MatNullSpace nullsp; 2944 2945 ierr = KSPGetRhs(pcbddc->coarse_ksp,&rhs);CHKERRQ(ierr); 2946 ierr = KSPGetSolution(pcbddc->coarse_ksp,&sol);CHKERRQ(ierr); 2947 ierr = KSPGetOperators(pcbddc->coarse_ksp,&coarse_mat,NULL);CHKERRQ(ierr); 2948 ierr = MatGetNullSpace(coarse_mat,&nullsp);CHKERRQ(ierr); 2949 if (nullsp) { 2950 ierr = MatNullSpaceRemove(nullsp,rhs);CHKERRQ(ierr); 2951 } 2952 if (applytranspose) { 2953 ierr = KSPSolveTranspose(pcbddc->coarse_ksp,rhs,sol);CHKERRQ(ierr); 2954 } else { 2955 ierr = KSPSolve(pcbddc->coarse_ksp,rhs,sol);CHKERRQ(ierr); 2956 } 2957 if (nullsp) { 2958 ierr = MatNullSpaceRemove(nullsp,sol);CHKERRQ(ierr); 2959 } 2960 } 2961 2962 /* Local solution on R nodes */ 2963 if (pcis->n) { /* in/out pcbddc->vec1_B,pcbddc->vec1_D */ 2964 ierr = PCBDDCSolveSubstructureCorrection(pc,pcis->vec1_B,pcis->vec1_D,applytranspose);CHKERRQ(ierr); 2965 } 2966 2967 /* communications from coarse sol to local primal nodes */ 2968 ierr = PCBDDCScatterCoarseDataBegin(pc,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2969 ierr = PCBDDCScatterCoarseDataEnd(pc,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2970 2971 /* Sum contributions from two levels */ 2972 if (applytranspose) { 2973 ierr = MatMultAdd(pcbddc->coarse_psi_B,pcbddc->vec1_P,pcis->vec1_B,pcis->vec1_B);CHKERRQ(ierr); 2974 if (pcbddc->switch_static) { ierr = MatMultAdd(pcbddc->coarse_psi_D,pcbddc->vec1_P,pcis->vec1_D,pcis->vec1_D);CHKERRQ(ierr); } 2975 } else { 2976 ierr = MatMultAdd(pcbddc->coarse_phi_B,pcbddc->vec1_P,pcis->vec1_B,pcis->vec1_B);CHKERRQ(ierr); 2977 if (pcbddc->switch_static) { ierr = MatMultAdd(pcbddc->coarse_phi_D,pcbddc->vec1_P,pcis->vec1_D,pcis->vec1_D);CHKERRQ(ierr); } 2978 } 2979 /* store p0 */ 2980 if (pcbddc->benign_n) { 2981 PetscScalar *array; 2982 PetscInt j; 2983 2984 ierr = VecGetArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 2985 for (j=0;j<pcbddc->benign_n;j++) pcbddc->benign_p0[j] = array[pcbddc->local_primal_size-pcbddc->benign_n+j]; 2986 ierr = VecRestoreArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 2987 } 2988 PetscFunctionReturn(0); 2989 } 2990 2991 #undef __FUNCT__ 2992 #define __FUNCT__ "PCBDDCScatterCoarseDataBegin" 2993 PetscErrorCode PCBDDCScatterCoarseDataBegin(PC pc,InsertMode imode, ScatterMode smode) 2994 { 2995 PetscErrorCode ierr; 2996 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 2997 PetscScalar *array; 2998 Vec from,to; 2999 3000 PetscFunctionBegin; 3001 if (smode == SCATTER_REVERSE) { /* from global to local -> get data from coarse solution */ 3002 from = pcbddc->coarse_vec; 3003 to = pcbddc->vec1_P; 3004 if (pcbddc->coarse_ksp) { /* get array from coarse processes */ 3005 Vec tvec; 3006 3007 ierr = KSPGetRhs(pcbddc->coarse_ksp,&tvec);CHKERRQ(ierr); 3008 ierr = VecResetArray(tvec);CHKERRQ(ierr); 3009 ierr = KSPGetSolution(pcbddc->coarse_ksp,&tvec);CHKERRQ(ierr); 3010 ierr = VecGetArray(tvec,&array);CHKERRQ(ierr); 3011 ierr = VecPlaceArray(from,array);CHKERRQ(ierr); 3012 ierr = VecRestoreArray(tvec,&array);CHKERRQ(ierr); 3013 } 3014 } else { /* from local to global -> put data in coarse right hand side */ 3015 from = pcbddc->vec1_P; 3016 to = pcbddc->coarse_vec; 3017 } 3018 ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,from,to,imode,smode);CHKERRQ(ierr); 3019 PetscFunctionReturn(0); 3020 } 3021 3022 #undef __FUNCT__ 3023 #define __FUNCT__ "PCBDDCScatterCoarseDataEnd" 3024 PetscErrorCode PCBDDCScatterCoarseDataEnd(PC pc, InsertMode imode, ScatterMode smode) 3025 { 3026 PetscErrorCode ierr; 3027 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 3028 PetscScalar *array; 3029 Vec from,to; 3030 3031 PetscFunctionBegin; 3032 if (smode == SCATTER_REVERSE) { /* from global to local -> get data from coarse solution */ 3033 from = pcbddc->coarse_vec; 3034 to = pcbddc->vec1_P; 3035 } else { /* from local to global -> put data in coarse right hand side */ 3036 from = pcbddc->vec1_P; 3037 to = pcbddc->coarse_vec; 3038 } 3039 ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,from,to,imode,smode);CHKERRQ(ierr); 3040 if (smode == SCATTER_FORWARD) { 3041 if (pcbddc->coarse_ksp) { /* get array from coarse processes */ 3042 Vec tvec; 3043 3044 ierr = KSPGetRhs(pcbddc->coarse_ksp,&tvec);CHKERRQ(ierr); 3045 ierr = VecGetArray(to,&array);CHKERRQ(ierr); 3046 ierr = VecPlaceArray(tvec,array);CHKERRQ(ierr); 3047 ierr = VecRestoreArray(to,&array);CHKERRQ(ierr); 3048 } 3049 } else { 3050 if (pcbddc->coarse_ksp) { /* restore array of pcbddc->coarse_vec */ 3051 ierr = VecResetArray(from);CHKERRQ(ierr); 3052 } 3053 } 3054 PetscFunctionReturn(0); 3055 } 3056 3057 /* uncomment for testing purposes */ 3058 /* #define PETSC_MISSING_LAPACK_GESVD 1 */ 3059 #undef __FUNCT__ 3060 #define __FUNCT__ "PCBDDCConstraintsSetUp" 3061 PetscErrorCode PCBDDCConstraintsSetUp(PC pc) 3062 { 3063 PetscErrorCode ierr; 3064 PC_IS* pcis = (PC_IS*)(pc->data); 3065 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 3066 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 3067 /* one and zero */ 3068 PetscScalar one=1.0,zero=0.0; 3069 /* space to store constraints and their local indices */ 3070 PetscScalar *constraints_data; 3071 PetscInt *constraints_idxs,*constraints_idxs_B; 3072 PetscInt *constraints_idxs_ptr,*constraints_data_ptr; 3073 PetscInt *constraints_n; 3074 /* iterators */ 3075 PetscInt i,j,k,total_counts,total_counts_cc,cum; 3076 /* BLAS integers */ 3077 PetscBLASInt lwork,lierr; 3078 PetscBLASInt Blas_N,Blas_M,Blas_K,Blas_one=1; 3079 PetscBLASInt Blas_LDA,Blas_LDB,Blas_LDC; 3080 /* reuse */ 3081 PetscInt olocal_primal_size,olocal_primal_size_cc; 3082 PetscInt *olocal_primal_ref_node,*olocal_primal_ref_mult; 3083 /* change of basis */ 3084 PetscBool qr_needed; 3085 PetscBT change_basis,qr_needed_idx; 3086 /* auxiliary stuff */ 3087 PetscInt *nnz,*is_indices; 3088 PetscInt ncc; 3089 /* some quantities */ 3090 PetscInt n_vertices,total_primal_vertices,valid_constraints; 3091 PetscInt size_of_constraint,max_size_of_constraint=0,max_constraints,temp_constraints; 3092 3093 PetscFunctionBegin; 3094 /* Destroy Mat objects computed previously */ 3095 ierr = MatDestroy(&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 3096 ierr = MatDestroy(&pcbddc->ConstraintMatrix);CHKERRQ(ierr); 3097 /* save info on constraints from previous setup (if any) */ 3098 olocal_primal_size = pcbddc->local_primal_size; 3099 olocal_primal_size_cc = pcbddc->local_primal_size_cc; 3100 ierr = PetscMalloc2(olocal_primal_size_cc,&olocal_primal_ref_node,olocal_primal_size_cc,&olocal_primal_ref_mult);CHKERRQ(ierr); 3101 ierr = PetscMemcpy(olocal_primal_ref_node,pcbddc->local_primal_ref_node,olocal_primal_size_cc*sizeof(PetscInt));CHKERRQ(ierr); 3102 ierr = PetscMemcpy(olocal_primal_ref_mult,pcbddc->local_primal_ref_mult,olocal_primal_size_cc*sizeof(PetscInt));CHKERRQ(ierr); 3103 ierr = PetscFree2(pcbddc->local_primal_ref_node,pcbddc->local_primal_ref_mult);CHKERRQ(ierr); 3104 ierr = PetscFree(pcbddc->primal_indices_local_idxs);CHKERRQ(ierr); 3105 3106 /* print some info */ 3107 if (pcbddc->dbg_flag) { 3108 IS vertices; 3109 PetscInt nv,nedges,nfaces; 3110 ierr = PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph,&nfaces,NULL,&nedges,NULL,&vertices);CHKERRQ(ierr); 3111 ierr = ISGetSize(vertices,&nv);CHKERRQ(ierr); 3112 ierr = ISDestroy(&vertices);CHKERRQ(ierr); 3113 ierr = PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 3114 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"--------------------------------------------------------------\n");CHKERRQ(ierr); 3115 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate vertices (%d)\n",PetscGlobalRank,nv,pcbddc->use_vertices);CHKERRQ(ierr); 3116 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate edges (%d)\n",PetscGlobalRank,nedges,pcbddc->use_edges);CHKERRQ(ierr); 3117 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate faces (%d)\n",PetscGlobalRank,nfaces,pcbddc->use_faces);CHKERRQ(ierr); 3118 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 3119 ierr = PetscViewerASCIIPopSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 3120 } 3121 3122 if (!pcbddc->adaptive_selection) { 3123 IS ISForVertices,*ISForFaces,*ISForEdges; 3124 MatNullSpace nearnullsp; 3125 const Vec *nearnullvecs; 3126 Vec *localnearnullsp; 3127 PetscScalar *array; 3128 PetscInt n_ISForFaces,n_ISForEdges,nnsp_size; 3129 PetscBool nnsp_has_cnst; 3130 /* LAPACK working arrays for SVD or POD */ 3131 PetscBool skip_lapack,boolforchange; 3132 PetscScalar *work; 3133 PetscReal *singular_vals; 3134 #if defined(PETSC_USE_COMPLEX) 3135 PetscReal *rwork; 3136 #endif 3137 #if defined(PETSC_MISSING_LAPACK_GESVD) 3138 PetscScalar *temp_basis,*correlation_mat; 3139 #else 3140 PetscBLASInt dummy_int=1; 3141 PetscScalar dummy_scalar=1.; 3142 #endif 3143 3144 /* Get index sets for faces, edges and vertices from graph */ 3145 ierr = PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph,&n_ISForFaces,&ISForFaces,&n_ISForEdges,&ISForEdges,&ISForVertices);CHKERRQ(ierr); 3146 /* free unneeded index sets */ 3147 if (!pcbddc->use_vertices) { 3148 ierr = ISDestroy(&ISForVertices);CHKERRQ(ierr); 3149 } 3150 if (!pcbddc->use_edges) { 3151 for (i=0;i<n_ISForEdges;i++) { 3152 ierr = ISDestroy(&ISForEdges[i]);CHKERRQ(ierr); 3153 } 3154 ierr = PetscFree(ISForEdges);CHKERRQ(ierr); 3155 n_ISForEdges = 0; 3156 } 3157 if (!pcbddc->use_faces) { 3158 for (i=0;i<n_ISForFaces;i++) { 3159 ierr = ISDestroy(&ISForFaces[i]);CHKERRQ(ierr); 3160 } 3161 ierr = PetscFree(ISForFaces);CHKERRQ(ierr); 3162 n_ISForFaces = 0; 3163 } 3164 3165 #if defined(PETSC_USE_DEBUG) 3166 /* HACK: when solving singular problems not using vertices, a change of basis is mandatory. 3167 Also use_change_of_basis should be consistent among processors */ 3168 if (pcbddc->NullSpace) { 3169 PetscBool tbool[2],gbool[2]; 3170 3171 if (!ISForVertices && !pcbddc->user_ChangeOfBasisMatrix) { 3172 pcbddc->use_change_of_basis = PETSC_TRUE; 3173 if (!ISForEdges) { 3174 pcbddc->use_change_on_faces = PETSC_TRUE; 3175 } 3176 } 3177 tbool[0] = pcbddc->use_change_of_basis; 3178 tbool[1] = pcbddc->use_change_on_faces; 3179 ierr = MPI_Allreduce(tbool,gbool,2,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 3180 pcbddc->use_change_of_basis = gbool[0]; 3181 pcbddc->use_change_on_faces = gbool[1]; 3182 } 3183 #endif 3184 3185 /* check if near null space is attached to global mat */ 3186 ierr = MatGetNearNullSpace(pc->pmat,&nearnullsp);CHKERRQ(ierr); 3187 if (nearnullsp) { 3188 ierr = MatNullSpaceGetVecs(nearnullsp,&nnsp_has_cnst,&nnsp_size,&nearnullvecs);CHKERRQ(ierr); 3189 /* remove any stored info */ 3190 ierr = MatNullSpaceDestroy(&pcbddc->onearnullspace);CHKERRQ(ierr); 3191 ierr = PetscFree(pcbddc->onearnullvecs_state);CHKERRQ(ierr); 3192 /* store information for BDDC solver reuse */ 3193 ierr = PetscObjectReference((PetscObject)nearnullsp);CHKERRQ(ierr); 3194 pcbddc->onearnullspace = nearnullsp; 3195 ierr = PetscMalloc1(nnsp_size,&pcbddc->onearnullvecs_state);CHKERRQ(ierr); 3196 for (i=0;i<nnsp_size;i++) { 3197 ierr = PetscObjectStateGet((PetscObject)nearnullvecs[i],&pcbddc->onearnullvecs_state[i]);CHKERRQ(ierr); 3198 } 3199 } else { /* if near null space is not provided BDDC uses constants by default */ 3200 nnsp_size = 0; 3201 nnsp_has_cnst = PETSC_TRUE; 3202 } 3203 /* get max number of constraints on a single cc */ 3204 max_constraints = nnsp_size; 3205 if (nnsp_has_cnst) max_constraints++; 3206 3207 /* 3208 Evaluate maximum storage size needed by the procedure 3209 - Indices for connected component i stored at "constraints_idxs + constraints_idxs_ptr[i]" 3210 - Values for constraints on connected component i stored at "constraints_data + constraints_data_ptr[i]" 3211 There can be multiple constraints per connected component 3212 */ 3213 n_vertices = 0; 3214 if (ISForVertices) { 3215 ierr = ISGetSize(ISForVertices,&n_vertices);CHKERRQ(ierr); 3216 } 3217 ncc = n_vertices+n_ISForFaces+n_ISForEdges; 3218 ierr = PetscMalloc3(ncc+1,&constraints_idxs_ptr,ncc+1,&constraints_data_ptr,ncc,&constraints_n);CHKERRQ(ierr); 3219 3220 total_counts = n_ISForFaces+n_ISForEdges; 3221 total_counts *= max_constraints; 3222 total_counts += n_vertices; 3223 ierr = PetscBTCreate(total_counts,&change_basis);CHKERRQ(ierr); 3224 3225 total_counts = 0; 3226 max_size_of_constraint = 0; 3227 for (i=0;i<n_ISForEdges+n_ISForFaces;i++) { 3228 IS used_is; 3229 if (i<n_ISForEdges) { 3230 used_is = ISForEdges[i]; 3231 } else { 3232 used_is = ISForFaces[i-n_ISForEdges]; 3233 } 3234 ierr = ISGetSize(used_is,&j);CHKERRQ(ierr); 3235 total_counts += j; 3236 max_size_of_constraint = PetscMax(j,max_size_of_constraint); 3237 } 3238 ierr = PetscMalloc3(total_counts*max_constraints+n_vertices,&constraints_data,total_counts+n_vertices,&constraints_idxs,total_counts+n_vertices,&constraints_idxs_B);CHKERRQ(ierr); 3239 3240 /* get local part of global near null space vectors */ 3241 ierr = PetscMalloc1(nnsp_size,&localnearnullsp);CHKERRQ(ierr); 3242 for (k=0;k<nnsp_size;k++) { 3243 ierr = VecDuplicate(pcis->vec1_N,&localnearnullsp[k]);CHKERRQ(ierr); 3244 ierr = VecScatterBegin(matis->rctx,nearnullvecs[k],localnearnullsp[k],INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3245 ierr = VecScatterEnd(matis->rctx,nearnullvecs[k],localnearnullsp[k],INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3246 } 3247 3248 /* whether or not to skip lapack calls */ 3249 skip_lapack = PETSC_TRUE; 3250 if (n_ISForFaces+n_ISForEdges && max_constraints > 1 && !pcbddc->use_nnsp_true) skip_lapack = PETSC_FALSE; 3251 3252 /* First we issue queries to allocate optimal workspace for LAPACKgesvd (or LAPACKsyev if SVD is missing) */ 3253 if (!skip_lapack) { 3254 PetscScalar temp_work; 3255 3256 #if defined(PETSC_MISSING_LAPACK_GESVD) 3257 /* Proper Orthogonal Decomposition (POD) using the snapshot method */ 3258 ierr = PetscMalloc1(max_constraints*max_constraints,&correlation_mat);CHKERRQ(ierr); 3259 ierr = PetscMalloc1(max_constraints,&singular_vals);CHKERRQ(ierr); 3260 ierr = PetscMalloc1(max_size_of_constraint*max_constraints,&temp_basis);CHKERRQ(ierr); 3261 #if defined(PETSC_USE_COMPLEX) 3262 ierr = PetscMalloc1(3*max_constraints,&rwork);CHKERRQ(ierr); 3263 #endif 3264 /* now we evaluate the optimal workspace using query with lwork=-1 */ 3265 ierr = PetscBLASIntCast(max_constraints,&Blas_N);CHKERRQ(ierr); 3266 ierr = PetscBLASIntCast(max_constraints,&Blas_LDA);CHKERRQ(ierr); 3267 lwork = -1; 3268 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 3269 #if !defined(PETSC_USE_COMPLEX) 3270 PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,&temp_work,&lwork,&lierr)); 3271 #else 3272 PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,&temp_work,&lwork,rwork,&lierr)); 3273 #endif 3274 ierr = PetscFPTrapPop();CHKERRQ(ierr); 3275 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to SYEV Lapack routine %d",(int)lierr); 3276 #else /* on missing GESVD */ 3277 /* SVD */ 3278 PetscInt max_n,min_n; 3279 max_n = max_size_of_constraint; 3280 min_n = max_constraints; 3281 if (max_size_of_constraint < max_constraints) { 3282 min_n = max_size_of_constraint; 3283 max_n = max_constraints; 3284 } 3285 ierr = PetscMalloc1(min_n,&singular_vals);CHKERRQ(ierr); 3286 #if defined(PETSC_USE_COMPLEX) 3287 ierr = PetscMalloc1(5*min_n,&rwork);CHKERRQ(ierr); 3288 #endif 3289 /* now we evaluate the optimal workspace using query with lwork=-1 */ 3290 lwork = -1; 3291 ierr = PetscBLASIntCast(max_n,&Blas_M);CHKERRQ(ierr); 3292 ierr = PetscBLASIntCast(min_n,&Blas_N);CHKERRQ(ierr); 3293 ierr = PetscBLASIntCast(max_n,&Blas_LDA);CHKERRQ(ierr); 3294 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 3295 #if !defined(PETSC_USE_COMPLEX) 3296 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&constraints_data[0],&Blas_LDA,singular_vals,&dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,&temp_work,&lwork,&lierr)); 3297 #else 3298 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&constraints_data[0],&Blas_LDA,singular_vals,&dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,&temp_work,&lwork,rwork,&lierr)); 3299 #endif 3300 ierr = PetscFPTrapPop();CHKERRQ(ierr); 3301 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to GESVD Lapack routine %d",(int)lierr); 3302 #endif /* on missing GESVD */ 3303 /* Allocate optimal workspace */ 3304 ierr = PetscBLASIntCast((PetscInt)PetscRealPart(temp_work),&lwork);CHKERRQ(ierr); 3305 ierr = PetscMalloc1(lwork,&work);CHKERRQ(ierr); 3306 } 3307 /* Now we can loop on constraining sets */ 3308 total_counts = 0; 3309 constraints_idxs_ptr[0] = 0; 3310 constraints_data_ptr[0] = 0; 3311 /* vertices */ 3312 if (n_vertices) { 3313 ierr = ISGetIndices(ISForVertices,(const PetscInt**)&is_indices);CHKERRQ(ierr); 3314 ierr = PetscMemcpy(constraints_idxs,is_indices,n_vertices*sizeof(PetscInt));CHKERRQ(ierr); 3315 for (i=0;i<n_vertices;i++) { 3316 constraints_n[total_counts] = 1; 3317 constraints_data[total_counts] = 1.0; 3318 constraints_idxs_ptr[total_counts+1] = constraints_idxs_ptr[total_counts]+1; 3319 constraints_data_ptr[total_counts+1] = constraints_data_ptr[total_counts]+1; 3320 total_counts++; 3321 } 3322 ierr = ISRestoreIndices(ISForVertices,(const PetscInt**)&is_indices);CHKERRQ(ierr); 3323 n_vertices = total_counts; 3324 } 3325 3326 /* edges and faces */ 3327 total_counts_cc = total_counts; 3328 for (ncc=0;ncc<n_ISForEdges+n_ISForFaces;ncc++) { 3329 IS used_is; 3330 PetscBool idxs_copied = PETSC_FALSE; 3331 3332 if (ncc<n_ISForEdges) { 3333 used_is = ISForEdges[ncc]; 3334 boolforchange = pcbddc->use_change_of_basis; /* change or not the basis on the edge */ 3335 } else { 3336 used_is = ISForFaces[ncc-n_ISForEdges]; 3337 boolforchange = (PetscBool)(pcbddc->use_change_of_basis && pcbddc->use_change_on_faces); /* change or not the basis on the face */ 3338 } 3339 temp_constraints = 0; /* zero the number of constraints I have on this conn comp */ 3340 3341 ierr = ISGetSize(used_is,&size_of_constraint);CHKERRQ(ierr); 3342 ierr = ISGetIndices(used_is,(const PetscInt**)&is_indices);CHKERRQ(ierr); 3343 /* change of basis should not be performed on local periodic nodes */ 3344 if (pcbddc->mat_graph->mirrors && pcbddc->mat_graph->mirrors[is_indices[0]]) boolforchange = PETSC_FALSE; 3345 if (nnsp_has_cnst) { 3346 PetscScalar quad_value; 3347 3348 ierr = PetscMemcpy(constraints_idxs + constraints_idxs_ptr[total_counts_cc],is_indices,size_of_constraint*sizeof(PetscInt));CHKERRQ(ierr); 3349 idxs_copied = PETSC_TRUE; 3350 3351 if (!pcbddc->use_nnsp_true) { 3352 quad_value = (PetscScalar)(1.0/PetscSqrtReal((PetscReal)size_of_constraint)); 3353 } else { 3354 quad_value = 1.0; 3355 } 3356 for (j=0;j<size_of_constraint;j++) { 3357 constraints_data[constraints_data_ptr[total_counts_cc]+j] = quad_value; 3358 } 3359 temp_constraints++; 3360 total_counts++; 3361 } 3362 for (k=0;k<nnsp_size;k++) { 3363 PetscReal real_value; 3364 PetscScalar *ptr_to_data; 3365 3366 ierr = VecGetArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr); 3367 ptr_to_data = &constraints_data[constraints_data_ptr[total_counts_cc]+temp_constraints*size_of_constraint]; 3368 for (j=0;j<size_of_constraint;j++) { 3369 ptr_to_data[j] = array[is_indices[j]]; 3370 } 3371 ierr = VecRestoreArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr); 3372 /* check if array is null on the connected component */ 3373 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 3374 PetscStackCallBLAS("BLASasum",real_value = BLASasum_(&Blas_N,ptr_to_data,&Blas_one)); 3375 if (real_value > 0.0) { /* keep indices and values */ 3376 temp_constraints++; 3377 total_counts++; 3378 if (!idxs_copied) { 3379 ierr = PetscMemcpy(constraints_idxs + constraints_idxs_ptr[total_counts_cc],is_indices,size_of_constraint*sizeof(PetscInt));CHKERRQ(ierr); 3380 idxs_copied = PETSC_TRUE; 3381 } 3382 } 3383 } 3384 ierr = ISRestoreIndices(used_is,(const PetscInt**)&is_indices);CHKERRQ(ierr); 3385 valid_constraints = temp_constraints; 3386 if (!pcbddc->use_nnsp_true && temp_constraints) { 3387 if (temp_constraints == 1) { /* just normalize the constraint */ 3388 PetscScalar norm,*ptr_to_data; 3389 3390 ptr_to_data = &constraints_data[constraints_data_ptr[total_counts_cc]]; 3391 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 3392 PetscStackCallBLAS("BLASdot",norm = BLASdot_(&Blas_N,ptr_to_data,&Blas_one,ptr_to_data,&Blas_one)); 3393 norm = 1.0/PetscSqrtReal(PetscRealPart(norm)); 3394 PetscStackCallBLAS("BLASscal",BLASscal_(&Blas_N,&norm,ptr_to_data,&Blas_one)); 3395 } else { /* perform SVD */ 3396 PetscReal tol = 1.0e-8; /* tolerance for retaining eigenmodes */ 3397 PetscScalar *ptr_to_data = &constraints_data[constraints_data_ptr[total_counts_cc]]; 3398 3399 #if defined(PETSC_MISSING_LAPACK_GESVD) 3400 /* SVD: Y = U*S*V^H -> U (eigenvectors of Y*Y^H) = Y*V*(S)^\dag 3401 POD: Y^H*Y = V*D*V^H, D = S^H*S -> U = Y*V*D^(-1/2) 3402 -> When PETSC_USE_COMPLEX and PETSC_MISSING_LAPACK_GESVD are defined 3403 the constraints basis will differ (by a complex factor with absolute value equal to 1) 3404 from that computed using LAPACKgesvd 3405 -> This is due to a different computation of eigenvectors in LAPACKheev 3406 -> The quality of the POD-computed basis will be the same */ 3407 ierr = PetscMemzero(correlation_mat,temp_constraints*temp_constraints*sizeof(PetscScalar));CHKERRQ(ierr); 3408 /* Store upper triangular part of correlation matrix */ 3409 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 3410 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 3411 for (j=0;j<temp_constraints;j++) { 3412 for (k=0;k<j+1;k++) { 3413 PetscStackCallBLAS("BLASdot",correlation_mat[j*temp_constraints+k] = BLASdot_(&Blas_N,ptr_to_data+k*size_of_constraint,&Blas_one,ptr_to_data+j*size_of_constraint,&Blas_one)); 3414 } 3415 } 3416 /* compute eigenvalues and eigenvectors of correlation matrix */ 3417 ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr); 3418 ierr = PetscBLASIntCast(temp_constraints,&Blas_LDA);CHKERRQ(ierr); 3419 #if !defined(PETSC_USE_COMPLEX) 3420 PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,work,&lwork,&lierr)); 3421 #else 3422 PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,work,&lwork,rwork,&lierr)); 3423 #endif 3424 ierr = PetscFPTrapPop();CHKERRQ(ierr); 3425 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYEV Lapack routine %d",(int)lierr); 3426 /* retain eigenvalues greater than tol: note that LAPACKsyev gives eigs in ascending order */ 3427 j = 0; 3428 while (j < temp_constraints && singular_vals[j] < tol) j++; 3429 total_counts = total_counts-j; 3430 valid_constraints = temp_constraints-j; 3431 /* scale and copy POD basis into used quadrature memory */ 3432 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 3433 ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr); 3434 ierr = PetscBLASIntCast(temp_constraints,&Blas_K);CHKERRQ(ierr); 3435 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 3436 ierr = PetscBLASIntCast(temp_constraints,&Blas_LDB);CHKERRQ(ierr); 3437 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDC);CHKERRQ(ierr); 3438 if (j<temp_constraints) { 3439 PetscInt ii; 3440 for (k=j;k<temp_constraints;k++) singular_vals[k] = 1.0/PetscSqrtReal(singular_vals[k]); 3441 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 3442 PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&Blas_M,&Blas_N,&Blas_K,&one,ptr_to_data,&Blas_LDA,correlation_mat,&Blas_LDB,&zero,temp_basis,&Blas_LDC)); 3443 ierr = PetscFPTrapPop();CHKERRQ(ierr); 3444 for (k=0;k<temp_constraints-j;k++) { 3445 for (ii=0;ii<size_of_constraint;ii++) { 3446 ptr_to_data[k*size_of_constraint+ii] = singular_vals[temp_constraints-1-k]*temp_basis[(temp_constraints-1-k)*size_of_constraint+ii]; 3447 } 3448 } 3449 } 3450 #else /* on missing GESVD */ 3451 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 3452 ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr); 3453 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 3454 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 3455 #if !defined(PETSC_USE_COMPLEX) 3456 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,ptr_to_data,&Blas_LDA,singular_vals,&dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,work,&lwork,&lierr)); 3457 #else 3458 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,ptr_to_data,&Blas_LDA,singular_vals,&dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,work,&lwork,rwork,&lierr)); 3459 #endif 3460 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in GESVD Lapack routine %d",(int)lierr); 3461 ierr = PetscFPTrapPop();CHKERRQ(ierr); 3462 /* retain eigenvalues greater than tol: note that LAPACKgesvd gives eigs in descending order */ 3463 k = temp_constraints; 3464 if (k > size_of_constraint) k = size_of_constraint; 3465 j = 0; 3466 while (j < k && singular_vals[k-j-1] < tol) j++; 3467 valid_constraints = k-j; 3468 total_counts = total_counts-temp_constraints+valid_constraints; 3469 #endif /* on missing GESVD */ 3470 } 3471 } 3472 /* update pointers information */ 3473 if (valid_constraints) { 3474 constraints_n[total_counts_cc] = valid_constraints; 3475 constraints_idxs_ptr[total_counts_cc+1] = constraints_idxs_ptr[total_counts_cc]+size_of_constraint; 3476 constraints_data_ptr[total_counts_cc+1] = constraints_data_ptr[total_counts_cc]+size_of_constraint*valid_constraints; 3477 /* set change_of_basis flag */ 3478 if (boolforchange) { 3479 PetscBTSet(change_basis,total_counts_cc); 3480 } 3481 total_counts_cc++; 3482 } 3483 } 3484 /* free workspace */ 3485 if (!skip_lapack) { 3486 ierr = PetscFree(work);CHKERRQ(ierr); 3487 #if defined(PETSC_USE_COMPLEX) 3488 ierr = PetscFree(rwork);CHKERRQ(ierr); 3489 #endif 3490 ierr = PetscFree(singular_vals);CHKERRQ(ierr); 3491 #if defined(PETSC_MISSING_LAPACK_GESVD) 3492 ierr = PetscFree(correlation_mat);CHKERRQ(ierr); 3493 ierr = PetscFree(temp_basis);CHKERRQ(ierr); 3494 #endif 3495 } 3496 for (k=0;k<nnsp_size;k++) { 3497 ierr = VecDestroy(&localnearnullsp[k]);CHKERRQ(ierr); 3498 } 3499 ierr = PetscFree(localnearnullsp);CHKERRQ(ierr); 3500 /* free index sets of faces, edges and vertices */ 3501 for (i=0;i<n_ISForFaces;i++) { 3502 ierr = ISDestroy(&ISForFaces[i]);CHKERRQ(ierr); 3503 } 3504 if (n_ISForFaces) { 3505 ierr = PetscFree(ISForFaces);CHKERRQ(ierr); 3506 } 3507 for (i=0;i<n_ISForEdges;i++) { 3508 ierr = ISDestroy(&ISForEdges[i]);CHKERRQ(ierr); 3509 } 3510 if (n_ISForEdges) { 3511 ierr = PetscFree(ISForEdges);CHKERRQ(ierr); 3512 } 3513 ierr = ISDestroy(&ISForVertices);CHKERRQ(ierr); 3514 } else { 3515 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 3516 3517 total_counts = 0; 3518 n_vertices = 0; 3519 if (sub_schurs->is_vertices && pcbddc->use_vertices) { 3520 ierr = ISGetLocalSize(sub_schurs->is_vertices,&n_vertices);CHKERRQ(ierr); 3521 } 3522 max_constraints = 0; 3523 total_counts_cc = 0; 3524 for (i=0;i<sub_schurs->n_subs+n_vertices;i++) { 3525 total_counts += pcbddc->adaptive_constraints_n[i]; 3526 if (pcbddc->adaptive_constraints_n[i]) total_counts_cc++; 3527 max_constraints = PetscMax(max_constraints,pcbddc->adaptive_constraints_n[i]); 3528 } 3529 constraints_idxs_ptr = pcbddc->adaptive_constraints_idxs_ptr; 3530 constraints_data_ptr = pcbddc->adaptive_constraints_data_ptr; 3531 constraints_idxs = pcbddc->adaptive_constraints_idxs; 3532 constraints_data = pcbddc->adaptive_constraints_data; 3533 /* constraints_n differs from pcbddc->adaptive_constraints_n */ 3534 ierr = PetscMalloc1(total_counts_cc,&constraints_n);CHKERRQ(ierr); 3535 total_counts_cc = 0; 3536 for (i=0;i<sub_schurs->n_subs+n_vertices;i++) { 3537 if (pcbddc->adaptive_constraints_n[i]) { 3538 constraints_n[total_counts_cc++] = pcbddc->adaptive_constraints_n[i]; 3539 } 3540 } 3541 #if 0 3542 printf("Found %d totals (%d)\n",total_counts_cc,total_counts); 3543 for (i=0;i<total_counts_cc;i++) { 3544 printf("const %d, start %d",i,constraints_idxs_ptr[i]); 3545 printf(" end %d:\n",constraints_idxs_ptr[i+1]); 3546 for (j=constraints_idxs_ptr[i];j<constraints_idxs_ptr[i+1];j++) { 3547 printf(" %d",constraints_idxs[j]); 3548 } 3549 printf("\n"); 3550 printf("number of cc: %d\n",constraints_n[i]); 3551 } 3552 for (i=0;i<n_vertices;i++) { 3553 PetscPrintf(PETSC_COMM_SELF,"[%d] vertex %d, n %d\n",PetscGlobalRank,i,pcbddc->adaptive_constraints_n[i]); 3554 } 3555 for (i=0;i<sub_schurs->n_subs;i++) { 3556 PetscPrintf(PETSC_COMM_SELF,"[%d] sub %d, edge %d, n %d\n",PetscGlobalRank,i,(PetscBool)PetscBTLookup(sub_schurs->is_edge,i),pcbddc->adaptive_constraints_n[i+n_vertices]); 3557 } 3558 #endif 3559 3560 max_size_of_constraint = 0; 3561 for (i=0;i<total_counts_cc;i++) max_size_of_constraint = PetscMax(max_size_of_constraint,constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i]); 3562 ierr = PetscMalloc1(constraints_idxs_ptr[total_counts_cc],&constraints_idxs_B);CHKERRQ(ierr); 3563 /* Change of basis */ 3564 ierr = PetscBTCreate(total_counts_cc,&change_basis);CHKERRQ(ierr); 3565 if (pcbddc->use_change_of_basis) { 3566 for (i=0;i<sub_schurs->n_subs;i++) { 3567 if (PetscBTLookup(sub_schurs->is_edge,i) || pcbddc->use_change_on_faces) { 3568 ierr = PetscBTSet(change_basis,i+n_vertices);CHKERRQ(ierr); 3569 } 3570 } 3571 } 3572 } 3573 pcbddc->local_primal_size = total_counts; 3574 ierr = PetscMalloc1(pcbddc->local_primal_size+pcbddc->benign_n,&pcbddc->primal_indices_local_idxs);CHKERRQ(ierr); 3575 3576 /* map constraints_idxs in boundary numbering */ 3577 ierr = ISGlobalToLocalMappingApply(pcis->BtoNmap,IS_GTOLM_DROP,constraints_idxs_ptr[total_counts_cc],constraints_idxs,&i,constraints_idxs_B);CHKERRQ(ierr); 3578 if (i != constraints_idxs_ptr[total_counts_cc]) { 3579 SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_SUP,"Error in boundary numbering for constraints indices %d != %d\n",constraints_idxs_ptr[total_counts_cc],i); 3580 } 3581 3582 /* Create constraint matrix */ 3583 ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->ConstraintMatrix);CHKERRQ(ierr); 3584 ierr = MatSetType(pcbddc->ConstraintMatrix,MATAIJ);CHKERRQ(ierr); 3585 ierr = MatSetSizes(pcbddc->ConstraintMatrix,pcbddc->local_primal_size,pcis->n,pcbddc->local_primal_size,pcis->n);CHKERRQ(ierr); 3586 3587 /* find primal_dofs: subdomain corners plus dofs selected as primal after change of basis */ 3588 /* determine if a QR strategy is needed for change of basis */ 3589 qr_needed = PETSC_FALSE; 3590 ierr = PetscBTCreate(total_counts_cc,&qr_needed_idx);CHKERRQ(ierr); 3591 total_primal_vertices=0; 3592 pcbddc->local_primal_size_cc = 0; 3593 for (i=0;i<total_counts_cc;i++) { 3594 size_of_constraint = constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i]; 3595 if (size_of_constraint == 1) { 3596 pcbddc->primal_indices_local_idxs[total_primal_vertices++] = constraints_idxs[constraints_idxs_ptr[i]]; 3597 pcbddc->local_primal_size_cc += 1; 3598 } else if (PetscBTLookup(change_basis,i)) { 3599 for (k=0;k<constraints_n[i];k++) { 3600 pcbddc->primal_indices_local_idxs[total_primal_vertices++] = constraints_idxs[constraints_idxs_ptr[i]+k]; 3601 } 3602 pcbddc->local_primal_size_cc += constraints_n[i]; 3603 if (constraints_n[i] > 1 || pcbddc->use_qr_single || pcbddc->faster_deluxe) { 3604 PetscBTSet(qr_needed_idx,i); 3605 qr_needed = PETSC_TRUE; 3606 } 3607 } else { 3608 pcbddc->local_primal_size_cc += 1; 3609 } 3610 } 3611 /* note that the local variable n_vertices used below stores the number of pointwise constraints */ 3612 pcbddc->n_vertices = total_primal_vertices; 3613 /* permute indices in order to have a sorted set of vertices */ 3614 ierr = PetscSortInt(total_primal_vertices,pcbddc->primal_indices_local_idxs);CHKERRQ(ierr); 3615 3616 ierr = PetscMalloc2(pcbddc->local_primal_size_cc+pcbddc->benign_n,&pcbddc->local_primal_ref_node,pcbddc->local_primal_size_cc+pcbddc->benign_n,&pcbddc->local_primal_ref_mult);CHKERRQ(ierr); 3617 ierr = PetscMemcpy(pcbddc->local_primal_ref_node,pcbddc->primal_indices_local_idxs,total_primal_vertices*sizeof(PetscInt));CHKERRQ(ierr); 3618 for (i=0;i<total_primal_vertices;i++) pcbddc->local_primal_ref_mult[i] = 1; 3619 3620 /* nonzero structure of constraint matrix */ 3621 /* and get reference dof for local constraints */ 3622 ierr = PetscMalloc1(pcbddc->local_primal_size,&nnz);CHKERRQ(ierr); 3623 for (i=0;i<total_primal_vertices;i++) nnz[i] = 1; 3624 3625 j = total_primal_vertices; 3626 total_counts = total_primal_vertices; 3627 cum = total_primal_vertices; 3628 for (i=n_vertices;i<total_counts_cc;i++) { 3629 if (!PetscBTLookup(change_basis,i)) { 3630 pcbddc->local_primal_ref_node[cum] = constraints_idxs[constraints_idxs_ptr[i]]; 3631 pcbddc->local_primal_ref_mult[cum] = constraints_n[i]; 3632 cum++; 3633 size_of_constraint = constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i]; 3634 for (k=0;k<constraints_n[i];k++) { 3635 pcbddc->primal_indices_local_idxs[total_counts++] = constraints_idxs[constraints_idxs_ptr[i]+k]; 3636 nnz[j+k] = size_of_constraint; 3637 } 3638 j += constraints_n[i]; 3639 } 3640 } 3641 ierr = MatSeqAIJSetPreallocation(pcbddc->ConstraintMatrix,0,nnz);CHKERRQ(ierr); 3642 ierr = PetscFree(nnz);CHKERRQ(ierr); 3643 3644 /* set values in constraint matrix */ 3645 for (i=0;i<total_primal_vertices;i++) { 3646 ierr = MatSetValue(pcbddc->ConstraintMatrix,i,pcbddc->local_primal_ref_node[i],1.0,INSERT_VALUES);CHKERRQ(ierr); 3647 } 3648 total_counts = total_primal_vertices; 3649 for (i=n_vertices;i<total_counts_cc;i++) { 3650 if (!PetscBTLookup(change_basis,i)) { 3651 PetscInt *cols; 3652 3653 size_of_constraint = constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i]; 3654 cols = constraints_idxs+constraints_idxs_ptr[i]; 3655 for (k=0;k<constraints_n[i];k++) { 3656 PetscInt row = total_counts+k; 3657 PetscScalar *vals; 3658 3659 vals = constraints_data+constraints_data_ptr[i]+k*size_of_constraint; 3660 ierr = MatSetValues(pcbddc->ConstraintMatrix,1,&row,size_of_constraint,cols,vals,INSERT_VALUES);CHKERRQ(ierr); 3661 } 3662 total_counts += constraints_n[i]; 3663 } 3664 } 3665 /* assembling */ 3666 ierr = MatAssemblyBegin(pcbddc->ConstraintMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3667 ierr = MatAssemblyEnd(pcbddc->ConstraintMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3668 3669 /* 3670 ierr = PetscViewerSetFormat(PETSC_VIEWER_STDOUT_SELF,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr); 3671 ierr = MatView(pcbddc->ConstraintMatrix,(PetscViewer)0);CHKERRQ(ierr); 3672 */ 3673 /* Create matrix for change of basis. We don't need it in case pcbddc->use_change_of_basis is FALSE */ 3674 if (pcbddc->use_change_of_basis) { 3675 /* dual and primal dofs on a single cc */ 3676 PetscInt dual_dofs,primal_dofs; 3677 /* working stuff for GEQRF */ 3678 PetscScalar *qr_basis,*qr_tau = NULL,*qr_work,lqr_work_t; 3679 PetscBLASInt lqr_work; 3680 /* working stuff for UNGQR */ 3681 PetscScalar *gqr_work,lgqr_work_t; 3682 PetscBLASInt lgqr_work; 3683 /* working stuff for TRTRS */ 3684 PetscScalar *trs_rhs; 3685 PetscBLASInt Blas_NRHS; 3686 /* pointers for values insertion into change of basis matrix */ 3687 PetscInt *start_rows,*start_cols; 3688 PetscScalar *start_vals; 3689 /* working stuff for values insertion */ 3690 PetscBT is_primal; 3691 PetscInt *aux_primal_numbering_B; 3692 /* matrix sizes */ 3693 PetscInt global_size,local_size; 3694 /* temporary change of basis */ 3695 Mat localChangeOfBasisMatrix; 3696 /* extra space for debugging */ 3697 PetscScalar *dbg_work; 3698 3699 /* local temporary change of basis acts on local interfaces -> dimension is n_B x n_B */ 3700 ierr = MatCreate(PETSC_COMM_SELF,&localChangeOfBasisMatrix);CHKERRQ(ierr); 3701 ierr = MatSetType(localChangeOfBasisMatrix,MATAIJ);CHKERRQ(ierr); 3702 ierr = MatSetSizes(localChangeOfBasisMatrix,pcis->n,pcis->n,pcis->n,pcis->n);CHKERRQ(ierr); 3703 /* nonzeros for local mat */ 3704 ierr = PetscMalloc1(pcis->n,&nnz);CHKERRQ(ierr); 3705 for (i=0;i<pcis->n;i++) nnz[i]=1; 3706 for (i=n_vertices;i<total_counts_cc;i++) { 3707 if (PetscBTLookup(change_basis,i)) { 3708 size_of_constraint = constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i]; 3709 if (PetscBTLookup(qr_needed_idx,i)) { 3710 for (j=0;j<size_of_constraint;j++) nnz[constraints_idxs[constraints_idxs_ptr[i]+j]] = size_of_constraint; 3711 } else { 3712 nnz[constraints_idxs[constraints_idxs_ptr[i]]] = size_of_constraint; 3713 for (j=1;j<size_of_constraint;j++) nnz[constraints_idxs[constraints_idxs_ptr[i]+j]] = 2; 3714 } 3715 } 3716 } 3717 ierr = MatSeqAIJSetPreallocation(localChangeOfBasisMatrix,0,nnz);CHKERRQ(ierr); 3718 ierr = PetscFree(nnz);CHKERRQ(ierr); 3719 /* Set initial identity in the matrix */ 3720 for (i=0;i<pcis->n;i++) { 3721 ierr = MatSetValue(localChangeOfBasisMatrix,i,i,1.0,INSERT_VALUES);CHKERRQ(ierr); 3722 } 3723 3724 if (pcbddc->dbg_flag) { 3725 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"--------------------------------------------------------------\n");CHKERRQ(ierr); 3726 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Checking change of basis computation for subdomain %04d\n",PetscGlobalRank);CHKERRQ(ierr); 3727 } 3728 3729 3730 /* Now we loop on the constraints which need a change of basis */ 3731 /* 3732 Change of basis matrix is evaluated similarly to the FIRST APPROACH in 3733 Klawonn and Widlund, Dual-primal FETI-DP methods for linear elasticity, (see Sect 6.2.1) 3734 3735 Basic blocks of change of basis matrix T computed by 3736 3737 - Using the following block transformation if there is only a primal dof on the cc (and -pc_bddc_use_qr_single is not specified) 3738 3739 | 1 0 ... 0 s_1/S | 3740 | 0 1 ... 0 s_2/S | 3741 | ... | 3742 | 0 ... 1 s_{n-1}/S | 3743 | -s_1/s_n ... -s_{n-1}/s_n s_n/S | 3744 3745 with S = \sum_{i=1}^n s_i^2 3746 NOTE: in the above example, the primal dof is the last one of the edge in LOCAL ordering 3747 in the current implementation, the primal dof is the first one of the edge in GLOBAL ordering 3748 3749 - QR decomposition of constraints otherwise 3750 */ 3751 if (qr_needed) { 3752 /* space to store Q */ 3753 ierr = PetscMalloc1(max_size_of_constraint*max_size_of_constraint,&qr_basis);CHKERRQ(ierr); 3754 /* first we issue queries for optimal work */ 3755 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_M);CHKERRQ(ierr); 3756 ierr = PetscBLASIntCast(max_constraints,&Blas_N);CHKERRQ(ierr); 3757 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 3758 lqr_work = -1; 3759 PetscStackCallBLAS("LAPACKgeqrf",LAPACKgeqrf_(&Blas_M,&Blas_N,qr_basis,&Blas_LDA,qr_tau,&lqr_work_t,&lqr_work,&lierr)); 3760 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to GEQRF Lapack routine %d",(int)lierr); 3761 ierr = PetscBLASIntCast((PetscInt)PetscRealPart(lqr_work_t),&lqr_work);CHKERRQ(ierr); 3762 ierr = PetscMalloc1((PetscInt)PetscRealPart(lqr_work_t),&qr_work);CHKERRQ(ierr); 3763 lgqr_work = -1; 3764 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_M);CHKERRQ(ierr); 3765 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_N);CHKERRQ(ierr); 3766 ierr = PetscBLASIntCast(max_constraints,&Blas_K);CHKERRQ(ierr); 3767 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 3768 if (Blas_K>Blas_M) Blas_K=Blas_M; /* adjust just for computing optimal work */ 3769 PetscStackCallBLAS("LAPACKungqr",LAPACKungqr_(&Blas_M,&Blas_N,&Blas_K,qr_basis,&Blas_LDA,qr_tau,&lgqr_work_t,&lgqr_work,&lierr)); 3770 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to UNGQR Lapack routine %d",(int)lierr); 3771 ierr = PetscBLASIntCast((PetscInt)PetscRealPart(lgqr_work_t),&lgqr_work);CHKERRQ(ierr); 3772 ierr = PetscMalloc1((PetscInt)PetscRealPart(lgqr_work_t),&gqr_work);CHKERRQ(ierr); 3773 /* array to store scaling factors for reflectors */ 3774 ierr = PetscMalloc1(max_constraints,&qr_tau);CHKERRQ(ierr); 3775 /* array to store rhs and solution of triangular solver */ 3776 ierr = PetscMalloc1(max_constraints*max_constraints,&trs_rhs);CHKERRQ(ierr); 3777 /* allocating workspace for check */ 3778 if (pcbddc->dbg_flag) { 3779 ierr = PetscMalloc1(max_size_of_constraint*(max_constraints+max_size_of_constraint),&dbg_work);CHKERRQ(ierr); 3780 } 3781 } 3782 /* array to store whether a node is primal or not */ 3783 ierr = PetscBTCreate(pcis->n_B,&is_primal);CHKERRQ(ierr); 3784 ierr = PetscMalloc1(total_primal_vertices,&aux_primal_numbering_B);CHKERRQ(ierr); 3785 ierr = ISGlobalToLocalMappingApply(pcis->BtoNmap,IS_GTOLM_DROP,total_primal_vertices,pcbddc->local_primal_ref_node,&i,aux_primal_numbering_B);CHKERRQ(ierr); 3786 if (i != total_primal_vertices) { 3787 SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_SUP,"Error in boundary numbering for BDDC vertices! %d != %d\n",total_primal_vertices,i); 3788 } 3789 for (i=0;i<total_primal_vertices;i++) { 3790 ierr = PetscBTSet(is_primal,aux_primal_numbering_B[i]);CHKERRQ(ierr); 3791 } 3792 ierr = PetscFree(aux_primal_numbering_B);CHKERRQ(ierr); 3793 3794 /* loop on constraints and see whether or not they need a change of basis and compute it */ 3795 for (total_counts=n_vertices;total_counts<total_counts_cc;total_counts++) { 3796 size_of_constraint = constraints_idxs_ptr[total_counts+1]-constraints_idxs_ptr[total_counts]; 3797 if (PetscBTLookup(change_basis,total_counts)) { 3798 /* get constraint info */ 3799 primal_dofs = constraints_n[total_counts]; 3800 dual_dofs = size_of_constraint-primal_dofs; 3801 3802 if (pcbddc->dbg_flag) { 3803 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Constraints %d: %d need a change of basis (size %d)\n",total_counts,primal_dofs,size_of_constraint);CHKERRQ(ierr); 3804 } 3805 3806 if (PetscBTLookup(qr_needed_idx,total_counts)) { /* QR */ 3807 3808 /* copy quadrature constraints for change of basis check */ 3809 if (pcbddc->dbg_flag) { 3810 ierr = PetscMemcpy(dbg_work,&constraints_data[constraints_data_ptr[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr); 3811 } 3812 /* copy temporary constraints into larger work vector (in order to store all columns of Q) */ 3813 ierr = PetscMemcpy(qr_basis,&constraints_data[constraints_data_ptr[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr); 3814 3815 /* compute QR decomposition of constraints */ 3816 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 3817 ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr); 3818 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 3819 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 3820 PetscStackCallBLAS("LAPACKgeqrf",LAPACKgeqrf_(&Blas_M,&Blas_N,qr_basis,&Blas_LDA,qr_tau,qr_work,&lqr_work,&lierr)); 3821 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in GEQRF Lapack routine %d",(int)lierr); 3822 ierr = PetscFPTrapPop();CHKERRQ(ierr); 3823 3824 /* explictly compute R^-T */ 3825 ierr = PetscMemzero(trs_rhs,primal_dofs*primal_dofs*sizeof(*trs_rhs));CHKERRQ(ierr); 3826 for (j=0;j<primal_dofs;j++) trs_rhs[j*(primal_dofs+1)] = 1.0; 3827 ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr); 3828 ierr = PetscBLASIntCast(primal_dofs,&Blas_NRHS);CHKERRQ(ierr); 3829 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 3830 ierr = PetscBLASIntCast(primal_dofs,&Blas_LDB);CHKERRQ(ierr); 3831 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 3832 PetscStackCallBLAS("LAPACKtrtrs",LAPACKtrtrs_("U","T","N",&Blas_N,&Blas_NRHS,qr_basis,&Blas_LDA,trs_rhs,&Blas_LDB,&lierr)); 3833 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in TRTRS Lapack routine %d",(int)lierr); 3834 ierr = PetscFPTrapPop();CHKERRQ(ierr); 3835 3836 /* explicitly compute all columns of Q (Q = [Q1 | Q2] ) overwriting QR factorization in qr_basis */ 3837 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 3838 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 3839 ierr = PetscBLASIntCast(primal_dofs,&Blas_K);CHKERRQ(ierr); 3840 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 3841 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 3842 PetscStackCallBLAS("LAPACKungqr",LAPACKungqr_(&Blas_M,&Blas_N,&Blas_K,qr_basis,&Blas_LDA,qr_tau,gqr_work,&lgqr_work,&lierr)); 3843 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in UNGQR Lapack routine %d",(int)lierr); 3844 ierr = PetscFPTrapPop();CHKERRQ(ierr); 3845 3846 /* first primal_dofs columns of Q need to be re-scaled in order to be unitary w.r.t constraints 3847 i.e. C_{pxn}*Q_{nxn} should be equal to [I_pxp | 0_pxd] (see check below) 3848 where n=size_of_constraint, p=primal_dofs, d=dual_dofs (n=p+d), I and 0 identity and null matrix resp. */ 3849 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 3850 ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr); 3851 ierr = PetscBLASIntCast(primal_dofs,&Blas_K);CHKERRQ(ierr); 3852 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 3853 ierr = PetscBLASIntCast(primal_dofs,&Blas_LDB);CHKERRQ(ierr); 3854 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDC);CHKERRQ(ierr); 3855 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 3856 PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&Blas_M,&Blas_N,&Blas_K,&one,qr_basis,&Blas_LDA,trs_rhs,&Blas_LDB,&zero,constraints_data+constraints_data_ptr[total_counts],&Blas_LDC)); 3857 ierr = PetscFPTrapPop();CHKERRQ(ierr); 3858 ierr = PetscMemcpy(qr_basis,&constraints_data[constraints_data_ptr[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr); 3859 3860 /* insert values in change of basis matrix respecting global ordering of new primal dofs */ 3861 start_rows = &constraints_idxs[constraints_idxs_ptr[total_counts]]; 3862 /* insert cols for primal dofs */ 3863 for (j=0;j<primal_dofs;j++) { 3864 start_vals = &qr_basis[j*size_of_constraint]; 3865 start_cols = &constraints_idxs[constraints_idxs_ptr[total_counts]+j]; 3866 ierr = MatSetValues(localChangeOfBasisMatrix,size_of_constraint,start_rows,1,start_cols,start_vals,INSERT_VALUES);CHKERRQ(ierr); 3867 } 3868 /* insert cols for dual dofs */ 3869 for (j=0,k=0;j<dual_dofs;k++) { 3870 if (!PetscBTLookup(is_primal,constraints_idxs_B[constraints_idxs_ptr[total_counts]+k])) { 3871 start_vals = &qr_basis[(primal_dofs+j)*size_of_constraint]; 3872 start_cols = &constraints_idxs[constraints_idxs_ptr[total_counts]+k]; 3873 ierr = MatSetValues(localChangeOfBasisMatrix,size_of_constraint,start_rows,1,start_cols,start_vals,INSERT_VALUES);CHKERRQ(ierr); 3874 j++; 3875 } 3876 } 3877 3878 /* check change of basis */ 3879 if (pcbddc->dbg_flag) { 3880 PetscInt ii,jj; 3881 PetscBool valid_qr=PETSC_TRUE; 3882 ierr = PetscBLASIntCast(primal_dofs,&Blas_M);CHKERRQ(ierr); 3883 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 3884 ierr = PetscBLASIntCast(size_of_constraint,&Blas_K);CHKERRQ(ierr); 3885 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 3886 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDB);CHKERRQ(ierr); 3887 ierr = PetscBLASIntCast(primal_dofs,&Blas_LDC);CHKERRQ(ierr); 3888 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 3889 PetscStackCallBLAS("BLASgemm",BLASgemm_("T","N",&Blas_M,&Blas_N,&Blas_K,&one,dbg_work,&Blas_LDA,qr_basis,&Blas_LDB,&zero,&dbg_work[size_of_constraint*primal_dofs],&Blas_LDC)); 3890 ierr = PetscFPTrapPop();CHKERRQ(ierr); 3891 for (jj=0;jj<size_of_constraint;jj++) { 3892 for (ii=0;ii<primal_dofs;ii++) { 3893 if (ii != jj && PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]) > 1.e-12) valid_qr = PETSC_FALSE; 3894 if (ii == jj && PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]-1.0) > 1.e-12) valid_qr = PETSC_FALSE; 3895 } 3896 } 3897 if (!valid_qr) { 3898 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> wrong change of basis!\n");CHKERRQ(ierr); 3899 for (jj=0;jj<size_of_constraint;jj++) { 3900 for (ii=0;ii<primal_dofs;ii++) { 3901 if (ii != jj && PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]) > 1.e-12) { 3902 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\tQr basis function %d is not orthogonal to constraint %d (%1.14e)!\n",jj,ii,PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii])); 3903 } 3904 if (ii == jj && PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]-1.0) > 1.e-12) { 3905 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\tQr basis function %d is not unitary w.r.t constraint %d (%1.14e)!\n",jj,ii,PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii])); 3906 } 3907 } 3908 } 3909 } else { 3910 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> right change of basis!\n");CHKERRQ(ierr); 3911 } 3912 } 3913 } else { /* simple transformation block */ 3914 PetscInt row,col; 3915 PetscScalar val,norm; 3916 3917 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 3918 PetscStackCallBLAS("BLASdot",norm = BLASdot_(&Blas_N,constraints_data+constraints_data_ptr[total_counts],&Blas_one,constraints_data+constraints_data_ptr[total_counts],&Blas_one)); 3919 for (j=0;j<size_of_constraint;j++) { 3920 PetscInt row_B = constraints_idxs_B[constraints_idxs_ptr[total_counts]+j]; 3921 row = constraints_idxs[constraints_idxs_ptr[total_counts]+j]; 3922 if (!PetscBTLookup(is_primal,row_B)) { 3923 col = constraints_idxs[constraints_idxs_ptr[total_counts]]; 3924 ierr = MatSetValue(localChangeOfBasisMatrix,row,row,1.0,INSERT_VALUES);CHKERRQ(ierr); 3925 ierr = MatSetValue(localChangeOfBasisMatrix,row,col,constraints_data[constraints_data_ptr[total_counts]+j]/norm,INSERT_VALUES);CHKERRQ(ierr); 3926 } else { 3927 for (k=0;k<size_of_constraint;k++) { 3928 col = constraints_idxs[constraints_idxs_ptr[total_counts]+k]; 3929 if (row != col) { 3930 val = -constraints_data[constraints_data_ptr[total_counts]+k]/constraints_data[constraints_data_ptr[total_counts]]; 3931 } else { 3932 val = constraints_data[constraints_data_ptr[total_counts]]/norm; 3933 } 3934 ierr = MatSetValue(localChangeOfBasisMatrix,row,col,val,INSERT_VALUES);CHKERRQ(ierr); 3935 } 3936 } 3937 } 3938 if (pcbddc->dbg_flag) { 3939 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> using standard change of basis\n");CHKERRQ(ierr); 3940 } 3941 } 3942 } else { 3943 if (pcbddc->dbg_flag) { 3944 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Constraint %d does not need a change of basis (size %d)\n",total_counts,size_of_constraint);CHKERRQ(ierr); 3945 } 3946 } 3947 } 3948 3949 /* free workspace */ 3950 if (qr_needed) { 3951 if (pcbddc->dbg_flag) { 3952 ierr = PetscFree(dbg_work);CHKERRQ(ierr); 3953 } 3954 ierr = PetscFree(trs_rhs);CHKERRQ(ierr); 3955 ierr = PetscFree(qr_tau);CHKERRQ(ierr); 3956 ierr = PetscFree(qr_work);CHKERRQ(ierr); 3957 ierr = PetscFree(gqr_work);CHKERRQ(ierr); 3958 ierr = PetscFree(qr_basis);CHKERRQ(ierr); 3959 } 3960 ierr = PetscBTDestroy(&is_primal);CHKERRQ(ierr); 3961 ierr = MatAssemblyBegin(localChangeOfBasisMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3962 ierr = MatAssemblyEnd(localChangeOfBasisMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3963 3964 /* assembling of global change of variable */ 3965 { 3966 Mat tmat; 3967 PetscInt bs; 3968 3969 ierr = VecGetSize(pcis->vec1_global,&global_size);CHKERRQ(ierr); 3970 ierr = VecGetLocalSize(pcis->vec1_global,&local_size);CHKERRQ(ierr); 3971 ierr = MatDuplicate(pc->pmat,MAT_DO_NOT_COPY_VALUES,&tmat);CHKERRQ(ierr); 3972 ierr = MatISSetLocalMat(tmat,localChangeOfBasisMatrix);CHKERRQ(ierr); 3973 ierr = MatCreate(PetscObjectComm((PetscObject)pc),&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 3974 ierr = MatSetType(pcbddc->ChangeOfBasisMatrix,MATAIJ);CHKERRQ(ierr); 3975 ierr = MatGetBlockSize(pc->pmat,&bs);CHKERRQ(ierr); 3976 ierr = MatSetBlockSize(pcbddc->ChangeOfBasisMatrix,bs);CHKERRQ(ierr); 3977 ierr = MatSetSizes(pcbddc->ChangeOfBasisMatrix,local_size,local_size,global_size,global_size);CHKERRQ(ierr); 3978 ierr = MatISSetMPIXAIJPreallocation_Private(tmat,pcbddc->ChangeOfBasisMatrix,PETSC_TRUE);CHKERRQ(ierr); 3979 ierr = MatISGetMPIXAIJ(tmat,MAT_REUSE_MATRIX,&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 3980 ierr = MatDestroy(&tmat);CHKERRQ(ierr); 3981 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 3982 ierr = VecSet(pcis->vec1_N,1.0);CHKERRQ(ierr); 3983 ierr = VecScatterBegin(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 3984 ierr = VecScatterEnd(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 3985 ierr = VecReciprocal(pcis->vec1_global);CHKERRQ(ierr); 3986 ierr = MatDiagonalScale(pcbddc->ChangeOfBasisMatrix,pcis->vec1_global,NULL);CHKERRQ(ierr); 3987 } 3988 /* check */ 3989 if (pcbddc->dbg_flag) { 3990 PetscReal error; 3991 Vec x,x_change; 3992 3993 ierr = VecDuplicate(pcis->vec1_global,&x);CHKERRQ(ierr); 3994 ierr = VecDuplicate(pcis->vec1_global,&x_change);CHKERRQ(ierr); 3995 ierr = VecSetRandom(x,NULL);CHKERRQ(ierr); 3996 ierr = VecCopy(x,pcis->vec1_global);CHKERRQ(ierr); 3997 ierr = VecScatterBegin(matis->rctx,x,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3998 ierr = VecScatterEnd(matis->rctx,x,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3999 ierr = MatMult(localChangeOfBasisMatrix,pcis->vec1_N,pcis->vec2_N);CHKERRQ(ierr); 4000 ierr = VecScatterBegin(matis->rctx,pcis->vec2_N,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4001 ierr = VecScatterEnd(matis->rctx,pcis->vec2_N,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4002 ierr = MatMult(pcbddc->ChangeOfBasisMatrix,pcis->vec1_global,x_change);CHKERRQ(ierr); 4003 ierr = VecAXPY(x,-1.0,x_change);CHKERRQ(ierr); 4004 ierr = VecNorm(x,NORM_INFINITY,&error);CHKERRQ(ierr); 4005 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4006 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Error global vs local change: %1.6e\n",error);CHKERRQ(ierr); 4007 ierr = VecDestroy(&x);CHKERRQ(ierr); 4008 ierr = VecDestroy(&x_change);CHKERRQ(ierr); 4009 } 4010 4011 /* adapt sub_schurs computed (if any) */ 4012 if (pcbddc->use_deluxe_scaling) { 4013 PCBDDCSubSchurs sub_schurs=pcbddc->sub_schurs; 4014 if (sub_schurs->S_Ej_all) { 4015 Mat S_new,tmat; 4016 ISLocalToGlobalMapping NtoSall; 4017 IS is_all_N,is_V,is_V_Sall; 4018 const PetscScalar *array; 4019 const PetscInt *idxs_V,*idxs_all; 4020 PetscInt i,n_V; 4021 4022 ierr = ISLocalToGlobalMappingApplyIS(pcis->BtoNmap,sub_schurs->is_Ej_all,&is_all_N);CHKERRQ(ierr); 4023 ierr = MatGetSubMatrix(localChangeOfBasisMatrix,is_all_N,is_all_N,MAT_INITIAL_MATRIX,&tmat);CHKERRQ(ierr); 4024 ierr = ISCreateGeneral(PETSC_COMM_SELF,pcbddc->n_vertices,pcbddc->local_primal_ref_node,PETSC_COPY_VALUES,&is_V);CHKERRQ(ierr); 4025 ierr = ISLocalToGlobalMappingCreateIS(is_all_N,&NtoSall);CHKERRQ(ierr); 4026 ierr = ISGlobalToLocalMappingApplyIS(NtoSall,IS_GTOLM_DROP,is_V,&is_V_Sall);CHKERRQ(ierr); 4027 ierr = ISLocalToGlobalMappingDestroy(&NtoSall);CHKERRQ(ierr); 4028 ierr = ISDestroy(&is_all_N);CHKERRQ(ierr); 4029 ierr = ISDestroy(&is_V);CHKERRQ(ierr); 4030 ierr = MatPtAP(sub_schurs->S_Ej_all,tmat,MAT_INITIAL_MATRIX,1.0,&S_new);CHKERRQ(ierr); 4031 ierr = MatDestroy(&sub_schurs->S_Ej_all);CHKERRQ(ierr); 4032 ierr = PetscObjectReference((PetscObject)S_new);CHKERRQ(ierr); 4033 ierr = MatZeroRowsColumnsIS(S_new,is_V_Sall,1.,NULL,NULL);CHKERRQ(ierr); 4034 ierr = ISGetLocalSize(is_V_Sall,&n_V);CHKERRQ(ierr); 4035 ierr = ISGetIndices(is_V_Sall,&idxs_V);CHKERRQ(ierr); 4036 ierr = ISGetIndices(sub_schurs->is_Ej_all,&idxs_all);CHKERRQ(ierr); 4037 ierr = VecGetArrayRead(pcis->D,&array);CHKERRQ(ierr); 4038 for (i=0;i<n_V;i++) { 4039 PetscScalar val; 4040 PetscInt idx; 4041 4042 idx = idxs_V[i]; 4043 val = array[idxs_all[idxs_V[i]]]; 4044 ierr = MatSetValue(S_new,idx,idx,val,INSERT_VALUES);CHKERRQ(ierr); 4045 } 4046 ierr = MatAssemblyBegin(S_new,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4047 ierr = MatAssemblyEnd(S_new,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4048 sub_schurs->S_Ej_all = S_new; 4049 ierr = MatDestroy(&S_new);CHKERRQ(ierr); 4050 if (sub_schurs->sum_S_Ej_all) { 4051 ierr = MatPtAP(sub_schurs->sum_S_Ej_all,tmat,MAT_INITIAL_MATRIX,1.0,&S_new);CHKERRQ(ierr); 4052 ierr = MatDestroy(&sub_schurs->sum_S_Ej_all);CHKERRQ(ierr); 4053 ierr = PetscObjectReference((PetscObject)S_new);CHKERRQ(ierr); 4054 ierr = MatZeroRowsColumnsIS(S_new,is_V_Sall,1.,NULL,NULL);CHKERRQ(ierr); 4055 sub_schurs->sum_S_Ej_all = S_new; 4056 ierr = MatDestroy(&S_new);CHKERRQ(ierr); 4057 } 4058 ierr = VecRestoreArrayRead(pcis->D,&array);CHKERRQ(ierr); 4059 ierr = ISRestoreIndices(sub_schurs->is_Ej_all,&idxs_all);CHKERRQ(ierr); 4060 ierr = ISRestoreIndices(is_V_Sall,&idxs_V);CHKERRQ(ierr); 4061 ierr = MatDestroy(&tmat);CHKERRQ(ierr); 4062 ierr = ISDestroy(&is_V_Sall);CHKERRQ(ierr); 4063 } 4064 } 4065 ierr = MatDestroy(&localChangeOfBasisMatrix);CHKERRQ(ierr); 4066 } else if (pcbddc->user_ChangeOfBasisMatrix) { 4067 ierr = PetscObjectReference((PetscObject)pcbddc->user_ChangeOfBasisMatrix);CHKERRQ(ierr); 4068 pcbddc->ChangeOfBasisMatrix = pcbddc->user_ChangeOfBasisMatrix; 4069 } 4070 4071 /* set up change of basis context */ 4072 if (pcbddc->ChangeOfBasisMatrix) { 4073 PCBDDCChange_ctx change_ctx; 4074 4075 if (!pcbddc->new_global_mat) { 4076 PetscInt global_size,local_size; 4077 4078 ierr = VecGetSize(pcis->vec1_global,&global_size);CHKERRQ(ierr); 4079 ierr = VecGetLocalSize(pcis->vec1_global,&local_size);CHKERRQ(ierr); 4080 ierr = MatCreate(PetscObjectComm((PetscObject)pc),&pcbddc->new_global_mat);CHKERRQ(ierr); 4081 ierr = MatSetSizes(pcbddc->new_global_mat,local_size,local_size,global_size,global_size);CHKERRQ(ierr); 4082 ierr = MatSetType(pcbddc->new_global_mat,MATSHELL);CHKERRQ(ierr); 4083 ierr = MatShellSetOperation(pcbddc->new_global_mat,MATOP_MULT,(void (*)(void))PCBDDCMatMult_Private);CHKERRQ(ierr); 4084 ierr = MatShellSetOperation(pcbddc->new_global_mat,MATOP_MULT_TRANSPOSE,(void (*)(void))PCBDDCMatMultTranspose_Private);CHKERRQ(ierr); 4085 ierr = PetscNew(&change_ctx);CHKERRQ(ierr); 4086 ierr = MatShellSetContext(pcbddc->new_global_mat,change_ctx);CHKERRQ(ierr); 4087 } else { 4088 ierr = MatShellGetContext(pcbddc->new_global_mat,&change_ctx);CHKERRQ(ierr); 4089 ierr = MatDestroy(&change_ctx->global_change);CHKERRQ(ierr); 4090 ierr = VecDestroyVecs(2,&change_ctx->work);CHKERRQ(ierr); 4091 } 4092 if (!pcbddc->user_ChangeOfBasisMatrix) { 4093 ierr = PetscObjectReference((PetscObject)pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 4094 change_ctx->global_change = pcbddc->ChangeOfBasisMatrix; 4095 } else { 4096 ierr = PetscObjectReference((PetscObject)pcbddc->user_ChangeOfBasisMatrix);CHKERRQ(ierr); 4097 change_ctx->global_change = pcbddc->user_ChangeOfBasisMatrix; 4098 } 4099 ierr = VecDuplicateVecs(pcis->vec1_global,2,&change_ctx->work);CHKERRQ(ierr); 4100 ierr = MatSetUp(pcbddc->new_global_mat);CHKERRQ(ierr); 4101 ierr = MatAssemblyBegin(pcbddc->new_global_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4102 ierr = MatAssemblyEnd(pcbddc->new_global_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4103 } else { 4104 ierr = MatDestroy(&pcbddc->new_global_mat);CHKERRQ(ierr); 4105 } 4106 4107 /* add pressure dofs to set of primal nodes for numbering purposes */ 4108 for (i=0;i<pcbddc->benign_n;i++) { 4109 pcbddc->local_primal_ref_node[pcbddc->local_primal_size_cc] = pcbddc->benign_p0_lidx[i]; 4110 pcbddc->primal_indices_local_idxs[pcbddc->local_primal_size] = pcbddc->benign_p0_lidx[i]; 4111 pcbddc->local_primal_ref_mult[pcbddc->local_primal_size_cc] = 1; 4112 pcbddc->local_primal_size_cc++; 4113 pcbddc->local_primal_size++; 4114 } 4115 4116 /* check if a new primal space has been introduced (also take into account benign trick) */ 4117 pcbddc->new_primal_space_local = PETSC_TRUE; 4118 if (olocal_primal_size == pcbddc->local_primal_size) { 4119 ierr = PetscMemcmp(pcbddc->local_primal_ref_node,olocal_primal_ref_node,olocal_primal_size_cc*sizeof(PetscInt),&pcbddc->new_primal_space_local);CHKERRQ(ierr); 4120 pcbddc->new_primal_space_local = (PetscBool)(!pcbddc->new_primal_space_local); 4121 if (!pcbddc->new_primal_space_local) { 4122 ierr = PetscMemcmp(pcbddc->local_primal_ref_mult,olocal_primal_ref_mult,olocal_primal_size_cc*sizeof(PetscInt),&pcbddc->new_primal_space_local);CHKERRQ(ierr); 4123 pcbddc->new_primal_space_local = (PetscBool)(!pcbddc->new_primal_space_local); 4124 } 4125 } 4126 ierr = PetscFree2(olocal_primal_ref_node,olocal_primal_ref_mult);CHKERRQ(ierr); 4127 /* new_primal_space will be used for numbering of coarse dofs, so it should be the same across all subdomains */ 4128 ierr = MPI_Allreduce(&pcbddc->new_primal_space_local,&pcbddc->new_primal_space,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 4129 4130 /* flush dbg viewer */ 4131 if (pcbddc->dbg_flag) { 4132 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4133 } 4134 4135 /* free workspace */ 4136 ierr = PetscBTDestroy(&qr_needed_idx);CHKERRQ(ierr); 4137 ierr = PetscBTDestroy(&change_basis);CHKERRQ(ierr); 4138 if (!pcbddc->adaptive_selection) { 4139 ierr = PetscFree3(constraints_idxs_ptr,constraints_data_ptr,constraints_n);CHKERRQ(ierr); 4140 ierr = PetscFree3(constraints_data,constraints_idxs,constraints_idxs_B);CHKERRQ(ierr); 4141 } else { 4142 ierr = PetscFree5(pcbddc->adaptive_constraints_n, 4143 pcbddc->adaptive_constraints_idxs_ptr, 4144 pcbddc->adaptive_constraints_data_ptr, 4145 pcbddc->adaptive_constraints_idxs, 4146 pcbddc->adaptive_constraints_data);CHKERRQ(ierr); 4147 ierr = PetscFree(constraints_n);CHKERRQ(ierr); 4148 ierr = PetscFree(constraints_idxs_B);CHKERRQ(ierr); 4149 } 4150 PetscFunctionReturn(0); 4151 } 4152 4153 #undef __FUNCT__ 4154 #define __FUNCT__ "PCBDDCAnalyzeInterface" 4155 PetscErrorCode PCBDDCAnalyzeInterface(PC pc) 4156 { 4157 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 4158 PC_IS *pcis = (PC_IS*)pc->data; 4159 Mat_IS *matis = (Mat_IS*)pc->pmat->data; 4160 PetscInt ierr,i,vertex_size,N; 4161 PetscViewer viewer=pcbddc->dbg_viewer; 4162 4163 PetscFunctionBegin; 4164 /* Reset previously computed graph */ 4165 ierr = PCBDDCGraphReset(pcbddc->mat_graph);CHKERRQ(ierr); 4166 /* Init local Graph struct */ 4167 ierr = MatGetSize(pc->pmat,&N,NULL);CHKERRQ(ierr); 4168 ierr = PCBDDCGraphInit(pcbddc->mat_graph,pcis->mapping,N);CHKERRQ(ierr); 4169 4170 /* Check validity of the csr graph passed in by the user */ 4171 if (pcbddc->mat_graph->nvtxs_csr && pcbddc->mat_graph->nvtxs_csr != pcbddc->mat_graph->nvtxs) { 4172 SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Invalid size of local CSR graph! Found %d, expected %d\n",pcbddc->mat_graph->nvtxs_csr,pcbddc->mat_graph->nvtxs); 4173 } 4174 4175 /* Set default CSR adjacency of local dofs if not provided by the user with PCBDDCSetLocalAdjacencyGraph */ 4176 if (!pcbddc->mat_graph->xadj || !pcbddc->mat_graph->adjncy) { 4177 PetscInt *xadj,*adjncy; 4178 PetscInt nvtxs; 4179 PetscBool flg_row=PETSC_FALSE; 4180 4181 if (pcbddc->use_local_adj) { 4182 4183 ierr = MatGetRowIJ(matis->A,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);CHKERRQ(ierr); 4184 if (flg_row) { 4185 ierr = PCBDDCSetLocalAdjacencyGraph(pc,nvtxs,xadj,adjncy,PETSC_COPY_VALUES);CHKERRQ(ierr); 4186 pcbddc->computed_rowadj = PETSC_TRUE; 4187 } 4188 ierr = MatRestoreRowIJ(matis->A,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);CHKERRQ(ierr); 4189 } else if (pcbddc->current_level && pcis->n_B) { /* just compute subdomain's connected components for coarser levels when the local boundary is not empty */ 4190 IS is_dummy; 4191 ISLocalToGlobalMapping l2gmap_dummy; 4192 PetscInt j,sum; 4193 PetscInt *cxadj,*cadjncy; 4194 const PetscInt *idxs; 4195 PCBDDCGraph graph; 4196 PetscBT is_on_boundary; 4197 4198 ierr = ISCreateStride(PETSC_COMM_SELF,pcis->n,0,1,&is_dummy);CHKERRQ(ierr); 4199 ierr = ISLocalToGlobalMappingCreateIS(is_dummy,&l2gmap_dummy);CHKERRQ(ierr); 4200 ierr = ISDestroy(&is_dummy);CHKERRQ(ierr); 4201 ierr = PCBDDCGraphCreate(&graph);CHKERRQ(ierr); 4202 ierr = PCBDDCGraphInit(graph,l2gmap_dummy,pcis->n);CHKERRQ(ierr); 4203 ierr = ISLocalToGlobalMappingDestroy(&l2gmap_dummy);CHKERRQ(ierr); 4204 ierr = MatGetRowIJ(matis->A,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);CHKERRQ(ierr); 4205 if (flg_row) { 4206 graph->xadj = xadj; 4207 graph->adjncy = adjncy; 4208 } 4209 ierr = PCBDDCGraphSetUp(graph,1,NULL,NULL,0,NULL,NULL);CHKERRQ(ierr); 4210 ierr = PCBDDCGraphComputeConnectedComponents(graph);CHKERRQ(ierr); 4211 ierr = MatRestoreRowIJ(matis->A,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);CHKERRQ(ierr); 4212 4213 if (pcbddc->dbg_flag) { 4214 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"[%d] Found %d subdomains (local size %d)\n",PetscGlobalRank,graph->ncc,pcis->n);CHKERRQ(ierr); 4215 for (i=0;i<graph->ncc;i++) { 4216 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"[%d] %d cc size %d\n",PetscGlobalRank,i,graph->cptr[i+1]-graph->cptr[i]);CHKERRQ(ierr); 4217 } 4218 } 4219 4220 ierr = PetscBTCreate(pcis->n,&is_on_boundary);CHKERRQ(ierr); 4221 ierr = ISGetIndices(pcis->is_B_local,&idxs);CHKERRQ(ierr); 4222 for (i=0;i<pcis->n_B;i++) { 4223 ierr = PetscBTSet(is_on_boundary,idxs[i]);CHKERRQ(ierr); 4224 } 4225 ierr = ISRestoreIndices(pcis->is_B_local,&idxs);CHKERRQ(ierr); 4226 4227 ierr = PetscCalloc1(pcis->n+1,&cxadj);CHKERRQ(ierr); 4228 sum = 0; 4229 for (i=0;i<graph->ncc;i++) { 4230 PetscInt sizecc = 0; 4231 for (j=graph->cptr[i];j<graph->cptr[i+1];j++) { 4232 if (PetscBTLookup(is_on_boundary,graph->queue[j])) { 4233 sizecc++; 4234 } 4235 } 4236 for (j=graph->cptr[i];j<graph->cptr[i+1];j++) { 4237 if (PetscBTLookup(is_on_boundary,graph->queue[j])) { 4238 cxadj[graph->queue[j]] = sizecc; 4239 } 4240 } 4241 sum += sizecc*sizecc; 4242 } 4243 ierr = PetscMalloc1(sum,&cadjncy);CHKERRQ(ierr); 4244 sum = 0; 4245 for (i=0;i<pcis->n;i++) { 4246 PetscInt temp = cxadj[i]; 4247 cxadj[i] = sum; 4248 sum += temp; 4249 } 4250 cxadj[pcis->n] = sum; 4251 for (i=0;i<graph->ncc;i++) { 4252 for (j=graph->cptr[i];j<graph->cptr[i+1];j++) { 4253 if (PetscBTLookup(is_on_boundary,graph->queue[j])) { 4254 PetscInt k,sizecc = 0; 4255 for (k=graph->cptr[i];k<graph->cptr[i+1];k++) { 4256 if (PetscBTLookup(is_on_boundary,graph->queue[k])) { 4257 cadjncy[cxadj[graph->queue[j]]+sizecc]=graph->queue[k]; 4258 sizecc++; 4259 } 4260 } 4261 } 4262 } 4263 } 4264 if (sum) { 4265 ierr = PCBDDCSetLocalAdjacencyGraph(pc,pcis->n,cxadj,cadjncy,PETSC_OWN_POINTER);CHKERRQ(ierr); 4266 } else { 4267 ierr = PetscFree(cxadj);CHKERRQ(ierr); 4268 ierr = PetscFree(cadjncy);CHKERRQ(ierr); 4269 } 4270 graph->xadj = 0; 4271 graph->adjncy = 0; 4272 ierr = PCBDDCGraphDestroy(&graph);CHKERRQ(ierr); 4273 ierr = PetscBTDestroy(&is_on_boundary);CHKERRQ(ierr); 4274 } 4275 } 4276 if (pcbddc->dbg_flag) { 4277 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4278 } 4279 4280 /* Set default dofs' splitting if no information has been provided by the user with PCBDDCSetDofsSplitting or PCBDDCSetDofsSplittingLocal */ 4281 vertex_size = 1; 4282 if (pcbddc->user_provided_isfordofs) { 4283 if (pcbddc->n_ISForDofs) { /* need to convert from global to local and remove references to global dofs splitting */ 4284 ierr = PetscMalloc1(pcbddc->n_ISForDofs,&pcbddc->ISForDofsLocal);CHKERRQ(ierr); 4285 for (i=0;i<pcbddc->n_ISForDofs;i++) { 4286 ierr = PCBDDCGlobalToLocal(matis->rctx,pcis->vec1_global,pcis->vec1_N,pcbddc->ISForDofs[i],&pcbddc->ISForDofsLocal[i]);CHKERRQ(ierr); 4287 ierr = ISDestroy(&pcbddc->ISForDofs[i]);CHKERRQ(ierr); 4288 } 4289 pcbddc->n_ISForDofsLocal = pcbddc->n_ISForDofs; 4290 pcbddc->n_ISForDofs = 0; 4291 ierr = PetscFree(pcbddc->ISForDofs);CHKERRQ(ierr); 4292 } 4293 /* mat block size as vertex size (used for elasticity with rigid body modes as nearnullspace) */ 4294 ierr = MatGetBlockSize(matis->A,&vertex_size);CHKERRQ(ierr); 4295 } else { 4296 if (!pcbddc->n_ISForDofsLocal) { /* field split not present, create it in local ordering */ 4297 ierr = MatGetBlockSize(pc->pmat,&pcbddc->n_ISForDofsLocal);CHKERRQ(ierr); 4298 ierr = PetscMalloc1(pcbddc->n_ISForDofsLocal,&pcbddc->ISForDofsLocal);CHKERRQ(ierr); 4299 for (i=0;i<pcbddc->n_ISForDofsLocal;i++) { 4300 ierr = ISCreateStride(PetscObjectComm((PetscObject)pc),pcis->n/pcbddc->n_ISForDofsLocal,i,pcbddc->n_ISForDofsLocal,&pcbddc->ISForDofsLocal[i]);CHKERRQ(ierr); 4301 } 4302 } 4303 } 4304 4305 /* Setup of Graph */ 4306 if (!pcbddc->DirichletBoundariesLocal && pcbddc->DirichletBoundaries) { /* need to convert from global to local */ 4307 ierr = PCBDDCGlobalToLocal(matis->rctx,pcis->vec1_global,pcis->vec1_N,pcbddc->DirichletBoundaries,&pcbddc->DirichletBoundariesLocal);CHKERRQ(ierr); 4308 } 4309 if (!pcbddc->NeumannBoundariesLocal && pcbddc->NeumannBoundaries) { /* need to convert from global to local */ 4310 ierr = PCBDDCGlobalToLocal(matis->rctx,pcis->vec1_global,pcis->vec1_N,pcbddc->NeumannBoundaries,&pcbddc->NeumannBoundariesLocal);CHKERRQ(ierr); 4311 } 4312 if (!pcbddc->user_primal_vertices_local && pcbddc->user_primal_vertices) { /* need to convert from global to local */ 4313 ierr = PCBDDCGlobalToLocal(matis->rctx,pcis->vec1_global,pcis->vec1_N,pcbddc->user_primal_vertices,&pcbddc->user_primal_vertices_local);CHKERRQ(ierr); 4314 } 4315 ierr = PCBDDCGraphSetUp(pcbddc->mat_graph,vertex_size,pcbddc->NeumannBoundariesLocal,pcbddc->DirichletBoundariesLocal,pcbddc->n_ISForDofsLocal,pcbddc->ISForDofsLocal,pcbddc->user_primal_vertices_local);CHKERRQ(ierr); 4316 4317 /* attach info on disconnected subdomains if present */ 4318 if (pcbddc->n_local_subs) { 4319 PetscInt *local_subs; 4320 4321 ierr = PetscMalloc1(N,&local_subs);CHKERRQ(ierr); 4322 for (i=0;i<pcbddc->n_local_subs;i++) { 4323 const PetscInt *idxs; 4324 PetscInt nl,j; 4325 4326 ierr = ISGetLocalSize(pcbddc->local_subs[i],&nl);CHKERRQ(ierr); 4327 ierr = ISGetIndices(pcbddc->local_subs[i],&idxs);CHKERRQ(ierr); 4328 for (j=0;j<nl;j++) { 4329 local_subs[idxs[j]] = i; 4330 } 4331 ierr = ISRestoreIndices(pcbddc->local_subs[i],&idxs);CHKERRQ(ierr); 4332 } 4333 pcbddc->mat_graph->n_local_subs = pcbddc->n_local_subs; 4334 pcbddc->mat_graph->local_subs = local_subs; 4335 } 4336 4337 /* Graph's connected components analysis */ 4338 ierr = PCBDDCGraphComputeConnectedComponents(pcbddc->mat_graph);CHKERRQ(ierr); 4339 4340 /* print some info to stdout */ 4341 if (pcbddc->dbg_flag) { 4342 ierr = PCBDDCGraphASCIIView(pcbddc->mat_graph,pcbddc->dbg_flag,viewer);CHKERRQ(ierr); 4343 } 4344 4345 /* mark topography has done */ 4346 pcbddc->recompute_topography = PETSC_FALSE; 4347 PetscFunctionReturn(0); 4348 } 4349 4350 /* given an index sets possibly with holes, renumbers the indexes removing the holes */ 4351 #undef __FUNCT__ 4352 #define __FUNCT__ "PCBDDCSubsetNumbering" 4353 PetscErrorCode PCBDDCSubsetNumbering(IS subset, IS subset_mult, PetscInt *N_n, IS *subset_n) 4354 { 4355 PetscSF sf; 4356 PetscLayout map; 4357 const PetscInt *idxs; 4358 PetscInt *leaf_data,*root_data,*gidxs; 4359 PetscInt N,n,i,lbounds[2],gbounds[2],Nl; 4360 PetscInt n_n,nlocals,start,first_index; 4361 PetscMPIInt commsize; 4362 PetscBool first_found; 4363 PetscErrorCode ierr; 4364 4365 PetscFunctionBegin; 4366 ierr = ISGetLocalSize(subset,&n);CHKERRQ(ierr); 4367 if (subset_mult) { 4368 PetscCheckSameComm(subset,1,subset_mult,2); 4369 ierr = ISGetLocalSize(subset,&i);CHKERRQ(ierr); 4370 if (i != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Local subset and multiplicity sizes don't match! %d != %d",n,i); 4371 } 4372 /* create workspace layout for computing global indices of subset */ 4373 ierr = ISGetIndices(subset,&idxs);CHKERRQ(ierr); 4374 lbounds[0] = lbounds[1] = 0; 4375 for (i=0;i<n;i++) { 4376 if (idxs[i] < lbounds[0]) lbounds[0] = idxs[i]; 4377 else if (idxs[i] > lbounds[1]) lbounds[1] = idxs[i]; 4378 } 4379 lbounds[0] = -lbounds[0]; 4380 ierr = MPI_Allreduce(lbounds,gbounds,2,MPIU_INT,MPI_MAX,PetscObjectComm((PetscObject)subset));CHKERRQ(ierr); 4381 gbounds[0] = -gbounds[0]; 4382 N = gbounds[1] - gbounds[0] + 1; 4383 ierr = PetscLayoutCreate(PetscObjectComm((PetscObject)subset),&map);CHKERRQ(ierr); 4384 ierr = PetscLayoutSetBlockSize(map,1);CHKERRQ(ierr); 4385 ierr = PetscLayoutSetSize(map,N);CHKERRQ(ierr); 4386 ierr = PetscLayoutSetUp(map);CHKERRQ(ierr); 4387 ierr = PetscLayoutGetLocalSize(map,&Nl);CHKERRQ(ierr); 4388 4389 /* create sf : leaf_data == multiplicity of indexes, root data == global index in layout */ 4390 ierr = PetscMalloc2(n,&leaf_data,Nl,&root_data);CHKERRQ(ierr); 4391 if (subset_mult) { 4392 const PetscInt* idxs_mult; 4393 4394 ierr = ISGetIndices(subset_mult,&idxs_mult);CHKERRQ(ierr); 4395 ierr = PetscMemcpy(leaf_data,idxs_mult,n*sizeof(PetscInt));CHKERRQ(ierr); 4396 ierr = ISRestoreIndices(subset_mult,&idxs_mult);CHKERRQ(ierr); 4397 } else { 4398 for (i=0;i<n;i++) leaf_data[i] = 1; 4399 } 4400 /* local size of new subset */ 4401 n_n = 0; 4402 for (i=0;i<n;i++) n_n += leaf_data[i]; 4403 4404 /* global indexes in layout */ 4405 ierr = PetscMalloc1(n_n,&gidxs);CHKERRQ(ierr); /* allocating possibly extra space in gidxs which will be used later */ 4406 for (i=0;i<n;i++) gidxs[i] = idxs[i] - gbounds[0]; 4407 ierr = ISRestoreIndices(subset,&idxs);CHKERRQ(ierr); 4408 ierr = PetscSFCreate(PetscObjectComm((PetscObject)subset),&sf);CHKERRQ(ierr); 4409 ierr = PetscSFSetGraphLayout(sf,map,n,NULL,PETSC_COPY_VALUES,gidxs);CHKERRQ(ierr); 4410 ierr = PetscLayoutDestroy(&map);CHKERRQ(ierr); 4411 4412 /* reduce from leaves to roots */ 4413 ierr = PetscMemzero(root_data,Nl*sizeof(PetscInt));CHKERRQ(ierr); 4414 ierr = PetscSFReduceBegin(sf,MPIU_INT,leaf_data,root_data,MPI_MAX);CHKERRQ(ierr); 4415 ierr = PetscSFReduceEnd(sf,MPIU_INT,leaf_data,root_data,MPI_MAX);CHKERRQ(ierr); 4416 4417 /* count indexes in local part of layout */ 4418 nlocals = 0; 4419 first_index = -1; 4420 first_found = PETSC_FALSE; 4421 for (i=0;i<Nl;i++) { 4422 if (!first_found && root_data[i]) { 4423 first_found = PETSC_TRUE; 4424 first_index = i; 4425 } 4426 nlocals += root_data[i]; 4427 } 4428 4429 /* cumulative of number of indexes and size of subset without holes */ 4430 #if defined(PETSC_HAVE_MPI_EXSCAN) 4431 start = 0; 4432 ierr = MPI_Exscan(&nlocals,&start,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)subset));CHKERRQ(ierr); 4433 #else 4434 ierr = MPI_Scan(&nlocals,&start,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)subset));CHKERRQ(ierr); 4435 start = start-nlocals; 4436 #endif 4437 4438 if (N_n) { /* compute total size of new subset if requested */ 4439 *N_n = start + nlocals; 4440 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)subset),&commsize);CHKERRQ(ierr); 4441 ierr = MPI_Bcast(N_n,1,MPIU_INT,commsize-1,PetscObjectComm((PetscObject)subset));CHKERRQ(ierr); 4442 } 4443 4444 /* adapt root data with cumulative */ 4445 if (first_found) { 4446 PetscInt old_index; 4447 4448 root_data[first_index] += start; 4449 old_index = first_index; 4450 for (i=first_index+1;i<Nl;i++) { 4451 if (root_data[i]) { 4452 root_data[i] += root_data[old_index]; 4453 old_index = i; 4454 } 4455 } 4456 } 4457 4458 /* from roots to leaves */ 4459 ierr = PetscSFBcastBegin(sf,MPIU_INT,root_data,leaf_data);CHKERRQ(ierr); 4460 ierr = PetscSFBcastEnd(sf,MPIU_INT,root_data,leaf_data);CHKERRQ(ierr); 4461 ierr = PetscSFDestroy(&sf);CHKERRQ(ierr); 4462 4463 /* create new IS with global indexes without holes */ 4464 if (subset_mult) { 4465 const PetscInt* idxs_mult; 4466 PetscInt cum; 4467 4468 cum = 0; 4469 ierr = ISGetIndices(subset_mult,&idxs_mult);CHKERRQ(ierr); 4470 for (i=0;i<n;i++) { 4471 PetscInt j; 4472 for (j=0;j<idxs_mult[i];j++) gidxs[cum++] = leaf_data[i] - idxs_mult[i] + j; 4473 } 4474 ierr = ISRestoreIndices(subset_mult,&idxs_mult);CHKERRQ(ierr); 4475 } else { 4476 for (i=0;i<n;i++) { 4477 gidxs[i] = leaf_data[i]-1; 4478 } 4479 } 4480 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)subset),n_n,gidxs,PETSC_OWN_POINTER,subset_n);CHKERRQ(ierr); 4481 ierr = PetscFree2(leaf_data,root_data);CHKERRQ(ierr); 4482 PetscFunctionReturn(0); 4483 } 4484 4485 #undef __FUNCT__ 4486 #define __FUNCT__ "PCBDDCOrthonormalizeVecs" 4487 PetscErrorCode PCBDDCOrthonormalizeVecs(PetscInt n, Vec vecs[]) 4488 { 4489 PetscInt i,j; 4490 PetscScalar *alphas; 4491 PetscErrorCode ierr; 4492 4493 PetscFunctionBegin; 4494 /* this implements stabilized Gram-Schmidt */ 4495 ierr = PetscMalloc1(n,&alphas);CHKERRQ(ierr); 4496 for (i=0;i<n;i++) { 4497 ierr = VecNormalize(vecs[i],NULL);CHKERRQ(ierr); 4498 if (i<n) { ierr = VecMDot(vecs[i],n-i-1,&vecs[i+1],&alphas[i+1]);CHKERRQ(ierr); } 4499 for (j=i+1;j<n;j++) { ierr = VecAXPY(vecs[j],PetscConj(-alphas[j]),vecs[i]);CHKERRQ(ierr); } 4500 } 4501 ierr = PetscFree(alphas);CHKERRQ(ierr); 4502 PetscFunctionReturn(0); 4503 } 4504 4505 #undef __FUNCT__ 4506 #define __FUNCT__ "MatISGetSubassemblingPattern" 4507 PetscErrorCode MatISGetSubassemblingPattern(Mat mat, PetscInt n_subdomains, PetscInt redprocs, IS* is_sends) 4508 { 4509 IS ranks_send_to; 4510 PetscInt n_neighs,*neighs,*n_shared,**shared; 4511 PetscMPIInt size,rank,color; 4512 PetscInt *xadj,*adjncy; 4513 PetscInt *adjncy_wgt,*v_wgt,*ranks_send_to_idx; 4514 PetscInt i,local_size,threshold=0; 4515 PetscBool use_vwgt=PETSC_FALSE,use_square=PETSC_FALSE; 4516 PetscSubcomm subcomm; 4517 PetscErrorCode ierr; 4518 4519 PetscFunctionBegin; 4520 ierr = PetscOptionsGetBool(NULL,"-matis_partitioning_use_square",&use_square,NULL);CHKERRQ(ierr); 4521 ierr = PetscOptionsGetBool(NULL,"-matis_partitioning_use_vwgt",&use_vwgt,NULL);CHKERRQ(ierr); 4522 ierr = PetscOptionsGetInt(NULL,"-matis_partitioning_threshold",&threshold,NULL);CHKERRQ(ierr); 4523 4524 /* Get info on mapping */ 4525 ierr = ISLocalToGlobalMappingGetSize(mat->rmap->mapping,&local_size);CHKERRQ(ierr); 4526 ierr = ISLocalToGlobalMappingGetInfo(mat->rmap->mapping,&n_neighs,&neighs,&n_shared,&shared);CHKERRQ(ierr); 4527 4528 /* build local CSR graph of subdomains' connectivity */ 4529 ierr = PetscMalloc1(2,&xadj);CHKERRQ(ierr); 4530 xadj[0] = 0; 4531 xadj[1] = PetscMax(n_neighs-1,0); 4532 ierr = PetscMalloc1(xadj[1],&adjncy);CHKERRQ(ierr); 4533 ierr = PetscMalloc1(xadj[1],&adjncy_wgt);CHKERRQ(ierr); 4534 4535 if (threshold) { 4536 PetscInt xadj_count = 0; 4537 for (i=1;i<n_neighs;i++) { 4538 if (n_shared[i] > threshold) { 4539 adjncy[xadj_count] = neighs[i]; 4540 adjncy_wgt[xadj_count] = n_shared[i]; 4541 xadj_count++; 4542 } 4543 } 4544 xadj[1] = xadj_count; 4545 } else { 4546 if (xadj[1]) { 4547 ierr = PetscMemcpy(adjncy,&neighs[1],xadj[1]*sizeof(*adjncy));CHKERRQ(ierr); 4548 ierr = PetscMemcpy(adjncy_wgt,&n_shared[1],xadj[1]*sizeof(*adjncy_wgt));CHKERRQ(ierr); 4549 } 4550 } 4551 ierr = ISLocalToGlobalMappingRestoreInfo(mat->rmap->mapping,&n_neighs,&neighs,&n_shared,&shared);CHKERRQ(ierr); 4552 if (use_square) { 4553 for (i=0;i<xadj[1];i++) { 4554 adjncy_wgt[i] = adjncy_wgt[i]*adjncy_wgt[i]; 4555 } 4556 } 4557 ierr = PetscSortIntWithArray(xadj[1],adjncy,adjncy_wgt);CHKERRQ(ierr); 4558 4559 ierr = PetscMalloc1(1,&ranks_send_to_idx);CHKERRQ(ierr); 4560 4561 /* 4562 Restrict work on active processes only. 4563 */ 4564 ierr = PetscSubcommCreate(PetscObjectComm((PetscObject)mat),&subcomm);CHKERRQ(ierr); 4565 ierr = PetscSubcommSetNumber(subcomm,2);CHKERRQ(ierr); /* 2 groups, active process and not active processes */ 4566 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr); 4567 ierr = PetscMPIIntCast(!local_size,&color);CHKERRQ(ierr); 4568 ierr = PetscSubcommSetTypeGeneral(subcomm,color,rank);CHKERRQ(ierr); 4569 if (color) { 4570 ierr = PetscFree(xadj);CHKERRQ(ierr); 4571 ierr = PetscFree(adjncy);CHKERRQ(ierr); 4572 ierr = PetscFree(adjncy_wgt);CHKERRQ(ierr); 4573 } else { 4574 Mat subdomain_adj; 4575 IS new_ranks,new_ranks_contig; 4576 MatPartitioning partitioner; 4577 PetscInt prank,rstart=0,rend=0; 4578 PetscInt *is_indices,*oldranks; 4579 PetscBool aggregate; 4580 4581 ierr = MPI_Comm_size(PetscSubcommChild(subcomm),&size);CHKERRQ(ierr); 4582 ierr = PetscMalloc1(size,&oldranks);CHKERRQ(ierr); 4583 prank = rank; 4584 ierr = MPI_Allgather(&prank,1,MPIU_INT,oldranks,1,MPIU_INT,PetscSubcommChild(subcomm));CHKERRQ(ierr); 4585 /* 4586 for (i=0;i<size;i++) { 4587 PetscPrintf(subcomm->comm,"oldranks[%d] = %d\n",i,oldranks[i]); 4588 } 4589 */ 4590 for (i=0;i<xadj[1];i++) { 4591 ierr = PetscFindInt(adjncy[i],size,oldranks,&adjncy[i]);CHKERRQ(ierr); 4592 } 4593 ierr = PetscSortIntWithArray(xadj[1],adjncy,adjncy_wgt);CHKERRQ(ierr); 4594 aggregate = ((redprocs > 0 && redprocs < size) ? PETSC_TRUE : PETSC_FALSE); 4595 if (aggregate) { 4596 PetscInt lrows,row,ncols,*cols; 4597 PetscMPIInt nrank; 4598 PetscScalar *vals; 4599 4600 ierr = MPI_Comm_rank(PetscSubcommChild(subcomm),&nrank);CHKERRQ(ierr); 4601 lrows = 0; 4602 if (nrank<redprocs) { 4603 lrows = size/redprocs; 4604 if (nrank<size%redprocs) lrows++; 4605 } 4606 ierr = MatCreateAIJ(PetscSubcommChild(subcomm),lrows,lrows,size,size,50,NULL,50,NULL,&subdomain_adj);CHKERRQ(ierr); 4607 ierr = MatGetOwnershipRange(subdomain_adj,&rstart,&rend);CHKERRQ(ierr); 4608 ierr = MatSetOption(subdomain_adj,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr); 4609 ierr = MatSetOption(subdomain_adj,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr); 4610 row = nrank; 4611 ncols = xadj[1]-xadj[0]; 4612 cols = adjncy; 4613 ierr = PetscMalloc1(ncols,&vals);CHKERRQ(ierr); 4614 for (i=0;i<ncols;i++) vals[i] = adjncy_wgt[i]; 4615 ierr = MatSetValues(subdomain_adj,1,&row,ncols,cols,vals,INSERT_VALUES);CHKERRQ(ierr); 4616 ierr = MatAssemblyBegin(subdomain_adj,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4617 ierr = MatAssemblyEnd(subdomain_adj,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4618 ierr = PetscFree(xadj);CHKERRQ(ierr); 4619 ierr = PetscFree(adjncy);CHKERRQ(ierr); 4620 ierr = PetscFree(adjncy_wgt);CHKERRQ(ierr); 4621 ierr = PetscFree(vals);CHKERRQ(ierr); 4622 } else { 4623 ierr = MatCreateMPIAdj(PetscSubcommChild(subcomm),1,(PetscInt)size,xadj,adjncy,adjncy_wgt,&subdomain_adj);CHKERRQ(ierr); 4624 } 4625 /* ierr = MatView(subdomain_adj,0);CHKERRQ(ierr); */ 4626 4627 /* Partition */ 4628 ierr = MatPartitioningCreate(PetscSubcommChild(subcomm),&partitioner);CHKERRQ(ierr); 4629 ierr = MatPartitioningSetAdjacency(partitioner,subdomain_adj);CHKERRQ(ierr); 4630 if (use_vwgt) { 4631 ierr = PetscMalloc1(1,&v_wgt);CHKERRQ(ierr); 4632 v_wgt[0] = local_size; 4633 ierr = MatPartitioningSetVertexWeights(partitioner,v_wgt);CHKERRQ(ierr); 4634 } 4635 n_subdomains = PetscMin((PetscInt)size,n_subdomains); 4636 ierr = MatPartitioningSetNParts(partitioner,n_subdomains);CHKERRQ(ierr); 4637 ierr = MatPartitioningSetFromOptions(partitioner);CHKERRQ(ierr); 4638 ierr = MatPartitioningApply(partitioner,&new_ranks);CHKERRQ(ierr); 4639 /* ierr = MatPartitioningView(partitioner,0);CHKERRQ(ierr); */ 4640 4641 /* renumber new_ranks to avoid "holes" in new set of processors */ 4642 ierr = PCBDDCSubsetNumbering(new_ranks,NULL,NULL,&new_ranks_contig);CHKERRQ(ierr); 4643 ierr = ISDestroy(&new_ranks);CHKERRQ(ierr); 4644 ierr = ISGetIndices(new_ranks_contig,(const PetscInt**)&is_indices);CHKERRQ(ierr); 4645 if (!redprocs) { 4646 ranks_send_to_idx[0] = oldranks[is_indices[0]]; 4647 } else { 4648 PetscInt idxs[1]; 4649 PetscMPIInt tag; 4650 MPI_Request *reqs; 4651 4652 ierr = PetscObjectGetNewTag((PetscObject)subdomain_adj,&tag);CHKERRQ(ierr); 4653 ierr = PetscMalloc1(rend-rstart,&reqs);CHKERRQ(ierr); 4654 for (i=rstart;i<rend;i++) { 4655 ierr = MPI_Isend(is_indices+i-rstart,1,MPIU_INT,i,tag,PetscSubcommChild(subcomm),&reqs[i-rstart]);CHKERRQ(ierr); 4656 } 4657 ierr = MPI_Recv(idxs,1,MPIU_INT,MPI_ANY_SOURCE,tag,PetscSubcommChild(subcomm),MPI_STATUS_IGNORE);CHKERRQ(ierr); 4658 ierr = MPI_Waitall(rend-rstart,reqs,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 4659 ierr = PetscFree(reqs);CHKERRQ(ierr); 4660 ranks_send_to_idx[0] = oldranks[idxs[0]]; 4661 } 4662 ierr = ISRestoreIndices(new_ranks_contig,(const PetscInt**)&is_indices);CHKERRQ(ierr); 4663 /* clean up */ 4664 ierr = PetscFree(oldranks);CHKERRQ(ierr); 4665 ierr = ISDestroy(&new_ranks_contig);CHKERRQ(ierr); 4666 ierr = MatDestroy(&subdomain_adj);CHKERRQ(ierr); 4667 ierr = MatPartitioningDestroy(&partitioner);CHKERRQ(ierr); 4668 } 4669 ierr = PetscSubcommDestroy(&subcomm);CHKERRQ(ierr); 4670 4671 /* assemble parallel IS for sends */ 4672 i = 1; 4673 if (color) i=0; 4674 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)mat),i,ranks_send_to_idx,PETSC_OWN_POINTER,&ranks_send_to);CHKERRQ(ierr); 4675 /* get back IS */ 4676 *is_sends = ranks_send_to; 4677 PetscFunctionReturn(0); 4678 } 4679 4680 typedef enum {MATDENSE_PRIVATE=0,MATAIJ_PRIVATE,MATBAIJ_PRIVATE,MATSBAIJ_PRIVATE}MatTypePrivate; 4681 4682 #undef __FUNCT__ 4683 #define __FUNCT__ "MatISSubassemble" 4684 PetscErrorCode MatISSubassemble(Mat mat, IS is_sends, PetscInt n_subdomains, PetscBool restrict_comm, PetscBool restrict_full, MatReuse reuse, Mat *mat_n, PetscInt nis, IS isarray[]) 4685 { 4686 Mat local_mat; 4687 IS is_sends_internal; 4688 PetscInt rows,cols,new_local_rows; 4689 PetscInt i,bs,buf_size_idxs,buf_size_idxs_is,buf_size_vals; 4690 PetscBool ismatis,isdense,newisdense,destroy_mat; 4691 ISLocalToGlobalMapping l2gmap; 4692 PetscInt* l2gmap_indices; 4693 const PetscInt* is_indices; 4694 MatType new_local_type; 4695 /* buffers */ 4696 PetscInt *ptr_idxs,*send_buffer_idxs,*recv_buffer_idxs; 4697 PetscInt *ptr_idxs_is,*send_buffer_idxs_is,*recv_buffer_idxs_is; 4698 PetscInt *recv_buffer_idxs_local; 4699 PetscScalar *ptr_vals,*send_buffer_vals,*recv_buffer_vals; 4700 /* MPI */ 4701 MPI_Comm comm,comm_n; 4702 PetscSubcomm subcomm; 4703 PetscMPIInt n_sends,n_recvs,commsize; 4704 PetscMPIInt *iflags,*ilengths_idxs,*ilengths_vals,*ilengths_idxs_is; 4705 PetscMPIInt *onodes,*onodes_is,*olengths_idxs,*olengths_idxs_is,*olengths_vals; 4706 PetscMPIInt len,tag_idxs,tag_idxs_is,tag_vals,source_dest; 4707 MPI_Request *send_req_idxs,*send_req_idxs_is,*send_req_vals; 4708 MPI_Request *recv_req_idxs,*recv_req_idxs_is,*recv_req_vals; 4709 PetscErrorCode ierr; 4710 4711 PetscFunctionBegin; 4712 /* TODO: add missing checks */ 4713 PetscValidLogicalCollectiveInt(mat,n_subdomains,3); 4714 PetscValidLogicalCollectiveBool(mat,restrict_comm,4); 4715 PetscValidLogicalCollectiveEnum(mat,reuse,5); 4716 PetscValidLogicalCollectiveInt(mat,nis,7); 4717 ierr = PetscObjectTypeCompare((PetscObject)mat,MATIS,&ismatis);CHKERRQ(ierr); 4718 if (!ismatis) SETERRQ1(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot use %s on a matrix object which is not of type MATIS",__FUNCT__); 4719 ierr = MatISGetLocalMat(mat,&local_mat);CHKERRQ(ierr); 4720 ierr = PetscObjectTypeCompare((PetscObject)local_mat,MATSEQDENSE,&isdense);CHKERRQ(ierr); 4721 if (!isdense) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Currently cannot subassemble MATIS when local matrix type is not of type SEQDENSE"); 4722 ierr = MatGetSize(local_mat,&rows,&cols);CHKERRQ(ierr); 4723 if (rows != cols) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Local MATIS matrices should be square"); 4724 if (reuse == MAT_REUSE_MATRIX && *mat_n) { 4725 PetscInt mrows,mcols,mnrows,mncols; 4726 ierr = PetscObjectTypeCompare((PetscObject)*mat_n,MATIS,&ismatis);CHKERRQ(ierr); 4727 if (!ismatis) SETERRQ(PetscObjectComm((PetscObject)*mat_n),PETSC_ERR_SUP,"Cannot reuse a matrix which is not of type MATIS"); 4728 ierr = MatGetSize(mat,&mrows,&mcols);CHKERRQ(ierr); 4729 ierr = MatGetSize(*mat_n,&mnrows,&mncols);CHKERRQ(ierr); 4730 if (mrows != mnrows) SETERRQ2(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot reuse matrix! Wrong number of rows %D != %D",mrows,mnrows); 4731 if (mcols != mncols) SETERRQ2(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot reuse matrix! Wrong number of cols %D != %D",mcols,mncols); 4732 } 4733 ierr = MatGetBlockSize(local_mat,&bs);CHKERRQ(ierr); 4734 PetscValidLogicalCollectiveInt(mat,bs,0); 4735 /* prepare IS for sending if not provided */ 4736 if (!is_sends) { 4737 if (!n_subdomains) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"You should specify either an IS or a target number of subdomains"); 4738 ierr = MatISGetSubassemblingPattern(mat,n_subdomains,0,&is_sends_internal);CHKERRQ(ierr); 4739 } else { 4740 ierr = PetscObjectReference((PetscObject)is_sends);CHKERRQ(ierr); 4741 is_sends_internal = is_sends; 4742 } 4743 4744 /* get comm */ 4745 ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr); 4746 4747 /* compute number of sends */ 4748 ierr = ISGetLocalSize(is_sends_internal,&i);CHKERRQ(ierr); 4749 ierr = PetscMPIIntCast(i,&n_sends);CHKERRQ(ierr); 4750 4751 /* compute number of receives */ 4752 ierr = MPI_Comm_size(comm,&commsize);CHKERRQ(ierr); 4753 ierr = PetscMalloc1(commsize,&iflags);CHKERRQ(ierr); 4754 ierr = PetscMemzero(iflags,commsize*sizeof(*iflags));CHKERRQ(ierr); 4755 ierr = ISGetIndices(is_sends_internal,&is_indices);CHKERRQ(ierr); 4756 for (i=0;i<n_sends;i++) iflags[is_indices[i]] = 1; 4757 ierr = PetscGatherNumberOfMessages(comm,iflags,NULL,&n_recvs);CHKERRQ(ierr); 4758 ierr = PetscFree(iflags);CHKERRQ(ierr); 4759 4760 /* restrict comm if requested */ 4761 subcomm = 0; 4762 destroy_mat = PETSC_FALSE; 4763 if (restrict_comm) { 4764 PetscMPIInt color,subcommsize; 4765 4766 color = 0; 4767 if (restrict_full) { 4768 if (!n_recvs) color = 1; /* processes not receiving anything will not partecipate in new comm (full restriction) */ 4769 } else { 4770 if (!n_recvs && n_sends) color = 1; /* just those processes that are sending but not receiving anything will not partecipate in new comm */ 4771 } 4772 ierr = MPI_Allreduce(&color,&subcommsize,1,MPI_INT,MPI_SUM,comm);CHKERRQ(ierr); 4773 subcommsize = commsize - subcommsize; 4774 /* check if reuse has been requested */ 4775 if (reuse == MAT_REUSE_MATRIX) { 4776 if (*mat_n) { 4777 PetscMPIInt subcommsize2; 4778 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)*mat_n),&subcommsize2);CHKERRQ(ierr); 4779 if (subcommsize != subcommsize2) SETERRQ2(PetscObjectComm((PetscObject)*mat_n),PETSC_ERR_PLIB,"Cannot reuse matrix! wrong subcomm size %d != %d",subcommsize,subcommsize2); 4780 comm_n = PetscObjectComm((PetscObject)*mat_n); 4781 } else { 4782 comm_n = PETSC_COMM_SELF; 4783 } 4784 } else { /* MAT_INITIAL_MATRIX */ 4785 PetscMPIInt rank; 4786 4787 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 4788 ierr = PetscSubcommCreate(comm,&subcomm);CHKERRQ(ierr); 4789 ierr = PetscSubcommSetNumber(subcomm,2);CHKERRQ(ierr); 4790 ierr = PetscSubcommSetTypeGeneral(subcomm,color,rank);CHKERRQ(ierr); 4791 comm_n = PetscSubcommChild(subcomm); 4792 } 4793 /* flag to destroy *mat_n if not significative */ 4794 if (color) destroy_mat = PETSC_TRUE; 4795 } else { 4796 comm_n = comm; 4797 } 4798 4799 /* prepare send/receive buffers */ 4800 ierr = PetscMalloc1(commsize,&ilengths_idxs);CHKERRQ(ierr); 4801 ierr = PetscMemzero(ilengths_idxs,commsize*sizeof(*ilengths_idxs));CHKERRQ(ierr); 4802 ierr = PetscMalloc1(commsize,&ilengths_vals);CHKERRQ(ierr); 4803 ierr = PetscMemzero(ilengths_vals,commsize*sizeof(*ilengths_vals));CHKERRQ(ierr); 4804 if (nis) { 4805 ierr = PetscCalloc1(commsize,&ilengths_idxs_is);CHKERRQ(ierr); 4806 } 4807 4808 /* Get data from local matrices */ 4809 if (!isdense) { 4810 SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Subassembling of AIJ local matrices not yet implemented"); 4811 /* TODO: See below some guidelines on how to prepare the local buffers */ 4812 /* 4813 send_buffer_vals should contain the raw values of the local matrix 4814 send_buffer_idxs should contain: 4815 - MatType_PRIVATE type 4816 - PetscInt size_of_l2gmap 4817 - PetscInt global_row_indices[size_of_l2gmap] 4818 - PetscInt all_other_info_which_is_needed_to_compute_preallocation_and_set_values 4819 */ 4820 } else { 4821 ierr = MatDenseGetArray(local_mat,&send_buffer_vals);CHKERRQ(ierr); 4822 ierr = ISLocalToGlobalMappingGetSize(mat->rmap->mapping,&i);CHKERRQ(ierr); 4823 ierr = PetscMalloc1(i+2,&send_buffer_idxs);CHKERRQ(ierr); 4824 send_buffer_idxs[0] = (PetscInt)MATDENSE_PRIVATE; 4825 send_buffer_idxs[1] = i; 4826 ierr = ISLocalToGlobalMappingGetIndices(mat->rmap->mapping,(const PetscInt**)&ptr_idxs);CHKERRQ(ierr); 4827 ierr = PetscMemcpy(&send_buffer_idxs[2],ptr_idxs,i*sizeof(PetscInt));CHKERRQ(ierr); 4828 ierr = ISLocalToGlobalMappingRestoreIndices(mat->rmap->mapping,(const PetscInt**)&ptr_idxs);CHKERRQ(ierr); 4829 ierr = PetscMPIIntCast(i,&len);CHKERRQ(ierr); 4830 for (i=0;i<n_sends;i++) { 4831 ilengths_vals[is_indices[i]] = len*len; 4832 ilengths_idxs[is_indices[i]] = len+2; 4833 } 4834 } 4835 ierr = PetscGatherMessageLengths2(comm,n_sends,n_recvs,ilengths_idxs,ilengths_vals,&onodes,&olengths_idxs,&olengths_vals);CHKERRQ(ierr); 4836 /* additional is (if any) */ 4837 if (nis) { 4838 PetscMPIInt psum; 4839 PetscInt j; 4840 for (j=0,psum=0;j<nis;j++) { 4841 PetscInt plen; 4842 ierr = ISGetLocalSize(isarray[j],&plen);CHKERRQ(ierr); 4843 ierr = PetscMPIIntCast(plen,&len);CHKERRQ(ierr); 4844 psum += len+1; /* indices + lenght */ 4845 } 4846 ierr = PetscMalloc1(psum,&send_buffer_idxs_is);CHKERRQ(ierr); 4847 for (j=0,psum=0;j<nis;j++) { 4848 PetscInt plen; 4849 const PetscInt *is_array_idxs; 4850 ierr = ISGetLocalSize(isarray[j],&plen);CHKERRQ(ierr); 4851 send_buffer_idxs_is[psum] = plen; 4852 ierr = ISGetIndices(isarray[j],&is_array_idxs);CHKERRQ(ierr); 4853 ierr = PetscMemcpy(&send_buffer_idxs_is[psum+1],is_array_idxs,plen*sizeof(PetscInt));CHKERRQ(ierr); 4854 ierr = ISRestoreIndices(isarray[j],&is_array_idxs);CHKERRQ(ierr); 4855 psum += plen+1; /* indices + lenght */ 4856 } 4857 for (i=0;i<n_sends;i++) { 4858 ilengths_idxs_is[is_indices[i]] = psum; 4859 } 4860 ierr = PetscGatherMessageLengths(comm,n_sends,n_recvs,ilengths_idxs_is,&onodes_is,&olengths_idxs_is);CHKERRQ(ierr); 4861 } 4862 4863 buf_size_idxs = 0; 4864 buf_size_vals = 0; 4865 buf_size_idxs_is = 0; 4866 for (i=0;i<n_recvs;i++) { 4867 buf_size_idxs += (PetscInt)olengths_idxs[i]; 4868 buf_size_vals += (PetscInt)olengths_vals[i]; 4869 if (nis) buf_size_idxs_is += (PetscInt)olengths_idxs_is[i]; 4870 } 4871 ierr = PetscMalloc1(buf_size_idxs,&recv_buffer_idxs);CHKERRQ(ierr); 4872 ierr = PetscMalloc1(buf_size_vals,&recv_buffer_vals);CHKERRQ(ierr); 4873 ierr = PetscMalloc1(buf_size_idxs_is,&recv_buffer_idxs_is);CHKERRQ(ierr); 4874 4875 /* get new tags for clean communications */ 4876 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_idxs);CHKERRQ(ierr); 4877 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_vals);CHKERRQ(ierr); 4878 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_idxs_is);CHKERRQ(ierr); 4879 4880 /* allocate for requests */ 4881 ierr = PetscMalloc1(n_sends,&send_req_idxs);CHKERRQ(ierr); 4882 ierr = PetscMalloc1(n_sends,&send_req_vals);CHKERRQ(ierr); 4883 ierr = PetscMalloc1(n_sends,&send_req_idxs_is);CHKERRQ(ierr); 4884 ierr = PetscMalloc1(n_recvs,&recv_req_idxs);CHKERRQ(ierr); 4885 ierr = PetscMalloc1(n_recvs,&recv_req_vals);CHKERRQ(ierr); 4886 ierr = PetscMalloc1(n_recvs,&recv_req_idxs_is);CHKERRQ(ierr); 4887 4888 /* communications */ 4889 ptr_idxs = recv_buffer_idxs; 4890 ptr_vals = recv_buffer_vals; 4891 ptr_idxs_is = recv_buffer_idxs_is; 4892 for (i=0;i<n_recvs;i++) { 4893 source_dest = onodes[i]; 4894 ierr = MPI_Irecv(ptr_idxs,olengths_idxs[i],MPIU_INT,source_dest,tag_idxs,comm,&recv_req_idxs[i]);CHKERRQ(ierr); 4895 ierr = MPI_Irecv(ptr_vals,olengths_vals[i],MPIU_SCALAR,source_dest,tag_vals,comm,&recv_req_vals[i]);CHKERRQ(ierr); 4896 ptr_idxs += olengths_idxs[i]; 4897 ptr_vals += olengths_vals[i]; 4898 if (nis) { 4899 ierr = MPI_Irecv(ptr_idxs_is,olengths_idxs_is[i],MPIU_INT,source_dest,tag_idxs_is,comm,&recv_req_idxs_is[i]);CHKERRQ(ierr); 4900 ptr_idxs_is += olengths_idxs_is[i]; 4901 } 4902 } 4903 for (i=0;i<n_sends;i++) { 4904 ierr = PetscMPIIntCast(is_indices[i],&source_dest);CHKERRQ(ierr); 4905 ierr = MPI_Isend(send_buffer_idxs,ilengths_idxs[source_dest],MPIU_INT,source_dest,tag_idxs,comm,&send_req_idxs[i]);CHKERRQ(ierr); 4906 ierr = MPI_Isend(send_buffer_vals,ilengths_vals[source_dest],MPIU_SCALAR,source_dest,tag_vals,comm,&send_req_vals[i]);CHKERRQ(ierr); 4907 if (nis) { 4908 ierr = MPI_Isend(send_buffer_idxs_is,ilengths_idxs_is[source_dest],MPIU_INT,source_dest,tag_idxs_is,comm,&send_req_idxs_is[i]);CHKERRQ(ierr); 4909 } 4910 } 4911 ierr = ISRestoreIndices(is_sends_internal,&is_indices);CHKERRQ(ierr); 4912 ierr = ISDestroy(&is_sends_internal);CHKERRQ(ierr); 4913 4914 /* assemble new l2g map */ 4915 ierr = MPI_Waitall(n_recvs,recv_req_idxs,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 4916 ptr_idxs = recv_buffer_idxs; 4917 new_local_rows = 0; 4918 for (i=0;i<n_recvs;i++) { 4919 new_local_rows += *(ptr_idxs+1); /* second element is the local size of the l2gmap */ 4920 ptr_idxs += olengths_idxs[i]; 4921 } 4922 ierr = PetscMalloc1(new_local_rows,&l2gmap_indices);CHKERRQ(ierr); 4923 ptr_idxs = recv_buffer_idxs; 4924 new_local_rows = 0; 4925 for (i=0;i<n_recvs;i++) { 4926 ierr = PetscMemcpy(&l2gmap_indices[new_local_rows],ptr_idxs+2,(*(ptr_idxs+1))*sizeof(PetscInt));CHKERRQ(ierr); 4927 new_local_rows += *(ptr_idxs+1); /* second element is the local size of the l2gmap */ 4928 ptr_idxs += olengths_idxs[i]; 4929 } 4930 ierr = PetscSortRemoveDupsInt(&new_local_rows,l2gmap_indices);CHKERRQ(ierr); 4931 ierr = ISLocalToGlobalMappingCreate(comm_n,1,new_local_rows,l2gmap_indices,PETSC_COPY_VALUES,&l2gmap);CHKERRQ(ierr); 4932 ierr = PetscFree(l2gmap_indices);CHKERRQ(ierr); 4933 4934 /* infer new local matrix type from received local matrices type */ 4935 /* currently if all local matrices are of type X, then the resulting matrix will be of type X, except for the dense case */ 4936 /* it also assumes that if the block size is set, than it is the same among all local matrices (see checks at the beginning of the function) */ 4937 if (n_recvs) { 4938 MatTypePrivate new_local_type_private = (MatTypePrivate)send_buffer_idxs[0]; 4939 ptr_idxs = recv_buffer_idxs; 4940 for (i=0;i<n_recvs;i++) { 4941 if ((PetscInt)new_local_type_private != *ptr_idxs) { 4942 new_local_type_private = MATAIJ_PRIVATE; 4943 break; 4944 } 4945 ptr_idxs += olengths_idxs[i]; 4946 } 4947 switch (new_local_type_private) { 4948 case MATDENSE_PRIVATE: 4949 if (n_recvs>1) { /* subassembling of dense matrices does not give a dense matrix! */ 4950 new_local_type = MATSEQAIJ; 4951 bs = 1; 4952 } else { /* if I receive only 1 dense matrix */ 4953 new_local_type = MATSEQDENSE; 4954 bs = 1; 4955 } 4956 break; 4957 case MATAIJ_PRIVATE: 4958 new_local_type = MATSEQAIJ; 4959 bs = 1; 4960 break; 4961 case MATBAIJ_PRIVATE: 4962 new_local_type = MATSEQBAIJ; 4963 break; 4964 case MATSBAIJ_PRIVATE: 4965 new_local_type = MATSEQSBAIJ; 4966 break; 4967 default: 4968 SETERRQ2(comm,PETSC_ERR_SUP,"Unsupported private type %d in %s",new_local_type_private,__FUNCT__); 4969 break; 4970 } 4971 } else { /* by default, new_local_type is seqdense */ 4972 new_local_type = MATSEQDENSE; 4973 bs = 1; 4974 } 4975 4976 /* create MATIS object if needed */ 4977 if (reuse == MAT_INITIAL_MATRIX) { 4978 ierr = MatGetSize(mat,&rows,&cols);CHKERRQ(ierr); 4979 ierr = MatCreateIS(comm_n,bs,PETSC_DECIDE,PETSC_DECIDE,rows,cols,l2gmap,NULL,mat_n);CHKERRQ(ierr); 4980 } else { 4981 /* it also destroys the local matrices */ 4982 ierr = MatSetLocalToGlobalMapping(*mat_n,l2gmap,l2gmap);CHKERRQ(ierr); 4983 } 4984 ierr = MatISGetLocalMat(*mat_n,&local_mat);CHKERRQ(ierr); 4985 ierr = MatSetType(local_mat,new_local_type);CHKERRQ(ierr); 4986 4987 ierr = MPI_Waitall(n_recvs,recv_req_vals,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 4988 4989 /* Global to local map of received indices */ 4990 ierr = PetscMalloc1(buf_size_idxs,&recv_buffer_idxs_local);CHKERRQ(ierr); /* needed for values insertion */ 4991 ierr = ISGlobalToLocalMappingApply(l2gmap,IS_GTOLM_MASK,buf_size_idxs,recv_buffer_idxs,&i,recv_buffer_idxs_local);CHKERRQ(ierr); 4992 ierr = ISLocalToGlobalMappingDestroy(&l2gmap);CHKERRQ(ierr); 4993 4994 /* restore attributes -> type of incoming data and its size */ 4995 buf_size_idxs = 0; 4996 for (i=0;i<n_recvs;i++) { 4997 recv_buffer_idxs_local[buf_size_idxs] = recv_buffer_idxs[buf_size_idxs]; 4998 recv_buffer_idxs_local[buf_size_idxs+1] = recv_buffer_idxs[buf_size_idxs+1]; 4999 buf_size_idxs += (PetscInt)olengths_idxs[i]; 5000 } 5001 ierr = PetscFree(recv_buffer_idxs);CHKERRQ(ierr); 5002 5003 /* set preallocation */ 5004 ierr = PetscObjectTypeCompare((PetscObject)local_mat,MATSEQDENSE,&newisdense);CHKERRQ(ierr); 5005 if (!newisdense) { 5006 PetscInt *new_local_nnz=0; 5007 5008 ptr_vals = recv_buffer_vals; 5009 ptr_idxs = recv_buffer_idxs_local; 5010 if (n_recvs) { 5011 ierr = PetscCalloc1(new_local_rows,&new_local_nnz);CHKERRQ(ierr); 5012 } 5013 for (i=0;i<n_recvs;i++) { 5014 PetscInt j; 5015 if (*ptr_idxs == (PetscInt)MATDENSE_PRIVATE) { /* preallocation provided for dense case only */ 5016 for (j=0;j<*(ptr_idxs+1);j++) { 5017 new_local_nnz[*(ptr_idxs+2+j)] += *(ptr_idxs+1); 5018 } 5019 } else { 5020 /* TODO */ 5021 } 5022 ptr_idxs += olengths_idxs[i]; 5023 } 5024 if (new_local_nnz) { 5025 for (i=0;i<new_local_rows;i++) new_local_nnz[i] = PetscMin(new_local_nnz[i],new_local_rows); 5026 ierr = MatSeqAIJSetPreallocation(local_mat,0,new_local_nnz);CHKERRQ(ierr); 5027 for (i=0;i<new_local_rows;i++) new_local_nnz[i] /= bs; 5028 ierr = MatSeqBAIJSetPreallocation(local_mat,bs,0,new_local_nnz);CHKERRQ(ierr); 5029 for (i=0;i<new_local_rows;i++) new_local_nnz[i] = PetscMax(new_local_nnz[i]-i,0); 5030 ierr = MatSeqSBAIJSetPreallocation(local_mat,bs,0,new_local_nnz);CHKERRQ(ierr); 5031 } else { 5032 ierr = MatSetUp(local_mat);CHKERRQ(ierr); 5033 } 5034 ierr = PetscFree(new_local_nnz);CHKERRQ(ierr); 5035 } else { 5036 ierr = MatSetUp(local_mat);CHKERRQ(ierr); 5037 } 5038 5039 /* set values */ 5040 ptr_vals = recv_buffer_vals; 5041 ptr_idxs = recv_buffer_idxs_local; 5042 for (i=0;i<n_recvs;i++) { 5043 if (*ptr_idxs == (PetscInt)MATDENSE_PRIVATE) { /* values insertion provided for dense case only */ 5044 ierr = MatSetOption(local_mat,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr); 5045 ierr = MatSetValues(local_mat,*(ptr_idxs+1),ptr_idxs+2,*(ptr_idxs+1),ptr_idxs+2,ptr_vals,ADD_VALUES);CHKERRQ(ierr); 5046 ierr = MatAssemblyBegin(local_mat,MAT_FLUSH_ASSEMBLY);CHKERRQ(ierr); 5047 ierr = MatAssemblyEnd(local_mat,MAT_FLUSH_ASSEMBLY);CHKERRQ(ierr); 5048 ierr = MatSetOption(local_mat,MAT_ROW_ORIENTED,PETSC_TRUE);CHKERRQ(ierr); 5049 } else { 5050 /* TODO */ 5051 } 5052 ptr_idxs += olengths_idxs[i]; 5053 ptr_vals += olengths_vals[i]; 5054 } 5055 ierr = MatAssemblyBegin(local_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5056 ierr = MatAssemblyEnd(local_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5057 ierr = MatAssemblyBegin(*mat_n,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5058 ierr = MatAssemblyEnd(*mat_n,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5059 ierr = PetscFree(recv_buffer_vals);CHKERRQ(ierr); 5060 ierr = PetscFree(recv_buffer_idxs_local);CHKERRQ(ierr); 5061 5062 #if 0 5063 if (!restrict_comm) { /* check */ 5064 Vec lvec,rvec; 5065 PetscReal infty_error; 5066 5067 ierr = MatCreateVecs(mat,&rvec,&lvec);CHKERRQ(ierr); 5068 ierr = VecSetRandom(rvec,NULL);CHKERRQ(ierr); 5069 ierr = MatMult(mat,rvec,lvec);CHKERRQ(ierr); 5070 ierr = VecScale(lvec,-1.0);CHKERRQ(ierr); 5071 ierr = MatMultAdd(*mat_n,rvec,lvec,lvec);CHKERRQ(ierr); 5072 ierr = VecNorm(lvec,NORM_INFINITY,&infty_error);CHKERRQ(ierr); 5073 ierr = PetscPrintf(PetscObjectComm((PetscObject)mat),"Infinity error subassembling %1.6e\n",infty_error); 5074 ierr = VecDestroy(&rvec);CHKERRQ(ierr); 5075 ierr = VecDestroy(&lvec);CHKERRQ(ierr); 5076 } 5077 #endif 5078 5079 /* assemble new additional is (if any) */ 5080 if (nis) { 5081 PetscInt **temp_idxs,*count_is,j,psum; 5082 5083 ierr = MPI_Waitall(n_recvs,recv_req_idxs_is,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 5084 ierr = PetscCalloc1(nis,&count_is);CHKERRQ(ierr); 5085 ptr_idxs = recv_buffer_idxs_is; 5086 psum = 0; 5087 for (i=0;i<n_recvs;i++) { 5088 for (j=0;j<nis;j++) { 5089 PetscInt plen = *(ptr_idxs); /* first element is the local size of IS's indices */ 5090 count_is[j] += plen; /* increment counting of buffer for j-th IS */ 5091 psum += plen; 5092 ptr_idxs += plen+1; /* shift pointer to received data */ 5093 } 5094 } 5095 ierr = PetscMalloc1(nis,&temp_idxs);CHKERRQ(ierr); 5096 ierr = PetscMalloc1(psum,&temp_idxs[0]);CHKERRQ(ierr); 5097 for (i=1;i<nis;i++) { 5098 temp_idxs[i] = temp_idxs[i-1]+count_is[i-1]; 5099 } 5100 ierr = PetscMemzero(count_is,nis*sizeof(PetscInt));CHKERRQ(ierr); 5101 ptr_idxs = recv_buffer_idxs_is; 5102 for (i=0;i<n_recvs;i++) { 5103 for (j=0;j<nis;j++) { 5104 PetscInt plen = *(ptr_idxs); /* first element is the local size of IS's indices */ 5105 ierr = PetscMemcpy(&temp_idxs[j][count_is[j]],ptr_idxs+1,plen*sizeof(PetscInt));CHKERRQ(ierr); 5106 count_is[j] += plen; /* increment starting point of buffer for j-th IS */ 5107 ptr_idxs += plen+1; /* shift pointer to received data */ 5108 } 5109 } 5110 for (i=0;i<nis;i++) { 5111 ierr = ISDestroy(&isarray[i]);CHKERRQ(ierr); 5112 ierr = PetscSortRemoveDupsInt(&count_is[i],temp_idxs[i]);CHKERRQ(ierr);CHKERRQ(ierr); 5113 ierr = ISCreateGeneral(comm_n,count_is[i],temp_idxs[i],PETSC_COPY_VALUES,&isarray[i]);CHKERRQ(ierr); 5114 } 5115 ierr = PetscFree(count_is);CHKERRQ(ierr); 5116 ierr = PetscFree(temp_idxs[0]);CHKERRQ(ierr); 5117 ierr = PetscFree(temp_idxs);CHKERRQ(ierr); 5118 } 5119 /* free workspace */ 5120 ierr = PetscFree(recv_buffer_idxs_is);CHKERRQ(ierr); 5121 ierr = MPI_Waitall(n_sends,send_req_idxs,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 5122 ierr = PetscFree(send_buffer_idxs);CHKERRQ(ierr); 5123 ierr = MPI_Waitall(n_sends,send_req_vals,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 5124 if (isdense) { 5125 ierr = MatISGetLocalMat(mat,&local_mat);CHKERRQ(ierr); 5126 ierr = MatDenseRestoreArray(local_mat,&send_buffer_vals);CHKERRQ(ierr); 5127 } else { 5128 /* ierr = PetscFree(send_buffer_vals);CHKERRQ(ierr); */ 5129 } 5130 if (nis) { 5131 ierr = MPI_Waitall(n_sends,send_req_idxs_is,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 5132 ierr = PetscFree(send_buffer_idxs_is);CHKERRQ(ierr); 5133 } 5134 ierr = PetscFree(recv_req_idxs);CHKERRQ(ierr); 5135 ierr = PetscFree(recv_req_vals);CHKERRQ(ierr); 5136 ierr = PetscFree(recv_req_idxs_is);CHKERRQ(ierr); 5137 ierr = PetscFree(send_req_idxs);CHKERRQ(ierr); 5138 ierr = PetscFree(send_req_vals);CHKERRQ(ierr); 5139 ierr = PetscFree(send_req_idxs_is);CHKERRQ(ierr); 5140 ierr = PetscFree(ilengths_vals);CHKERRQ(ierr); 5141 ierr = PetscFree(ilengths_idxs);CHKERRQ(ierr); 5142 ierr = PetscFree(olengths_vals);CHKERRQ(ierr); 5143 ierr = PetscFree(olengths_idxs);CHKERRQ(ierr); 5144 ierr = PetscFree(onodes);CHKERRQ(ierr); 5145 if (nis) { 5146 ierr = PetscFree(ilengths_idxs_is);CHKERRQ(ierr); 5147 ierr = PetscFree(olengths_idxs_is);CHKERRQ(ierr); 5148 ierr = PetscFree(onodes_is);CHKERRQ(ierr); 5149 } 5150 ierr = PetscSubcommDestroy(&subcomm);CHKERRQ(ierr); 5151 if (destroy_mat) { /* destroy mat is true only if restrict comm is true and process will not partecipate */ 5152 ierr = MatDestroy(mat_n);CHKERRQ(ierr); 5153 for (i=0;i<nis;i++) { 5154 ierr = ISDestroy(&isarray[i]);CHKERRQ(ierr); 5155 } 5156 *mat_n = NULL; 5157 } 5158 PetscFunctionReturn(0); 5159 } 5160 5161 /* temporary hack into ksp private data structure */ 5162 #include <petsc/private/kspimpl.h> 5163 5164 #undef __FUNCT__ 5165 #define __FUNCT__ "PCBDDCSetUpCoarseSolver" 5166 PetscErrorCode PCBDDCSetUpCoarseSolver(PC pc,PetscScalar* coarse_submat_vals) 5167 { 5168 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 5169 PC_IS *pcis = (PC_IS*)pc->data; 5170 Mat coarse_mat,coarse_mat_is,coarse_submat_dense; 5171 MatNullSpace CoarseNullSpace=NULL; 5172 ISLocalToGlobalMapping coarse_islg; 5173 IS coarse_is,*isarray; 5174 PetscInt i,im_active=-1,active_procs=-1; 5175 PetscInt nis,nisdofs,nisneu,nisvert; 5176 PC pc_temp; 5177 PCType coarse_pc_type; 5178 KSPType coarse_ksp_type; 5179 PetscBool multilevel_requested,multilevel_allowed; 5180 PetscBool isredundant,isbddc,isnn,coarse_reuse; 5181 Mat t_coarse_mat_is; 5182 PetscInt void_procs,ncoarse_ml,ncoarse_ds,ncoarse; 5183 PetscMPIInt all_procs; 5184 PetscBool csin_ml,csin_ds,csin,csin_type_simple,redist; 5185 PetscBool compute_vecs = PETSC_FALSE; 5186 PetscScalar *array; 5187 PetscErrorCode ierr; 5188 5189 PetscFunctionBegin; 5190 /* Assign global numbering to coarse dofs */ 5191 if (pcbddc->new_primal_space || pcbddc->coarse_size == -1) { /* a new primal space is present or it is the first initialization, so recompute global numbering */ 5192 PetscInt ocoarse_size; 5193 compute_vecs = PETSC_TRUE; 5194 ocoarse_size = pcbddc->coarse_size; 5195 ierr = PetscFree(pcbddc->global_primal_indices);CHKERRQ(ierr); 5196 ierr = PCBDDCComputePrimalNumbering(pc,&pcbddc->coarse_size,&pcbddc->global_primal_indices);CHKERRQ(ierr); 5197 /* see if we can avoid some work */ 5198 if (pcbddc->coarse_ksp) { /* coarse ksp has already been created */ 5199 /* if the coarse size is different or we are using adaptive selection, better to not reuse the coarse matrix */ 5200 if (ocoarse_size != pcbddc->coarse_size || pcbddc->adaptive_selection) { 5201 PC pc; 5202 PetscBool isbddc; 5203 5204 /* temporary workaround since PCBDDC does not have a reset method so far */ 5205 ierr = KSPGetPC(pcbddc->coarse_ksp,&pc);CHKERRQ(ierr); 5206 ierr = PetscObjectTypeCompare((PetscObject)pc,PCBDDC,&isbddc);CHKERRQ(ierr); 5207 if (isbddc) { 5208 ierr = PCDestroy(&pc);CHKERRQ(ierr); 5209 } 5210 ierr = KSPReset(pcbddc->coarse_ksp);CHKERRQ(ierr); 5211 coarse_reuse = PETSC_FALSE; 5212 } else { /* we can safely reuse already computed coarse matrix */ 5213 coarse_reuse = PETSC_TRUE; 5214 } 5215 } else { /* there's no coarse ksp, so we need to create the coarse matrix too */ 5216 coarse_reuse = PETSC_FALSE; 5217 } 5218 /* reset any subassembling information */ 5219 ierr = ISDestroy(&pcbddc->coarse_subassembling);CHKERRQ(ierr); 5220 ierr = ISDestroy(&pcbddc->coarse_subassembling_init);CHKERRQ(ierr); 5221 } else { /* primal space is unchanged, so we can reuse coarse matrix */ 5222 coarse_reuse = PETSC_TRUE; 5223 } 5224 5225 /* count "active" (i.e. with positive local size) and "void" processes */ 5226 im_active = !!(pcis->n); 5227 ierr = MPI_Allreduce(&im_active,&active_procs,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 5228 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)pc),&all_procs);CHKERRQ(ierr); 5229 void_procs = all_procs-active_procs; 5230 csin_type_simple = PETSC_TRUE; 5231 redist = PETSC_FALSE; 5232 if (pcbddc->current_level && void_procs) { 5233 csin_ml = PETSC_TRUE; 5234 ncoarse_ml = void_procs; 5235 /* it has no sense to redistribute on a set of processors larger than the number of active processes */ 5236 if (pcbddc->redistribute_coarse > 0 && pcbddc->redistribute_coarse < active_procs) { 5237 csin_ds = PETSC_TRUE; 5238 ncoarse_ds = pcbddc->redistribute_coarse; 5239 redist = PETSC_TRUE; 5240 } else { 5241 csin_ds = PETSC_TRUE; 5242 ncoarse_ds = active_procs; 5243 redist = PETSC_TRUE; 5244 } 5245 } else { 5246 csin_ml = PETSC_FALSE; 5247 ncoarse_ml = all_procs; 5248 if (void_procs) { 5249 csin_ds = PETSC_TRUE; 5250 ncoarse_ds = void_procs; 5251 csin_type_simple = PETSC_FALSE; 5252 } else { 5253 if (pcbddc->redistribute_coarse > 0 && pcbddc->redistribute_coarse < all_procs) { 5254 csin_ds = PETSC_TRUE; 5255 ncoarse_ds = pcbddc->redistribute_coarse; 5256 redist = PETSC_TRUE; 5257 } else { 5258 csin_ds = PETSC_FALSE; 5259 ncoarse_ds = all_procs; 5260 } 5261 } 5262 } 5263 5264 /* 5265 test if we can go multilevel: three conditions must be satisfied: 5266 - we have not exceeded the number of levels requested 5267 - we can actually subassemble the active processes 5268 - we can find a suitable number of MPI processes where we can place the subassembled problem 5269 */ 5270 multilevel_allowed = PETSC_FALSE; 5271 multilevel_requested = PETSC_FALSE; 5272 if (pcbddc->current_level < pcbddc->max_levels) { 5273 multilevel_requested = PETSC_TRUE; 5274 if (active_procs/pcbddc->coarsening_ratio < 2 || ncoarse_ml/pcbddc->coarsening_ratio < 2) { 5275 multilevel_allowed = PETSC_FALSE; 5276 } else { 5277 multilevel_allowed = PETSC_TRUE; 5278 } 5279 } 5280 /* determine number of process partecipating to coarse solver */ 5281 if (multilevel_allowed) { 5282 ncoarse = ncoarse_ml; 5283 csin = csin_ml; 5284 redist = PETSC_FALSE; 5285 } else { 5286 ncoarse = ncoarse_ds; 5287 csin = csin_ds; 5288 } 5289 5290 /* creates temporary l2gmap and IS for coarse indexes */ 5291 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),pcbddc->local_primal_size,pcbddc->global_primal_indices,PETSC_COPY_VALUES,&coarse_is);CHKERRQ(ierr); 5292 ierr = ISLocalToGlobalMappingCreateIS(coarse_is,&coarse_islg);CHKERRQ(ierr); 5293 5294 /* creates temporary MATIS object for coarse matrix */ 5295 ierr = MatCreateSeqDense(PETSC_COMM_SELF,pcbddc->local_primal_size,pcbddc->local_primal_size,NULL,&coarse_submat_dense);CHKERRQ(ierr); 5296 ierr = MatDenseGetArray(coarse_submat_dense,&array);CHKERRQ(ierr); 5297 ierr = PetscMemcpy(array,coarse_submat_vals,sizeof(*coarse_submat_vals)*pcbddc->local_primal_size*pcbddc->local_primal_size);CHKERRQ(ierr); 5298 ierr = MatDenseRestoreArray(coarse_submat_dense,&array);CHKERRQ(ierr); 5299 ierr = MatCreateIS(PetscObjectComm((PetscObject)pc),1,PETSC_DECIDE,PETSC_DECIDE,pcbddc->coarse_size,pcbddc->coarse_size,coarse_islg,NULL,&t_coarse_mat_is);CHKERRQ(ierr); 5300 ierr = MatISSetLocalMat(t_coarse_mat_is,coarse_submat_dense);CHKERRQ(ierr); 5301 ierr = MatAssemblyBegin(t_coarse_mat_is,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5302 ierr = MatAssemblyEnd(t_coarse_mat_is,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5303 ierr = MatDestroy(&coarse_submat_dense);CHKERRQ(ierr); 5304 5305 /* compute dofs splitting and neumann boundaries for coarse dofs */ 5306 if (multilevel_allowed && (pcbddc->n_ISForDofsLocal || pcbddc->NeumannBoundariesLocal || (pcbddc->benign_saddle_point && pcbddc->user_primal_vertices_local))) { /* protects from unneded computations */ 5307 PetscInt *tidxs,*tidxs2,nout,tsize,i; 5308 const PetscInt *idxs; 5309 ISLocalToGlobalMapping tmap; 5310 5311 /* create map between primal indices (in local representative ordering) and local primal numbering */ 5312 ierr = ISLocalToGlobalMappingCreate(PETSC_COMM_SELF,1,pcbddc->local_primal_size,pcbddc->primal_indices_local_idxs,PETSC_COPY_VALUES,&tmap);CHKERRQ(ierr); 5313 /* allocate space for temporary storage */ 5314 ierr = PetscMalloc1(pcbddc->local_primal_size,&tidxs);CHKERRQ(ierr); 5315 ierr = PetscMalloc1(pcbddc->local_primal_size,&tidxs2);CHKERRQ(ierr); 5316 /* allocate for IS array */ 5317 nisdofs = pcbddc->n_ISForDofsLocal; 5318 nisneu = !!pcbddc->NeumannBoundariesLocal; 5319 nisvert = 0; 5320 if (pcbddc->benign_saddle_point && pcbddc->user_primal_vertices_local) { 5321 nisvert = 1; 5322 } 5323 nis = nisdofs + nisneu + nisvert; 5324 ierr = PetscMalloc1(nis,&isarray);CHKERRQ(ierr); 5325 /* dofs splitting */ 5326 for (i=0;i<nisdofs;i++) { 5327 /* ierr = ISView(pcbddc->ISForDofsLocal[i],0);CHKERRQ(ierr); */ 5328 ierr = ISGetLocalSize(pcbddc->ISForDofsLocal[i],&tsize);CHKERRQ(ierr); 5329 ierr = ISGetIndices(pcbddc->ISForDofsLocal[i],&idxs);CHKERRQ(ierr); 5330 ierr = ISGlobalToLocalMappingApply(tmap,IS_GTOLM_DROP,tsize,idxs,&nout,tidxs);CHKERRQ(ierr); 5331 ierr = ISRestoreIndices(pcbddc->ISForDofsLocal[i],&idxs);CHKERRQ(ierr); 5332 ierr = ISLocalToGlobalMappingApply(coarse_islg,nout,tidxs,tidxs2);CHKERRQ(ierr); 5333 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),nout,tidxs2,PETSC_COPY_VALUES,&isarray[i]);CHKERRQ(ierr); 5334 /* ierr = ISView(isarray[i],0);CHKERRQ(ierr); */ 5335 } 5336 /* neumann boundaries */ 5337 if (pcbddc->NeumannBoundariesLocal) { 5338 /* ierr = ISView(pcbddc->NeumannBoundariesLocal,0);CHKERRQ(ierr); */ 5339 ierr = ISGetLocalSize(pcbddc->NeumannBoundariesLocal,&tsize);CHKERRQ(ierr); 5340 ierr = ISGetIndices(pcbddc->NeumannBoundariesLocal,&idxs);CHKERRQ(ierr); 5341 ierr = ISGlobalToLocalMappingApply(tmap,IS_GTOLM_DROP,tsize,idxs,&nout,tidxs);CHKERRQ(ierr); 5342 ierr = ISRestoreIndices(pcbddc->NeumannBoundariesLocal,&idxs);CHKERRQ(ierr); 5343 ierr = ISLocalToGlobalMappingApply(coarse_islg,nout,tidxs,tidxs2);CHKERRQ(ierr); 5344 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),nout,tidxs2,PETSC_COPY_VALUES,&isarray[nisdofs]);CHKERRQ(ierr); 5345 /* ierr = ISView(isarray[nisdofs],0);CHKERRQ(ierr); */ 5346 } 5347 /* primal vertices (benign) */ 5348 if (pcbddc->benign_saddle_point && pcbddc->user_primal_vertices_local) { 5349 ierr = ISGetLocalSize(pcbddc->user_primal_vertices_local,&tsize);CHKERRQ(ierr); 5350 ierr = ISGetIndices(pcbddc->user_primal_vertices_local,&idxs);CHKERRQ(ierr); 5351 ierr = ISGlobalToLocalMappingApply(tmap,IS_GTOLM_DROP,tsize,idxs,&nout,tidxs);CHKERRQ(ierr); 5352 ierr = ISRestoreIndices(pcbddc->user_primal_vertices_local,&idxs);CHKERRQ(ierr); 5353 ierr = ISLocalToGlobalMappingApply(coarse_islg,nout,tidxs,tidxs2);CHKERRQ(ierr); 5354 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),nout,tidxs2,PETSC_COPY_VALUES,&isarray[nis-1]);CHKERRQ(ierr); 5355 } 5356 /* free memory */ 5357 ierr = PetscFree(tidxs);CHKERRQ(ierr); 5358 ierr = PetscFree(tidxs2);CHKERRQ(ierr); 5359 ierr = ISLocalToGlobalMappingDestroy(&tmap);CHKERRQ(ierr); 5360 } else { 5361 nis = 0; 5362 nisdofs = 0; 5363 nisneu = 0; 5364 nisvert = 0; 5365 isarray = NULL; 5366 } 5367 /* destroy no longer needed map */ 5368 ierr = ISLocalToGlobalMappingDestroy(&coarse_islg);CHKERRQ(ierr); 5369 5370 /* restrict on coarse candidates (if needed) */ 5371 coarse_mat_is = NULL; 5372 if (csin) { 5373 if (!pcbddc->coarse_subassembling_init ) { /* creates subassembling init pattern if not present */ 5374 if (redist) { 5375 PetscMPIInt rank; 5376 PetscInt spc,n_spc_p1,dest[1],destsize; 5377 5378 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)pc),&rank);CHKERRQ(ierr); 5379 spc = active_procs/ncoarse; 5380 n_spc_p1 = active_procs%ncoarse; 5381 if (im_active) { 5382 destsize = 1; 5383 if (rank > n_spc_p1*(spc+1)-1) { 5384 dest[0] = n_spc_p1+(rank-(n_spc_p1*(spc+1)))/spc; 5385 } else { 5386 dest[0] = rank/(spc+1); 5387 } 5388 } else { 5389 destsize = 0; 5390 } 5391 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),destsize,dest,PETSC_COPY_VALUES,&pcbddc->coarse_subassembling_init);CHKERRQ(ierr); 5392 } else if (csin_type_simple) { 5393 PetscMPIInt rank; 5394 PetscInt issize,isidx; 5395 5396 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)pc),&rank);CHKERRQ(ierr); 5397 if (im_active) { 5398 issize = 1; 5399 isidx = (PetscInt)rank; 5400 } else { 5401 issize = 0; 5402 isidx = -1; 5403 } 5404 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),issize,&isidx,PETSC_COPY_VALUES,&pcbddc->coarse_subassembling_init);CHKERRQ(ierr); 5405 } else { /* get a suitable subassembling pattern from MATIS code */ 5406 ierr = MatISGetSubassemblingPattern(t_coarse_mat_is,ncoarse,pcbddc->coarse_adj_red,&pcbddc->coarse_subassembling_init);CHKERRQ(ierr); 5407 } 5408 5409 /* we need to shift on coarse candidates either if we are not redistributing or we are redistributing and we have enough void processes */ 5410 if (!redist || ncoarse <= void_procs) { 5411 PetscInt ncoarse_cand,tissize,*nisindices; 5412 PetscInt *coarse_candidates; 5413 const PetscInt* tisindices; 5414 5415 /* get coarse candidates' ranks in pc communicator */ 5416 ierr = PetscMalloc1(all_procs,&coarse_candidates);CHKERRQ(ierr); 5417 ierr = MPI_Allgather(&im_active,1,MPIU_INT,coarse_candidates,1,MPIU_INT,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 5418 for (i=0,ncoarse_cand=0;i<all_procs;i++) { 5419 if (!coarse_candidates[i]) { 5420 coarse_candidates[ncoarse_cand++]=i; 5421 } 5422 } 5423 if (ncoarse_cand < ncoarse) SETERRQ2(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"This should not happen! %d < %d",ncoarse_cand,ncoarse); 5424 5425 5426 if (pcbddc->dbg_flag) { 5427 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 5428 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Subassembling pattern init (before shift)\n");CHKERRQ(ierr); 5429 ierr = ISView(pcbddc->coarse_subassembling_init,pcbddc->dbg_viewer);CHKERRQ(ierr); 5430 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Coarse candidates\n");CHKERRQ(ierr); 5431 for (i=0;i<ncoarse_cand;i++) { 5432 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"%d ",coarse_candidates[i]);CHKERRQ(ierr); 5433 } 5434 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"\n");CHKERRQ(ierr); 5435 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 5436 } 5437 /* shift the pattern on coarse candidates */ 5438 ierr = ISGetLocalSize(pcbddc->coarse_subassembling_init,&tissize);CHKERRQ(ierr); 5439 ierr = ISGetIndices(pcbddc->coarse_subassembling_init,&tisindices);CHKERRQ(ierr); 5440 ierr = PetscMalloc1(tissize,&nisindices);CHKERRQ(ierr); 5441 for (i=0;i<tissize;i++) nisindices[i] = coarse_candidates[tisindices[i]]; 5442 ierr = ISRestoreIndices(pcbddc->coarse_subassembling_init,&tisindices);CHKERRQ(ierr); 5443 ierr = ISGeneralSetIndices(pcbddc->coarse_subassembling_init,tissize,nisindices,PETSC_OWN_POINTER);CHKERRQ(ierr); 5444 ierr = PetscFree(coarse_candidates);CHKERRQ(ierr); 5445 } 5446 if (pcbddc->dbg_flag) { 5447 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 5448 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Subassembling pattern init\n");CHKERRQ(ierr); 5449 ierr = ISView(pcbddc->coarse_subassembling_init,pcbddc->dbg_viewer);CHKERRQ(ierr); 5450 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 5451 } 5452 } 5453 /* get temporary coarse mat in IS format restricted on coarse procs (plus additional index sets of isarray) */ 5454 if (multilevel_allowed) { /* we need to keep tracking of void processes for future placements */ 5455 ierr = MatISSubassemble(t_coarse_mat_is,pcbddc->coarse_subassembling_init,0,PETSC_TRUE,PETSC_FALSE,MAT_INITIAL_MATRIX,&coarse_mat_is,nis,isarray);CHKERRQ(ierr); 5456 } else { /* this is the last level, so use just receiving processes in subcomm */ 5457 ierr = MatISSubassemble(t_coarse_mat_is,pcbddc->coarse_subassembling_init,0,PETSC_TRUE,PETSC_TRUE,MAT_INITIAL_MATRIX,&coarse_mat_is,nis,isarray);CHKERRQ(ierr); 5458 } 5459 } else { 5460 if (pcbddc->dbg_flag) { 5461 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 5462 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Subassembling pattern init not needed\n");CHKERRQ(ierr); 5463 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 5464 } 5465 ierr = PetscObjectReference((PetscObject)t_coarse_mat_is);CHKERRQ(ierr); 5466 coarse_mat_is = t_coarse_mat_is; 5467 } 5468 5469 /* create local to global scatters for coarse problem */ 5470 if (compute_vecs) { 5471 PetscInt lrows; 5472 ierr = VecDestroy(&pcbddc->coarse_vec);CHKERRQ(ierr); 5473 if (coarse_mat_is) { 5474 ierr = MatGetLocalSize(coarse_mat_is,&lrows,NULL);CHKERRQ(ierr); 5475 } else { 5476 lrows = 0; 5477 } 5478 ierr = VecCreate(PetscObjectComm((PetscObject)pc),&pcbddc->coarse_vec);CHKERRQ(ierr); 5479 ierr = VecSetSizes(pcbddc->coarse_vec,lrows,PETSC_DECIDE);CHKERRQ(ierr); 5480 ierr = VecSetType(pcbddc->coarse_vec,VECSTANDARD);CHKERRQ(ierr); 5481 ierr = VecScatterDestroy(&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr); 5482 ierr = VecScatterCreate(pcbddc->vec1_P,NULL,pcbddc->coarse_vec,coarse_is,&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr); 5483 } 5484 ierr = ISDestroy(&coarse_is);CHKERRQ(ierr); 5485 ierr = MatDestroy(&t_coarse_mat_is);CHKERRQ(ierr); 5486 5487 /* set defaults for coarse KSP and PC */ 5488 if (multilevel_allowed) { 5489 coarse_ksp_type = KSPRICHARDSON; 5490 coarse_pc_type = PCBDDC; 5491 } else { 5492 coarse_ksp_type = KSPPREONLY; 5493 coarse_pc_type = PCREDUNDANT; 5494 } 5495 5496 /* print some info if requested */ 5497 if (pcbddc->dbg_flag) { 5498 if (!multilevel_allowed) { 5499 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 5500 if (multilevel_requested) { 5501 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Not enough active processes on level %d (active processes %d, coarsening ratio %d)\n",pcbddc->current_level,active_procs,pcbddc->coarsening_ratio);CHKERRQ(ierr); 5502 } else if (pcbddc->max_levels) { 5503 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Maximum number of requested levels reached (%d)\n",pcbddc->max_levels);CHKERRQ(ierr); 5504 } 5505 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 5506 } 5507 } 5508 5509 /* create the coarse KSP object only once with defaults */ 5510 if (coarse_mat_is) { 5511 MatReuse coarse_mat_reuse; 5512 PetscViewer dbg_viewer = NULL; 5513 if (pcbddc->dbg_flag) { 5514 dbg_viewer = PETSC_VIEWER_STDOUT_(PetscObjectComm((PetscObject)coarse_mat_is)); 5515 ierr = PetscViewerASCIIAddTab(dbg_viewer,2*pcbddc->current_level);CHKERRQ(ierr); 5516 } 5517 if (!pcbddc->coarse_ksp) { 5518 char prefix[256],str_level[16]; 5519 size_t len; 5520 ierr = KSPCreate(PetscObjectComm((PetscObject)coarse_mat_is),&pcbddc->coarse_ksp);CHKERRQ(ierr); 5521 ierr = KSPSetErrorIfNotConverged(pcbddc->coarse_ksp,pc->erroriffailure);CHKERRQ(ierr); 5522 ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->coarse_ksp,(PetscObject)pc,1);CHKERRQ(ierr); 5523 ierr = KSPSetTolerances(pcbddc->coarse_ksp,PETSC_DEFAULT,PETSC_DEFAULT,PETSC_DEFAULT,1);CHKERRQ(ierr); 5524 ierr = KSPSetOperators(pcbddc->coarse_ksp,coarse_mat_is,coarse_mat_is);CHKERRQ(ierr); 5525 ierr = KSPSetType(pcbddc->coarse_ksp,coarse_ksp_type);CHKERRQ(ierr); 5526 ierr = KSPSetNormType(pcbddc->coarse_ksp,KSP_NORM_NONE);CHKERRQ(ierr); 5527 ierr = KSPGetPC(pcbddc->coarse_ksp,&pc_temp);CHKERRQ(ierr); 5528 ierr = PCSetType(pc_temp,coarse_pc_type);CHKERRQ(ierr); 5529 /* prefix */ 5530 ierr = PetscStrcpy(prefix,"");CHKERRQ(ierr); 5531 ierr = PetscStrcpy(str_level,"");CHKERRQ(ierr); 5532 if (!pcbddc->current_level) { 5533 ierr = PetscStrcpy(prefix,((PetscObject)pc)->prefix);CHKERRQ(ierr); 5534 ierr = PetscStrcat(prefix,"pc_bddc_coarse_");CHKERRQ(ierr); 5535 } else { 5536 ierr = PetscStrlen(((PetscObject)pc)->prefix,&len);CHKERRQ(ierr); 5537 if (pcbddc->current_level>1) len -= 3; /* remove "lX_" with X level number */ 5538 if (pcbddc->current_level>10) len -= 1; /* remove another char from level number */ 5539 ierr = PetscStrncpy(prefix,((PetscObject)pc)->prefix,len+1);CHKERRQ(ierr); 5540 sprintf(str_level,"l%d_",(int)(pcbddc->current_level)); 5541 ierr = PetscStrcat(prefix,str_level);CHKERRQ(ierr); 5542 } 5543 ierr = KSPSetOptionsPrefix(pcbddc->coarse_ksp,prefix);CHKERRQ(ierr); 5544 /* propagate BDDC info to the next level (these are dummy calls if pc_temp is not of type PCBDDC) */ 5545 ierr = PCBDDCSetLevel(pc_temp,pcbddc->current_level+1);CHKERRQ(ierr); 5546 ierr = PCBDDCSetCoarseningRatio(pc_temp,pcbddc->coarsening_ratio);CHKERRQ(ierr); 5547 ierr = PCBDDCSetLevels(pc_temp,pcbddc->max_levels);CHKERRQ(ierr); 5548 /* allow user customization */ 5549 ierr = KSPSetFromOptions(pcbddc->coarse_ksp);CHKERRQ(ierr); 5550 } 5551 /* propagate BDDC info to the next level (these are dummy calls if pc_temp is not of type PCBDDC) */ 5552 ierr = KSPGetPC(pcbddc->coarse_ksp,&pc_temp);CHKERRQ(ierr); 5553 if (nisdofs) { 5554 ierr = PCBDDCSetDofsSplitting(pc_temp,nisdofs,isarray);CHKERRQ(ierr); 5555 for (i=0;i<nisdofs;i++) { 5556 ierr = ISDestroy(&isarray[i]);CHKERRQ(ierr); 5557 } 5558 } 5559 if (nisneu) { 5560 ierr = PCBDDCSetNeumannBoundaries(pc_temp,isarray[nisdofs]);CHKERRQ(ierr); 5561 ierr = ISDestroy(&isarray[nisdofs]);CHKERRQ(ierr); 5562 } 5563 if (nisvert) { 5564 ierr = PCBDDCSetPrimalVerticesIS(pc_temp,isarray[nis-1]);CHKERRQ(ierr); 5565 ierr = ISDestroy(&isarray[nis-1]);CHKERRQ(ierr); 5566 } 5567 5568 /* get some info after set from options */ 5569 ierr = PetscObjectTypeCompare((PetscObject)pc_temp,PCNN,&isnn);CHKERRQ(ierr); 5570 ierr = PetscObjectTypeCompare((PetscObject)pc_temp,PCBDDC,&isbddc);CHKERRQ(ierr); 5571 ierr = PetscObjectTypeCompare((PetscObject)pc_temp,PCREDUNDANT,&isredundant);CHKERRQ(ierr); 5572 if (isbddc && !multilevel_allowed) { /* multilevel can only be requested via pc_bddc_set_levels */ 5573 ierr = PCSetType(pc_temp,coarse_pc_type);CHKERRQ(ierr); 5574 isbddc = PETSC_FALSE; 5575 } 5576 ierr = PCFactorSetReuseFill(pc_temp,PETSC_TRUE);CHKERRQ(ierr); 5577 if (isredundant) { 5578 KSP inner_ksp; 5579 PC inner_pc; 5580 ierr = PCRedundantGetKSP(pc_temp,&inner_ksp);CHKERRQ(ierr); 5581 ierr = KSPGetPC(inner_ksp,&inner_pc);CHKERRQ(ierr); 5582 ierr = PCFactorSetReuseFill(inner_pc,PETSC_TRUE);CHKERRQ(ierr); 5583 } 5584 5585 /* assemble coarse matrix */ 5586 if (coarse_reuse) { 5587 ierr = KSPGetOperators(pcbddc->coarse_ksp,&coarse_mat,NULL);CHKERRQ(ierr); 5588 ierr = PetscObjectReference((PetscObject)coarse_mat);CHKERRQ(ierr); 5589 coarse_mat_reuse = MAT_REUSE_MATRIX; 5590 } else { 5591 coarse_mat_reuse = MAT_INITIAL_MATRIX; 5592 } 5593 if (isbddc || isnn) { 5594 if (pcbddc->coarsening_ratio > 1) { 5595 if (!pcbddc->coarse_subassembling) { /* subassembling info is not present */ 5596 ierr = MatISGetSubassemblingPattern(coarse_mat_is,active_procs/pcbddc->coarsening_ratio,pcbddc->coarse_adj_red,&pcbddc->coarse_subassembling);CHKERRQ(ierr); 5597 if (pcbddc->dbg_flag) { 5598 ierr = PetscViewerASCIIPrintf(dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 5599 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Subassembling pattern\n");CHKERRQ(ierr); 5600 ierr = ISView(pcbddc->coarse_subassembling,dbg_viewer);CHKERRQ(ierr); 5601 ierr = PetscViewerFlush(dbg_viewer);CHKERRQ(ierr); 5602 } 5603 } 5604 ierr = MatISSubassemble(coarse_mat_is,pcbddc->coarse_subassembling,0,PETSC_FALSE,PETSC_FALSE,coarse_mat_reuse,&coarse_mat,0,NULL);CHKERRQ(ierr); 5605 } else { 5606 ierr = PetscObjectReference((PetscObject)coarse_mat_is);CHKERRQ(ierr); 5607 coarse_mat = coarse_mat_is; 5608 } 5609 } else { 5610 ierr = MatISGetMPIXAIJ(coarse_mat_is,coarse_mat_reuse,&coarse_mat);CHKERRQ(ierr); 5611 } 5612 ierr = MatDestroy(&coarse_mat_is);CHKERRQ(ierr); 5613 5614 /* propagate symmetry info of coarse matrix */ 5615 ierr = MatSetOption(coarse_mat,MAT_STRUCTURALLY_SYMMETRIC,PETSC_TRUE);CHKERRQ(ierr); 5616 if (pc->pmat->symmetric_set) { 5617 ierr = MatSetOption(coarse_mat,MAT_SYMMETRIC,pc->pmat->symmetric);CHKERRQ(ierr); 5618 } 5619 if (pc->pmat->hermitian_set) { 5620 ierr = MatSetOption(coarse_mat,MAT_HERMITIAN,pc->pmat->hermitian);CHKERRQ(ierr); 5621 } 5622 if (pc->pmat->spd_set) { 5623 ierr = MatSetOption(coarse_mat,MAT_SPD,pc->pmat->spd);CHKERRQ(ierr); 5624 } 5625 /* set operators */ 5626 ierr = KSPSetOperators(pcbddc->coarse_ksp,coarse_mat,coarse_mat);CHKERRQ(ierr); 5627 if (pcbddc->dbg_flag) { 5628 ierr = PetscViewerASCIISubtractTab(dbg_viewer,2*pcbddc->current_level);CHKERRQ(ierr); 5629 } 5630 } else { /* processes non partecipating to coarse solver (if any) */ 5631 coarse_mat = 0; 5632 } 5633 ierr = PetscFree(isarray);CHKERRQ(ierr); 5634 #if 0 5635 { 5636 PetscViewer viewer; 5637 char filename[256]; 5638 sprintf(filename,"coarse_mat_level%d.m",pcbddc->current_level); 5639 ierr = PetscViewerASCIIOpen(PetscObjectComm((PetscObject)coarse_mat),filename,&viewer);CHKERRQ(ierr); 5640 ierr = PetscViewerSetFormat(viewer,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr); 5641 ierr = MatView(coarse_mat,viewer);CHKERRQ(ierr); 5642 ierr = PetscViewerDestroy(&viewer);CHKERRQ(ierr); 5643 } 5644 #endif 5645 5646 /* Compute coarse null space (special handling by BDDC only) */ 5647 #if 0 5648 if (pcbddc->NullSpace) { 5649 ierr = PCBDDCNullSpaceAssembleCoarse(pc,coarse_mat,&CoarseNullSpace);CHKERRQ(ierr); 5650 } 5651 #endif 5652 /* hack */ 5653 if (pcbddc->coarse_ksp) { 5654 Vec crhs,csol; 5655 5656 ierr = KSPGetSolution(pcbddc->coarse_ksp,&csol);CHKERRQ(ierr); 5657 ierr = KSPGetRhs(pcbddc->coarse_ksp,&crhs);CHKERRQ(ierr); 5658 if (!csol) { 5659 ierr = MatCreateVecs(coarse_mat,&((pcbddc->coarse_ksp)->vec_sol),NULL);CHKERRQ(ierr); 5660 } 5661 if (!crhs) { 5662 ierr = MatCreateVecs(coarse_mat,NULL,&((pcbddc->coarse_ksp)->vec_rhs));CHKERRQ(ierr); 5663 } 5664 } 5665 5666 /* compute null space for coarse solver if the benign trick has been requested */ 5667 if (pcbddc->benign_null) { 5668 5669 ierr = VecSet(pcbddc->vec1_P,0.);CHKERRQ(ierr); 5670 for (i=0;i<pcbddc->benign_n;i++) { 5671 ierr = VecSetValue(pcbddc->vec1_P,pcbddc->local_primal_size-pcbddc->benign_n+i,1.0,INSERT_VALUES);CHKERRQ(ierr); 5672 } 5673 ierr = VecAssemblyBegin(pcbddc->vec1_P);CHKERRQ(ierr); 5674 ierr = VecAssemblyEnd(pcbddc->vec1_P);CHKERRQ(ierr); 5675 ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,pcbddc->vec1_P,pcbddc->coarse_vec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 5676 ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,pcbddc->vec1_P,pcbddc->coarse_vec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 5677 if (coarse_mat) { 5678 Vec nullv; 5679 PetscScalar *array,*array2; 5680 PetscInt nl; 5681 5682 ierr = MatCreateVecs(coarse_mat,&nullv,NULL);CHKERRQ(ierr); 5683 ierr = VecGetLocalSize(nullv,&nl);CHKERRQ(ierr); 5684 ierr = VecGetArrayRead(pcbddc->coarse_vec,(const PetscScalar**)&array);CHKERRQ(ierr); 5685 ierr = VecGetArray(nullv,&array2);CHKERRQ(ierr); 5686 ierr = PetscMemcpy(array2,array,nl*sizeof(*array));CHKERRQ(ierr); 5687 ierr = VecRestoreArray(nullv,&array2);CHKERRQ(ierr); 5688 ierr = VecRestoreArrayRead(pcbddc->coarse_vec,(const PetscScalar**)&array);CHKERRQ(ierr); 5689 ierr = VecNormalize(nullv,NULL);CHKERRQ(ierr); 5690 ierr = MatNullSpaceCreate(PetscObjectComm((PetscObject)coarse_mat),PETSC_FALSE,1,&nullv,&CoarseNullSpace);CHKERRQ(ierr); 5691 ierr = VecDestroy(&nullv);CHKERRQ(ierr); 5692 } 5693 } 5694 5695 if (pcbddc->coarse_ksp) { 5696 PetscBool ispreonly; 5697 5698 if (CoarseNullSpace) { 5699 PetscBool isnull; 5700 ierr = MatNullSpaceTest(CoarseNullSpace,coarse_mat,&isnull);CHKERRQ(ierr); 5701 if (isnull) { 5702 if (isbddc && !pcbddc->benign_saddle_point) { 5703 ierr = PCBDDCSetNullSpace(pc_temp,CoarseNullSpace);CHKERRQ(ierr); 5704 } else { 5705 ierr = MatSetNullSpace(coarse_mat,CoarseNullSpace);CHKERRQ(ierr); 5706 } 5707 } else { 5708 ierr = MatNullSpaceDestroy(&CoarseNullSpace);CHKERRQ(ierr); 5709 } 5710 } 5711 /* setup coarse ksp */ 5712 ierr = KSPSetUp(pcbddc->coarse_ksp);CHKERRQ(ierr); 5713 /* Check coarse problem if in debug mode or if solving with an iterative method */ 5714 ierr = PetscObjectTypeCompare((PetscObject)pcbddc->coarse_ksp,KSPPREONLY,&ispreonly);CHKERRQ(ierr); 5715 if (pcbddc->dbg_flag || (!ispreonly && pcbddc->use_coarse_estimates) ) { 5716 KSP check_ksp; 5717 KSPType check_ksp_type; 5718 PC check_pc; 5719 Vec check_vec,coarse_vec; 5720 PetscReal abs_infty_error,infty_error,lambda_min=1.0,lambda_max=1.0; 5721 PetscInt its; 5722 PetscBool compute_eigs; 5723 PetscReal *eigs_r,*eigs_c; 5724 PetscInt neigs; 5725 const char *prefix; 5726 5727 /* Create ksp object suitable for estimation of extreme eigenvalues */ 5728 ierr = KSPCreate(PetscObjectComm((PetscObject)pcbddc->coarse_ksp),&check_ksp);CHKERRQ(ierr); 5729 ierr = KSPSetErrorIfNotConverged(pcbddc->coarse_ksp,pc->erroriffailure);CHKERRQ(ierr); 5730 ierr = KSPSetOperators(check_ksp,coarse_mat,coarse_mat);CHKERRQ(ierr); 5731 ierr = KSPSetTolerances(check_ksp,1.e-12,1.e-12,PETSC_DEFAULT,pcbddc->coarse_size);CHKERRQ(ierr); 5732 if (ispreonly) { 5733 check_ksp_type = KSPPREONLY; 5734 compute_eigs = PETSC_FALSE; 5735 } else { 5736 check_ksp_type = KSPGMRES; 5737 compute_eigs = PETSC_TRUE; 5738 } 5739 ierr = KSPSetType(check_ksp,check_ksp_type);CHKERRQ(ierr); 5740 ierr = KSPSetComputeSingularValues(check_ksp,compute_eigs);CHKERRQ(ierr); 5741 ierr = KSPSetComputeEigenvalues(check_ksp,compute_eigs);CHKERRQ(ierr); 5742 ierr = KSPGMRESSetRestart(check_ksp,pcbddc->coarse_size+1);CHKERRQ(ierr); 5743 ierr = KSPGetOptionsPrefix(pcbddc->coarse_ksp,&prefix);CHKERRQ(ierr); 5744 ierr = KSPSetOptionsPrefix(check_ksp,prefix);CHKERRQ(ierr); 5745 ierr = KSPAppendOptionsPrefix(check_ksp,"check_");CHKERRQ(ierr); 5746 ierr = KSPSetFromOptions(check_ksp);CHKERRQ(ierr); 5747 ierr = KSPSetUp(check_ksp);CHKERRQ(ierr); 5748 ierr = KSPGetPC(pcbddc->coarse_ksp,&check_pc);CHKERRQ(ierr); 5749 ierr = KSPSetPC(check_ksp,check_pc);CHKERRQ(ierr); 5750 /* create random vec */ 5751 ierr = KSPGetSolution(pcbddc->coarse_ksp,&coarse_vec);CHKERRQ(ierr); 5752 ierr = VecDuplicate(coarse_vec,&check_vec);CHKERRQ(ierr); 5753 ierr = VecSetRandom(check_vec,NULL);CHKERRQ(ierr); 5754 if (CoarseNullSpace) { 5755 ierr = MatNullSpaceRemove(CoarseNullSpace,check_vec);CHKERRQ(ierr); 5756 } 5757 ierr = MatMult(coarse_mat,check_vec,coarse_vec);CHKERRQ(ierr); 5758 /* solve coarse problem */ 5759 ierr = KSPSolve(check_ksp,coarse_vec,coarse_vec);CHKERRQ(ierr); 5760 if (CoarseNullSpace) { 5761 ierr = MatNullSpaceRemove(CoarseNullSpace,coarse_vec);CHKERRQ(ierr); 5762 } 5763 /* set eigenvalue estimation if preonly has not been requested */ 5764 if (compute_eigs) { 5765 ierr = PetscMalloc1(pcbddc->coarse_size+1,&eigs_r);CHKERRQ(ierr); 5766 ierr = PetscMalloc1(pcbddc->coarse_size+1,&eigs_c);CHKERRQ(ierr); 5767 ierr = KSPComputeEigenvalues(check_ksp,pcbddc->coarse_size+1,eigs_r,eigs_c,&neigs);CHKERRQ(ierr); 5768 lambda_max = eigs_r[neigs-1]; 5769 lambda_min = eigs_r[0]; 5770 if (pcbddc->use_coarse_estimates) { 5771 if (lambda_max>lambda_min) { 5772 ierr = KSPChebyshevSetEigenvalues(pcbddc->coarse_ksp,lambda_max,lambda_min);CHKERRQ(ierr); 5773 ierr = KSPRichardsonSetScale(pcbddc->coarse_ksp,2.0/(lambda_max+lambda_min));CHKERRQ(ierr); 5774 } 5775 } 5776 } 5777 5778 /* check coarse problem residual error */ 5779 if (pcbddc->dbg_flag) { 5780 PetscViewer dbg_viewer = PETSC_VIEWER_STDOUT_(PetscObjectComm((PetscObject)pcbddc->coarse_ksp)); 5781 ierr = PetscViewerASCIIAddTab(dbg_viewer,2*(pcbddc->current_level+1));CHKERRQ(ierr); 5782 ierr = VecAXPY(check_vec,-1.0,coarse_vec);CHKERRQ(ierr); 5783 ierr = VecNorm(check_vec,NORM_INFINITY,&infty_error);CHKERRQ(ierr); 5784 ierr = MatMult(coarse_mat,check_vec,coarse_vec);CHKERRQ(ierr); 5785 ierr = VecNorm(coarse_vec,NORM_INFINITY,&abs_infty_error);CHKERRQ(ierr); 5786 ierr = VecDestroy(&check_vec);CHKERRQ(ierr); 5787 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem details (use estimates %d)\n",pcbddc->use_coarse_estimates);CHKERRQ(ierr); 5788 ierr = PetscObjectPrintClassNamePrefixType((PetscObject)(pcbddc->coarse_ksp),dbg_viewer);CHKERRQ(ierr); 5789 ierr = PetscObjectPrintClassNamePrefixType((PetscObject)(check_pc),dbg_viewer);CHKERRQ(ierr); 5790 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem exact infty_error : %1.6e\n",infty_error);CHKERRQ(ierr); 5791 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem residual infty_error: %1.6e\n",abs_infty_error);CHKERRQ(ierr); 5792 if (CoarseNullSpace) { 5793 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem is singular\n");CHKERRQ(ierr); 5794 } 5795 if (compute_eigs) { 5796 PetscReal lambda_max_s,lambda_min_s; 5797 ierr = KSPGetType(check_ksp,&check_ksp_type);CHKERRQ(ierr); 5798 ierr = KSPGetIterationNumber(check_ksp,&its);CHKERRQ(ierr); 5799 ierr = KSPComputeExtremeSingularValues(check_ksp,&lambda_max_s,&lambda_min_s);CHKERRQ(ierr); 5800 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem eigenvalues (estimated with %d iterations of %s): %1.6e %1.6e (%1.6e %1.6e)\n",its,check_ksp_type,lambda_min,lambda_max,lambda_min_s,lambda_max_s);CHKERRQ(ierr); 5801 for (i=0;i<neigs;i++) { 5802 ierr = PetscViewerASCIIPrintf(dbg_viewer,"%1.6e %1.6ei\n",eigs_r[i],eigs_c[i]);CHKERRQ(ierr); 5803 } 5804 } 5805 ierr = PetscViewerFlush(dbg_viewer);CHKERRQ(ierr); 5806 ierr = PetscViewerASCIISubtractTab(dbg_viewer,2*(pcbddc->current_level+1));CHKERRQ(ierr); 5807 } 5808 ierr = KSPDestroy(&check_ksp);CHKERRQ(ierr); 5809 if (compute_eigs) { 5810 ierr = PetscFree(eigs_r);CHKERRQ(ierr); 5811 ierr = PetscFree(eigs_c);CHKERRQ(ierr); 5812 } 5813 } 5814 } 5815 /* print additional info */ 5816 if (pcbddc->dbg_flag) { 5817 /* waits until all processes reaches this point */ 5818 ierr = PetscBarrier((PetscObject)pc);CHKERRQ(ierr); 5819 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Coarse solver setup completed at level %d\n",pcbddc->current_level);CHKERRQ(ierr); 5820 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 5821 } 5822 5823 /* free memory */ 5824 ierr = MatNullSpaceDestroy(&CoarseNullSpace);CHKERRQ(ierr); 5825 ierr = MatDestroy(&coarse_mat);CHKERRQ(ierr); 5826 PetscFunctionReturn(0); 5827 } 5828 5829 #undef __FUNCT__ 5830 #define __FUNCT__ "PCBDDCComputePrimalNumbering" 5831 PetscErrorCode PCBDDCComputePrimalNumbering(PC pc,PetscInt* coarse_size_n,PetscInt** local_primal_indices_n) 5832 { 5833 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 5834 PC_IS* pcis = (PC_IS*)pc->data; 5835 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 5836 IS subset,subset_mult,subset_n; 5837 PetscInt local_size,coarse_size=0; 5838 PetscInt *local_primal_indices=NULL; 5839 const PetscInt *t_local_primal_indices; 5840 PetscErrorCode ierr; 5841 5842 PetscFunctionBegin; 5843 /* Compute global number of coarse dofs */ 5844 if (pcbddc->local_primal_size && !pcbddc->local_primal_ref_node) { 5845 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"BDDC ConstraintsSetUp should be called first"); 5846 } 5847 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)(pc->pmat)),pcbddc->local_primal_size_cc,pcbddc->local_primal_ref_node,PETSC_COPY_VALUES,&subset_n);CHKERRQ(ierr); 5848 ierr = ISLocalToGlobalMappingApplyIS(pcis->mapping,subset_n,&subset);CHKERRQ(ierr); 5849 ierr = ISDestroy(&subset_n);CHKERRQ(ierr); 5850 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)(pc->pmat)),pcbddc->local_primal_size_cc,pcbddc->local_primal_ref_mult,PETSC_COPY_VALUES,&subset_mult);CHKERRQ(ierr); 5851 ierr = PCBDDCSubsetNumbering(subset,subset_mult,&coarse_size,&subset_n);CHKERRQ(ierr); 5852 ierr = ISDestroy(&subset);CHKERRQ(ierr); 5853 ierr = ISDestroy(&subset_mult);CHKERRQ(ierr); 5854 ierr = ISGetLocalSize(subset_n,&local_size);CHKERRQ(ierr); 5855 if (local_size != pcbddc->local_primal_size) { 5856 SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Invalid number of local primal indices computed %d != %d",local_size,pcbddc->local_primal_size); 5857 } 5858 ierr = PetscMalloc1(local_size,&local_primal_indices);CHKERRQ(ierr); 5859 ierr = ISGetIndices(subset_n,&t_local_primal_indices);CHKERRQ(ierr); 5860 ierr = PetscMemcpy(local_primal_indices,t_local_primal_indices,local_size*sizeof(PetscInt));CHKERRQ(ierr); 5861 ierr = ISRestoreIndices(subset_n,&t_local_primal_indices);CHKERRQ(ierr); 5862 ierr = ISDestroy(&subset_n);CHKERRQ(ierr); 5863 5864 /* check numbering */ 5865 if (pcbddc->dbg_flag) { 5866 PetscScalar coarsesum,*array,*array2; 5867 PetscInt i; 5868 PetscBool set_error = PETSC_FALSE,set_error_reduced = PETSC_FALSE; 5869 5870 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 5871 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 5872 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Check coarse indices\n");CHKERRQ(ierr); 5873 ierr = PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 5874 /* counter */ 5875 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 5876 ierr = VecSet(pcis->vec1_N,1.0);CHKERRQ(ierr); 5877 ierr = VecScatterBegin(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 5878 ierr = VecScatterEnd(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 5879 ierr = VecScatterBegin(matis->rctx,pcis->vec1_global,pcis->vec2_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 5880 ierr = VecScatterEnd(matis->rctx,pcis->vec1_global,pcis->vec2_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 5881 ierr = VecSet(pcis->vec1_N,0.0);CHKERRQ(ierr); 5882 for (i=0;i<pcbddc->local_primal_size;i++) { 5883 ierr = VecSetValue(pcis->vec1_N,pcbddc->primal_indices_local_idxs[i],1.0,INSERT_VALUES);CHKERRQ(ierr); 5884 } 5885 ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr); 5886 ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr); 5887 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 5888 ierr = VecScatterBegin(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 5889 ierr = VecScatterEnd(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 5890 ierr = VecScatterBegin(matis->rctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 5891 ierr = VecScatterEnd(matis->rctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 5892 ierr = VecGetArray(pcis->vec1_N,&array);CHKERRQ(ierr); 5893 ierr = VecGetArray(pcis->vec2_N,&array2);CHKERRQ(ierr); 5894 for (i=0;i<pcis->n;i++) { 5895 if (array[i] != 0.0 && array[i] != array2[i]) { 5896 PetscInt owned = (PetscInt)PetscRealPart(array[i]); 5897 PetscInt neigh = (PetscInt)PetscRealPart(array2[i]); 5898 set_error = PETSC_TRUE; 5899 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d: local index %d owned by a %d processes instead of %d!\n",PetscGlobalRank,i,owned,neigh);CHKERRQ(ierr); 5900 } 5901 } 5902 ierr = VecRestoreArray(pcis->vec2_N,&array2);CHKERRQ(ierr); 5903 ierr = MPI_Allreduce(&set_error,&set_error_reduced,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 5904 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 5905 for (i=0;i<pcis->n;i++) { 5906 if (PetscRealPart(array[i]) > 0.0) array[i] = 1.0/PetscRealPart(array[i]); 5907 } 5908 ierr = VecRestoreArray(pcis->vec1_N,&array);CHKERRQ(ierr); 5909 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 5910 ierr = VecScatterBegin(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 5911 ierr = VecScatterEnd(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 5912 ierr = VecSum(pcis->vec1_global,&coarsesum);CHKERRQ(ierr); 5913 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Size of coarse problem is %d (%lf)\n",coarse_size,PetscRealPart(coarsesum));CHKERRQ(ierr); 5914 if (pcbddc->dbg_flag > 1 || set_error_reduced) { 5915 PetscInt *gidxs; 5916 5917 ierr = PetscMalloc1(pcbddc->local_primal_size,&gidxs);CHKERRQ(ierr); 5918 ierr = ISLocalToGlobalMappingApply(pcis->mapping,pcbddc->local_primal_size,pcbddc->primal_indices_local_idxs,gidxs);CHKERRQ(ierr); 5919 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Distribution of local primal indices\n");CHKERRQ(ierr); 5920 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 5921 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d\n",PetscGlobalRank);CHKERRQ(ierr); 5922 for (i=0;i<pcbddc->local_primal_size;i++) { 5923 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local_primal_indices[%d]=%d (%d,%d)\n",i,local_primal_indices[i],pcbddc->primal_indices_local_idxs[i],gidxs[i]);CHKERRQ(ierr); 5924 } 5925 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 5926 ierr = PetscFree(gidxs);CHKERRQ(ierr); 5927 } 5928 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 5929 ierr = PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 5930 if (set_error_reduced) SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"BDDC Numbering of coarse dofs failed"); 5931 } 5932 /* ierr = PetscPrintf(PetscObjectComm((PetscObject)pc),"Size of coarse problem is %d\n",coarse_size);CHKERRQ(ierr); */ 5933 /* get back data */ 5934 *coarse_size_n = coarse_size; 5935 *local_primal_indices_n = local_primal_indices; 5936 PetscFunctionReturn(0); 5937 } 5938 5939 #undef __FUNCT__ 5940 #define __FUNCT__ "PCBDDCGlobalToLocal" 5941 PetscErrorCode PCBDDCGlobalToLocal(VecScatter g2l_ctx,Vec gwork, Vec lwork, IS globalis, IS* localis) 5942 { 5943 IS localis_t; 5944 PetscInt i,lsize,*idxs,n; 5945 PetscScalar *vals; 5946 PetscErrorCode ierr; 5947 5948 PetscFunctionBegin; 5949 /* get indices in local ordering exploiting local to global map */ 5950 ierr = ISGetLocalSize(globalis,&lsize);CHKERRQ(ierr); 5951 ierr = PetscMalloc1(lsize,&vals);CHKERRQ(ierr); 5952 for (i=0;i<lsize;i++) vals[i] = 1.0; 5953 ierr = ISGetIndices(globalis,(const PetscInt**)&idxs);CHKERRQ(ierr); 5954 ierr = VecSet(gwork,0.0);CHKERRQ(ierr); 5955 ierr = VecSet(lwork,0.0);CHKERRQ(ierr); 5956 if (idxs) { /* multilevel guard */ 5957 ierr = VecSetValues(gwork,lsize,idxs,vals,INSERT_VALUES);CHKERRQ(ierr); 5958 } 5959 ierr = VecAssemblyBegin(gwork);CHKERRQ(ierr); 5960 ierr = ISRestoreIndices(globalis,(const PetscInt**)&idxs);CHKERRQ(ierr); 5961 ierr = PetscFree(vals);CHKERRQ(ierr); 5962 ierr = VecAssemblyEnd(gwork);CHKERRQ(ierr); 5963 /* now compute set in local ordering */ 5964 ierr = VecScatterBegin(g2l_ctx,gwork,lwork,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 5965 ierr = VecScatterEnd(g2l_ctx,gwork,lwork,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 5966 ierr = VecGetArrayRead(lwork,(const PetscScalar**)&vals);CHKERRQ(ierr); 5967 ierr = VecGetSize(lwork,&n);CHKERRQ(ierr); 5968 for (i=0,lsize=0;i<n;i++) { 5969 if (PetscRealPart(vals[i]) > 0.5) { 5970 lsize++; 5971 } 5972 } 5973 ierr = PetscMalloc1(lsize,&idxs);CHKERRQ(ierr); 5974 for (i=0,lsize=0;i<n;i++) { 5975 if (PetscRealPart(vals[i]) > 0.5) { 5976 idxs[lsize++] = i; 5977 } 5978 } 5979 ierr = VecRestoreArrayRead(lwork,(const PetscScalar**)&vals);CHKERRQ(ierr); 5980 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)gwork),lsize,idxs,PETSC_OWN_POINTER,&localis_t);CHKERRQ(ierr); 5981 *localis = localis_t; 5982 PetscFunctionReturn(0); 5983 } 5984 5985 /* the next two functions will be called in KSPMatMult if a change of basis has been requested */ 5986 #undef __FUNCT__ 5987 #define __FUNCT__ "PCBDDCMatMult_Private" 5988 static PetscErrorCode PCBDDCMatMult_Private(Mat A, Vec x, Vec y) 5989 { 5990 PCBDDCChange_ctx change_ctx; 5991 PetscErrorCode ierr; 5992 5993 PetscFunctionBegin; 5994 ierr = MatShellGetContext(A,&change_ctx);CHKERRQ(ierr); 5995 ierr = MatMult(change_ctx->global_change,x,change_ctx->work[0]);CHKERRQ(ierr); 5996 ierr = MatMult(change_ctx->original_mat,change_ctx->work[0],change_ctx->work[1]);CHKERRQ(ierr); 5997 ierr = MatMultTranspose(change_ctx->global_change,change_ctx->work[1],y);CHKERRQ(ierr); 5998 PetscFunctionReturn(0); 5999 } 6000 6001 #undef __FUNCT__ 6002 #define __FUNCT__ "PCBDDCMatMultTranspose_Private" 6003 static PetscErrorCode PCBDDCMatMultTranspose_Private(Mat A, Vec x, Vec y) 6004 { 6005 PCBDDCChange_ctx change_ctx; 6006 PetscErrorCode ierr; 6007 6008 PetscFunctionBegin; 6009 ierr = MatShellGetContext(A,&change_ctx);CHKERRQ(ierr); 6010 ierr = MatMult(change_ctx->global_change,x,change_ctx->work[0]);CHKERRQ(ierr); 6011 ierr = MatMultTranspose(change_ctx->original_mat,change_ctx->work[0],change_ctx->work[1]);CHKERRQ(ierr); 6012 ierr = MatMultTranspose(change_ctx->global_change,change_ctx->work[1],y);CHKERRQ(ierr); 6013 PetscFunctionReturn(0); 6014 } 6015 6016 #undef __FUNCT__ 6017 #define __FUNCT__ "PCBDDCSetUpSubSchurs" 6018 PetscErrorCode PCBDDCSetUpSubSchurs(PC pc) 6019 { 6020 PC_IS *pcis=(PC_IS*)pc->data; 6021 PC_BDDC *pcbddc=(PC_BDDC*)pc->data; 6022 PCBDDCSubSchurs sub_schurs=pcbddc->sub_schurs; 6023 Mat S_j; 6024 PetscInt *used_xadj,*used_adjncy; 6025 PetscBool free_used_adj; 6026 PetscErrorCode ierr; 6027 6028 PetscFunctionBegin; 6029 /* decide the adjacency to be used for determining internal problems for local schur on subsets */ 6030 free_used_adj = PETSC_FALSE; 6031 if (pcbddc->sub_schurs_layers == -1) { 6032 used_xadj = NULL; 6033 used_adjncy = NULL; 6034 } else { 6035 if (pcbddc->sub_schurs_use_useradj && pcbddc->mat_graph->xadj) { 6036 used_xadj = pcbddc->mat_graph->xadj; 6037 used_adjncy = pcbddc->mat_graph->adjncy; 6038 } else if (pcbddc->computed_rowadj) { 6039 used_xadj = pcbddc->mat_graph->xadj; 6040 used_adjncy = pcbddc->mat_graph->adjncy; 6041 } else { 6042 PetscBool flg_row=PETSC_FALSE; 6043 const PetscInt *xadj,*adjncy; 6044 PetscInt nvtxs; 6045 6046 ierr = MatGetRowIJ(pcbddc->local_mat,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,&xadj,&adjncy,&flg_row);CHKERRQ(ierr); 6047 if (flg_row) { 6048 ierr = PetscMalloc2(nvtxs+1,&used_xadj,xadj[nvtxs],&used_adjncy);CHKERRQ(ierr); 6049 ierr = PetscMemcpy(used_xadj,xadj,(nvtxs+1)*sizeof(*xadj));CHKERRQ(ierr); 6050 ierr = PetscMemcpy(used_adjncy,adjncy,(xadj[nvtxs])*sizeof(*adjncy));CHKERRQ(ierr); 6051 free_used_adj = PETSC_TRUE; 6052 } else { 6053 pcbddc->sub_schurs_layers = -1; 6054 used_xadj = NULL; 6055 used_adjncy = NULL; 6056 } 6057 ierr = MatRestoreRowIJ(pcbddc->local_mat,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,&xadj,&adjncy,&flg_row);CHKERRQ(ierr); 6058 } 6059 } 6060 6061 /* setup sub_schurs data */ 6062 ierr = MatCreateSchurComplement(pcis->A_II,pcis->A_II,pcis->A_IB,pcis->A_BI,pcis->A_BB,&S_j);CHKERRQ(ierr); 6063 if (!sub_schurs->use_mumps) { 6064 /* pcbddc->ksp_D up to date only if not using MUMPS */ 6065 ierr = MatSchurComplementSetKSP(S_j,pcbddc->ksp_D);CHKERRQ(ierr); 6066 ierr = PCBDDCSubSchursSetUp(sub_schurs,NULL,S_j,PETSC_FALSE,used_xadj,used_adjncy,pcbddc->sub_schurs_layers,pcbddc->faster_deluxe,pcbddc->adaptive_selection,PETSC_FALSE,PETSC_FALSE,0,NULL,NULL);CHKERRQ(ierr); 6067 } else { 6068 PetscBool reuse_solvers = (PetscBool)!pcbddc->use_change_of_basis; 6069 PetscBool isseqaij; 6070 PetscInt benign_n; 6071 6072 if (!pcbddc->use_vertices && reuse_solvers) { 6073 PetscInt n_vertices; 6074 6075 ierr = ISGetLocalSize(sub_schurs->is_vertices,&n_vertices);CHKERRQ(ierr); 6076 reuse_solvers = (PetscBool)!n_vertices; 6077 } 6078 ierr = PetscObjectTypeCompare((PetscObject)pcbddc->local_mat,MATSEQAIJ,&isseqaij);CHKERRQ(ierr); 6079 if (!isseqaij) { 6080 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 6081 if (matis->A == pcbddc->local_mat) { 6082 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 6083 ierr = MatConvert(matis->A,MATSEQAIJ,MAT_INITIAL_MATRIX,&pcbddc->local_mat);CHKERRQ(ierr); 6084 } else { 6085 ierr = MatConvert(pcbddc->local_mat,MATSEQAIJ,MAT_REUSE_MATRIX,&pcbddc->local_mat);CHKERRQ(ierr); 6086 } 6087 } 6088 if (!pcbddc->benign_change_explicit) { 6089 benign_n = pcbddc->benign_n; 6090 } else { 6091 benign_n = 0; 6092 } 6093 ierr = PCBDDCSubSchursSetUp(sub_schurs,pcbddc->local_mat,S_j,pcbddc->sub_schurs_exact_schur,used_xadj,used_adjncy,pcbddc->sub_schurs_layers,pcbddc->faster_deluxe,pcbddc->adaptive_selection,reuse_solvers,pcbddc->benign_saddle_point,benign_n,pcbddc->benign_p0_lidx,pcbddc->benign_zerodiag_subs);CHKERRQ(ierr); 6094 } 6095 6096 /* free adjacency */ 6097 if (free_used_adj) { 6098 ierr = PetscFree2(used_xadj,used_adjncy);CHKERRQ(ierr); 6099 } 6100 PetscFunctionReturn(0); 6101 } 6102 6103 #undef __FUNCT__ 6104 #define __FUNCT__ "PCBDDCInitSubSchurs" 6105 PetscErrorCode PCBDDCInitSubSchurs(PC pc) 6106 { 6107 PC_IS *pcis=(PC_IS*)pc->data; 6108 PC_BDDC *pcbddc=(PC_BDDC*)pc->data; 6109 PCBDDCSubSchurs sub_schurs=pcbddc->sub_schurs; 6110 PCBDDCGraph graph; 6111 PetscErrorCode ierr; 6112 6113 PetscFunctionBegin; 6114 /* attach interface graph for determining subsets */ 6115 if (pcbddc->sub_schurs_rebuild) { /* in case rebuild has been requested, it uses a graph generated only by the neighbouring information */ 6116 IS verticesIS,verticescomm; 6117 PetscInt vsize,*idxs; 6118 6119 ierr = PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph,NULL,NULL,NULL,NULL,&verticesIS);CHKERRQ(ierr); 6120 ierr = ISGetSize(verticesIS,&vsize);CHKERRQ(ierr); 6121 ierr = ISGetIndices(verticesIS,(const PetscInt**)&idxs);CHKERRQ(ierr); 6122 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),vsize,idxs,PETSC_COPY_VALUES,&verticescomm);CHKERRQ(ierr); 6123 ierr = ISRestoreIndices(verticesIS,(const PetscInt**)&idxs);CHKERRQ(ierr); 6124 ierr = ISDestroy(&verticesIS);CHKERRQ(ierr); 6125 ierr = PCBDDCGraphCreate(&graph);CHKERRQ(ierr); 6126 ierr = PCBDDCGraphInit(graph,pcbddc->mat_graph->l2gmap,pcbddc->mat_graph->nvtxs_global);CHKERRQ(ierr); 6127 ierr = PCBDDCGraphSetUp(graph,0,NULL,pcbddc->DirichletBoundariesLocal,0,NULL,verticescomm);CHKERRQ(ierr); 6128 ierr = ISDestroy(&verticescomm);CHKERRQ(ierr); 6129 ierr = PCBDDCGraphComputeConnectedComponents(graph);CHKERRQ(ierr); 6130 /* 6131 if (pcbddc->dbg_flag) { 6132 ierr = PCBDDCGraphASCIIView(graph,pcbddc->dbg_flag,pcbddc->dbg_viewer);CHKERRQ(ierr); 6133 } 6134 */ 6135 } else { 6136 graph = pcbddc->mat_graph; 6137 } 6138 6139 /* sub_schurs init */ 6140 ierr = PCBDDCSubSchursInit(sub_schurs,pcis->is_I_local,pcis->is_B_local,graph,pcis->BtoNmap);CHKERRQ(ierr); 6141 6142 /* free graph struct */ 6143 if (pcbddc->sub_schurs_rebuild) { 6144 ierr = PCBDDCGraphDestroy(&graph);CHKERRQ(ierr); 6145 } 6146 PetscFunctionReturn(0); 6147 } 6148 6149 #undef __FUNCT__ 6150 #define __FUNCT__ "PCBDDCCheckOperator" 6151 PetscErrorCode PCBDDCCheckOperator(PC pc) 6152 { 6153 PC_IS *pcis=(PC_IS*)pc->data; 6154 PC_BDDC *pcbddc=(PC_BDDC*)pc->data; 6155 PetscErrorCode ierr; 6156 6157 PetscFunctionBegin; 6158 if (pcbddc->n_vertices == pcbddc->local_primal_size) { 6159 IS zerodiag = NULL; 6160 Mat S_j,B0_B=NULL; 6161 Vec dummy_vec=NULL,vec_check_B,vec_scale_P; 6162 PetscScalar *p0_check,*array,*array2; 6163 PetscReal norm; 6164 PetscInt i; 6165 6166 /* B0 and B0_B */ 6167 if (zerodiag) { 6168 IS dummy; 6169 6170 ierr = ISCreateStride(PETSC_COMM_SELF,pcbddc->benign_n,0,1,&dummy);CHKERRQ(ierr); 6171 ierr = MatGetSubMatrix(pcbddc->benign_B0,dummy,pcis->is_B_local,MAT_INITIAL_MATRIX,&B0_B);CHKERRQ(ierr); 6172 ierr = MatCreateVecs(B0_B,NULL,&dummy_vec);CHKERRQ(ierr); 6173 ierr = ISDestroy(&dummy);CHKERRQ(ierr); 6174 } 6175 /* I need a primal vector to scale primal nodes since BDDC sums contibutions */ 6176 ierr = VecDuplicate(pcbddc->vec1_P,&vec_scale_P);CHKERRQ(ierr); 6177 ierr = VecSet(pcbddc->vec1_P,1.0);CHKERRQ(ierr); 6178 ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,pcbddc->vec1_P,pcbddc->coarse_vec,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 6179 ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,pcbddc->vec1_P,pcbddc->coarse_vec,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 6180 ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,pcbddc->coarse_vec,vec_scale_P,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 6181 ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,pcbddc->coarse_vec,vec_scale_P,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 6182 ierr = VecReciprocal(vec_scale_P);CHKERRQ(ierr); 6183 /* S_j */ 6184 ierr = MatCreateSchurComplement(pcis->A_II,pcis->A_II,pcis->A_IB,pcis->A_BI,pcis->A_BB,&S_j);CHKERRQ(ierr); 6185 ierr = MatSchurComplementSetKSP(S_j,pcbddc->ksp_D);CHKERRQ(ierr); 6186 6187 /* mimic vector in \widetilde{W}_\Gamma */ 6188 ierr = VecSetRandom(pcis->vec1_N,NULL);CHKERRQ(ierr); 6189 /* continuous in primal space */ 6190 ierr = VecSetRandom(pcbddc->coarse_vec,NULL);CHKERRQ(ierr); 6191 ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,pcbddc->coarse_vec,pcbddc->vec1_P,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 6192 ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,pcbddc->coarse_vec,pcbddc->vec1_P,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 6193 ierr = VecGetArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 6194 ierr = PetscCalloc1(pcbddc->benign_n,&p0_check);CHKERRQ(ierr); 6195 for (i=0;i<pcbddc->benign_n;i++) p0_check[i] = array[pcbddc->local_primal_size-pcbddc->benign_n+i]; 6196 ierr = VecSetValues(pcis->vec1_N,pcbddc->local_primal_size,pcbddc->local_primal_ref_node,array,INSERT_VALUES);CHKERRQ(ierr); 6197 ierr = VecRestoreArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 6198 ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr); 6199 ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr); 6200 ierr = VecScatterBegin(pcis->N_to_B,pcis->vec1_N,pcis->vec2_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 6201 ierr = VecScatterEnd(pcis->N_to_B,pcis->vec1_N,pcis->vec2_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 6202 ierr = VecDuplicate(pcis->vec2_B,&vec_check_B);CHKERRQ(ierr); 6203 ierr = VecCopy(pcis->vec2_B,vec_check_B);CHKERRQ(ierr); 6204 6205 /* assemble rhs for coarse problem */ 6206 /* widetilde{S}_\Gamma w_\Gamma + \widetilde{B0}^T_B p0 */ 6207 /* local with Schur */ 6208 ierr = MatMult(S_j,pcis->vec2_B,pcis->vec1_B);CHKERRQ(ierr); 6209 if (zerodiag) { 6210 ierr = VecGetArray(dummy_vec,&array);CHKERRQ(ierr); 6211 for (i=0;i<pcbddc->benign_n;i++) array[i] = p0_check[i]; 6212 ierr = VecRestoreArray(dummy_vec,&array);CHKERRQ(ierr); 6213 ierr = MatMultTransposeAdd(B0_B,dummy_vec,pcis->vec1_B,pcis->vec1_B);CHKERRQ(ierr); 6214 } 6215 /* sum on primal nodes the local contributions */ 6216 ierr = VecScatterBegin(pcis->N_to_B,pcis->vec1_B,pcis->vec1_N,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 6217 ierr = VecScatterEnd(pcis->N_to_B,pcis->vec1_B,pcis->vec1_N,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 6218 ierr = VecGetArray(pcis->vec1_N,&array);CHKERRQ(ierr); 6219 ierr = VecGetArray(pcbddc->vec1_P,&array2);CHKERRQ(ierr); 6220 for (i=0;i<pcbddc->local_primal_size;i++) array2[i] = array[pcbddc->local_primal_ref_node[i]]; 6221 ierr = VecRestoreArray(pcbddc->vec1_P,&array2);CHKERRQ(ierr); 6222 ierr = VecRestoreArray(pcis->vec1_N,&array);CHKERRQ(ierr); 6223 ierr = VecSet(pcbddc->coarse_vec,0.);CHKERRQ(ierr); 6224 ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,pcbddc->vec1_P,pcbddc->coarse_vec,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 6225 ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,pcbddc->vec1_P,pcbddc->coarse_vec,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 6226 ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,pcbddc->coarse_vec,pcbddc->vec1_P,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 6227 ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,pcbddc->coarse_vec,pcbddc->vec1_P,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 6228 ierr = VecGetArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 6229 /* scale primal nodes (BDDC sums contibutions) */ 6230 ierr = VecPointwiseMult(pcbddc->vec1_P,vec_scale_P,pcbddc->vec1_P);CHKERRQ(ierr); 6231 ierr = VecSetValues(pcis->vec1_N,pcbddc->local_primal_size,pcbddc->local_primal_ref_node,array,INSERT_VALUES);CHKERRQ(ierr); 6232 ierr = VecRestoreArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 6233 ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr); 6234 ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr); 6235 ierr = VecScatterBegin(pcis->N_to_B,pcis->vec1_N,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 6236 ierr = VecScatterEnd(pcis->N_to_B,pcis->vec1_N,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 6237 /* global: \widetilde{B0}_B w_\Gamma */ 6238 if (zerodiag) { 6239 ierr = MatMult(B0_B,pcis->vec2_B,dummy_vec);CHKERRQ(ierr); 6240 ierr = VecGetArray(dummy_vec,&array);CHKERRQ(ierr); 6241 for (i=0;i<pcbddc->benign_n;i++) pcbddc->benign_p0[i] = array[i]; 6242 ierr = VecRestoreArray(dummy_vec,&array);CHKERRQ(ierr); 6243 } 6244 /* BDDC */ 6245 ierr = VecSet(pcis->vec1_D,0.);CHKERRQ(ierr); 6246 ierr = PCBDDCApplyInterfacePreconditioner(pc,PETSC_FALSE);CHKERRQ(ierr); 6247 6248 ierr = VecCopy(pcis->vec1_B,pcis->vec2_B);CHKERRQ(ierr); 6249 ierr = VecAXPY(pcis->vec1_B,-1.0,vec_check_B);CHKERRQ(ierr); 6250 ierr = VecNorm(pcis->vec1_B,NORM_INFINITY,&norm);CHKERRQ(ierr); 6251 PetscPrintf(PETSC_COMM_SELF,"[%d] BDDC local error is %1.4e\n",PetscGlobalRank,norm); 6252 for (i=0;i<pcbddc->benign_n;i++) { 6253 PetscPrintf(PETSC_COMM_SELF,"[%d] BDDC p0[%d] error is %1.4e\n",PetscGlobalRank,i,PetscAbsScalar(pcbddc->benign_p0[i]-p0_check[i])); 6254 } 6255 ierr = PetscFree(p0_check);CHKERRQ(ierr); 6256 ierr = VecDestroy(&vec_scale_P);CHKERRQ(ierr); 6257 ierr = VecDestroy(&vec_check_B);CHKERRQ(ierr); 6258 ierr = VecDestroy(&dummy_vec);CHKERRQ(ierr); 6259 ierr = MatDestroy(&S_j);CHKERRQ(ierr); 6260 ierr = MatDestroy(&B0_B);CHKERRQ(ierr); 6261 } 6262 PetscFunctionReturn(0); 6263 } 6264