1 #include <../src/mat/impls/aij/seq/aij.h> 2 #include <../src/ksp/pc/impls/bddc/bddc.h> 3 #include <../src/ksp/pc/impls/bddc/bddcprivate.h> 4 #include <petscblaslapack.h> 5 6 static PetscErrorCode PCBDDCMatMultTranspose_Private(Mat A, Vec x, Vec y); 7 static PetscErrorCode PCBDDCMatMult_Private(Mat A, Vec x, Vec y); 8 9 #undef __FUNCT__ 10 #define __FUNCT__ "PCBDDCBenignMatMult_Private_Private" 11 PetscErrorCode PCBDDCBenignMatMult_Private_Private(Mat A, Vec x, Vec y, PetscBool transpose) 12 { 13 PCBDDCBenignMatMult_ctx ctx; 14 PetscErrorCode ierr; 15 PetscBool apply_right,apply_left,reset_x; 16 17 PetscFunctionBegin; 18 ierr = MatShellGetContext(A,&ctx);CHKERRQ(ierr); 19 if (transpose) { 20 apply_right = ctx->apply_left; 21 apply_left = ctx->apply_right; 22 } else { 23 apply_right = ctx->apply_right; 24 apply_left = ctx->apply_left; 25 } 26 reset_x = PETSC_FALSE; 27 if (apply_right) { 28 const PetscScalar *ax; 29 PetscInt nl,i; 30 31 ierr = VecGetLocalSize(x,&nl);CHKERRQ(ierr); 32 ierr = VecGetArrayRead(x,&ax);CHKERRQ(ierr); 33 ierr = PetscMemcpy(ctx->work,ax,nl*sizeof(PetscScalar));CHKERRQ(ierr); 34 ierr = VecRestoreArrayRead(x,&ax);CHKERRQ(ierr); 35 for (i=0;i<ctx->benign_n;i++) { 36 PetscScalar sum,val; 37 const PetscInt *idxs; 38 PetscInt nz,j; 39 ierr = ISGetLocalSize(ctx->benign_zerodiag_subs[i],&nz);CHKERRQ(ierr); 40 ierr = ISGetIndices(ctx->benign_zerodiag_subs[i],&idxs);CHKERRQ(ierr); 41 sum = 0.; 42 if (ctx->apply_p0) { 43 val = ctx->work[idxs[nz-1]]; 44 for (j=0;j<nz-1;j++) { 45 sum += ctx->work[idxs[j]]; 46 ctx->work[idxs[j]] += val; 47 } 48 } else { 49 for (j=0;j<nz-1;j++) { 50 sum += ctx->work[idxs[j]]; 51 } 52 } 53 ctx->work[idxs[nz-1]] -= sum; 54 ierr = ISRestoreIndices(ctx->benign_zerodiag_subs[i],&idxs);CHKERRQ(ierr); 55 } 56 ierr = VecPlaceArray(x,ctx->work);CHKERRQ(ierr); 57 reset_x = PETSC_TRUE; 58 } 59 if (transpose) { 60 ierr = MatMultTranspose(ctx->A,x,y);CHKERRQ(ierr); 61 } else { 62 ierr = MatMult(ctx->A,x,y);CHKERRQ(ierr); 63 } 64 if (reset_x) { 65 ierr = VecResetArray(x);CHKERRQ(ierr); 66 } 67 if (apply_left) { 68 PetscScalar *ay; 69 PetscInt i; 70 71 ierr = VecGetArray(y,&ay);CHKERRQ(ierr); 72 for (i=0;i<ctx->benign_n;i++) { 73 PetscScalar sum,val; 74 const PetscInt *idxs; 75 PetscInt nz,j; 76 ierr = ISGetLocalSize(ctx->benign_zerodiag_subs[i],&nz);CHKERRQ(ierr); 77 ierr = ISGetIndices(ctx->benign_zerodiag_subs[i],&idxs);CHKERRQ(ierr); 78 val = -ay[idxs[nz-1]]; 79 if (ctx->apply_p0) { 80 sum = 0.; 81 for (j=0;j<nz-1;j++) { 82 sum += ay[idxs[j]]; 83 ay[idxs[j]] += val; 84 } 85 ay[idxs[nz-1]] += sum; 86 } else { 87 for (j=0;j<nz-1;j++) { 88 ay[idxs[j]] += val; 89 } 90 ay[idxs[nz-1]] = 0.; 91 } 92 ierr = ISRestoreIndices(ctx->benign_zerodiag_subs[i],&idxs);CHKERRQ(ierr); 93 } 94 ierr = VecRestoreArray(y,&ay);CHKERRQ(ierr); 95 } 96 PetscFunctionReturn(0); 97 } 98 99 #undef __FUNCT__ 100 #define __FUNCT__ "PCBDDCBenignMatMultTranspose_Private" 101 PetscErrorCode PCBDDCBenignMatMultTranspose_Private(Mat A, Vec x, Vec y) 102 { 103 PetscErrorCode ierr; 104 105 PetscFunctionBegin; 106 ierr = PCBDDCBenignMatMult_Private_Private(A,x,y,PETSC_TRUE);CHKERRQ(ierr); 107 PetscFunctionReturn(0); 108 } 109 110 #undef __FUNCT__ 111 #define __FUNCT__ "PCBDDCBenignMatMult_Private" 112 PetscErrorCode PCBDDCBenignMatMult_Private(Mat A, Vec x, Vec y) 113 { 114 PetscErrorCode ierr; 115 116 PetscFunctionBegin; 117 ierr = PCBDDCBenignMatMult_Private_Private(A,x,y,PETSC_FALSE);CHKERRQ(ierr); 118 PetscFunctionReturn(0); 119 } 120 121 #undef __FUNCT__ 122 #define __FUNCT__ "PCBDDCBenignShellMat" 123 PetscErrorCode PCBDDCBenignShellMat(PC pc, PetscBool restore) 124 { 125 PC_IS *pcis = (PC_IS*)pc->data; 126 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 127 Mat_IS *matis = (Mat_IS*)pc->pmat->data; 128 PCBDDCBenignMatMult_ctx ctx; 129 PetscErrorCode ierr; 130 131 PetscFunctionBegin; 132 if (!restore) { 133 Mat A,A_IB,A_BI; 134 PetscScalar *work; 135 PCBDDCReuseSolvers reuse = pcbddc->sub_schurs->reuse_solver; 136 137 if (!reuse) { 138 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Cannot setup shell matrices!"); 139 } 140 141 /* local mat */ 142 ierr = MatCreate(PETSC_COMM_SELF,&A);CHKERRQ(ierr); 143 ierr = MatSetSizes(A,pcis->n,pcis->n,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr); 144 ierr = MatSetType(A,MATSHELL);CHKERRQ(ierr); 145 ierr = MatShellSetOperation(A,MATOP_MULT,(void (*)(void))PCBDDCBenignMatMult_Private);CHKERRQ(ierr); 146 ierr = MatShellSetOperation(A,MATOP_MULT_TRANSPOSE,(void (*)(void))PCBDDCBenignMatMultTranspose_Private);CHKERRQ(ierr); 147 ierr = PetscNew(&ctx);CHKERRQ(ierr); 148 ierr = MatShellSetContext(A,ctx);CHKERRQ(ierr); 149 ctx->apply_left = PETSC_TRUE; 150 ctx->apply_right = PETSC_TRUE; 151 ctx->apply_p0 = PETSC_TRUE; 152 ctx->benign_n = pcbddc->benign_n; 153 ctx->benign_zerodiag_subs = pcbddc->benign_zerodiag_subs; 154 ctx->A = matis->A; 155 ierr = PetscMalloc1(pcis->n,&work);CHKERRQ(ierr); 156 ctx->work = work; 157 ierr = MatSetUp(A);CHKERRQ(ierr); 158 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 159 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 160 matis->A = A; 161 162 /* A_IB */ 163 ierr = MatCreate(PETSC_COMM_SELF,&A_IB);CHKERRQ(ierr); 164 ierr = MatSetSizes(A_IB,pcis->n-pcis->n_B,pcis->n_B,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr); 165 ierr = MatSetType(A_IB,MATSHELL);CHKERRQ(ierr); 166 ierr = MatShellSetOperation(A_IB,MATOP_MULT,(void (*)(void))PCBDDCBenignMatMult_Private);CHKERRQ(ierr); 167 ierr = MatShellSetOperation(A_IB,MATOP_MULT_TRANSPOSE,(void (*)(void))PCBDDCBenignMatMultTranspose_Private);CHKERRQ(ierr); 168 ierr = PetscNew(&ctx);CHKERRQ(ierr); 169 ierr = MatShellSetContext(A_IB,ctx);CHKERRQ(ierr); 170 ctx->apply_left = PETSC_TRUE; 171 ctx->apply_right = PETSC_FALSE; 172 ctx->apply_p0 = PETSC_FALSE; 173 ctx->benign_n = pcbddc->benign_n; 174 ctx->benign_zerodiag_subs = reuse->benign_zerodiag_subs; 175 ctx->A = pcis->A_IB; 176 ctx->work = work; 177 ierr = MatSetUp(A_IB);CHKERRQ(ierr); 178 ierr = MatAssemblyBegin(A_IB,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 179 ierr = MatAssemblyEnd(A_IB,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 180 pcis->A_IB = A_IB; 181 182 /* A_BI as A_IB^T */ 183 ierr = MatCreateTranspose(A_IB,&A_BI);CHKERRQ(ierr); 184 pcbddc->benign_original_mat = pcis->A_BI; 185 pcis->A_BI = A_BI; 186 } else { 187 ierr = MatShellGetContext(matis->A,&ctx);CHKERRQ(ierr); 188 ierr = MatDestroy(&matis->A);CHKERRQ(ierr); 189 matis->A = ctx->A; 190 ierr = PetscFree(ctx);CHKERRQ(ierr); 191 ierr = MatShellGetContext(pcis->A_IB,&ctx);CHKERRQ(ierr); 192 ierr = MatDestroy(&pcis->A_IB);CHKERRQ(ierr); 193 pcis->A_IB = ctx->A; 194 ierr = PetscFree(ctx->work);CHKERRQ(ierr); 195 ierr = PetscFree(ctx);CHKERRQ(ierr); 196 ierr = MatDestroy(&pcis->A_BI);CHKERRQ(ierr); 197 pcis->A_BI = pcbddc->benign_original_mat; 198 pcbddc->benign_original_mat = NULL; 199 } 200 PetscFunctionReturn(0); 201 } 202 203 /* used just in bddc debug mode */ 204 #undef __FUNCT__ 205 #define __FUNCT__ "PCBDDCBenignProject" 206 PetscErrorCode PCBDDCBenignProject(PC pc, IS is1, IS is2, Mat *B) 207 { 208 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 209 Mat_IS *matis = (Mat_IS*)pc->pmat->data; 210 Mat An; 211 PetscErrorCode ierr; 212 213 PetscFunctionBegin; 214 ierr = MatPtAP(matis->A,pcbddc->benign_change,MAT_INITIAL_MATRIX,2.0,&An);CHKERRQ(ierr); 215 ierr = MatZeroRowsColumns(An,pcbddc->benign_n,pcbddc->benign_p0_lidx,1.0,NULL,NULL);CHKERRQ(ierr); 216 if (is1) { 217 ierr = MatGetSubMatrix(An,is1,is2,MAT_INITIAL_MATRIX,B);CHKERRQ(ierr); 218 ierr = MatDestroy(&An);CHKERRQ(ierr); 219 } else { 220 *B = An; 221 } 222 PetscFunctionReturn(0); 223 } 224 225 /* TODO: add reuse flag */ 226 #undef __FUNCT__ 227 #define __FUNCT__ "MatSeqAIJCompress" 228 PetscErrorCode MatSeqAIJCompress(Mat A, Mat *B) 229 { 230 Mat Bt; 231 PetscScalar *a,*bdata; 232 const PetscInt *ii,*ij; 233 PetscInt m,n,i,nnz,*bii,*bij; 234 PetscBool flg_row; 235 PetscErrorCode ierr; 236 237 PetscFunctionBegin; 238 ierr = MatGetSize(A,&n,&m);CHKERRQ(ierr); 239 ierr = MatGetRowIJ(A,0,PETSC_FALSE,PETSC_FALSE,&n,&ii,&ij,&flg_row);CHKERRQ(ierr); 240 ierr = MatSeqAIJGetArray(A,&a);CHKERRQ(ierr); 241 nnz = n; 242 for (i=0;i<ii[n];i++) { 243 if (PetscLikely(PetscAbsScalar(a[i]) > PETSC_SMALL)) nnz++; 244 } 245 ierr = PetscMalloc1(n+1,&bii);CHKERRQ(ierr); 246 ierr = PetscMalloc1(nnz,&bij);CHKERRQ(ierr); 247 ierr = PetscMalloc1(nnz,&bdata);CHKERRQ(ierr); 248 nnz = 0; 249 bii[0] = 0; 250 for (i=0;i<n;i++) { 251 PetscInt j; 252 for (j=ii[i];j<ii[i+1];j++) { 253 PetscScalar entry = a[j]; 254 if (PetscLikely(PetscAbsScalar(entry) > PETSC_SMALL) || ij[j] == i) { 255 bij[nnz] = ij[j]; 256 bdata[nnz] = entry; 257 nnz++; 258 } 259 } 260 bii[i+1] = nnz; 261 } 262 ierr = MatSeqAIJRestoreArray(A,&a);CHKERRQ(ierr); 263 ierr = MatCreateSeqAIJWithArrays(PetscObjectComm((PetscObject)A),n,m,bii,bij,bdata,&Bt);CHKERRQ(ierr); 264 ierr = MatRestoreRowIJ(A,0,PETSC_FALSE,PETSC_FALSE,&n,&ii,&ij,&flg_row);CHKERRQ(ierr); 265 { 266 Mat_SeqAIJ *b = (Mat_SeqAIJ*)(Bt->data); 267 b->free_a = PETSC_TRUE; 268 b->free_ij = PETSC_TRUE; 269 } 270 *B = Bt; 271 PetscFunctionReturn(0); 272 } 273 274 #undef __FUNCT__ 275 #define __FUNCT__ "MatDetectDisconnectedComponents" 276 PetscErrorCode MatDetectDisconnectedComponents(Mat A, PetscBool filter, PetscInt *ncc, IS* cc[]) 277 { 278 Mat B; 279 IS is_dummy,*cc_n; 280 ISLocalToGlobalMapping l2gmap_dummy; 281 PCBDDCGraph graph; 282 PetscInt i,n; 283 PetscInt *xadj,*adjncy; 284 PetscInt *xadj_filtered,*adjncy_filtered; 285 PetscBool flg_row,isseqaij; 286 PetscErrorCode ierr; 287 288 PetscFunctionBegin; 289 ierr = PetscObjectTypeCompare((PetscObject)A,MATSEQAIJ,&isseqaij);CHKERRQ(ierr); 290 if (!isseqaij && filter) { 291 PetscBool isseqdense; 292 293 ierr = PetscObjectTypeCompare((PetscObject)A,MATSEQDENSE,&isseqdense);CHKERRQ(ierr); 294 if (!isseqdense) { 295 ierr = MatConvert(A,MATSEQAIJ,MAT_INITIAL_MATRIX,&B);CHKERRQ(ierr); 296 } else { /* TODO: rectangular case and LDA */ 297 PetscScalar *array; 298 PetscReal chop=1.e-6; 299 300 ierr = MatDuplicate(A,MAT_COPY_VALUES,&B);CHKERRQ(ierr); 301 ierr = MatDenseGetArray(B,&array);CHKERRQ(ierr); 302 ierr = MatGetSize(B,&n,NULL);CHKERRQ(ierr); 303 for (i=0;i<n;i++) { 304 PetscInt j; 305 for (j=i+1;j<n;j++) { 306 PetscReal thresh = chop*(PetscAbsScalar(array[i*(n+1)])+PetscAbsScalar(array[j*(n+1)])); 307 if (PetscAbsScalar(array[i*n+j]) < thresh) array[i*n+j] = 0.; 308 if (PetscAbsScalar(array[j*n+i]) < thresh) array[j*n+i] = 0.; 309 } 310 } 311 ierr = MatDenseRestoreArray(B,&array);CHKERRQ(ierr); 312 ierr = MatConvert(B,MATSEQAIJ,MAT_REUSE_MATRIX,&B);CHKERRQ(ierr); 313 } 314 } else { 315 B = A; 316 } 317 ierr = MatGetRowIJ(B,0,PETSC_TRUE,PETSC_FALSE,&n,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);CHKERRQ(ierr); 318 319 /* if filter is true, then removes entries lower than PETSC_SMALL in magnitude */ 320 if (filter) { 321 PetscScalar *data; 322 PetscInt j,cum; 323 324 ierr = PetscCalloc2(n+1,&xadj_filtered,xadj[n],&adjncy_filtered);CHKERRQ(ierr); 325 ierr = MatSeqAIJGetArray(B,&data);CHKERRQ(ierr); 326 cum = 0; 327 for (i=0;i<n;i++) { 328 PetscInt t; 329 330 for (j=xadj[i];j<xadj[i+1];j++) { 331 if (PetscUnlikely(PetscAbsScalar(data[j]) < PETSC_SMALL)) { 332 continue; 333 } 334 adjncy_filtered[cum+xadj_filtered[i]++] = adjncy[j]; 335 } 336 t = xadj_filtered[i]; 337 xadj_filtered[i] = cum; 338 cum += t; 339 } 340 ierr = MatSeqAIJRestoreArray(B,&data);CHKERRQ(ierr); 341 } else { 342 xadj_filtered = NULL; 343 adjncy_filtered = NULL; 344 } 345 346 /* compute local connected components using PCBDDCGraph */ 347 ierr = ISCreateStride(PETSC_COMM_SELF,n,0,1,&is_dummy);CHKERRQ(ierr); 348 ierr = ISLocalToGlobalMappingCreateIS(is_dummy,&l2gmap_dummy);CHKERRQ(ierr); 349 ierr = ISDestroy(&is_dummy);CHKERRQ(ierr); 350 ierr = PCBDDCGraphCreate(&graph);CHKERRQ(ierr); 351 ierr = PCBDDCGraphInit(graph,l2gmap_dummy,n);CHKERRQ(ierr); 352 ierr = ISLocalToGlobalMappingDestroy(&l2gmap_dummy);CHKERRQ(ierr); 353 if (xadj_filtered) { 354 graph->xadj = xadj_filtered; 355 graph->adjncy = adjncy_filtered; 356 } else { 357 graph->xadj = xadj; 358 graph->adjncy = adjncy; 359 } 360 ierr = PCBDDCGraphSetUp(graph,1,NULL,NULL,0,NULL,NULL);CHKERRQ(ierr); 361 ierr = PCBDDCGraphComputeConnectedComponents(graph);CHKERRQ(ierr); 362 /* partial clean up */ 363 ierr = PetscFree2(xadj_filtered,adjncy_filtered);CHKERRQ(ierr); 364 ierr = MatRestoreRowIJ(B,0,PETSC_TRUE,PETSC_FALSE,&n,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);CHKERRQ(ierr); 365 if (A != B) { 366 ierr = MatDestroy(&B);CHKERRQ(ierr); 367 } 368 369 /* get back data */ 370 if (ncc) *ncc = graph->ncc; 371 if (cc) { 372 ierr = PetscMalloc1(graph->ncc,&cc_n);CHKERRQ(ierr); 373 for (i=0;i<graph->ncc;i++) { 374 ierr = ISCreateGeneral(PETSC_COMM_SELF,graph->cptr[i+1]-graph->cptr[i],graph->queue+graph->cptr[i],PETSC_COPY_VALUES,&cc_n[i]);CHKERRQ(ierr); 375 } 376 *cc = cc_n; 377 } 378 /* clean up graph */ 379 graph->xadj = 0; 380 graph->adjncy = 0; 381 ierr = PCBDDCGraphDestroy(&graph);CHKERRQ(ierr); 382 PetscFunctionReturn(0); 383 } 384 385 #undef __FUNCT__ 386 #define __FUNCT__ "PCBDDCBenignCheck" 387 PetscErrorCode PCBDDCBenignCheck(PC pc, IS zerodiag) 388 { 389 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 390 PC_IS* pcis = (PC_IS*)(pc->data); 391 IS dirIS = NULL; 392 PetscInt i; 393 PetscErrorCode ierr; 394 395 PetscFunctionBegin; 396 ierr = PCBDDCGraphGetDirichletDofs(pcbddc->mat_graph,&dirIS);CHKERRQ(ierr); 397 if (zerodiag) { 398 Mat A; 399 Vec vec3_N; 400 PetscScalar *vals; 401 const PetscInt *idxs; 402 PetscInt nz,*count; 403 404 /* p0 */ 405 ierr = VecSet(pcis->vec1_N,0.);CHKERRQ(ierr); 406 ierr = PetscMalloc1(pcis->n,&vals);CHKERRQ(ierr); 407 ierr = ISGetLocalSize(zerodiag,&nz);CHKERRQ(ierr); 408 ierr = ISGetIndices(zerodiag,&idxs);CHKERRQ(ierr); 409 for (i=0;i<nz;i++) vals[i] = 1.; 410 ierr = VecSetValues(pcis->vec1_N,nz,idxs,vals,INSERT_VALUES);CHKERRQ(ierr); 411 ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr); 412 ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr); 413 /* v_I */ 414 ierr = VecSetRandom(pcis->vec2_N,NULL);CHKERRQ(ierr); 415 for (i=0;i<nz;i++) vals[i] = 0.; 416 ierr = VecSetValues(pcis->vec2_N,nz,idxs,vals,INSERT_VALUES);CHKERRQ(ierr); 417 ierr = ISRestoreIndices(zerodiag,&idxs);CHKERRQ(ierr); 418 ierr = ISGetIndices(pcis->is_B_local,&idxs);CHKERRQ(ierr); 419 for (i=0;i<pcis->n_B;i++) vals[i] = 0.; 420 ierr = VecSetValues(pcis->vec2_N,pcis->n_B,idxs,vals,INSERT_VALUES);CHKERRQ(ierr); 421 ierr = ISRestoreIndices(pcis->is_B_local,&idxs);CHKERRQ(ierr); 422 if (dirIS) { 423 PetscInt n; 424 425 ierr = ISGetLocalSize(dirIS,&n);CHKERRQ(ierr); 426 ierr = ISGetIndices(dirIS,&idxs);CHKERRQ(ierr); 427 for (i=0;i<n;i++) vals[i] = 0.; 428 ierr = VecSetValues(pcis->vec2_N,n,idxs,vals,INSERT_VALUES);CHKERRQ(ierr); 429 ierr = ISRestoreIndices(dirIS,&idxs);CHKERRQ(ierr); 430 } 431 ierr = VecAssemblyBegin(pcis->vec2_N);CHKERRQ(ierr); 432 ierr = VecAssemblyEnd(pcis->vec2_N);CHKERRQ(ierr); 433 ierr = VecDuplicate(pcis->vec1_N,&vec3_N);CHKERRQ(ierr); 434 ierr = VecSet(vec3_N,0.);CHKERRQ(ierr); 435 ierr = MatISGetLocalMat(pc->mat,&A);CHKERRQ(ierr); 436 ierr = MatMult(A,pcis->vec1_N,vec3_N);CHKERRQ(ierr); 437 ierr = VecDot(vec3_N,pcis->vec2_N,&vals[0]);CHKERRQ(ierr); 438 if (PetscAbsScalar(vals[0]) > PETSC_SMALL) { 439 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"Benign trick can not be applied! b(v_I,p_0) = %f (should be numerically 0.)",PetscAbsScalar(vals[0])); 440 } 441 ierr = PetscFree(vals);CHKERRQ(ierr); 442 ierr = VecDestroy(&vec3_N);CHKERRQ(ierr); 443 444 /* there should not be any pressure dofs lying on the interface */ 445 ierr = PetscCalloc1(pcis->n,&count);CHKERRQ(ierr); 446 ierr = ISGetIndices(pcis->is_B_local,&idxs);CHKERRQ(ierr); 447 for (i=0;i<pcis->n_B;i++) count[idxs[i]]++; 448 ierr = ISRestoreIndices(pcis->is_B_local,&idxs);CHKERRQ(ierr); 449 ierr = ISGetIndices(zerodiag,&idxs);CHKERRQ(ierr); 450 if (count[idxs[i]]) { 451 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"Benign trick can not be applied! pressure dof %d is an interface dof",idxs[i]); 452 } 453 ierr = ISRestoreIndices(zerodiag,&idxs);CHKERRQ(ierr); 454 ierr = PetscFree(count);CHKERRQ(ierr); 455 } 456 ierr = ISDestroy(&dirIS);CHKERRQ(ierr); 457 458 /* check PCBDDCBenignGetOrSetP0 */ 459 ierr = VecSetRandom(pcis->vec1_global,NULL);CHKERRQ(ierr); 460 for (i=0;i<pcbddc->benign_n;i++) pcbddc->benign_p0[i] = -PetscGlobalRank-i; 461 ierr = PCBDDCBenignGetOrSetP0(pc,pcis->vec1_global,PETSC_FALSE);CHKERRQ(ierr); 462 for (i=0;i<pcbddc->benign_n;i++) pcbddc->benign_p0[i] = 1; 463 ierr = PCBDDCBenignGetOrSetP0(pc,pcis->vec1_global,PETSC_TRUE);CHKERRQ(ierr); 464 for (i=0;i<pcbddc->benign_n;i++) { 465 if ((PetscInt)PetscRealPart(pcbddc->benign_p0[i]) != -PetscGlobalRank-i) { 466 SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error testing PCBDDCBenignGetOrSetP0! Found %1.4e at %d instead of %1.4e\n",pcbddc->benign_p0[i],i,-PetscGlobalRank-i);CHKERRQ(ierr); 467 } 468 } 469 PetscFunctionReturn(0); 470 } 471 472 #undef __FUNCT__ 473 #define __FUNCT__ "PCBDDCBenignDetectSaddlePoint" 474 PetscErrorCode PCBDDCBenignDetectSaddlePoint(PC pc, IS *zerodiaglocal) 475 { 476 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 477 IS pressures,zerodiag,*zerodiag_subs; 478 PetscInt nz,n; 479 PetscBool sorted,have_null,has_null_pressures,recompute_zerodiag; 480 PetscErrorCode ierr; 481 482 PetscFunctionBegin; 483 ierr = PetscSFDestroy(&pcbddc->benign_sf);CHKERRQ(ierr); 484 ierr = MatDestroy(&pcbddc->benign_B0);CHKERRQ(ierr); 485 ierr = MatDestroy(&pcbddc->benign_original_mat);CHKERRQ(ierr); 486 for (n=0;n<pcbddc->benign_n;n++) { 487 ierr = ISDestroy(&pcbddc->benign_zerodiag_subs[n]);CHKERRQ(ierr); 488 } 489 ierr = PetscFree(pcbddc->benign_zerodiag_subs);CHKERRQ(ierr); 490 pcbddc->benign_n = 0; 491 /* if a local info on dofs is present, assumes the last field is represented by "pressures" 492 otherwise, it uses only zerodiagonal dofs (ok if the pressure block is all zero; it could fail if it is not) 493 Checks if all the pressure dofs in each subdomain have a zero diagonal 494 If not, a change of basis on pressures is not needed 495 since the local Schur complements are SPD 496 */ 497 has_null_pressures = PETSC_TRUE; 498 have_null = PETSC_TRUE; 499 if (pcbddc->n_ISForDofsLocal) { 500 PetscInt npl,*idxs,p = pcbddc->n_ISForDofsLocal-1; 501 502 /* Dofs splitting for BDDC cannot have PETSC_COMM_SELF, so create a sequential IS */ 503 ierr = ISGetLocalSize(pcbddc->ISForDofsLocal[p],&npl);CHKERRQ(ierr); 504 ierr = ISGetIndices(pcbddc->ISForDofsLocal[p],(const PetscInt**)&idxs);CHKERRQ(ierr); 505 ierr = ISCreateGeneral(PETSC_COMM_SELF,npl,idxs,PETSC_COPY_VALUES,&pressures);CHKERRQ(ierr); 506 ierr = ISRestoreIndices(pcbddc->ISForDofsLocal[p],(const PetscInt**)&idxs);CHKERRQ(ierr); 507 ierr = ISSorted(pressures,&sorted);CHKERRQ(ierr); 508 if (!sorted) { 509 ierr = ISSort(pressures);CHKERRQ(ierr); 510 } 511 } else { 512 pressures = NULL; 513 } 514 /* pcis has not been setup yet, so get the local size from the subdomain matrix */ 515 ierr = MatGetLocalSize(pcbddc->local_mat,&n,NULL);CHKERRQ(ierr); 516 ierr = MatFindZeroDiagonals(pcbddc->local_mat,&zerodiag);CHKERRQ(ierr); 517 ierr = ISSorted(zerodiag,&sorted);CHKERRQ(ierr); 518 if (!sorted) { 519 ierr = ISSort(zerodiag);CHKERRQ(ierr); 520 } 521 ierr = ISGetLocalSize(zerodiag,&nz);CHKERRQ(ierr); 522 if (!nz) { 523 if (n) have_null = PETSC_FALSE; 524 has_null_pressures = PETSC_FALSE; 525 ierr = ISDestroy(&zerodiag);CHKERRQ(ierr); 526 } 527 recompute_zerodiag = PETSC_FALSE; 528 /* in case disconnected subdomains info is present, split the pressures accordingly (otherwise the benign trick could fail) */ 529 zerodiag_subs = NULL; 530 pcbddc->benign_n = 0; 531 if (has_null_pressures) { 532 IS *subs; 533 PetscInt nsubs,i; 534 535 subs = pcbddc->local_subs; 536 nsubs = pcbddc->n_local_subs; 537 if (nsubs > 1) { 538 ierr = PetscCalloc1(nsubs,&zerodiag_subs);CHKERRQ(ierr); 539 for (i=0;i<nsubs;i++) { 540 ISLocalToGlobalMapping l2g; 541 IS t_zerodiag_subs; 542 PetscInt nl; 543 544 ierr = ISLocalToGlobalMappingCreateIS(subs[i],&l2g);CHKERRQ(ierr); 545 ierr = ISGlobalToLocalMappingApplyIS(l2g,IS_GTOLM_DROP,zerodiag,&t_zerodiag_subs);CHKERRQ(ierr); 546 ierr = ISGetLocalSize(t_zerodiag_subs,&nl);CHKERRQ(ierr); 547 if (nl) { 548 PetscBool valid = PETSC_TRUE; 549 550 if (pressures) { 551 IS t_pressure_subs; 552 ierr = ISGlobalToLocalMappingApplyIS(l2g,IS_GTOLM_DROP,pressures,&t_pressure_subs);CHKERRQ(ierr); 553 ierr = ISEqual(t_pressure_subs,t_zerodiag_subs,&valid);CHKERRQ(ierr); 554 ierr = ISDestroy(&t_pressure_subs);CHKERRQ(ierr); 555 } 556 if (valid) { 557 ierr = ISLocalToGlobalMappingApplyIS(l2g,t_zerodiag_subs,&zerodiag_subs[pcbddc->benign_n]);CHKERRQ(ierr); 558 pcbddc->benign_n++; 559 } else { 560 recompute_zerodiag = PETSC_TRUE; 561 } 562 } 563 ierr = ISDestroy(&t_zerodiag_subs);CHKERRQ(ierr); 564 ierr = ISLocalToGlobalMappingDestroy(&l2g);CHKERRQ(ierr); 565 } 566 } else { /* there's just one subdomain (or zero if they have not been detected */ 567 PetscBool valid = PETSC_TRUE; 568 if (pressures) { 569 ierr = ISEqual(pressures,zerodiag,&valid);CHKERRQ(ierr); 570 } 571 if (valid) { 572 pcbddc->benign_n = 1; 573 ierr = PetscMalloc1(pcbddc->benign_n,&zerodiag_subs);CHKERRQ(ierr); 574 ierr = PetscObjectReference((PetscObject)zerodiag);CHKERRQ(ierr); 575 zerodiag_subs[0] = zerodiag; 576 } 577 } 578 } 579 580 if (!pcbddc->benign_n) { 581 ierr = ISDestroy(&zerodiag);CHKERRQ(ierr); 582 recompute_zerodiag = PETSC_FALSE; 583 has_null_pressures = PETSC_FALSE; 584 have_null = PETSC_FALSE; 585 } 586 587 /* final check for null pressures */ 588 if (zerodiag && pressures) { 589 PetscInt nz,np; 590 ierr = ISGetLocalSize(zerodiag,&nz);CHKERRQ(ierr); 591 ierr = ISGetLocalSize(pressures,&np);CHKERRQ(ierr); 592 if (nz != np) have_null = PETSC_FALSE; 593 } 594 595 if (recompute_zerodiag) { 596 ierr = ISDestroy(&zerodiag);CHKERRQ(ierr); 597 if (pcbddc->benign_n == 1) { 598 ierr = PetscObjectReference((PetscObject)zerodiag_subs[0]);CHKERRQ(ierr); 599 zerodiag = zerodiag_subs[0]; 600 } else { 601 PetscInt i,nzn,*new_idxs; 602 603 nzn = 0; 604 for (i=0;i<pcbddc->benign_n;i++) { 605 PetscInt ns; 606 ierr = ISGetLocalSize(zerodiag_subs[i],&ns);CHKERRQ(ierr); 607 nzn += ns; 608 } 609 ierr = PetscMalloc1(nzn,&new_idxs);CHKERRQ(ierr); 610 nzn = 0; 611 for (i=0;i<pcbddc->benign_n;i++) { 612 PetscInt ns,*idxs; 613 ierr = ISGetLocalSize(zerodiag_subs[i],&ns);CHKERRQ(ierr); 614 ierr = ISGetIndices(zerodiag_subs[i],(const PetscInt**)&idxs);CHKERRQ(ierr); 615 ierr = PetscMemcpy(new_idxs+nzn,idxs,ns*sizeof(PetscInt));CHKERRQ(ierr); 616 ierr = ISRestoreIndices(zerodiag_subs[i],(const PetscInt**)&idxs);CHKERRQ(ierr); 617 nzn += ns; 618 } 619 ierr = PetscSortInt(nzn,new_idxs);CHKERRQ(ierr); 620 ierr = ISCreateGeneral(PETSC_COMM_SELF,nzn,new_idxs,PETSC_OWN_POINTER,&zerodiag);CHKERRQ(ierr); 621 } 622 have_null = PETSC_FALSE; 623 } 624 625 if (has_null_pressures) { 626 IS zerodiagc; 627 const PetscInt *idxs,*idxsc; 628 PetscInt i,s,*nnz; 629 630 ierr = ISGetLocalSize(zerodiag,&nz);CHKERRQ(ierr); 631 ierr = ISComplement(zerodiag,0,n,&zerodiagc);CHKERRQ(ierr); 632 ierr = ISGetIndices(zerodiagc,&idxsc);CHKERRQ(ierr); 633 /* local change of basis for pressures */ 634 ierr = MatDestroy(&pcbddc->benign_change);CHKERRQ(ierr); 635 ierr = MatCreate(PetscObjectComm((PetscObject)pcbddc->local_mat),&pcbddc->benign_change);CHKERRQ(ierr); 636 ierr = MatSetType(pcbddc->benign_change,MATAIJ);CHKERRQ(ierr); 637 ierr = MatSetSizes(pcbddc->benign_change,n,n,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr); 638 ierr = PetscMalloc1(n,&nnz);CHKERRQ(ierr); 639 for (i=0;i<n-nz;i++) nnz[idxsc[i]] = 1; /* identity on velocities plus pressure dofs for non-singular subdomains */ 640 for (i=0;i<pcbddc->benign_n;i++) { 641 PetscInt nzs,j; 642 643 ierr = ISGetLocalSize(zerodiag_subs[i],&nzs);CHKERRQ(ierr); 644 ierr = ISGetIndices(zerodiag_subs[i],&idxs);CHKERRQ(ierr); 645 for (j=0;j<nzs-1;j++) nnz[idxs[j]] = 2; /* change on pressures */ 646 nnz[idxs[nzs-1]] = nzs; /* last local pressure dof in subdomain */ 647 ierr = ISRestoreIndices(zerodiag_subs[i],&idxs);CHKERRQ(ierr); 648 } 649 ierr = MatSeqAIJSetPreallocation(pcbddc->benign_change,0,nnz);CHKERRQ(ierr); 650 ierr = PetscFree(nnz);CHKERRQ(ierr); 651 /* set identity on velocities */ 652 for (i=0;i<n-nz;i++) { 653 ierr = MatSetValue(pcbddc->benign_change,idxsc[i],idxsc[i],1.,INSERT_VALUES);CHKERRQ(ierr); 654 } 655 ierr = ISRestoreIndices(zerodiagc,&idxsc);CHKERRQ(ierr); 656 ierr = ISDestroy(&zerodiagc);CHKERRQ(ierr); 657 ierr = PetscFree3(pcbddc->benign_p0_lidx,pcbddc->benign_p0_gidx,pcbddc->benign_p0);CHKERRQ(ierr); 658 ierr = PetscMalloc3(pcbddc->benign_n,&pcbddc->benign_p0_lidx,pcbddc->benign_n,&pcbddc->benign_p0_gidx,pcbddc->benign_n,&pcbddc->benign_p0);CHKERRQ(ierr); 659 /* set change on pressures */ 660 for (s=0;s<pcbddc->benign_n;s++) { 661 PetscScalar *array; 662 PetscInt nzs; 663 664 ierr = ISGetLocalSize(zerodiag_subs[s],&nzs);CHKERRQ(ierr); 665 ierr = ISGetIndices(zerodiag_subs[s],&idxs);CHKERRQ(ierr); 666 for (i=0;i<nzs-1;i++) { 667 PetscScalar vals[2]; 668 PetscInt cols[2]; 669 670 cols[0] = idxs[i]; 671 cols[1] = idxs[nzs-1]; 672 vals[0] = 1.; 673 vals[1] = 1.; 674 ierr = MatSetValues(pcbddc->benign_change,1,cols,2,cols,vals,INSERT_VALUES);CHKERRQ(ierr); 675 } 676 ierr = PetscMalloc1(nzs,&array);CHKERRQ(ierr); 677 for (i=0;i<nzs-1;i++) array[i] = -1.; 678 array[nzs-1] = 1.; 679 ierr = MatSetValues(pcbddc->benign_change,1,idxs+nzs-1,nzs,idxs,array,INSERT_VALUES);CHKERRQ(ierr); 680 /* store local idxs for p0 */ 681 pcbddc->benign_p0_lidx[s] = idxs[nzs-1]; 682 ierr = ISRestoreIndices(zerodiag_subs[s],&idxs);CHKERRQ(ierr); 683 ierr = PetscFree(array);CHKERRQ(ierr); 684 } 685 ierr = MatAssemblyBegin(pcbddc->benign_change,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 686 ierr = MatAssemblyEnd(pcbddc->benign_change,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 687 /* project if needed */ 688 if (pcbddc->benign_change_explicit) { 689 ierr = MatPtAP(pcbddc->local_mat,pcbddc->benign_change,MAT_INITIAL_MATRIX,2.0,&pcbddc->benign_original_mat);CHKERRQ(ierr); 690 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 691 ierr = MatSeqAIJCompress(pcbddc->benign_original_mat,&pcbddc->local_mat);CHKERRQ(ierr); 692 ierr = MatDestroy(&pcbddc->benign_original_mat);CHKERRQ(ierr); 693 } 694 /* store global idxs for p0 */ 695 ierr = ISLocalToGlobalMappingApply(pc->pmat->rmap->mapping,pcbddc->benign_n,pcbddc->benign_p0_lidx,pcbddc->benign_p0_gidx);CHKERRQ(ierr); 696 } 697 pcbddc->benign_zerodiag_subs = zerodiag_subs; 698 ierr = ISDestroy(&pressures);CHKERRQ(ierr); 699 700 /* determines if the coarse solver will be singular or not */ 701 ierr = MPI_Allreduce(&have_null,&pcbddc->benign_null,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 702 *zerodiaglocal = zerodiag; 703 PetscFunctionReturn(0); 704 } 705 706 #undef __FUNCT__ 707 #define __FUNCT__ "PCBDDCBenignGetOrSetP0" 708 PetscErrorCode PCBDDCBenignGetOrSetP0(PC pc, Vec v, PetscBool get) 709 { 710 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 711 PetscErrorCode ierr; 712 713 PetscFunctionBegin; 714 if (!pcbddc->benign_sf) { 715 ierr = PetscSFCreate(PetscObjectComm((PetscObject)pc),&pcbddc->benign_sf);CHKERRQ(ierr); 716 ierr = PetscSFSetGraphLayout(pcbddc->benign_sf,pc->pmat->rmap,pcbddc->benign_n,NULL,PETSC_OWN_POINTER,pcbddc->benign_p0_gidx);CHKERRQ(ierr); 717 } 718 if (get) { /* use SF to get values */ 719 PetscScalar *array; 720 721 ierr = VecGetArrayRead(v,(const PetscScalar**)&array);CHKERRQ(ierr); 722 ierr = PetscSFBcastBegin(pcbddc->benign_sf,MPIU_SCALAR,array,pcbddc->benign_p0);CHKERRQ(ierr); 723 ierr = PetscSFBcastEnd(pcbddc->benign_sf,MPIU_SCALAR,array,pcbddc->benign_p0);CHKERRQ(ierr); 724 ierr = VecRestoreArrayRead(v,(const PetscScalar**)&array);CHKERRQ(ierr); 725 } else { /* use VecSetValues (not scalable, I should try to find a better solution (defining a new MPI_OP for reduction) */ 726 ierr = VecSetValues(v,pcbddc->benign_n,pcbddc->benign_p0_gidx,pcbddc->benign_p0,INSERT_VALUES);CHKERRQ(ierr); 727 ierr = VecAssemblyBegin(v);CHKERRQ(ierr); 728 ierr = VecAssemblyEnd(v);CHKERRQ(ierr); 729 } 730 PetscFunctionReturn(0); 731 } 732 733 #undef __FUNCT__ 734 #define __FUNCT__ "PCBDDCBenignPopOrPushB0" 735 PetscErrorCode PCBDDCBenignPopOrPushB0(PC pc, PetscBool pop) 736 { 737 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 738 PetscErrorCode ierr; 739 740 PetscFunctionBegin; 741 /* TODO: add error checking 742 - avoid nested pop (or push) calls. 743 - cannot push before pop. 744 - cannot call this if pcbddc->local_mat is NULL 745 */ 746 if (!pcbddc->benign_n) { 747 PetscFunctionReturn(0); 748 } 749 if (pop) { 750 if (pcbddc->benign_change_explicit) { 751 IS is_p0; 752 MatReuse reuse; 753 754 /* extract B_0 */ 755 reuse = MAT_INITIAL_MATRIX; 756 if (pcbddc->benign_B0) { 757 reuse = MAT_REUSE_MATRIX; 758 } 759 ierr = ISCreateGeneral(PETSC_COMM_SELF,pcbddc->benign_n,pcbddc->benign_p0_lidx,PETSC_COPY_VALUES,&is_p0);CHKERRQ(ierr); 760 ierr = MatGetSubMatrix(pcbddc->local_mat,is_p0,NULL,reuse,&pcbddc->benign_B0);CHKERRQ(ierr); 761 /* remove rows and cols from local problem */ 762 ierr = MatSetOption(pcbddc->local_mat,MAT_KEEP_NONZERO_PATTERN,PETSC_TRUE);CHKERRQ(ierr); 763 ierr = MatSetOption(pcbddc->local_mat,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr); 764 ierr = MatZeroRowsColumnsIS(pcbddc->local_mat,is_p0,1.0,NULL,NULL);CHKERRQ(ierr); 765 ierr = ISDestroy(&is_p0);CHKERRQ(ierr); 766 } else { 767 Mat_IS *matis = (Mat_IS*)pc->pmat->data; 768 PetscScalar *vals; 769 PetscInt i,n,*idxs_ins; 770 771 ierr = VecGetLocalSize(matis->y,&n);CHKERRQ(ierr); 772 ierr = PetscMalloc2(n,&idxs_ins,n,&vals);CHKERRQ(ierr); 773 if (!pcbddc->benign_B0) { 774 ierr = MatCreate(PetscObjectComm((PetscObject)pcbddc->local_mat),&pcbddc->benign_B0);CHKERRQ(ierr); 775 ierr = MatSetType(pcbddc->benign_B0,MATAIJ);CHKERRQ(ierr); 776 ierr = MatSetSizes(pcbddc->benign_B0,pcbddc->benign_n,n,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr); 777 /* this matrix is very sparse: the nnz pattern is not known unless we do 2 sweeps of the next loop. 778 Setting nnz=100 should be more than enough */ 779 ierr = MatSeqAIJSetPreallocation(pcbddc->benign_B0,100,NULL);CHKERRQ(ierr); 780 } 781 782 for (i=0;i<pcbddc->benign_n;i++) { 783 PetscScalar *array; 784 PetscInt *idxs,j,nz,cum; 785 786 ierr = VecSet(matis->x,0.);CHKERRQ(ierr); 787 ierr = ISGetLocalSize(pcbddc->benign_zerodiag_subs[i],&nz);CHKERRQ(ierr); 788 ierr = ISGetIndices(pcbddc->benign_zerodiag_subs[i],(const PetscInt**)&idxs);CHKERRQ(ierr); 789 for (j=0;j<nz;j++) vals[j] = 1.; 790 ierr = VecSetValues(matis->x,nz,idxs,vals,INSERT_VALUES);CHKERRQ(ierr); 791 ierr = VecAssemblyBegin(matis->x);CHKERRQ(ierr); 792 ierr = VecAssemblyEnd(matis->x);CHKERRQ(ierr); 793 ierr = VecSet(matis->y,0.);CHKERRQ(ierr); 794 ierr = MatMult(matis->A,matis->x,matis->y);CHKERRQ(ierr); 795 ierr = VecGetArray(matis->y,&array);CHKERRQ(ierr); 796 cum = 0; 797 for (j=0;j<n;j++) { 798 if (PetscUnlikely(PetscAbsReal(array[j]) > PETSC_SMALL)) { 799 vals[cum] = array[j]; 800 idxs_ins[cum] = j; 801 cum++; 802 } 803 } 804 ierr = MatSetValues(pcbddc->benign_B0,1,&i,cum,idxs_ins,vals,INSERT_VALUES);CHKERRQ(ierr); 805 ierr = VecRestoreArray(matis->y,&array);CHKERRQ(ierr); 806 ierr = ISRestoreIndices(pcbddc->benign_zerodiag_subs[i],(const PetscInt**)&idxs);CHKERRQ(ierr); 807 } 808 ierr = MatAssemblyBegin(pcbddc->benign_B0,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 809 ierr = MatAssemblyEnd(pcbddc->benign_B0,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 810 ierr = PetscFree2(idxs_ins,vals);CHKERRQ(ierr); 811 } 812 } else { /* push */ 813 if (pcbddc->benign_change_explicit) { 814 PetscInt i; 815 816 for (i=0;i<pcbddc->benign_n;i++) { 817 PetscScalar *B0_vals; 818 PetscInt *B0_cols,B0_ncol; 819 820 ierr = MatGetRow(pcbddc->benign_B0,i,&B0_ncol,(const PetscInt**)&B0_cols,(const PetscScalar**)&B0_vals);CHKERRQ(ierr); 821 ierr = MatSetValues(pcbddc->local_mat,1,pcbddc->benign_p0_lidx+i,B0_ncol,B0_cols,B0_vals,INSERT_VALUES);CHKERRQ(ierr); 822 ierr = MatSetValues(pcbddc->local_mat,B0_ncol,B0_cols,1,pcbddc->benign_p0_lidx+i,B0_vals,INSERT_VALUES);CHKERRQ(ierr); 823 ierr = MatSetValue(pcbddc->local_mat,pcbddc->benign_p0_lidx[i],pcbddc->benign_p0_lidx[i],0.0,INSERT_VALUES);CHKERRQ(ierr); 824 ierr = MatRestoreRow(pcbddc->benign_B0,i,&B0_ncol,(const PetscInt**)&B0_cols,(const PetscScalar**)&B0_vals);CHKERRQ(ierr); 825 } 826 ierr = MatAssemblyBegin(pcbddc->local_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 827 ierr = MatAssemblyEnd(pcbddc->local_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 828 } else { 829 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Cannot push B0!\n"); 830 } 831 } 832 PetscFunctionReturn(0); 833 } 834 835 #undef __FUNCT__ 836 #define __FUNCT__ "PCBDDCAdaptiveSelection" 837 PetscErrorCode PCBDDCAdaptiveSelection(PC pc) 838 { 839 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 840 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 841 PetscBLASInt B_dummyint,B_neigs,B_ierr,B_lwork; 842 PetscBLASInt *B_iwork,*B_ifail; 843 PetscScalar *work,lwork; 844 PetscScalar *St,*S,*eigv; 845 PetscScalar *Sarray,*Starray; 846 PetscReal *eigs,thresh; 847 PetscInt i,nmax,nmin,nv,cum,mss,cum2,cumarray,maxneigs; 848 PetscBool allocated_S_St; 849 #if defined(PETSC_USE_COMPLEX) 850 PetscReal *rwork; 851 #endif 852 PetscErrorCode ierr; 853 854 PetscFunctionBegin; 855 if (!sub_schurs->schur_explicit) { 856 SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_SUP,"Adaptive selection of constraints requires MUMPS and/or MKL_CPARDISO"); 857 } 858 859 if (sub_schurs->n_subs && (!sub_schurs->is_hermitian || !sub_schurs->is_posdef)) { 860 SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_SUP,"Adaptive selection not yet implemented for general matrix pencils (herm %d, posdef %d)\n",sub_schurs->is_hermitian,sub_schurs->is_posdef); 861 } 862 863 if (pcbddc->dbg_flag) { 864 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 865 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 866 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Check adaptive selection of constraints\n");CHKERRQ(ierr); 867 ierr = PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 868 } 869 870 if (pcbddc->dbg_flag) { 871 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d cc %d (%d,%d).\n",PetscGlobalRank,sub_schurs->n_subs,sub_schurs->is_hermitian,sub_schurs->is_posdef); 872 } 873 874 /* max size of subsets */ 875 mss = 0; 876 for (i=0;i<sub_schurs->n_subs;i++) { 877 PetscInt subset_size; 878 879 ierr = ISGetLocalSize(sub_schurs->is_subs[i],&subset_size);CHKERRQ(ierr); 880 mss = PetscMax(mss,subset_size); 881 } 882 883 /* min/max and threshold */ 884 nmax = pcbddc->adaptive_nmax > 0 ? pcbddc->adaptive_nmax : mss; 885 nmin = pcbddc->adaptive_nmin > 0 ? pcbddc->adaptive_nmin : 0; 886 nmax = PetscMax(nmin,nmax); 887 allocated_S_St = PETSC_FALSE; 888 if (nmin) { 889 allocated_S_St = PETSC_TRUE; 890 } 891 892 /* allocate lapack workspace */ 893 cum = cum2 = 0; 894 maxneigs = 0; 895 for (i=0;i<sub_schurs->n_subs;i++) { 896 PetscInt n,subset_size; 897 898 ierr = ISGetLocalSize(sub_schurs->is_subs[i],&subset_size);CHKERRQ(ierr); 899 n = PetscMin(subset_size,nmax); 900 cum += subset_size; 901 cum2 += subset_size*n; 902 maxneigs = PetscMax(maxneigs,n); 903 } 904 if (mss) { 905 if (sub_schurs->is_hermitian && sub_schurs->is_posdef) { 906 PetscBLASInt B_itype = 1; 907 PetscBLASInt B_N = mss; 908 PetscReal zero = 0.0; 909 PetscReal eps = 0.0; /* dlamch? */ 910 911 B_lwork = -1; 912 S = NULL; 913 St = NULL; 914 eigs = NULL; 915 eigv = NULL; 916 B_iwork = NULL; 917 B_ifail = NULL; 918 #if defined(PETSC_USE_COMPLEX) 919 rwork = NULL; 920 #endif 921 thresh = 1.0; 922 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 923 #if defined(PETSC_USE_COMPLEX) 924 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&zero,&thresh,&B_dummyint,&B_dummyint,&eps,&B_neigs,eigs,eigv,&B_N,&lwork,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr)); 925 #else 926 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&zero,&thresh,&B_dummyint,&B_dummyint,&eps,&B_neigs,eigs,eigv,&B_N,&lwork,&B_lwork,B_iwork,B_ifail,&B_ierr)); 927 #endif 928 if (B_ierr != 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to SYGVX Lapack routine %d",(int)B_ierr); 929 ierr = PetscFPTrapPop();CHKERRQ(ierr); 930 } else { 931 /* TODO */ 932 } 933 } else { 934 lwork = 0; 935 } 936 937 nv = 0; 938 if (sub_schurs->is_vertices && pcbddc->use_vertices) { /* complement set of active subsets, each entry is a vertex (boundary made by active subsets, vertices and dirichlet dofs) */ 939 ierr = ISGetLocalSize(sub_schurs->is_vertices,&nv);CHKERRQ(ierr); 940 } 941 ierr = PetscBLASIntCast((PetscInt)PetscRealPart(lwork),&B_lwork);CHKERRQ(ierr); 942 if (allocated_S_St) { 943 ierr = PetscMalloc2(mss*mss,&S,mss*mss,&St);CHKERRQ(ierr); 944 } 945 ierr = PetscMalloc5(mss*mss,&eigv,mss,&eigs,B_lwork,&work,5*mss,&B_iwork,mss,&B_ifail);CHKERRQ(ierr); 946 #if defined(PETSC_USE_COMPLEX) 947 ierr = PetscMalloc1(7*mss,&rwork);CHKERRQ(ierr); 948 #endif 949 ierr = PetscMalloc5(nv+sub_schurs->n_subs,&pcbddc->adaptive_constraints_n, 950 nv+sub_schurs->n_subs+1,&pcbddc->adaptive_constraints_idxs_ptr, 951 nv+sub_schurs->n_subs+1,&pcbddc->adaptive_constraints_data_ptr, 952 nv+cum,&pcbddc->adaptive_constraints_idxs, 953 nv+cum2,&pcbddc->adaptive_constraints_data);CHKERRQ(ierr); 954 ierr = PetscMemzero(pcbddc->adaptive_constraints_n,(nv+sub_schurs->n_subs)*sizeof(PetscInt));CHKERRQ(ierr); 955 956 maxneigs = 0; 957 cum = cum2 = cumarray = 0; 958 pcbddc->adaptive_constraints_idxs_ptr[0] = 0; 959 pcbddc->adaptive_constraints_data_ptr[0] = 0; 960 if (sub_schurs->is_vertices && pcbddc->use_vertices) { 961 const PetscInt *idxs; 962 963 ierr = ISGetIndices(sub_schurs->is_vertices,&idxs);CHKERRQ(ierr); 964 for (cum=0;cum<nv;cum++) { 965 pcbddc->adaptive_constraints_n[cum] = 1; 966 pcbddc->adaptive_constraints_idxs[cum] = idxs[cum]; 967 pcbddc->adaptive_constraints_data[cum] = 1.0; 968 pcbddc->adaptive_constraints_idxs_ptr[cum+1] = pcbddc->adaptive_constraints_idxs_ptr[cum]+1; 969 pcbddc->adaptive_constraints_data_ptr[cum+1] = pcbddc->adaptive_constraints_data_ptr[cum]+1; 970 } 971 cum2 = cum; 972 ierr = ISRestoreIndices(sub_schurs->is_vertices,&idxs);CHKERRQ(ierr); 973 } 974 975 if (mss) { /* multilevel */ 976 ierr = MatSeqAIJGetArray(sub_schurs->sum_S_Ej_inv_all,&Sarray);CHKERRQ(ierr); 977 ierr = MatSeqAIJGetArray(sub_schurs->sum_S_Ej_tilda_all,&Starray);CHKERRQ(ierr); 978 } 979 980 thresh = pcbddc->adaptive_threshold; 981 for (i=0;i<sub_schurs->n_subs;i++) { 982 const PetscInt *idxs; 983 PetscReal infty = PETSC_MAX_REAL; 984 PetscInt j,subset_size,eigs_start = 0; 985 PetscBLASInt B_N; 986 PetscBool same_data = PETSC_FALSE; 987 988 ierr = ISGetLocalSize(sub_schurs->is_subs[i],&subset_size);CHKERRQ(ierr); 989 ierr = ISGetIndices(sub_schurs->is_subs[i],&idxs);CHKERRQ(ierr); 990 ierr = PetscBLASIntCast(subset_size,&B_N);CHKERRQ(ierr); 991 if (allocated_S_St) { /* S and S_t should be copied since we could need them later */ 992 if (sub_schurs->is_hermitian) { 993 PetscInt j,k; 994 if (sub_schurs->n_subs == 1) { /* zeroing memory to use PetscMemcmp later */ 995 ierr = PetscMemzero(S,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 996 ierr = PetscMemzero(St,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 997 } 998 for (j=0;j<subset_size;j++) { 999 for (k=j;k<subset_size;k++) { 1000 S [j*subset_size+k] = Sarray [cumarray+j*subset_size+k]; 1001 St[j*subset_size+k] = Starray[cumarray+j*subset_size+k]; 1002 } 1003 } 1004 } else { 1005 ierr = PetscMemcpy(S,Sarray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 1006 ierr = PetscMemcpy(St,Starray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 1007 } 1008 } else { 1009 S = Sarray + cumarray; 1010 St = Starray + cumarray; 1011 } 1012 1013 /* see if we can save some work */ 1014 if (sub_schurs->n_subs == 1) { 1015 ierr = PetscMemcmp(S,St,subset_size*subset_size*sizeof(PetscScalar),&same_data);CHKERRQ(ierr); 1016 } 1017 1018 if (same_data) { /* there's no need of constraints here, deluxe scaling is enough */ 1019 B_neigs = 0; 1020 } else { 1021 if (sub_schurs->is_hermitian && sub_schurs->is_posdef) { 1022 PetscBLASInt B_itype = 1; 1023 PetscBLASInt B_IL, B_IU; 1024 PetscReal eps = -1.0; /* dlamch? */ 1025 PetscInt nmin_s; 1026 1027 if (pcbddc->dbg_flag) { 1028 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Computing for sub %d/%d %d %d.\n",i,sub_schurs->n_subs,subset_size,pcbddc->mat_graph->count[idxs[0]]); 1029 } 1030 1031 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 1032 if (thresh > 1.+PETSC_SMALL) { 1033 1034 /* ask for eigenvalues larger than thresh */ 1035 #if defined(PETSC_USE_COMPLEX) 1036 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&thresh,&infty,&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr)); 1037 #else 1038 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&thresh,&infty,&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr)); 1039 #endif 1040 } else { 1041 B_IU = PetscMax(1,PetscMin(B_N,nmax)); 1042 B_IL = 1; 1043 #if defined(PETSC_USE_COMPLEX) 1044 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","I","L",&B_N,St,&B_N,S,&B_N,&thresh,&infty,&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr)); 1045 #else 1046 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","I","L",&B_N,St,&B_N,S,&B_N,&thresh,&infty,&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr)); 1047 #endif 1048 } 1049 ierr = PetscFPTrapPop();CHKERRQ(ierr); 1050 if (B_ierr) { 1051 if (B_ierr < 0 ) { 1052 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: illegal value for argument %d",-(int)B_ierr); 1053 } else if (B_ierr <= B_N) { 1054 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: %d eigenvalues failed to converge",(int)B_ierr); 1055 } else { 1056 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: leading minor of order %d is not positive definite",(int)B_ierr-B_N-1); 1057 } 1058 } 1059 1060 if (B_neigs > nmax) { 1061 if (pcbddc->dbg_flag) { 1062 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," found %d eigs, more than maximum required %d.\n",B_neigs,nmax); 1063 } 1064 eigs_start = B_neigs -nmax; 1065 B_neigs = nmax; 1066 } 1067 1068 nmin_s = PetscMin(nmin,B_N); 1069 if (B_neigs < nmin_s) { 1070 PetscBLASInt B_neigs2; 1071 1072 B_IU = B_N - B_neigs; 1073 B_IL = B_N - nmin_s + 1; 1074 if (pcbddc->dbg_flag) { 1075 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," found %d eigs, less than minimum required %d. Asking for %d to %d incl (fortran like)\n",B_neigs,nmin,B_IL,B_IU); 1076 } 1077 if (sub_schurs->is_hermitian) { 1078 PetscInt j; 1079 for (j=0;j<subset_size;j++) { 1080 ierr = PetscMemcpy(S+j*(subset_size+1),Sarray+cumarray+j*(subset_size+1),(subset_size-j)*sizeof(PetscScalar));CHKERRQ(ierr); 1081 } 1082 for (j=0;j<subset_size;j++) { 1083 ierr = PetscMemcpy(St+j*(subset_size+1),Starray+cumarray+j*(subset_size+1),(subset_size-j)*sizeof(PetscScalar));CHKERRQ(ierr); 1084 } 1085 } else { 1086 ierr = PetscMemcpy(S,Sarray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 1087 ierr = PetscMemcpy(St,Starray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 1088 } 1089 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 1090 #if defined(PETSC_USE_COMPLEX) 1091 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","I","L",&B_N,St,&B_N,S,&B_N,&thresh,&infty,&B_IL,&B_IU,&eps,&B_neigs2,eigs+B_neigs,eigv+B_neigs*subset_size,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr)); 1092 #else 1093 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","I","L",&B_N,St,&B_N,S,&B_N,&thresh,&infty,&B_IL,&B_IU,&eps,&B_neigs2,eigs+B_neigs,eigv+B_neigs*subset_size,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr)); 1094 #endif 1095 ierr = PetscFPTrapPop();CHKERRQ(ierr); 1096 B_neigs += B_neigs2; 1097 } 1098 if (B_ierr) { 1099 if (B_ierr < 0 ) { 1100 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: illegal value for argument %d",-(int)B_ierr); 1101 } else if (B_ierr <= B_N) { 1102 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: %d eigenvalues failed to converge",(int)B_ierr); 1103 } else { 1104 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: leading minor of order %d is not positive definite",(int)B_ierr-B_N-1); 1105 } 1106 } 1107 if (pcbddc->dbg_flag) { 1108 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," -> Got %d eigs\n",B_neigs);CHKERRQ(ierr); 1109 for (j=0;j<B_neigs;j++) { 1110 if (eigs[j] == 0.0) { 1111 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," Inf\n");CHKERRQ(ierr); 1112 } else { 1113 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," %1.6e\n",eigs[j+eigs_start]);CHKERRQ(ierr); 1114 } 1115 } 1116 } 1117 } else { 1118 /* TODO */ 1119 } 1120 } 1121 maxneigs = PetscMax(B_neigs,maxneigs); 1122 pcbddc->adaptive_constraints_n[i+nv] = B_neigs; 1123 if (B_neigs) { 1124 ierr = PetscMemcpy(pcbddc->adaptive_constraints_data+pcbddc->adaptive_constraints_data_ptr[cum],eigv+eigs_start*subset_size,B_neigs*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 1125 1126 if (pcbddc->dbg_flag > 1) { 1127 PetscInt ii; 1128 for (ii=0;ii<B_neigs;ii++) { 1129 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," -> Eigenvector %d/%d (%d)\n",ii,B_neigs,B_N);CHKERRQ(ierr); 1130 for (j=0;j<B_N;j++) { 1131 #if defined(PETSC_USE_COMPLEX) 1132 PetscReal r = PetscRealPart(pcbddc->adaptive_constraints_data[ii*subset_size+j+pcbddc->adaptive_constraints_data_ptr[cum]]); 1133 PetscReal c = PetscImaginaryPart(pcbddc->adaptive_constraints_data[ii*subset_size+j+pcbddc->adaptive_constraints_data_ptr[cum]]); 1134 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," %1.4e + %1.4e i\n",r,c);CHKERRQ(ierr); 1135 #else 1136 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," %1.4e\n",pcbddc->adaptive_constraints_data[ii*subset_size+j+pcbddc->adaptive_constraints_data_ptr[cum]]);CHKERRQ(ierr); 1137 #endif 1138 } 1139 } 1140 } 1141 #if 0 1142 for (j=0;j<B_neigs;j++) { 1143 PetscBLASInt Blas_N,Blas_one = 1.0; 1144 PetscScalar norm; 1145 ierr = PetscBLASIntCast(subset_size,&Blas_N);CHKERRQ(ierr); 1146 PetscStackCallBLAS("BLASdot",norm = BLASdot_(&Blas_N,pcbddc->adaptive_constraints_data+pcbddc->adaptive_constraints_data_ptr[cum]+j*subset_size, 1147 &Blas_one,pcbddc->adaptive_constraints_data+pcbddc->adaptive_constraints_data_ptr[cum]+j*subset_size,&Blas_one)); 1148 if (pcbddc->adaptive_constraints_data[cum2] > 0.0) { 1149 norm = 1.0/PetscSqrtReal(PetscRealPart(norm)); 1150 } else { 1151 norm = -1.0/PetscSqrtReal(PetscRealPart(norm)); 1152 } 1153 PetscStackCallBLAS("BLASscal",BLASscal_(&Blas_N,&norm,pcbddc->adaptive_constraints_data+pcbddc->adaptive_constraints_data_ptr[cum]+j*subset_size,&Blas_one)); 1154 } 1155 #endif 1156 ierr = PetscMemcpy(pcbddc->adaptive_constraints_idxs+pcbddc->adaptive_constraints_idxs_ptr[cum],idxs,subset_size*sizeof(PetscInt));CHKERRQ(ierr); 1157 pcbddc->adaptive_constraints_idxs_ptr[cum+1] = pcbddc->adaptive_constraints_idxs_ptr[cum] + subset_size; 1158 pcbddc->adaptive_constraints_data_ptr[cum+1] = pcbddc->adaptive_constraints_data_ptr[cum] + subset_size*B_neigs; 1159 cum++; 1160 } 1161 ierr = ISRestoreIndices(sub_schurs->is_subs[i],&idxs);CHKERRQ(ierr); 1162 /* shift for next computation */ 1163 cumarray += subset_size*subset_size; 1164 } 1165 if (pcbddc->dbg_flag) { 1166 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 1167 } 1168 1169 if (mss) { 1170 ierr = MatSeqAIJRestoreArray(sub_schurs->sum_S_Ej_inv_all,&Sarray);CHKERRQ(ierr); 1171 ierr = MatSeqAIJRestoreArray(sub_schurs->sum_S_Ej_tilda_all,&Starray);CHKERRQ(ierr); 1172 /* destroy matrices (junk) */ 1173 ierr = MatDestroy(&sub_schurs->sum_S_Ej_inv_all);CHKERRQ(ierr); 1174 ierr = MatDestroy(&sub_schurs->sum_S_Ej_tilda_all);CHKERRQ(ierr); 1175 } 1176 if (allocated_S_St) { 1177 ierr = PetscFree2(S,St);CHKERRQ(ierr); 1178 } 1179 ierr = PetscFree5(eigv,eigs,work,B_iwork,B_ifail);CHKERRQ(ierr); 1180 #if defined(PETSC_USE_COMPLEX) 1181 ierr = PetscFree(rwork);CHKERRQ(ierr); 1182 #endif 1183 if (pcbddc->dbg_flag) { 1184 PetscInt maxneigs_r; 1185 ierr = MPI_Allreduce(&maxneigs,&maxneigs_r,1,MPIU_INT,MPI_MAX,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 1186 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Maximum number of constraints per cc %d\n",maxneigs_r);CHKERRQ(ierr); 1187 } 1188 PetscFunctionReturn(0); 1189 } 1190 1191 #undef __FUNCT__ 1192 #define __FUNCT__ "PCBDDCSetUpSolvers" 1193 PetscErrorCode PCBDDCSetUpSolvers(PC pc) 1194 { 1195 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 1196 PetscScalar *coarse_submat_vals; 1197 PetscErrorCode ierr; 1198 1199 PetscFunctionBegin; 1200 /* Setup local scatters R_to_B and (optionally) R_to_D */ 1201 /* PCBDDCSetUpLocalWorkVectors should be called first! */ 1202 ierr = PCBDDCSetUpLocalScatters(pc);CHKERRQ(ierr); 1203 1204 /* Setup local neumann solver ksp_R */ 1205 /* PCBDDCSetUpLocalScatters should be called first! */ 1206 ierr = PCBDDCSetUpLocalSolvers(pc,PETSC_FALSE,PETSC_TRUE);CHKERRQ(ierr); 1207 1208 /* Change global null space passed in by the user if change of basis has been requested */ 1209 if (pcbddc->NullSpace && pcbddc->ChangeOfBasisMatrix) { 1210 ierr = PCBDDCNullSpaceAdaptGlobal(pc);CHKERRQ(ierr); 1211 } 1212 1213 /* 1214 Setup local correction and local part of coarse basis. 1215 Gives back the dense local part of the coarse matrix in column major ordering 1216 */ 1217 ierr = PCBDDCSetUpCorrection(pc,&coarse_submat_vals);CHKERRQ(ierr); 1218 1219 /* Compute total number of coarse nodes and setup coarse solver */ 1220 ierr = PCBDDCSetUpCoarseSolver(pc,coarse_submat_vals);CHKERRQ(ierr); 1221 1222 /* free */ 1223 ierr = PetscFree(coarse_submat_vals);CHKERRQ(ierr); 1224 PetscFunctionReturn(0); 1225 } 1226 1227 #undef __FUNCT__ 1228 #define __FUNCT__ "PCBDDCResetCustomization" 1229 PetscErrorCode PCBDDCResetCustomization(PC pc) 1230 { 1231 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 1232 PetscErrorCode ierr; 1233 1234 PetscFunctionBegin; 1235 ierr = PCBDDCGraphResetCSR(pcbddc->mat_graph);CHKERRQ(ierr); 1236 ierr = ISDestroy(&pcbddc->user_primal_vertices);CHKERRQ(ierr); 1237 ierr = ISDestroy(&pcbddc->user_primal_vertices_local);CHKERRQ(ierr); 1238 ierr = MatNullSpaceDestroy(&pcbddc->NullSpace);CHKERRQ(ierr); 1239 ierr = ISDestroy(&pcbddc->NeumannBoundaries);CHKERRQ(ierr); 1240 ierr = ISDestroy(&pcbddc->NeumannBoundariesLocal);CHKERRQ(ierr); 1241 ierr = ISDestroy(&pcbddc->DirichletBoundaries);CHKERRQ(ierr); 1242 ierr = MatNullSpaceDestroy(&pcbddc->onearnullspace);CHKERRQ(ierr); 1243 ierr = PetscFree(pcbddc->onearnullvecs_state);CHKERRQ(ierr); 1244 ierr = ISDestroy(&pcbddc->DirichletBoundariesLocal);CHKERRQ(ierr); 1245 ierr = PCBDDCSetDofsSplitting(pc,0,NULL);CHKERRQ(ierr); 1246 ierr = PCBDDCSetDofsSplittingLocal(pc,0,NULL);CHKERRQ(ierr); 1247 PetscFunctionReturn(0); 1248 } 1249 1250 #undef __FUNCT__ 1251 #define __FUNCT__ "PCBDDCResetTopography" 1252 PetscErrorCode PCBDDCResetTopography(PC pc) 1253 { 1254 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 1255 PetscInt i; 1256 PetscErrorCode ierr; 1257 1258 PetscFunctionBegin; 1259 ierr = MatDestroy(&pcbddc->user_ChangeOfBasisMatrix);CHKERRQ(ierr); 1260 ierr = MatDestroy(&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 1261 ierr = MatDestroy(&pcbddc->ConstraintMatrix);CHKERRQ(ierr); 1262 ierr = PCBDDCGraphReset(pcbddc->mat_graph);CHKERRQ(ierr); 1263 for (i=0;i<pcbddc->n_local_subs;i++) { 1264 ierr = ISDestroy(&pcbddc->local_subs[i]);CHKERRQ(ierr); 1265 } 1266 ierr = PetscFree(pcbddc->local_subs);CHKERRQ(ierr); 1267 ierr = PCBDDCSubSchursReset(pcbddc->sub_schurs);CHKERRQ(ierr); 1268 PetscFunctionReturn(0); 1269 } 1270 1271 #undef __FUNCT__ 1272 #define __FUNCT__ "PCBDDCResetSolvers" 1273 PetscErrorCode PCBDDCResetSolvers(PC pc) 1274 { 1275 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 1276 PetscErrorCode ierr; 1277 1278 PetscFunctionBegin; 1279 ierr = VecDestroy(&pcbddc->coarse_vec);CHKERRQ(ierr); 1280 if (pcbddc->coarse_phi_B) { 1281 PetscScalar *array; 1282 ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&array);CHKERRQ(ierr); 1283 ierr = PetscFree(array);CHKERRQ(ierr); 1284 } 1285 ierr = MatDestroy(&pcbddc->coarse_phi_B);CHKERRQ(ierr); 1286 ierr = MatDestroy(&pcbddc->coarse_phi_D);CHKERRQ(ierr); 1287 ierr = MatDestroy(&pcbddc->coarse_psi_B);CHKERRQ(ierr); 1288 ierr = MatDestroy(&pcbddc->coarse_psi_D);CHKERRQ(ierr); 1289 ierr = VecDestroy(&pcbddc->vec1_P);CHKERRQ(ierr); 1290 ierr = VecDestroy(&pcbddc->vec1_C);CHKERRQ(ierr); 1291 ierr = MatDestroy(&pcbddc->local_auxmat2);CHKERRQ(ierr); 1292 ierr = MatDestroy(&pcbddc->local_auxmat1);CHKERRQ(ierr); 1293 ierr = VecDestroy(&pcbddc->vec1_R);CHKERRQ(ierr); 1294 ierr = VecDestroy(&pcbddc->vec2_R);CHKERRQ(ierr); 1295 ierr = ISDestroy(&pcbddc->is_R_local);CHKERRQ(ierr); 1296 ierr = VecScatterDestroy(&pcbddc->R_to_B);CHKERRQ(ierr); 1297 ierr = VecScatterDestroy(&pcbddc->R_to_D);CHKERRQ(ierr); 1298 ierr = VecScatterDestroy(&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr); 1299 ierr = KSPDestroy(&pcbddc->ksp_D);CHKERRQ(ierr); 1300 ierr = KSPDestroy(&pcbddc->ksp_R);CHKERRQ(ierr); 1301 ierr = KSPDestroy(&pcbddc->coarse_ksp);CHKERRQ(ierr); 1302 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 1303 ierr = PetscFree(pcbddc->primal_indices_local_idxs);CHKERRQ(ierr); 1304 ierr = PetscFree2(pcbddc->local_primal_ref_node,pcbddc->local_primal_ref_mult);CHKERRQ(ierr); 1305 ierr = PetscFree(pcbddc->global_primal_indices);CHKERRQ(ierr); 1306 ierr = ISDestroy(&pcbddc->coarse_subassembling);CHKERRQ(ierr); 1307 ierr = ISDestroy(&pcbddc->coarse_subassembling_init);CHKERRQ(ierr); 1308 ierr = MatDestroy(&pcbddc->benign_change);CHKERRQ(ierr); 1309 ierr = VecDestroy(&pcbddc->benign_vec);CHKERRQ(ierr); 1310 ierr = MatDestroy(&pcbddc->benign_original_mat);CHKERRQ(ierr); 1311 ierr = MatDestroy(&pcbddc->benign_B0);CHKERRQ(ierr); 1312 ierr = PetscSFDestroy(&pcbddc->benign_sf);CHKERRQ(ierr); 1313 if (pcbddc->benign_zerodiag_subs) { 1314 PetscInt i; 1315 for (i=0;i<pcbddc->benign_n;i++) { 1316 ierr = ISDestroy(&pcbddc->benign_zerodiag_subs[i]);CHKERRQ(ierr); 1317 } 1318 ierr = PetscFree(pcbddc->benign_zerodiag_subs);CHKERRQ(ierr); 1319 } 1320 ierr = PetscFree3(pcbddc->benign_p0_lidx,pcbddc->benign_p0_gidx,pcbddc->benign_p0);CHKERRQ(ierr); 1321 PetscFunctionReturn(0); 1322 } 1323 1324 #undef __FUNCT__ 1325 #define __FUNCT__ "PCBDDCSetUpLocalWorkVectors" 1326 PetscErrorCode PCBDDCSetUpLocalWorkVectors(PC pc) 1327 { 1328 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 1329 PC_IS *pcis = (PC_IS*)pc->data; 1330 VecType impVecType; 1331 PetscInt n_constraints,n_R,old_size; 1332 PetscErrorCode ierr; 1333 1334 PetscFunctionBegin; 1335 if (!pcbddc->ConstraintMatrix) { 1336 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"BDDC Constraint matrix has not been created"); 1337 } 1338 /* get sizes */ 1339 n_constraints = pcbddc->local_primal_size - pcbddc->benign_n - pcbddc->n_vertices; 1340 n_R = pcis->n - pcbddc->n_vertices; 1341 ierr = VecGetType(pcis->vec1_N,&impVecType);CHKERRQ(ierr); 1342 /* local work vectors (try to avoid unneeded work)*/ 1343 /* R nodes */ 1344 old_size = -1; 1345 if (pcbddc->vec1_R) { 1346 ierr = VecGetSize(pcbddc->vec1_R,&old_size);CHKERRQ(ierr); 1347 } 1348 if (n_R != old_size) { 1349 ierr = VecDestroy(&pcbddc->vec1_R);CHKERRQ(ierr); 1350 ierr = VecDestroy(&pcbddc->vec2_R);CHKERRQ(ierr); 1351 ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_R);CHKERRQ(ierr); 1352 ierr = VecSetSizes(pcbddc->vec1_R,PETSC_DECIDE,n_R);CHKERRQ(ierr); 1353 ierr = VecSetType(pcbddc->vec1_R,impVecType);CHKERRQ(ierr); 1354 ierr = VecDuplicate(pcbddc->vec1_R,&pcbddc->vec2_R);CHKERRQ(ierr); 1355 } 1356 /* local primal dofs */ 1357 old_size = -1; 1358 if (pcbddc->vec1_P) { 1359 ierr = VecGetSize(pcbddc->vec1_P,&old_size);CHKERRQ(ierr); 1360 } 1361 if (pcbddc->local_primal_size != old_size) { 1362 ierr = VecDestroy(&pcbddc->vec1_P);CHKERRQ(ierr); 1363 ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_P);CHKERRQ(ierr); 1364 ierr = VecSetSizes(pcbddc->vec1_P,PETSC_DECIDE,pcbddc->local_primal_size);CHKERRQ(ierr); 1365 ierr = VecSetType(pcbddc->vec1_P,impVecType);CHKERRQ(ierr); 1366 } 1367 /* local explicit constraints */ 1368 old_size = -1; 1369 if (pcbddc->vec1_C) { 1370 ierr = VecGetSize(pcbddc->vec1_C,&old_size);CHKERRQ(ierr); 1371 } 1372 if (n_constraints && n_constraints != old_size) { 1373 ierr = VecDestroy(&pcbddc->vec1_C);CHKERRQ(ierr); 1374 ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_C);CHKERRQ(ierr); 1375 ierr = VecSetSizes(pcbddc->vec1_C,PETSC_DECIDE,n_constraints);CHKERRQ(ierr); 1376 ierr = VecSetType(pcbddc->vec1_C,impVecType);CHKERRQ(ierr); 1377 } 1378 PetscFunctionReturn(0); 1379 } 1380 1381 #undef __FUNCT__ 1382 #define __FUNCT__ "PCBDDCSetUpCorrection" 1383 PetscErrorCode PCBDDCSetUpCorrection(PC pc, PetscScalar **coarse_submat_vals_n) 1384 { 1385 PetscErrorCode ierr; 1386 /* pointers to pcis and pcbddc */ 1387 PC_IS* pcis = (PC_IS*)pc->data; 1388 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 1389 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 1390 /* submatrices of local problem */ 1391 Mat A_RV,A_VR,A_VV,local_auxmat2_R; 1392 /* submatrices of benign trick */ 1393 Mat B0_V = NULL; 1394 /* submatrices of local coarse problem */ 1395 Mat S_VV,S_CV,S_VC,S_CC; 1396 /* working matrices */ 1397 Mat C_CR; 1398 /* additional working stuff */ 1399 PC pc_R; 1400 Mat F; 1401 PetscBool isLU,isCHOL,isILU,need_benign_correction; 1402 1403 PetscScalar *coarse_submat_vals; /* TODO: use a PETSc matrix */ 1404 PetscScalar *work; 1405 PetscInt *idx_V_B; 1406 PetscInt lda_rhs,n,n_vertices,n_constraints,*p0_lidx_I; 1407 PetscInt i,n_R,n_D,n_B; 1408 1409 /* some shortcuts to scalars */ 1410 PetscScalar one=1.0,m_one=-1.0; 1411 1412 PetscFunctionBegin; 1413 if (!pcbddc->symmetric_primal && pcbddc->benign_n) { 1414 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Non-symmetric primal basis computation with benign trick not yet implemented"); 1415 } 1416 1417 /* Set Non-overlapping dimensions */ 1418 n_vertices = pcbddc->n_vertices; 1419 n_constraints = pcbddc->local_primal_size - pcbddc->benign_n - n_vertices; 1420 n_B = pcis->n_B; 1421 n_D = pcis->n - n_B; 1422 n_R = pcis->n - n_vertices; 1423 1424 /* vertices in boundary numbering */ 1425 ierr = PetscMalloc1(n_vertices,&idx_V_B);CHKERRQ(ierr); 1426 ierr = ISGlobalToLocalMappingApply(pcis->BtoNmap,IS_GTOLM_DROP,n_vertices,pcbddc->local_primal_ref_node,&i,idx_V_B);CHKERRQ(ierr); 1427 if (i != n_vertices) { 1428 SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error in boundary numbering for BDDC vertices! %d != %d\n",n_vertices,i); 1429 } 1430 1431 /* Subdomain contribution (Non-overlapping) to coarse matrix */ 1432 ierr = PetscCalloc1(pcbddc->local_primal_size*pcbddc->local_primal_size,&coarse_submat_vals);CHKERRQ(ierr); 1433 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_vertices,n_vertices,coarse_submat_vals,&S_VV);CHKERRQ(ierr); 1434 ierr = MatSeqDenseSetLDA(S_VV,pcbddc->local_primal_size);CHKERRQ(ierr); 1435 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_constraints,n_vertices,coarse_submat_vals+n_vertices,&S_CV);CHKERRQ(ierr); 1436 ierr = MatSeqDenseSetLDA(S_CV,pcbddc->local_primal_size);CHKERRQ(ierr); 1437 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_vertices,n_constraints,coarse_submat_vals+pcbddc->local_primal_size*n_vertices,&S_VC);CHKERRQ(ierr); 1438 ierr = MatSeqDenseSetLDA(S_VC,pcbddc->local_primal_size);CHKERRQ(ierr); 1439 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_constraints,n_constraints,coarse_submat_vals+(pcbddc->local_primal_size+1)*n_vertices,&S_CC);CHKERRQ(ierr); 1440 ierr = MatSeqDenseSetLDA(S_CC,pcbddc->local_primal_size);CHKERRQ(ierr); 1441 1442 /* determine if can use MatSolve routines instead of calling KSPSolve on ksp_R */ 1443 ierr = KSPGetPC(pcbddc->ksp_R,&pc_R);CHKERRQ(ierr); 1444 ierr = PetscObjectTypeCompare((PetscObject)pc_R,PCLU,&isLU);CHKERRQ(ierr); 1445 ierr = PetscObjectTypeCompare((PetscObject)pc_R,PCILU,&isILU);CHKERRQ(ierr); 1446 ierr = PetscObjectTypeCompare((PetscObject)pc_R,PCCHOLESKY,&isCHOL);CHKERRQ(ierr); 1447 lda_rhs = n_R; 1448 need_benign_correction = PETSC_FALSE; 1449 if (isLU || isILU || isCHOL) { 1450 ierr = PCFactorGetMatrix(pc_R,&F);CHKERRQ(ierr); 1451 } else if (sub_schurs->reuse_solver) { 1452 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 1453 MatFactorType type; 1454 1455 F = reuse_solver->F; 1456 ierr = MatGetFactorType(F,&type);CHKERRQ(ierr); 1457 if (type == MAT_FACTOR_CHOLESKY) isCHOL = PETSC_TRUE; 1458 ierr = MatGetSize(F,&lda_rhs,NULL);CHKERRQ(ierr); 1459 need_benign_correction = !!reuse_solver->benign_n; 1460 } else { 1461 F = NULL; 1462 } 1463 1464 /* allocate workspace */ 1465 n = 0; 1466 if (n_constraints) { 1467 n += lda_rhs*n_constraints; 1468 } 1469 if (n_vertices) { 1470 n = PetscMax(2*lda_rhs*n_vertices,n); 1471 n = PetscMax((lda_rhs+n_B)*n_vertices,n); 1472 } 1473 ierr = PetscMalloc1(n,&work);CHKERRQ(ierr); 1474 1475 /* Precompute stuffs needed for preprocessing and application of BDDC*/ 1476 if (n_constraints) { 1477 Mat M1,M2,M3; 1478 Mat auxmat; 1479 IS is_aux; 1480 PetscScalar *array,*array2; 1481 1482 ierr = MatDestroy(&pcbddc->local_auxmat1);CHKERRQ(ierr); 1483 ierr = MatDestroy(&pcbddc->local_auxmat2);CHKERRQ(ierr); 1484 1485 /* Extract constraints on R nodes: C_{CR} */ 1486 ierr = ISCreateStride(PETSC_COMM_SELF,n_constraints,n_vertices,1,&is_aux);CHKERRQ(ierr); 1487 ierr = MatGetSubMatrix(pcbddc->ConstraintMatrix,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&C_CR);CHKERRQ(ierr); 1488 ierr = MatGetSubMatrix(pcbddc->ConstraintMatrix,is_aux,pcis->is_B_local,MAT_INITIAL_MATRIX,&auxmat);CHKERRQ(ierr); 1489 1490 /* Assemble local_auxmat2_R = (- A_{RR}^{-1} C^T_{CR}) needed by BDDC setup */ 1491 /* Assemble pcbddc->local_auxmat2 = R_to_B (- A_{RR}^{-1} C^T_{CR}) needed by BDDC application */ 1492 ierr = PetscMemzero(work,lda_rhs*n_constraints*sizeof(PetscScalar));CHKERRQ(ierr); 1493 for (i=0;i<n_constraints;i++) { 1494 const PetscScalar *row_cmat_values; 1495 const PetscInt *row_cmat_indices; 1496 PetscInt size_of_constraint,j; 1497 1498 ierr = MatGetRow(C_CR,i,&size_of_constraint,&row_cmat_indices,&row_cmat_values);CHKERRQ(ierr); 1499 for (j=0;j<size_of_constraint;j++) { 1500 work[row_cmat_indices[j]+i*lda_rhs] = -row_cmat_values[j]; 1501 } 1502 ierr = MatRestoreRow(C_CR,i,&size_of_constraint,&row_cmat_indices,&row_cmat_values);CHKERRQ(ierr); 1503 } 1504 ierr = MatCreateSeqDense(PETSC_COMM_SELF,lda_rhs,n_constraints,NULL,&local_auxmat2_R);CHKERRQ(ierr); 1505 if (F) { 1506 Mat B; 1507 1508 ierr = MatCreateSeqDense(PETSC_COMM_SELF,lda_rhs,n_constraints,work,&B);CHKERRQ(ierr); 1509 if (need_benign_correction) { 1510 PetscScalar *marr; 1511 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 1512 1513 ierr = MatDenseGetArray(B,&marr);CHKERRQ(ierr); 1514 for (i=0;i<n_constraints;i++) { 1515 ierr = VecPlaceArray(pcbddc->vec1_R,marr+i*lda_rhs);CHKERRQ(ierr); 1516 ierr = PCBDDCReuseSolversChangeInterior(reuse_solver,pcbddc->vec1_R,NULL,PETSC_FALSE);CHKERRQ(ierr); 1517 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 1518 } 1519 ierr = MatDenseRestoreArray(B,&marr);CHKERRQ(ierr); 1520 } 1521 ierr = MatMatSolve(F,B,local_auxmat2_R);CHKERRQ(ierr); 1522 if (need_benign_correction) { 1523 PetscScalar *marr; 1524 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 1525 1526 ierr = MatDenseGetArray(local_auxmat2_R,&marr);CHKERRQ(ierr); 1527 for (i=0;i<n_constraints;i++) { 1528 ierr = VecPlaceArray(pcbddc->vec1_R,marr+i*lda_rhs);CHKERRQ(ierr); 1529 ierr = PCBDDCReuseSolversChangeInterior(reuse_solver,pcbddc->vec1_R,NULL,PETSC_TRUE);CHKERRQ(ierr); 1530 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 1531 } 1532 ierr = MatDenseRestoreArray(local_auxmat2_R,&marr);CHKERRQ(ierr); 1533 } 1534 ierr = MatDestroy(&B);CHKERRQ(ierr); 1535 } else { 1536 PetscScalar *marr; 1537 1538 ierr = MatDenseGetArray(local_auxmat2_R,&marr);CHKERRQ(ierr); 1539 for (i=0;i<n_constraints;i++) { 1540 ierr = VecPlaceArray(pcbddc->vec1_R,work+i*lda_rhs);CHKERRQ(ierr); 1541 ierr = VecPlaceArray(pcbddc->vec2_R,marr+i*lda_rhs);CHKERRQ(ierr); 1542 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 1543 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 1544 ierr = VecResetArray(pcbddc->vec2_R);CHKERRQ(ierr); 1545 } 1546 ierr = MatDenseRestoreArray(local_auxmat2_R,&marr);CHKERRQ(ierr); 1547 } 1548 if (!pcbddc->switch_static) { 1549 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_B,n_constraints,NULL,&pcbddc->local_auxmat2);CHKERRQ(ierr); 1550 ierr = MatDenseGetArray(pcbddc->local_auxmat2,&array);CHKERRQ(ierr); 1551 ierr = MatDenseGetArray(local_auxmat2_R,&array2);CHKERRQ(ierr); 1552 for (i=0;i<n_constraints;i++) { 1553 ierr = VecPlaceArray(pcbddc->vec1_R,array2+i*lda_rhs);CHKERRQ(ierr); 1554 ierr = VecPlaceArray(pcis->vec1_B,array+i*n_B);CHKERRQ(ierr); 1555 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1556 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1557 ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr); 1558 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 1559 } 1560 ierr = MatDenseRestoreArray(local_auxmat2_R,&array2);CHKERRQ(ierr); 1561 ierr = MatDenseRestoreArray(pcbddc->local_auxmat2,&array);CHKERRQ(ierr); 1562 ierr = MatMatMult(auxmat,pcbddc->local_auxmat2,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&M3);CHKERRQ(ierr); 1563 } else { 1564 if (lda_rhs != n_R) { 1565 IS dummy; 1566 1567 ierr = ISCreateStride(PETSC_COMM_SELF,n_R,0,1,&dummy);CHKERRQ(ierr); 1568 ierr = MatGetSubMatrix(local_auxmat2_R,dummy,dummy,MAT_INITIAL_MATRIX,&pcbddc->local_auxmat2);CHKERRQ(ierr); 1569 ierr = ISDestroy(&dummy);CHKERRQ(ierr); 1570 } else { 1571 ierr = PetscObjectReference((PetscObject)local_auxmat2_R);CHKERRQ(ierr); 1572 pcbddc->local_auxmat2 = local_auxmat2_R; 1573 } 1574 ierr = MatMatMult(C_CR,pcbddc->local_auxmat2,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&M3);CHKERRQ(ierr); 1575 } 1576 ierr = ISDestroy(&is_aux);CHKERRQ(ierr); 1577 /* Assemble explicitly S_CC = ( C_{CR} A_{RR}^{-1} C^T_{CR} )^{-1} */ 1578 ierr = MatScale(M3,m_one);CHKERRQ(ierr); 1579 ierr = MatDuplicate(M3,MAT_DO_NOT_COPY_VALUES,&M1);CHKERRQ(ierr); 1580 ierr = MatDuplicate(M3,MAT_DO_NOT_COPY_VALUES,&M2);CHKERRQ(ierr); 1581 if (isCHOL) { 1582 ierr = MatCholeskyFactor(M3,NULL,NULL);CHKERRQ(ierr); 1583 } else { 1584 ierr = MatLUFactor(M3,NULL,NULL,NULL);CHKERRQ(ierr); 1585 } 1586 ierr = VecSet(pcbddc->vec1_C,one);CHKERRQ(ierr); 1587 ierr = MatDiagonalSet(M2,pcbddc->vec1_C,INSERT_VALUES);CHKERRQ(ierr); 1588 ierr = MatMatSolve(M3,M2,M1);CHKERRQ(ierr); 1589 ierr = MatDestroy(&M2);CHKERRQ(ierr); 1590 ierr = MatDestroy(&M3);CHKERRQ(ierr); 1591 /* Assemble local_auxmat1 = S_CC*C_{CB} needed by BDDC application in KSP and in preproc */ 1592 ierr = MatMatMult(M1,auxmat,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&pcbddc->local_auxmat1);CHKERRQ(ierr); 1593 ierr = MatDestroy(&auxmat);CHKERRQ(ierr); 1594 ierr = MatCopy(M1,S_CC,SAME_NONZERO_PATTERN);CHKERRQ(ierr); /* S_CC can have a different LDA, MatMatSolve doesn't support it */ 1595 ierr = MatDestroy(&M1);CHKERRQ(ierr); 1596 } 1597 /* Get submatrices from subdomain matrix */ 1598 if (pcbddc->benign_n) { 1599 IS dummy; 1600 Mat B0_R; 1601 PetscReal norm; 1602 1603 ierr = ISCreateStride(PETSC_COMM_SELF,pcbddc->benign_n,0,1,&dummy);CHKERRQ(ierr); 1604 ierr = MatGetSubMatrix(pcbddc->benign_B0,dummy,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&B0_R);CHKERRQ(ierr); 1605 ierr = MatNorm(B0_R,NORM_INFINITY,&norm);CHKERRQ(ierr); 1606 if (norm > PETSC_SMALL) { 1607 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"Benign trick can not be applied! ||B0_R|| = %f (should be numerically 0.)",norm); 1608 } 1609 ierr = MatDestroy(&B0_R);CHKERRQ(ierr); 1610 ierr = ISDestroy(&dummy);CHKERRQ(ierr); 1611 } 1612 1613 if (n_vertices) { 1614 IS is_aux; 1615 1616 if (sub_schurs->reuse_solver) { /* is_R_local is not sorted, ISComplement doesn't like it */ 1617 IS tis; 1618 1619 ierr = ISDuplicate(pcbddc->is_R_local,&tis);CHKERRQ(ierr); 1620 ierr = ISSort(tis);CHKERRQ(ierr); 1621 ierr = ISComplement(tis,0,pcis->n,&is_aux);CHKERRQ(ierr); 1622 ierr = ISDestroy(&tis);CHKERRQ(ierr); 1623 } else { 1624 ierr = ISComplement(pcbddc->is_R_local,0,pcis->n,&is_aux);CHKERRQ(ierr); 1625 } 1626 ierr = MatGetSubMatrix(pcbddc->local_mat,pcbddc->is_R_local,is_aux,MAT_INITIAL_MATRIX,&A_RV);CHKERRQ(ierr); 1627 ierr = MatGetSubMatrix(pcbddc->local_mat,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&A_VR);CHKERRQ(ierr); 1628 ierr = MatGetSubMatrix(pcbddc->local_mat,is_aux,is_aux,MAT_INITIAL_MATRIX,&A_VV);CHKERRQ(ierr); 1629 if (pcbddc->benign_n) { 1630 IS dummy; 1631 1632 ierr = ISCreateStride(PETSC_COMM_SELF,pcbddc->benign_n,0,1,&dummy);CHKERRQ(ierr); 1633 ierr = MatGetSubMatrix(pcbddc->benign_B0,dummy,is_aux,MAT_INITIAL_MATRIX,&B0_V);CHKERRQ(ierr); 1634 ierr = ISDestroy(&dummy);CHKERRQ(ierr); 1635 } 1636 ierr = ISDestroy(&is_aux);CHKERRQ(ierr); 1637 } 1638 1639 /* Matrix of coarse basis functions (local) */ 1640 if (pcbddc->coarse_phi_B) { 1641 PetscInt on_B,on_primal,on_D=n_D; 1642 if (pcbddc->coarse_phi_D) { 1643 ierr = MatGetSize(pcbddc->coarse_phi_D,&on_D,NULL);CHKERRQ(ierr); 1644 } 1645 ierr = MatGetSize(pcbddc->coarse_phi_B,&on_B,&on_primal);CHKERRQ(ierr); 1646 if (on_B != n_B || on_primal != pcbddc->local_primal_size || on_D != n_D) { 1647 PetscScalar *marray; 1648 1649 ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&marray);CHKERRQ(ierr); 1650 ierr = PetscFree(marray);CHKERRQ(ierr); 1651 ierr = MatDestroy(&pcbddc->coarse_phi_B);CHKERRQ(ierr); 1652 ierr = MatDestroy(&pcbddc->coarse_psi_B);CHKERRQ(ierr); 1653 ierr = MatDestroy(&pcbddc->coarse_phi_D);CHKERRQ(ierr); 1654 ierr = MatDestroy(&pcbddc->coarse_psi_D);CHKERRQ(ierr); 1655 } 1656 } 1657 1658 if (!pcbddc->coarse_phi_B) { 1659 PetscScalar *marray; 1660 1661 n = n_B*pcbddc->local_primal_size; 1662 if (pcbddc->switch_static || pcbddc->dbg_flag) { 1663 n += n_D*pcbddc->local_primal_size; 1664 } 1665 if (!pcbddc->symmetric_primal) { 1666 n *= 2; 1667 } 1668 ierr = PetscCalloc1(n,&marray);CHKERRQ(ierr); 1669 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_B,pcbddc->local_primal_size,marray,&pcbddc->coarse_phi_B);CHKERRQ(ierr); 1670 n = n_B*pcbddc->local_primal_size; 1671 if (pcbddc->switch_static || pcbddc->dbg_flag) { 1672 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_D,pcbddc->local_primal_size,marray+n,&pcbddc->coarse_phi_D);CHKERRQ(ierr); 1673 n += n_D*pcbddc->local_primal_size; 1674 } 1675 if (!pcbddc->symmetric_primal) { 1676 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_B,pcbddc->local_primal_size,marray+n,&pcbddc->coarse_psi_B);CHKERRQ(ierr); 1677 if (pcbddc->switch_static || pcbddc->dbg_flag) { 1678 n = n_B*pcbddc->local_primal_size; 1679 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_D,pcbddc->local_primal_size,marray+n,&pcbddc->coarse_psi_D);CHKERRQ(ierr); 1680 } 1681 } else { 1682 ierr = PetscObjectReference((PetscObject)pcbddc->coarse_phi_B);CHKERRQ(ierr); 1683 pcbddc->coarse_psi_B = pcbddc->coarse_phi_B; 1684 if (pcbddc->switch_static || pcbddc->dbg_flag) { 1685 ierr = PetscObjectReference((PetscObject)pcbddc->coarse_phi_D);CHKERRQ(ierr); 1686 pcbddc->coarse_psi_D = pcbddc->coarse_phi_D; 1687 } 1688 } 1689 } 1690 1691 /* We are now ready to evaluate coarse basis functions and subdomain contribution to coarse problem */ 1692 p0_lidx_I = NULL; 1693 if (pcbddc->benign_n && (pcbddc->switch_static || pcbddc->dbg_flag)) { 1694 const PetscInt *idxs; 1695 1696 ierr = ISGetIndices(pcis->is_I_local,&idxs);CHKERRQ(ierr); 1697 ierr = PetscMalloc1(pcbddc->benign_n,&p0_lidx_I);CHKERRQ(ierr); 1698 for (i=0;i<pcbddc->benign_n;i++) { 1699 ierr = PetscFindInt(pcbddc->benign_p0_lidx[i],pcis->n-pcis->n_B,idxs,&p0_lidx_I[i]);CHKERRQ(ierr); 1700 } 1701 ierr = ISRestoreIndices(pcis->is_I_local,&idxs);CHKERRQ(ierr); 1702 } 1703 1704 /* vertices */ 1705 if (n_vertices) { 1706 1707 ierr = MatConvert(A_VV,MATDENSE,MAT_REUSE_MATRIX,&A_VV);CHKERRQ(ierr); 1708 1709 if (n_R) { 1710 Mat A_RRmA_RV,A_RV_bcorr=NULL,S_VVt; /* S_VVt with LDA=N */ 1711 PetscBLASInt B_N,B_one = 1; 1712 PetscScalar *x,*y; 1713 PetscBool isseqaij; 1714 1715 ierr = MatScale(A_RV,m_one);CHKERRQ(ierr); 1716 if (need_benign_correction) { 1717 ISLocalToGlobalMapping RtoN; 1718 IS is_p0; 1719 PetscInt *idxs_p0,n; 1720 1721 ierr = PetscMalloc1(pcbddc->benign_n,&idxs_p0);CHKERRQ(ierr); 1722 ierr = ISLocalToGlobalMappingCreateIS(pcbddc->is_R_local,&RtoN);CHKERRQ(ierr); 1723 ierr = ISGlobalToLocalMappingApply(RtoN,IS_GTOLM_DROP,pcbddc->benign_n,pcbddc->benign_p0_lidx,&n,idxs_p0);CHKERRQ(ierr); 1724 if (n != pcbddc->benign_n) { 1725 SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error in R numbering for benign p0! %d != %d\n",n,pcbddc->benign_n); 1726 } 1727 ierr = ISLocalToGlobalMappingDestroy(&RtoN);CHKERRQ(ierr); 1728 ierr = ISCreateGeneral(PETSC_COMM_SELF,n,idxs_p0,PETSC_OWN_POINTER,&is_p0);CHKERRQ(ierr); 1729 ierr = MatGetSubMatrix(A_RV,is_p0,NULL,MAT_INITIAL_MATRIX,&A_RV_bcorr);CHKERRQ(ierr); 1730 ierr = ISDestroy(&is_p0);CHKERRQ(ierr); 1731 } 1732 1733 if (lda_rhs == n_R) { 1734 ierr = MatConvert(A_RV,MATDENSE,MAT_REUSE_MATRIX,&A_RV);CHKERRQ(ierr); 1735 } else { 1736 PetscScalar *av,*array; 1737 const PetscInt *xadj,*adjncy; 1738 PetscInt n; 1739 PetscBool flg_row; 1740 1741 array = work+lda_rhs*n_vertices; 1742 ierr = PetscMemzero(array,lda_rhs*n_vertices*sizeof(PetscScalar));CHKERRQ(ierr); 1743 ierr = MatConvert(A_RV,MATSEQAIJ,MAT_REUSE_MATRIX,&A_RV);CHKERRQ(ierr); 1744 ierr = MatGetRowIJ(A_RV,0,PETSC_FALSE,PETSC_FALSE,&n,&xadj,&adjncy,&flg_row);CHKERRQ(ierr); 1745 ierr = MatSeqAIJGetArray(A_RV,&av);CHKERRQ(ierr); 1746 for (i=0;i<n;i++) { 1747 PetscInt j; 1748 for (j=xadj[i];j<xadj[i+1];j++) array[lda_rhs*adjncy[j]+i] = av[j]; 1749 } 1750 ierr = MatRestoreRowIJ(A_RV,0,PETSC_FALSE,PETSC_FALSE,&n,&xadj,&adjncy,&flg_row);CHKERRQ(ierr); 1751 ierr = MatDestroy(&A_RV);CHKERRQ(ierr); 1752 ierr = MatCreateSeqDense(PETSC_COMM_SELF,lda_rhs,n_vertices,array,&A_RV);CHKERRQ(ierr); 1753 } 1754 ierr = MatCreateSeqDense(PETSC_COMM_SELF,lda_rhs,n_vertices,work,&A_RRmA_RV);CHKERRQ(ierr); 1755 if (F) { 1756 if (need_benign_correction) { 1757 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 1758 PetscScalar *marr; 1759 1760 ierr = MatDenseGetArray(A_RV,&marr);CHKERRQ(ierr); 1761 /* need \Phi^T A_RV = (I+L)A_RV, L given by 1762 1763 | 0 0 0 | (V) 1764 L = | 0 0 -1 | (P-p0) 1765 | 0 0 -1 | (p0) 1766 1767 */ 1768 for (i=0;i<reuse_solver->benign_n;i++) { 1769 const PetscScalar *vals; 1770 const PetscInt *idxs,*idxs_zero; 1771 PetscInt n,j,nz; 1772 1773 ierr = ISGetLocalSize(reuse_solver->benign_zerodiag_subs[i],&nz);CHKERRQ(ierr); 1774 ierr = ISGetIndices(reuse_solver->benign_zerodiag_subs[i],&idxs_zero);CHKERRQ(ierr); 1775 ierr = MatGetRow(A_RV_bcorr,i,&n,&idxs,&vals);CHKERRQ(ierr); 1776 for (j=0;j<n;j++) { 1777 PetscScalar val = vals[j]; 1778 PetscInt k,col = idxs[j]; 1779 for (k=0;k<nz;k++) marr[idxs_zero[k]+lda_rhs*col] -= val; 1780 } 1781 ierr = MatRestoreRow(A_RV_bcorr,i,&n,&idxs,&vals);CHKERRQ(ierr); 1782 ierr = ISRestoreIndices(reuse_solver->benign_zerodiag_subs[i],&idxs_zero);CHKERRQ(ierr); 1783 } 1784 /* need to correct the rhs */ 1785 for (i=0;i<n_vertices;i++) { 1786 ierr = VecPlaceArray(pcbddc->vec1_R,marr+i*lda_rhs);CHKERRQ(ierr); 1787 ierr = PCBDDCReuseSolversChangeInterior(reuse_solver,pcbddc->vec1_R,NULL,PETSC_FALSE);CHKERRQ(ierr); 1788 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 1789 } 1790 ierr = MatDenseRestoreArray(A_RV,&marr);CHKERRQ(ierr); 1791 } 1792 ierr = MatMatSolve(F,A_RV,A_RRmA_RV);CHKERRQ(ierr); 1793 /* need to correct the solution */ 1794 if (need_benign_correction) { 1795 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 1796 PetscScalar *marr; 1797 1798 ierr = MatDenseGetArray(A_RRmA_RV,&marr);CHKERRQ(ierr); 1799 for (i=0;i<n_vertices;i++) { 1800 ierr = VecPlaceArray(pcbddc->vec1_R,marr+i*lda_rhs);CHKERRQ(ierr); 1801 ierr = PCBDDCReuseSolversChangeInterior(reuse_solver,pcbddc->vec1_R,NULL,PETSC_TRUE);CHKERRQ(ierr); 1802 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 1803 } 1804 ierr = MatDenseRestoreArray(A_RRmA_RV,&marr);CHKERRQ(ierr); 1805 } 1806 } else { 1807 ierr = MatDenseGetArray(A_RV,&y);CHKERRQ(ierr); 1808 for (i=0;i<n_vertices;i++) { 1809 ierr = VecPlaceArray(pcbddc->vec1_R,y+i*lda_rhs);CHKERRQ(ierr); 1810 ierr = VecPlaceArray(pcbddc->vec2_R,work+i*lda_rhs);CHKERRQ(ierr); 1811 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 1812 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 1813 ierr = VecResetArray(pcbddc->vec2_R);CHKERRQ(ierr); 1814 } 1815 ierr = MatDenseRestoreArray(A_RV,&y);CHKERRQ(ierr); 1816 } 1817 ierr = MatDestroy(&A_RV);CHKERRQ(ierr); 1818 /* S_VV and S_CV */ 1819 if (n_constraints) { 1820 Mat B; 1821 1822 ierr = PetscMemzero(work+lda_rhs*n_vertices,n_B*n_vertices*sizeof(PetscScalar));CHKERRQ(ierr); 1823 for (i=0;i<n_vertices;i++) { 1824 ierr = VecPlaceArray(pcbddc->vec1_R,work+i*lda_rhs);CHKERRQ(ierr); 1825 ierr = VecPlaceArray(pcis->vec1_B,work+lda_rhs*n_vertices+i*n_B);CHKERRQ(ierr); 1826 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1827 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1828 ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr); 1829 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 1830 } 1831 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_B,n_vertices,work+lda_rhs*n_vertices,&B);CHKERRQ(ierr); 1832 ierr = MatMatMult(pcbddc->local_auxmat1,B,MAT_REUSE_MATRIX,PETSC_DEFAULT,&S_CV);CHKERRQ(ierr); 1833 ierr = MatDestroy(&B);CHKERRQ(ierr); 1834 ierr = MatCreateSeqDense(PETSC_COMM_SELF,lda_rhs,n_vertices,work+lda_rhs*n_vertices,&B);CHKERRQ(ierr); 1835 ierr = MatMatMult(local_auxmat2_R,S_CV,MAT_REUSE_MATRIX,PETSC_DEFAULT,&B);CHKERRQ(ierr); 1836 ierr = MatScale(S_CV,m_one);CHKERRQ(ierr); 1837 ierr = PetscBLASIntCast(lda_rhs*n_vertices,&B_N);CHKERRQ(ierr); 1838 PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&B_N,&one,work+lda_rhs*n_vertices,&B_one,work,&B_one)); 1839 ierr = MatDestroy(&B);CHKERRQ(ierr); 1840 } 1841 ierr = PetscObjectTypeCompare((PetscObject)A_VR,MATSEQAIJ,&isseqaij);CHKERRQ(ierr); 1842 if (!isseqaij) { /* MatMatMult with SEQ(S)BAIJ below will raise an error */ 1843 ierr = MatConvert(A_VR,MATSEQAIJ,MAT_REUSE_MATRIX,&A_VR);CHKERRQ(ierr); 1844 } 1845 if (lda_rhs != n_R) { 1846 ierr = MatDestroy(&A_RRmA_RV);CHKERRQ(ierr); 1847 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_vertices,work,&A_RRmA_RV);CHKERRQ(ierr); 1848 ierr = MatSeqDenseSetLDA(A_RRmA_RV,lda_rhs);CHKERRQ(ierr); 1849 } 1850 ierr = MatMatMult(A_VR,A_RRmA_RV,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&S_VVt);CHKERRQ(ierr); 1851 /* need A_VR * \Phi * A_RRmA_RV = A_VR * (I+L)^T * A_RRmA_RV, L given as before */ 1852 if (need_benign_correction) { 1853 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 1854 PetscScalar *marr,*sums; 1855 1856 ierr = PetscMalloc1(n_vertices,&sums);CHKERRQ(ierr); 1857 ierr = MatDenseGetArray(S_VVt,&marr); 1858 for (i=0;i<reuse_solver->benign_n;i++) { 1859 const PetscScalar *vals; 1860 const PetscInt *idxs,*idxs_zero; 1861 PetscInt n,j,nz; 1862 1863 ierr = ISGetLocalSize(reuse_solver->benign_zerodiag_subs[i],&nz);CHKERRQ(ierr); 1864 ierr = ISGetIndices(reuse_solver->benign_zerodiag_subs[i],&idxs_zero);CHKERRQ(ierr); 1865 for (j=0;j<n_vertices;j++) { 1866 PetscInt k; 1867 sums[j] = 0.; 1868 for (k=0;k<nz;k++) sums[j] += work[idxs_zero[k]+j*lda_rhs]; 1869 } 1870 ierr = MatGetRow(A_RV_bcorr,i,&n,&idxs,&vals);CHKERRQ(ierr); 1871 for (j=0;j<n;j++) { 1872 PetscScalar val = vals[j]; 1873 PetscInt k; 1874 for (k=0;k<n_vertices;k++) { 1875 marr[idxs[j]+k*n_vertices] += val*sums[k]; 1876 } 1877 } 1878 ierr = MatRestoreRow(A_RV_bcorr,i,&n,&idxs,&vals);CHKERRQ(ierr); 1879 ierr = ISRestoreIndices(reuse_solver->benign_zerodiag_subs[i],&idxs_zero);CHKERRQ(ierr); 1880 } 1881 ierr = PetscFree(sums);CHKERRQ(ierr); 1882 ierr = MatDenseRestoreArray(S_VVt,&marr); 1883 ierr = MatDestroy(&A_RV_bcorr);CHKERRQ(ierr); 1884 } 1885 ierr = MatDestroy(&A_RRmA_RV);CHKERRQ(ierr); 1886 ierr = PetscBLASIntCast(n_vertices*n_vertices,&B_N);CHKERRQ(ierr); 1887 ierr = MatDenseGetArray(A_VV,&x);CHKERRQ(ierr); 1888 ierr = MatDenseGetArray(S_VVt,&y);CHKERRQ(ierr); 1889 PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&B_N,&one,x,&B_one,y,&B_one)); 1890 ierr = MatDenseRestoreArray(A_VV,&x);CHKERRQ(ierr); 1891 ierr = MatDenseRestoreArray(S_VVt,&y);CHKERRQ(ierr); 1892 ierr = MatCopy(S_VVt,S_VV,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 1893 ierr = MatDestroy(&S_VVt);CHKERRQ(ierr); 1894 } else { 1895 ierr = MatCopy(A_VV,S_VV,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 1896 } 1897 for (i=0;i<pcbddc->benign_n;i++) { 1898 const PetscScalar *vals; 1899 const PetscInt *idxs; 1900 PetscInt n,j,primal_idx; 1901 1902 ierr = MatGetRow(B0_V,i,&n,&idxs,&vals);CHKERRQ(ierr); 1903 primal_idx = pcbddc->local_primal_size - pcbddc->benign_n + i; 1904 for (j=0;j<n;j++) { 1905 coarse_submat_vals[primal_idx*pcbddc->local_primal_size+idxs[j]] = vals[j]; 1906 coarse_submat_vals[idxs[j]*pcbddc->local_primal_size+primal_idx] = vals[j]; 1907 } 1908 ierr = MatRestoreRow(B0_V,i,&n,&idxs,&vals);CHKERRQ(ierr); 1909 } 1910 ierr = MatDestroy(&A_VV);CHKERRQ(ierr); 1911 1912 /* coarse basis functions */ 1913 for (i=0;i<n_vertices;i++) { 1914 PetscScalar *y; 1915 1916 ierr = VecPlaceArray(pcbddc->vec1_R,work+lda_rhs*i);CHKERRQ(ierr); 1917 ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&y);CHKERRQ(ierr); 1918 ierr = VecPlaceArray(pcis->vec1_B,y+n_B*i);CHKERRQ(ierr); 1919 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1920 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1921 y[n_B*i+idx_V_B[i]] = 1.0; 1922 ierr = MatDenseRestoreArray(pcbddc->coarse_phi_B,&y);CHKERRQ(ierr); 1923 ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr); 1924 1925 if (pcbddc->switch_static || pcbddc->dbg_flag) { 1926 PetscInt j; 1927 1928 ierr = MatDenseGetArray(pcbddc->coarse_phi_D,&y);CHKERRQ(ierr); 1929 ierr = VecPlaceArray(pcis->vec1_D,y+n_D*i);CHKERRQ(ierr); 1930 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1931 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1932 ierr = VecResetArray(pcis->vec1_D);CHKERRQ(ierr); 1933 for (j=0;j<pcbddc->benign_n;j++) y[n_D*i+p0_lidx_I[j]] = 0.0; 1934 ierr = MatDenseRestoreArray(pcbddc->coarse_phi_D,&y);CHKERRQ(ierr); 1935 } 1936 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 1937 } 1938 /* if n_R == 0 the object is not destroyed */ 1939 ierr = MatDestroy(&A_RV);CHKERRQ(ierr); 1940 } 1941 1942 if (n_constraints) { 1943 Mat B; 1944 1945 ierr = MatCreateSeqDense(PETSC_COMM_SELF,lda_rhs,n_constraints,work,&B);CHKERRQ(ierr); 1946 ierr = MatScale(S_CC,m_one);CHKERRQ(ierr); 1947 ierr = MatMatMult(local_auxmat2_R,S_CC,MAT_REUSE_MATRIX,PETSC_DEFAULT,&B);CHKERRQ(ierr); 1948 ierr = MatScale(S_CC,m_one);CHKERRQ(ierr); 1949 if (n_vertices) { 1950 if (isCHOL) { /* if we can solve the interior problem with cholesky, we should also be fine with transposing here */ 1951 ierr = MatTranspose(S_CV,MAT_REUSE_MATRIX,&S_VC);CHKERRQ(ierr); 1952 } else { 1953 Mat S_VCt; 1954 1955 if (lda_rhs != n_R) { 1956 ierr = MatDestroy(&B);CHKERRQ(ierr); 1957 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_vertices,work,&B);CHKERRQ(ierr); 1958 ierr = MatSeqDenseSetLDA(B,lda_rhs);CHKERRQ(ierr); 1959 } 1960 ierr = MatMatMult(A_VR,B,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&S_VCt);CHKERRQ(ierr); 1961 ierr = MatCopy(S_VCt,S_VC,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 1962 ierr = MatDestroy(&S_VCt);CHKERRQ(ierr); 1963 } 1964 } 1965 ierr = MatDestroy(&B);CHKERRQ(ierr); 1966 /* coarse basis functions */ 1967 for (i=0;i<n_constraints;i++) { 1968 PetscScalar *y; 1969 1970 ierr = VecPlaceArray(pcbddc->vec1_R,work+lda_rhs*i);CHKERRQ(ierr); 1971 ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&y);CHKERRQ(ierr); 1972 ierr = VecPlaceArray(pcis->vec1_B,y+n_B*(i+n_vertices));CHKERRQ(ierr); 1973 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1974 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1975 ierr = MatDenseRestoreArray(pcbddc->coarse_phi_B,&y);CHKERRQ(ierr); 1976 ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr); 1977 if (pcbddc->switch_static || pcbddc->dbg_flag) { 1978 PetscInt j; 1979 1980 ierr = MatDenseGetArray(pcbddc->coarse_phi_D,&y);CHKERRQ(ierr); 1981 ierr = VecPlaceArray(pcis->vec1_D,y+n_D*(i+n_vertices));CHKERRQ(ierr); 1982 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1983 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1984 ierr = VecResetArray(pcis->vec1_D);CHKERRQ(ierr); 1985 for (j=0;j<pcbddc->benign_n;j++) y[n_D*i+p0_lidx_I[j]] = 0.0; 1986 ierr = MatDenseRestoreArray(pcbddc->coarse_phi_D,&y);CHKERRQ(ierr); 1987 } 1988 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 1989 } 1990 } 1991 if (n_constraints) { 1992 ierr = MatDestroy(&local_auxmat2_R);CHKERRQ(ierr); 1993 } 1994 ierr = PetscFree(p0_lidx_I);CHKERRQ(ierr); 1995 ierr = MatDestroy(&B0_V);CHKERRQ(ierr); 1996 1997 /* compute other basis functions for non-symmetric problems */ 1998 if (!pcbddc->symmetric_primal) { 1999 Mat B_V=NULL,B_C=NULL; 2000 PetscScalar *marray; 2001 2002 if (n_constraints) { 2003 Mat S_CCT,C_CRT; 2004 2005 ierr = MatTranspose(C_CR,MAT_INITIAL_MATRIX,&C_CRT);CHKERRQ(ierr); 2006 ierr = MatTranspose(S_CC,MAT_INITIAL_MATRIX,&S_CCT);CHKERRQ(ierr); 2007 ierr = MatMatMult(C_CRT,S_CCT,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&B_C);CHKERRQ(ierr); 2008 ierr = MatDestroy(&S_CCT);CHKERRQ(ierr); 2009 if (n_vertices) { 2010 Mat S_VCT; 2011 2012 ierr = MatTranspose(S_VC,MAT_INITIAL_MATRIX,&S_VCT);CHKERRQ(ierr); 2013 ierr = MatMatMult(C_CRT,S_VCT,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&B_V);CHKERRQ(ierr); 2014 ierr = MatDestroy(&S_VCT);CHKERRQ(ierr); 2015 } 2016 ierr = MatDestroy(&C_CRT);CHKERRQ(ierr); 2017 } 2018 if (n_vertices && n_R) { 2019 PetscScalar *av,*marray; 2020 const PetscInt *xadj,*adjncy; 2021 PetscInt n; 2022 PetscBool flg_row; 2023 2024 /* B_V = B_V - A_VR^T */ 2025 ierr = MatConvert(A_VR,MATSEQAIJ,MAT_REUSE_MATRIX,&A_VR);CHKERRQ(ierr); 2026 ierr = MatGetRowIJ(A_VR,0,PETSC_FALSE,PETSC_FALSE,&n,&xadj,&adjncy,&flg_row);CHKERRQ(ierr); 2027 ierr = MatSeqAIJGetArray(A_VR,&av);CHKERRQ(ierr); 2028 ierr = MatDenseGetArray(B_V,&marray);CHKERRQ(ierr); 2029 for (i=0;i<n;i++) { 2030 PetscInt j; 2031 for (j=xadj[i];j<xadj[i+1];j++) marray[i*n_R + adjncy[j]] -= av[j]; 2032 } 2033 ierr = MatDenseRestoreArray(B_V,&marray);CHKERRQ(ierr); 2034 ierr = MatRestoreRowIJ(A_VR,0,PETSC_FALSE,PETSC_FALSE,&n,&xadj,&adjncy,&flg_row);CHKERRQ(ierr); 2035 ierr = MatDestroy(&A_VR);CHKERRQ(ierr); 2036 } 2037 2038 /* currently there's no support for MatTransposeMatSolve(F,B,X) */ 2039 ierr = MatDenseGetArray(B_V,&marray);CHKERRQ(ierr); 2040 for (i=0;i<n_vertices;i++) { 2041 ierr = VecPlaceArray(pcbddc->vec1_R,marray+i*n_R);CHKERRQ(ierr); 2042 ierr = VecPlaceArray(pcbddc->vec2_R,work+i*n_R);CHKERRQ(ierr); 2043 ierr = KSPSolveTranspose(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 2044 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 2045 ierr = VecResetArray(pcbddc->vec2_R);CHKERRQ(ierr); 2046 } 2047 ierr = MatDenseRestoreArray(B_V,&marray);CHKERRQ(ierr); 2048 ierr = MatDenseGetArray(B_C,&marray);CHKERRQ(ierr); 2049 for (i=n_vertices;i<n_constraints+n_vertices;i++) { 2050 ierr = VecPlaceArray(pcbddc->vec1_R,marray+(i-n_vertices)*n_R);CHKERRQ(ierr); 2051 ierr = VecPlaceArray(pcbddc->vec2_R,work+i*n_R);CHKERRQ(ierr); 2052 ierr = KSPSolveTranspose(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 2053 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 2054 ierr = VecResetArray(pcbddc->vec2_R);CHKERRQ(ierr); 2055 } 2056 ierr = MatDenseRestoreArray(B_C,&marray);CHKERRQ(ierr); 2057 /* coarse basis functions */ 2058 for (i=0;i<pcbddc->local_primal_size;i++) { 2059 PetscScalar *y; 2060 2061 ierr = VecPlaceArray(pcbddc->vec1_R,work+i*n_R);CHKERRQ(ierr); 2062 ierr = MatDenseGetArray(pcbddc->coarse_psi_B,&y);CHKERRQ(ierr); 2063 ierr = VecPlaceArray(pcis->vec1_B,y+n_B*i);CHKERRQ(ierr); 2064 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2065 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2066 if (i<n_vertices) { 2067 y[n_B*i+idx_V_B[i]] = 1.0; 2068 } 2069 ierr = MatDenseRestoreArray(pcbddc->coarse_psi_B,&y);CHKERRQ(ierr); 2070 ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr); 2071 2072 if (pcbddc->switch_static || pcbddc->dbg_flag) { 2073 ierr = MatDenseGetArray(pcbddc->coarse_psi_D,&y);CHKERRQ(ierr); 2074 ierr = VecPlaceArray(pcis->vec1_D,y+n_D*i);CHKERRQ(ierr); 2075 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2076 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2077 ierr = VecResetArray(pcis->vec1_D);CHKERRQ(ierr); 2078 ierr = MatDenseRestoreArray(pcbddc->coarse_psi_D,&y);CHKERRQ(ierr); 2079 } 2080 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 2081 } 2082 ierr = MatDestroy(&B_V);CHKERRQ(ierr); 2083 ierr = MatDestroy(&B_C);CHKERRQ(ierr); 2084 } 2085 /* free memory */ 2086 ierr = PetscFree(idx_V_B);CHKERRQ(ierr); 2087 ierr = MatDestroy(&S_VV);CHKERRQ(ierr); 2088 ierr = MatDestroy(&S_CV);CHKERRQ(ierr); 2089 ierr = MatDestroy(&S_VC);CHKERRQ(ierr); 2090 ierr = MatDestroy(&S_CC);CHKERRQ(ierr); 2091 ierr = PetscFree(work);CHKERRQ(ierr); 2092 if (n_vertices) { 2093 ierr = MatDestroy(&A_VR);CHKERRQ(ierr); 2094 } 2095 if (n_constraints) { 2096 ierr = MatDestroy(&C_CR);CHKERRQ(ierr); 2097 } 2098 /* Checking coarse_sub_mat and coarse basis functios */ 2099 /* Symmetric case : It should be \Phi^{(j)^T} A^{(j)} \Phi^{(j)}=coarse_sub_mat */ 2100 /* Non-symmetric case : It should be \Psi^{(j)^T} A^{(j)} \Phi^{(j)}=coarse_sub_mat */ 2101 if (pcbddc->dbg_flag) { 2102 Mat coarse_sub_mat; 2103 Mat AUXMAT,TM1,TM2,TM3,TM4; 2104 Mat coarse_phi_D,coarse_phi_B; 2105 Mat coarse_psi_D,coarse_psi_B; 2106 Mat A_II,A_BB,A_IB,A_BI; 2107 Mat C_B,CPHI; 2108 IS is_dummy; 2109 Vec mones; 2110 MatType checkmattype=MATSEQAIJ; 2111 PetscReal real_value; 2112 2113 if (pcbddc->benign_n && !pcbddc->benign_change_explicit) { 2114 Mat A; 2115 ierr = PCBDDCBenignProject(pc,NULL,NULL,&A);CHKERRQ(ierr); 2116 ierr = MatGetSubMatrix(A,pcis->is_I_local,pcis->is_I_local,MAT_INITIAL_MATRIX,&A_II);CHKERRQ(ierr); 2117 ierr = MatGetSubMatrix(A,pcis->is_I_local,pcis->is_B_local,MAT_INITIAL_MATRIX,&A_IB);CHKERRQ(ierr); 2118 ierr = MatGetSubMatrix(A,pcis->is_B_local,pcis->is_I_local,MAT_INITIAL_MATRIX,&A_BI);CHKERRQ(ierr); 2119 ierr = MatGetSubMatrix(A,pcis->is_B_local,pcis->is_B_local,MAT_INITIAL_MATRIX,&A_BB);CHKERRQ(ierr); 2120 ierr = MatDestroy(&A);CHKERRQ(ierr); 2121 } else { 2122 ierr = MatConvert(pcis->A_II,checkmattype,MAT_INITIAL_MATRIX,&A_II);CHKERRQ(ierr); 2123 ierr = MatConvert(pcis->A_IB,checkmattype,MAT_INITIAL_MATRIX,&A_IB);CHKERRQ(ierr); 2124 ierr = MatConvert(pcis->A_BI,checkmattype,MAT_INITIAL_MATRIX,&A_BI);CHKERRQ(ierr); 2125 ierr = MatConvert(pcis->A_BB,checkmattype,MAT_INITIAL_MATRIX,&A_BB);CHKERRQ(ierr); 2126 } 2127 ierr = MatConvert(pcbddc->coarse_phi_D,checkmattype,MAT_INITIAL_MATRIX,&coarse_phi_D);CHKERRQ(ierr); 2128 ierr = MatConvert(pcbddc->coarse_phi_B,checkmattype,MAT_INITIAL_MATRIX,&coarse_phi_B);CHKERRQ(ierr); 2129 if (!pcbddc->symmetric_primal) { 2130 ierr = MatConvert(pcbddc->coarse_psi_D,checkmattype,MAT_INITIAL_MATRIX,&coarse_psi_D);CHKERRQ(ierr); 2131 ierr = MatConvert(pcbddc->coarse_psi_B,checkmattype,MAT_INITIAL_MATRIX,&coarse_psi_B);CHKERRQ(ierr); 2132 } 2133 ierr = MatCreateSeqDense(PETSC_COMM_SELF,pcbddc->local_primal_size,pcbddc->local_primal_size,coarse_submat_vals,&coarse_sub_mat);CHKERRQ(ierr); 2134 2135 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 2136 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Check coarse sub mat computation (symmetric %d)\n",pcbddc->symmetric_primal);CHKERRQ(ierr); 2137 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 2138 if (!pcbddc->symmetric_primal) { 2139 ierr = MatMatMult(A_II,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 2140 ierr = MatTransposeMatMult(coarse_psi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM1);CHKERRQ(ierr); 2141 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 2142 ierr = MatMatMult(A_BB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 2143 ierr = MatTransposeMatMult(coarse_psi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM2);CHKERRQ(ierr); 2144 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 2145 ierr = MatMatMult(A_IB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 2146 ierr = MatTransposeMatMult(coarse_psi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM3);CHKERRQ(ierr); 2147 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 2148 ierr = MatMatMult(A_BI,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 2149 ierr = MatTransposeMatMult(coarse_psi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM4);CHKERRQ(ierr); 2150 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 2151 } else { 2152 ierr = MatPtAP(A_II,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&TM1);CHKERRQ(ierr); 2153 ierr = MatPtAP(A_BB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&TM2);CHKERRQ(ierr); 2154 ierr = MatMatMult(A_IB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 2155 ierr = MatTransposeMatMult(coarse_phi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM3);CHKERRQ(ierr); 2156 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 2157 ierr = MatMatMult(A_BI,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 2158 ierr = MatTransposeMatMult(coarse_phi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM4);CHKERRQ(ierr); 2159 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 2160 } 2161 ierr = MatAXPY(TM1,one,TM2,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 2162 ierr = MatAXPY(TM1,one,TM3,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 2163 ierr = MatAXPY(TM1,one,TM4,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 2164 ierr = MatConvert(TM1,MATSEQDENSE,MAT_REUSE_MATRIX,&TM1);CHKERRQ(ierr); 2165 if (pcbddc->benign_n) { 2166 Mat B0_I,B0_B,B0_BPHI,B0_IPHI; 2167 PetscScalar *data,*data2; 2168 PetscInt j; 2169 2170 ierr = ISCreateStride(PETSC_COMM_SELF,pcbddc->benign_n,0,1,&is_dummy);CHKERRQ(ierr); 2171 ierr = MatGetSubMatrix(pcbddc->benign_B0,is_dummy,pcis->is_B_local,MAT_INITIAL_MATRIX,&B0_B); 2172 ierr = MatGetSubMatrix(pcbddc->benign_B0,is_dummy,pcis->is_I_local,MAT_INITIAL_MATRIX,&B0_I); 2173 ierr = MatMatMult(B0_B,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&B0_BPHI);CHKERRQ(ierr); 2174 ierr = MatConvert(B0_BPHI,MATSEQDENSE,MAT_REUSE_MATRIX,&B0_BPHI);CHKERRQ(ierr); 2175 ierr = MatDenseGetArray(TM1,&data);CHKERRQ(ierr); 2176 ierr = MatDenseGetArray(B0_BPHI,&data2);CHKERRQ(ierr); 2177 for (j=0;j<pcbddc->benign_n;j++) { 2178 PetscInt primal_idx = pcbddc->local_primal_size - pcbddc->benign_n + j; 2179 for (i=0;i<pcbddc->local_primal_size;i++) { 2180 data[primal_idx*pcbddc->local_primal_size+i] += data2[i*pcbddc->benign_n+j]; 2181 data[i*pcbddc->local_primal_size+primal_idx] += data2[i*pcbddc->benign_n+j]; 2182 } 2183 } 2184 ierr = MatDenseRestoreArray(TM1,&data);CHKERRQ(ierr); 2185 ierr = MatDenseRestoreArray(B0_BPHI,&data2);CHKERRQ(ierr); 2186 ierr = MatDestroy(&B0_B);CHKERRQ(ierr); 2187 ierr = ISDestroy(&is_dummy);CHKERRQ(ierr); 2188 ierr = MatDestroy(&B0_BPHI);CHKERRQ(ierr); 2189 ierr = MatMatMult(B0_I,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&B0_IPHI);CHKERRQ(ierr); 2190 ierr = MatDestroy(&B0_I);CHKERRQ(ierr); 2191 ierr = MatNorm(B0_IPHI,NORM_FROBENIUS,&real_value);CHKERRQ(ierr); 2192 ierr = MatDestroy(&B0_IPHI);CHKERRQ(ierr); 2193 } 2194 #if 0 2195 { 2196 PetscViewer viewer; 2197 char filename[256]; 2198 sprintf(filename,"details_local_coarse_mat%d_level%d.m",PetscGlobalRank,pcbddc->current_level); 2199 ierr = PetscViewerASCIIOpen(PETSC_COMM_SELF,filename,&viewer);CHKERRQ(ierr); 2200 ierr = PetscViewerSetFormat(viewer,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr); 2201 ierr = PetscObjectSetName((PetscObject)coarse_sub_mat,"computed");CHKERRQ(ierr); 2202 ierr = MatView(coarse_sub_mat,viewer);CHKERRQ(ierr); 2203 ierr = PetscObjectSetName((PetscObject)TM1,"projected");CHKERRQ(ierr); 2204 ierr = MatView(TM1,viewer);CHKERRQ(ierr); 2205 ierr = PetscObjectSetName((PetscObject)pcbddc->coarse_phi_B,"phi_B");CHKERRQ(ierr); 2206 ierr = MatView(pcbddc->coarse_phi_B,viewer);CHKERRQ(ierr); 2207 if (pcbddc->coarse_phi_D) { 2208 ierr = PetscObjectSetName((PetscObject)pcbddc->coarse_phi_D,"phi_D");CHKERRQ(ierr); 2209 ierr = MatView(pcbddc->coarse_phi_D,viewer);CHKERRQ(ierr); 2210 } 2211 if (pcbddc->coarse_psi_B) { 2212 ierr = PetscObjectSetName((PetscObject)pcbddc->coarse_psi_B,"psi_B");CHKERRQ(ierr); 2213 ierr = MatView(pcbddc->coarse_psi_B,viewer);CHKERRQ(ierr); 2214 } 2215 if (pcbddc->coarse_phi_B) { 2216 ierr = PetscObjectSetName((PetscObject)pcbddc->coarse_psi_D,"psi_D");CHKERRQ(ierr); 2217 ierr = MatView(pcbddc->coarse_psi_D,viewer);CHKERRQ(ierr); 2218 } 2219 ierr = PetscViewerDestroy(&viewer);CHKERRQ(ierr); 2220 } 2221 #endif 2222 ierr = MatAXPY(TM1,m_one,coarse_sub_mat,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 2223 ierr = MatNorm(TM1,NORM_FROBENIUS,&real_value);CHKERRQ(ierr); 2224 ierr = PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 2225 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d matrix error % 1.14e\n",PetscGlobalRank,real_value);CHKERRQ(ierr); 2226 2227 /* check constraints */ 2228 if (!pcbddc->benign_n) { /* TODO: add benign case */ 2229 ierr = ISCreateStride(PETSC_COMM_SELF,pcbddc->local_primal_size,0,1,&is_dummy);CHKERRQ(ierr); 2230 ierr = MatGetSubMatrix(pcbddc->ConstraintMatrix,is_dummy,pcis->is_B_local,MAT_INITIAL_MATRIX,&C_B); 2231 ierr = MatMatMult(C_B,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&CPHI);CHKERRQ(ierr); 2232 ierr = MatCreateVecs(CPHI,&mones,NULL);CHKERRQ(ierr); 2233 ierr = VecSet(mones,-1.0);CHKERRQ(ierr); 2234 ierr = MatDiagonalSet(CPHI,mones,ADD_VALUES);CHKERRQ(ierr); 2235 ierr = MatNorm(CPHI,NORM_FROBENIUS,&real_value);CHKERRQ(ierr); 2236 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d phi constraints error % 1.14e\n",PetscGlobalRank,real_value);CHKERRQ(ierr); 2237 if (!pcbddc->symmetric_primal) { 2238 ierr = MatMatMult(C_B,coarse_psi_B,MAT_REUSE_MATRIX,1.0,&CPHI);CHKERRQ(ierr); 2239 ierr = VecSet(mones,-1.0);CHKERRQ(ierr); 2240 ierr = MatDiagonalSet(CPHI,mones,ADD_VALUES);CHKERRQ(ierr); 2241 ierr = MatNorm(CPHI,NORM_FROBENIUS,&real_value);CHKERRQ(ierr); 2242 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d psi constraints error % 1.14e\n",PetscGlobalRank,real_value);CHKERRQ(ierr); 2243 } 2244 ierr = MatDestroy(&C_B);CHKERRQ(ierr); 2245 ierr = MatDestroy(&CPHI);CHKERRQ(ierr); 2246 ierr = ISDestroy(&is_dummy);CHKERRQ(ierr); 2247 ierr = VecDestroy(&mones);CHKERRQ(ierr); 2248 } 2249 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 2250 ierr = MatDestroy(&A_II);CHKERRQ(ierr); 2251 ierr = MatDestroy(&A_BB);CHKERRQ(ierr); 2252 ierr = MatDestroy(&A_IB);CHKERRQ(ierr); 2253 ierr = MatDestroy(&A_BI);CHKERRQ(ierr); 2254 ierr = MatDestroy(&TM1);CHKERRQ(ierr); 2255 ierr = MatDestroy(&TM2);CHKERRQ(ierr); 2256 ierr = MatDestroy(&TM3);CHKERRQ(ierr); 2257 ierr = MatDestroy(&TM4);CHKERRQ(ierr); 2258 ierr = MatDestroy(&coarse_phi_D);CHKERRQ(ierr); 2259 ierr = MatDestroy(&coarse_phi_B);CHKERRQ(ierr); 2260 if (!pcbddc->symmetric_primal) { 2261 ierr = MatDestroy(&coarse_psi_D);CHKERRQ(ierr); 2262 ierr = MatDestroy(&coarse_psi_B);CHKERRQ(ierr); 2263 } 2264 ierr = MatDestroy(&coarse_sub_mat);CHKERRQ(ierr); 2265 } 2266 /* get back data */ 2267 *coarse_submat_vals_n = coarse_submat_vals; 2268 PetscFunctionReturn(0); 2269 } 2270 2271 #undef __FUNCT__ 2272 #define __FUNCT__ "MatGetSubMatrixUnsorted" 2273 PetscErrorCode MatGetSubMatrixUnsorted(Mat A, IS isrow, IS iscol, Mat* B) 2274 { 2275 Mat *work_mat; 2276 IS isrow_s,iscol_s; 2277 PetscBool rsorted,csorted; 2278 PetscInt rsize,*idxs_perm_r,csize,*idxs_perm_c; 2279 PetscErrorCode ierr; 2280 2281 PetscFunctionBegin; 2282 ierr = ISSorted(isrow,&rsorted);CHKERRQ(ierr); 2283 ierr = ISSorted(iscol,&csorted);CHKERRQ(ierr); 2284 ierr = ISGetLocalSize(isrow,&rsize);CHKERRQ(ierr); 2285 ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr); 2286 2287 if (!rsorted) { 2288 const PetscInt *idxs; 2289 PetscInt *idxs_sorted,i; 2290 2291 ierr = PetscMalloc1(rsize,&idxs_perm_r);CHKERRQ(ierr); 2292 ierr = PetscMalloc1(rsize,&idxs_sorted);CHKERRQ(ierr); 2293 for (i=0;i<rsize;i++) { 2294 idxs_perm_r[i] = i; 2295 } 2296 ierr = ISGetIndices(isrow,&idxs);CHKERRQ(ierr); 2297 ierr = PetscSortIntWithPermutation(rsize,idxs,idxs_perm_r);CHKERRQ(ierr); 2298 for (i=0;i<rsize;i++) { 2299 idxs_sorted[i] = idxs[idxs_perm_r[i]]; 2300 } 2301 ierr = ISRestoreIndices(isrow,&idxs);CHKERRQ(ierr); 2302 ierr = ISCreateGeneral(PETSC_COMM_SELF,rsize,idxs_sorted,PETSC_OWN_POINTER,&isrow_s);CHKERRQ(ierr); 2303 } else { 2304 ierr = PetscObjectReference((PetscObject)isrow);CHKERRQ(ierr); 2305 isrow_s = isrow; 2306 } 2307 2308 if (!csorted) { 2309 if (isrow == iscol) { 2310 ierr = PetscObjectReference((PetscObject)isrow_s);CHKERRQ(ierr); 2311 iscol_s = isrow_s; 2312 } else { 2313 const PetscInt *idxs; 2314 PetscInt *idxs_sorted,i; 2315 2316 ierr = PetscMalloc1(csize,&idxs_perm_c);CHKERRQ(ierr); 2317 ierr = PetscMalloc1(csize,&idxs_sorted);CHKERRQ(ierr); 2318 for (i=0;i<csize;i++) { 2319 idxs_perm_c[i] = i; 2320 } 2321 ierr = ISGetIndices(iscol,&idxs);CHKERRQ(ierr); 2322 ierr = PetscSortIntWithPermutation(csize,idxs,idxs_perm_c);CHKERRQ(ierr); 2323 for (i=0;i<csize;i++) { 2324 idxs_sorted[i] = idxs[idxs_perm_c[i]]; 2325 } 2326 ierr = ISRestoreIndices(iscol,&idxs);CHKERRQ(ierr); 2327 ierr = ISCreateGeneral(PETSC_COMM_SELF,csize,idxs_sorted,PETSC_OWN_POINTER,&iscol_s);CHKERRQ(ierr); 2328 } 2329 } else { 2330 ierr = PetscObjectReference((PetscObject)iscol);CHKERRQ(ierr); 2331 iscol_s = iscol; 2332 } 2333 2334 ierr = MatGetSubMatrices(A,1,&isrow_s,&iscol_s,MAT_INITIAL_MATRIX,&work_mat);CHKERRQ(ierr); 2335 2336 if (!rsorted || !csorted) { 2337 Mat new_mat; 2338 IS is_perm_r,is_perm_c; 2339 2340 if (!rsorted) { 2341 PetscInt *idxs_r,i; 2342 ierr = PetscMalloc1(rsize,&idxs_r);CHKERRQ(ierr); 2343 for (i=0;i<rsize;i++) { 2344 idxs_r[idxs_perm_r[i]] = i; 2345 } 2346 ierr = PetscFree(idxs_perm_r);CHKERRQ(ierr); 2347 ierr = ISCreateGeneral(PETSC_COMM_SELF,rsize,idxs_r,PETSC_OWN_POINTER,&is_perm_r);CHKERRQ(ierr); 2348 } else { 2349 ierr = ISCreateStride(PETSC_COMM_SELF,rsize,0,1,&is_perm_r);CHKERRQ(ierr); 2350 } 2351 ierr = ISSetPermutation(is_perm_r);CHKERRQ(ierr); 2352 2353 if (!csorted) { 2354 if (isrow_s == iscol_s) { 2355 ierr = PetscObjectReference((PetscObject)is_perm_r);CHKERRQ(ierr); 2356 is_perm_c = is_perm_r; 2357 } else { 2358 PetscInt *idxs_c,i; 2359 ierr = PetscMalloc1(csize,&idxs_c);CHKERRQ(ierr); 2360 for (i=0;i<csize;i++) { 2361 idxs_c[idxs_perm_c[i]] = i; 2362 } 2363 ierr = PetscFree(idxs_perm_c);CHKERRQ(ierr); 2364 ierr = ISCreateGeneral(PETSC_COMM_SELF,csize,idxs_c,PETSC_OWN_POINTER,&is_perm_c);CHKERRQ(ierr); 2365 } 2366 } else { 2367 ierr = ISCreateStride(PETSC_COMM_SELF,csize,0,1,&is_perm_c);CHKERRQ(ierr); 2368 } 2369 ierr = ISSetPermutation(is_perm_c);CHKERRQ(ierr); 2370 2371 ierr = MatPermute(work_mat[0],is_perm_r,is_perm_c,&new_mat);CHKERRQ(ierr); 2372 ierr = MatDestroy(&work_mat[0]);CHKERRQ(ierr); 2373 work_mat[0] = new_mat; 2374 ierr = ISDestroy(&is_perm_r);CHKERRQ(ierr); 2375 ierr = ISDestroy(&is_perm_c);CHKERRQ(ierr); 2376 } 2377 2378 ierr = PetscObjectReference((PetscObject)work_mat[0]);CHKERRQ(ierr); 2379 *B = work_mat[0]; 2380 ierr = MatDestroyMatrices(1,&work_mat);CHKERRQ(ierr); 2381 ierr = ISDestroy(&isrow_s);CHKERRQ(ierr); 2382 ierr = ISDestroy(&iscol_s);CHKERRQ(ierr); 2383 PetscFunctionReturn(0); 2384 } 2385 2386 #undef __FUNCT__ 2387 #define __FUNCT__ "PCBDDCComputeLocalMatrix" 2388 PetscErrorCode PCBDDCComputeLocalMatrix(PC pc, Mat ChangeOfBasisMatrix) 2389 { 2390 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 2391 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 2392 Mat new_mat; 2393 IS is_local,is_global; 2394 PetscInt local_size; 2395 PetscBool isseqaij; 2396 PetscErrorCode ierr; 2397 2398 PetscFunctionBegin; 2399 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 2400 ierr = MatGetSize(matis->A,&local_size,NULL);CHKERRQ(ierr); 2401 ierr = ISCreateStride(PetscObjectComm((PetscObject)matis->A),local_size,0,1,&is_local);CHKERRQ(ierr); 2402 ierr = ISLocalToGlobalMappingApplyIS(pc->pmat->rmap->mapping,is_local,&is_global);CHKERRQ(ierr); 2403 ierr = ISDestroy(&is_local);CHKERRQ(ierr); 2404 ierr = MatGetSubMatrixUnsorted(ChangeOfBasisMatrix,is_global,is_global,&new_mat);CHKERRQ(ierr); 2405 ierr = ISDestroy(&is_global);CHKERRQ(ierr); 2406 2407 /* check */ 2408 if (pcbddc->dbg_flag) { 2409 Vec x,x_change; 2410 PetscReal error; 2411 2412 ierr = MatCreateVecs(ChangeOfBasisMatrix,&x,&x_change);CHKERRQ(ierr); 2413 ierr = VecSetRandom(x,NULL);CHKERRQ(ierr); 2414 ierr = MatMult(ChangeOfBasisMatrix,x,x_change);CHKERRQ(ierr); 2415 ierr = VecScatterBegin(matis->cctx,x,matis->x,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2416 ierr = VecScatterEnd(matis->cctx,x,matis->x,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2417 ierr = MatMult(new_mat,matis->x,matis->y);CHKERRQ(ierr); 2418 ierr = VecScatterBegin(matis->rctx,matis->y,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2419 ierr = VecScatterEnd(matis->rctx,matis->y,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2420 ierr = VecAXPY(x,-1.0,x_change);CHKERRQ(ierr); 2421 ierr = VecNorm(x,NORM_INFINITY,&error);CHKERRQ(ierr); 2422 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 2423 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Error global vs local change on N: %1.6e\n",error);CHKERRQ(ierr); 2424 ierr = VecDestroy(&x);CHKERRQ(ierr); 2425 ierr = VecDestroy(&x_change);CHKERRQ(ierr); 2426 } 2427 2428 /* TODO: HOW TO WORK WITH BAIJ and SBAIJ and SEQDENSE? */ 2429 ierr = PetscObjectTypeCompare((PetscObject)matis->A,MATSEQAIJ,&isseqaij);CHKERRQ(ierr); 2430 if (isseqaij) { 2431 Mat M; 2432 2433 ierr = MatPtAP(matis->A,new_mat,MAT_INITIAL_MATRIX,2.0,&M);CHKERRQ(ierr); 2434 ierr = MatSeqAIJCompress(M,&pcbddc->local_mat);CHKERRQ(ierr); 2435 ierr = MatDestroy(&M);CHKERRQ(ierr); 2436 } else { 2437 Mat work_mat,M; 2438 2439 ierr = MatConvert(matis->A,MATSEQAIJ,MAT_INITIAL_MATRIX,&work_mat);CHKERRQ(ierr); 2440 ierr = MatPtAP(work_mat,new_mat,MAT_INITIAL_MATRIX,2.0,&M);CHKERRQ(ierr); 2441 ierr = MatSeqAIJCompress(M,&pcbddc->local_mat);CHKERRQ(ierr); 2442 ierr = MatDestroy(&M);CHKERRQ(ierr); 2443 ierr = MatDestroy(&work_mat);CHKERRQ(ierr); 2444 } 2445 if (matis->A->symmetric_set) { 2446 ierr = MatSetOption(pcbddc->local_mat,MAT_SYMMETRIC,matis->A->symmetric);CHKERRQ(ierr); 2447 #if !defined(PETSC_USE_COMPLEX) 2448 ierr = MatSetOption(pcbddc->local_mat,MAT_HERMITIAN,matis->A->symmetric);CHKERRQ(ierr); 2449 #endif 2450 } 2451 /* 2452 ierr = PetscViewerSetFormat(PETSC_VIEWER_STDOUT_SELF,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr); 2453 ierr = MatView(new_mat,(PetscViewer)0);CHKERRQ(ierr); 2454 */ 2455 ierr = MatDestroy(&new_mat);CHKERRQ(ierr); 2456 PetscFunctionReturn(0); 2457 } 2458 2459 #undef __FUNCT__ 2460 #define __FUNCT__ "PCBDDCSetUpLocalScatters" 2461 PetscErrorCode PCBDDCSetUpLocalScatters(PC pc) 2462 { 2463 PC_IS* pcis = (PC_IS*)(pc->data); 2464 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 2465 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 2466 PetscInt *idx_R_local=NULL; 2467 PetscInt n_vertices,i,j,n_R,n_D,n_B; 2468 PetscInt vbs,bs; 2469 PetscBT bitmask=NULL; 2470 PetscErrorCode ierr; 2471 2472 PetscFunctionBegin; 2473 /* 2474 No need to setup local scatters if 2475 - primal space is unchanged 2476 AND 2477 - we actually have locally some primal dofs (could not be true in multilevel or for isolated subdomains) 2478 AND 2479 - we are not in debugging mode (this is needed since there are Synchronized prints at the end of the subroutine 2480 */ 2481 if (!pcbddc->new_primal_space_local && pcbddc->local_primal_size && !pcbddc->dbg_flag) { 2482 PetscFunctionReturn(0); 2483 } 2484 /* destroy old objects */ 2485 ierr = ISDestroy(&pcbddc->is_R_local);CHKERRQ(ierr); 2486 ierr = VecScatterDestroy(&pcbddc->R_to_B);CHKERRQ(ierr); 2487 ierr = VecScatterDestroy(&pcbddc->R_to_D);CHKERRQ(ierr); 2488 /* Set Non-overlapping dimensions */ 2489 n_B = pcis->n_B; 2490 n_D = pcis->n - n_B; 2491 n_vertices = pcbddc->n_vertices; 2492 2493 /* Dohrmann's notation: dofs splitted in R (Remaining: all dofs but the vertices) and V (Vertices) */ 2494 2495 /* create auxiliary bitmask and allocate workspace */ 2496 if (!sub_schurs->reuse_solver) { 2497 ierr = PetscMalloc1(pcis->n-n_vertices,&idx_R_local);CHKERRQ(ierr); 2498 ierr = PetscBTCreate(pcis->n,&bitmask);CHKERRQ(ierr); 2499 for (i=0;i<n_vertices;i++) { 2500 ierr = PetscBTSet(bitmask,pcbddc->local_primal_ref_node[i]);CHKERRQ(ierr); 2501 } 2502 2503 for (i=0, n_R=0; i<pcis->n; i++) { 2504 if (!PetscBTLookup(bitmask,i)) { 2505 idx_R_local[n_R++] = i; 2506 } 2507 } 2508 } else { /* A different ordering (already computed) is present if we are reusing the Schur solver */ 2509 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 2510 2511 ierr = ISGetIndices(reuse_solver->is_R,(const PetscInt**)&idx_R_local);CHKERRQ(ierr); 2512 ierr = ISGetLocalSize(reuse_solver->is_R,&n_R);CHKERRQ(ierr); 2513 } 2514 2515 /* Block code */ 2516 vbs = 1; 2517 ierr = MatGetBlockSize(pcbddc->local_mat,&bs);CHKERRQ(ierr); 2518 if (bs>1 && !(n_vertices%bs)) { 2519 PetscBool is_blocked = PETSC_TRUE; 2520 PetscInt *vary; 2521 if (!sub_schurs->reuse_solver) { 2522 ierr = PetscMalloc1(pcis->n/bs,&vary);CHKERRQ(ierr); 2523 ierr = PetscMemzero(vary,pcis->n/bs*sizeof(PetscInt));CHKERRQ(ierr); 2524 /* Verify that the vertex indices correspond to each element in a block (code taken from sbaij2.c) */ 2525 /* it is ok to check this way since local_primal_ref_node are always sorted by local numbering and idx_R_local is obtained as a complement */ 2526 for (i=0; i<n_vertices; i++) vary[pcbddc->local_primal_ref_node[i]/bs]++; 2527 for (i=0; i<pcis->n/bs; i++) { 2528 if (vary[i]!=0 && vary[i]!=bs) { 2529 is_blocked = PETSC_FALSE; 2530 break; 2531 } 2532 } 2533 ierr = PetscFree(vary);CHKERRQ(ierr); 2534 } else { 2535 /* Verify directly the R set */ 2536 for (i=0; i<n_R/bs; i++) { 2537 PetscInt j,node=idx_R_local[bs*i]; 2538 for (j=1; j<bs; j++) { 2539 if (node != idx_R_local[bs*i+j]-j) { 2540 is_blocked = PETSC_FALSE; 2541 break; 2542 } 2543 } 2544 } 2545 } 2546 if (is_blocked) { /* build compressed IS for R nodes (complement of vertices) */ 2547 vbs = bs; 2548 for (i=0;i<n_R/vbs;i++) { 2549 idx_R_local[i] = idx_R_local[vbs*i]/vbs; 2550 } 2551 } 2552 } 2553 ierr = ISCreateBlock(PETSC_COMM_SELF,vbs,n_R/vbs,idx_R_local,PETSC_COPY_VALUES,&pcbddc->is_R_local);CHKERRQ(ierr); 2554 if (sub_schurs->reuse_solver) { 2555 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 2556 2557 ierr = ISRestoreIndices(reuse_solver->is_R,(const PetscInt**)&idx_R_local);CHKERRQ(ierr); 2558 ierr = ISDestroy(&reuse_solver->is_R);CHKERRQ(ierr); 2559 ierr = PetscObjectReference((PetscObject)pcbddc->is_R_local);CHKERRQ(ierr); 2560 reuse_solver->is_R = pcbddc->is_R_local; 2561 } else { 2562 ierr = PetscFree(idx_R_local);CHKERRQ(ierr); 2563 } 2564 2565 /* print some info if requested */ 2566 if (pcbddc->dbg_flag) { 2567 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 2568 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 2569 ierr = PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 2570 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d local dimensions\n",PetscGlobalRank);CHKERRQ(ierr); 2571 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local_size = %d, dirichlet_size = %d, boundary_size = %d\n",pcis->n,n_D,n_B);CHKERRQ(ierr); 2572 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"r_size = %d, v_size = %d, constraints = %d, local_primal_size = %d\n",n_R,n_vertices,pcbddc->local_primal_size-n_vertices-pcbddc->benign_n,pcbddc->local_primal_size);CHKERRQ(ierr); 2573 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 2574 } 2575 2576 /* VecScatters pcbddc->R_to_B and (optionally) pcbddc->R_to_D */ 2577 if (!sub_schurs->reuse_solver) { 2578 IS is_aux1,is_aux2; 2579 PetscInt *aux_array1,*aux_array2,*is_indices,*idx_R_local; 2580 2581 ierr = ISGetIndices(pcbddc->is_R_local,(const PetscInt**)&idx_R_local);CHKERRQ(ierr); 2582 ierr = PetscMalloc1(pcis->n_B-n_vertices,&aux_array1);CHKERRQ(ierr); 2583 ierr = PetscMalloc1(pcis->n_B-n_vertices,&aux_array2);CHKERRQ(ierr); 2584 ierr = ISGetIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 2585 for (i=0; i<n_D; i++) { 2586 ierr = PetscBTSet(bitmask,is_indices[i]);CHKERRQ(ierr); 2587 } 2588 ierr = ISRestoreIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 2589 for (i=0, j=0; i<n_R; i++) { 2590 if (!PetscBTLookup(bitmask,idx_R_local[i])) { 2591 aux_array1[j++] = i; 2592 } 2593 } 2594 ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array1,PETSC_OWN_POINTER,&is_aux1);CHKERRQ(ierr); 2595 ierr = ISGetIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 2596 for (i=0, j=0; i<n_B; i++) { 2597 if (!PetscBTLookup(bitmask,is_indices[i])) { 2598 aux_array2[j++] = i; 2599 } 2600 } 2601 ierr = ISRestoreIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 2602 ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array2,PETSC_OWN_POINTER,&is_aux2);CHKERRQ(ierr); 2603 ierr = VecScatterCreate(pcbddc->vec1_R,is_aux1,pcis->vec1_B,is_aux2,&pcbddc->R_to_B);CHKERRQ(ierr); 2604 ierr = ISDestroy(&is_aux1);CHKERRQ(ierr); 2605 ierr = ISDestroy(&is_aux2);CHKERRQ(ierr); 2606 2607 if (pcbddc->switch_static || pcbddc->dbg_flag) { 2608 ierr = PetscMalloc1(n_D,&aux_array1);CHKERRQ(ierr); 2609 for (i=0, j=0; i<n_R; i++) { 2610 if (PetscBTLookup(bitmask,idx_R_local[i])) { 2611 aux_array1[j++] = i; 2612 } 2613 } 2614 ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array1,PETSC_OWN_POINTER,&is_aux1);CHKERRQ(ierr); 2615 ierr = VecScatterCreate(pcbddc->vec1_R,is_aux1,pcis->vec1_D,(IS)0,&pcbddc->R_to_D);CHKERRQ(ierr); 2616 ierr = ISDestroy(&is_aux1);CHKERRQ(ierr); 2617 } 2618 ierr = PetscBTDestroy(&bitmask);CHKERRQ(ierr); 2619 ierr = ISRestoreIndices(pcbddc->is_R_local,(const PetscInt**)&idx_R_local);CHKERRQ(ierr); 2620 } else { 2621 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 2622 IS tis; 2623 PetscInt schur_size; 2624 2625 ierr = ISGetLocalSize(reuse_solver->is_B,&schur_size);CHKERRQ(ierr); 2626 ierr = ISCreateStride(PETSC_COMM_SELF,schur_size,n_D,1,&tis);CHKERRQ(ierr); 2627 ierr = VecScatterCreate(pcbddc->vec1_R,tis,pcis->vec1_B,reuse_solver->is_B,&pcbddc->R_to_B);CHKERRQ(ierr); 2628 ierr = ISDestroy(&tis);CHKERRQ(ierr); 2629 if (pcbddc->switch_static || pcbddc->dbg_flag) { 2630 ierr = ISCreateStride(PETSC_COMM_SELF,n_D,0,1,&tis);CHKERRQ(ierr); 2631 ierr = VecScatterCreate(pcbddc->vec1_R,tis,pcis->vec1_D,(IS)0,&pcbddc->R_to_D);CHKERRQ(ierr); 2632 ierr = ISDestroy(&tis);CHKERRQ(ierr); 2633 } 2634 } 2635 PetscFunctionReturn(0); 2636 } 2637 2638 2639 #undef __FUNCT__ 2640 #define __FUNCT__ "PCBDDCSetUpLocalSolvers" 2641 PetscErrorCode PCBDDCSetUpLocalSolvers(PC pc, PetscBool dirichlet, PetscBool neumann) 2642 { 2643 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 2644 PC_IS *pcis = (PC_IS*)pc->data; 2645 PC pc_temp; 2646 Mat A_RR; 2647 MatReuse reuse; 2648 PetscScalar m_one = -1.0; 2649 PetscReal value; 2650 PetscInt n_D,n_R; 2651 PetscBool use_exact,use_exact_reduced,issbaij; 2652 PetscErrorCode ierr; 2653 /* prefixes stuff */ 2654 char dir_prefix[256],neu_prefix[256],str_level[16]; 2655 size_t len; 2656 2657 PetscFunctionBegin; 2658 2659 /* compute prefixes */ 2660 ierr = PetscStrcpy(dir_prefix,"");CHKERRQ(ierr); 2661 ierr = PetscStrcpy(neu_prefix,"");CHKERRQ(ierr); 2662 if (!pcbddc->current_level) { 2663 ierr = PetscStrcpy(dir_prefix,((PetscObject)pc)->prefix);CHKERRQ(ierr); 2664 ierr = PetscStrcpy(neu_prefix,((PetscObject)pc)->prefix);CHKERRQ(ierr); 2665 ierr = PetscStrcat(dir_prefix,"pc_bddc_dirichlet_");CHKERRQ(ierr); 2666 ierr = PetscStrcat(neu_prefix,"pc_bddc_neumann_");CHKERRQ(ierr); 2667 } else { 2668 ierr = PetscStrcpy(str_level,"");CHKERRQ(ierr); 2669 sprintf(str_level,"l%d_",(int)(pcbddc->current_level)); 2670 ierr = PetscStrlen(((PetscObject)pc)->prefix,&len);CHKERRQ(ierr); 2671 len -= 15; /* remove "pc_bddc_coarse_" */ 2672 if (pcbddc->current_level>1) len -= 3; /* remove "lX_" with X level number */ 2673 if (pcbddc->current_level>10) len -= 1; /* remove another char from level number */ 2674 ierr = PetscStrncpy(dir_prefix,((PetscObject)pc)->prefix,len+1);CHKERRQ(ierr); 2675 ierr = PetscStrncpy(neu_prefix,((PetscObject)pc)->prefix,len+1);CHKERRQ(ierr); 2676 ierr = PetscStrcat(dir_prefix,"pc_bddc_dirichlet_");CHKERRQ(ierr); 2677 ierr = PetscStrcat(neu_prefix,"pc_bddc_neumann_");CHKERRQ(ierr); 2678 ierr = PetscStrcat(dir_prefix,str_level);CHKERRQ(ierr); 2679 ierr = PetscStrcat(neu_prefix,str_level);CHKERRQ(ierr); 2680 } 2681 2682 /* DIRICHLET PROBLEM */ 2683 if (dirichlet) { 2684 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 2685 if (pcbddc->benign_n && !pcbddc->benign_change_explicit && pcbddc->dbg_flag) { 2686 Mat A_IIn; 2687 2688 ierr = PCBDDCBenignProject(pc,pcis->is_I_local,pcis->is_I_local,&A_IIn);CHKERRQ(ierr); 2689 ierr = MatDestroy(&pcis->A_II);CHKERRQ(ierr); 2690 pcis->A_II = A_IIn; 2691 } 2692 if (pcbddc->local_mat->symmetric_set) { 2693 ierr = MatSetOption(pcis->A_II,MAT_SYMMETRIC,pcbddc->local_mat->symmetric_set);CHKERRQ(ierr); 2694 } 2695 /* Matrix for Dirichlet problem is pcis->A_II */ 2696 n_D = pcis->n - pcis->n_B; 2697 if (!pcbddc->ksp_D) { /* create object if not yet build */ 2698 ierr = KSPCreate(PETSC_COMM_SELF,&pcbddc->ksp_D);CHKERRQ(ierr); 2699 ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->ksp_D,(PetscObject)pc,1);CHKERRQ(ierr); 2700 /* default */ 2701 ierr = KSPSetType(pcbddc->ksp_D,KSPPREONLY);CHKERRQ(ierr); 2702 ierr = KSPSetOptionsPrefix(pcbddc->ksp_D,dir_prefix);CHKERRQ(ierr); 2703 ierr = PetscObjectTypeCompare((PetscObject)pcis->A_II,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr); 2704 ierr = KSPGetPC(pcbddc->ksp_D,&pc_temp);CHKERRQ(ierr); 2705 if (issbaij) { 2706 ierr = PCSetType(pc_temp,PCCHOLESKY);CHKERRQ(ierr); 2707 } else { 2708 ierr = PCSetType(pc_temp,PCLU);CHKERRQ(ierr); 2709 } 2710 /* Allow user's customization */ 2711 ierr = KSPSetFromOptions(pcbddc->ksp_D);CHKERRQ(ierr); 2712 ierr = PCFactorSetReuseFill(pc_temp,PETSC_TRUE);CHKERRQ(ierr); 2713 } 2714 ierr = KSPSetOperators(pcbddc->ksp_D,pcis->A_II,pcis->A_II);CHKERRQ(ierr); 2715 if (sub_schurs->reuse_solver) { 2716 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 2717 2718 ierr = KSPSetPC(pcbddc->ksp_D,reuse_solver->interior_solver);CHKERRQ(ierr); 2719 } 2720 /* umfpack interface has a bug when matrix dimension is zero. TODO solve from umfpack interface */ 2721 if (!n_D) { 2722 ierr = KSPGetPC(pcbddc->ksp_D,&pc_temp);CHKERRQ(ierr); 2723 ierr = PCSetType(pc_temp,PCNONE);CHKERRQ(ierr); 2724 } 2725 /* Set Up KSP for Dirichlet problem of BDDC */ 2726 ierr = KSPSetUp(pcbddc->ksp_D);CHKERRQ(ierr); 2727 /* set ksp_D into pcis data */ 2728 ierr = KSPDestroy(&pcis->ksp_D);CHKERRQ(ierr); 2729 ierr = PetscObjectReference((PetscObject)pcbddc->ksp_D);CHKERRQ(ierr); 2730 pcis->ksp_D = pcbddc->ksp_D; 2731 } 2732 2733 /* NEUMANN PROBLEM */ 2734 A_RR = 0; 2735 if (neumann) { 2736 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 2737 PetscInt ibs,mbs; 2738 PetscBool issbaij; 2739 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 2740 /* Matrix for Neumann problem is A_RR -> we need to create/reuse it at this point */ 2741 ierr = ISGetSize(pcbddc->is_R_local,&n_R);CHKERRQ(ierr); 2742 if (pcbddc->ksp_R) { /* already created ksp */ 2743 PetscInt nn_R; 2744 ierr = KSPGetOperators(pcbddc->ksp_R,NULL,&A_RR);CHKERRQ(ierr); 2745 ierr = PetscObjectReference((PetscObject)A_RR);CHKERRQ(ierr); 2746 ierr = MatGetSize(A_RR,&nn_R,NULL);CHKERRQ(ierr); 2747 if (nn_R != n_R) { /* old ksp is not reusable, so reset it */ 2748 ierr = KSPReset(pcbddc->ksp_R);CHKERRQ(ierr); 2749 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 2750 reuse = MAT_INITIAL_MATRIX; 2751 } else { /* same sizes, but nonzero pattern depend on primal vertices so it can be changed */ 2752 if (pcbddc->new_primal_space_local) { /* we are not sure the matrix will have the same nonzero pattern */ 2753 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 2754 reuse = MAT_INITIAL_MATRIX; 2755 } else { /* safe to reuse the matrix */ 2756 reuse = MAT_REUSE_MATRIX; 2757 } 2758 } 2759 /* last check */ 2760 if (pc->flag == DIFFERENT_NONZERO_PATTERN) { 2761 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 2762 reuse = MAT_INITIAL_MATRIX; 2763 } 2764 } else { /* first time, so we need to create the matrix */ 2765 reuse = MAT_INITIAL_MATRIX; 2766 } 2767 /* extract A_RR */ 2768 ierr = MatGetBlockSize(pcbddc->local_mat,&mbs);CHKERRQ(ierr); 2769 ierr = ISGetBlockSize(pcbddc->is_R_local,&ibs);CHKERRQ(ierr); 2770 ierr = PetscObjectTypeCompare((PetscObject)pcbddc->local_mat,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr); 2771 if (ibs != mbs) { /* need to convert to SEQAIJ to extract any submatrix with is_R_local */ 2772 if (matis->A == pcbddc->local_mat) { 2773 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 2774 ierr = MatConvert(matis->A,MATSEQAIJ,MAT_INITIAL_MATRIX,&pcbddc->local_mat);CHKERRQ(ierr); 2775 } else { 2776 ierr = MatConvert(pcbddc->local_mat,MATSEQAIJ,MAT_REUSE_MATRIX,&pcbddc->local_mat);CHKERRQ(ierr); 2777 } 2778 } else if (issbaij) { /* need to convert to BAIJ to get offdiagonal blocks */ 2779 if (matis->A == pcbddc->local_mat) { 2780 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 2781 ierr = MatConvert(matis->A,MATSEQBAIJ,MAT_INITIAL_MATRIX,&pcbddc->local_mat);CHKERRQ(ierr); 2782 } else { 2783 ierr = MatConvert(pcbddc->local_mat,MATSEQBAIJ,MAT_REUSE_MATRIX,&pcbddc->local_mat);CHKERRQ(ierr); 2784 } 2785 } 2786 ierr = MatGetSubMatrix(pcbddc->local_mat,pcbddc->is_R_local,pcbddc->is_R_local,reuse,&A_RR);CHKERRQ(ierr); 2787 if (pcbddc->local_mat->symmetric_set) { 2788 ierr = MatSetOption(A_RR,MAT_SYMMETRIC,pcbddc->local_mat->symmetric_set);CHKERRQ(ierr); 2789 } 2790 if (pcbddc->benign_n && !pcbddc->benign_change_explicit && pcbddc->dbg_flag) { 2791 Mat A_RRn; 2792 ierr = PCBDDCBenignProject(pc,pcbddc->is_R_local,pcbddc->is_R_local,&A_RRn);CHKERRQ(ierr); 2793 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 2794 A_RR = A_RRn; 2795 } 2796 if (!pcbddc->ksp_R) { /* create object if not present */ 2797 ierr = KSPCreate(PETSC_COMM_SELF,&pcbddc->ksp_R);CHKERRQ(ierr); 2798 ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->ksp_R,(PetscObject)pc,1);CHKERRQ(ierr); 2799 /* default */ 2800 ierr = KSPSetType(pcbddc->ksp_R,KSPPREONLY);CHKERRQ(ierr); 2801 ierr = KSPSetOptionsPrefix(pcbddc->ksp_R,neu_prefix);CHKERRQ(ierr); 2802 ierr = KSPGetPC(pcbddc->ksp_R,&pc_temp);CHKERRQ(ierr); 2803 ierr = PetscObjectTypeCompare((PetscObject)A_RR,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr); 2804 if (issbaij) { 2805 ierr = PCSetType(pc_temp,PCCHOLESKY);CHKERRQ(ierr); 2806 } else { 2807 ierr = PCSetType(pc_temp,PCLU);CHKERRQ(ierr); 2808 } 2809 /* Allow user's customization */ 2810 ierr = KSPSetFromOptions(pcbddc->ksp_R);CHKERRQ(ierr); 2811 ierr = PCFactorSetReuseFill(pc_temp,PETSC_TRUE);CHKERRQ(ierr); 2812 } 2813 /* umfpack interface has a bug when matrix dimension is zero. TODO solve from umfpack interface */ 2814 if (!n_R) { 2815 ierr = KSPGetPC(pcbddc->ksp_R,&pc_temp);CHKERRQ(ierr); 2816 ierr = PCSetType(pc_temp,PCNONE);CHKERRQ(ierr); 2817 } 2818 /* Reuse solver if it is present */ 2819 if (sub_schurs->reuse_solver) { 2820 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 2821 2822 ierr = KSPSetPC(pcbddc->ksp_R,reuse_solver->correction_solver);CHKERRQ(ierr); 2823 } 2824 ierr = KSPSetOperators(pcbddc->ksp_R,A_RR,A_RR);CHKERRQ(ierr); 2825 /* Set Up KSP for Neumann problem of BDDC */ 2826 ierr = KSPSetUp(pcbddc->ksp_R);CHKERRQ(ierr); 2827 } 2828 /* free Neumann problem's matrix */ 2829 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 2830 2831 /* check Dirichlet and Neumann solvers and adapt them if a nullspace correction is needed */ 2832 if (pcbddc->NullSpace || pcbddc->dbg_flag) { 2833 if (pcbddc->dbg_flag) { 2834 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 2835 ierr = PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 2836 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 2837 } 2838 if (dirichlet) { /* Dirichlet */ 2839 ierr = VecSetRandom(pcis->vec1_D,NULL);CHKERRQ(ierr); 2840 ierr = MatMult(pcis->A_II,pcis->vec1_D,pcis->vec2_D);CHKERRQ(ierr); 2841 ierr = KSPSolve(pcbddc->ksp_D,pcis->vec2_D,pcis->vec2_D);CHKERRQ(ierr); 2842 ierr = VecAXPY(pcis->vec1_D,m_one,pcis->vec2_D);CHKERRQ(ierr); 2843 ierr = VecNorm(pcis->vec1_D,NORM_INFINITY,&value);CHKERRQ(ierr); 2844 /* need to be adapted? */ 2845 use_exact = (PetscAbsReal(value) > 1.e-4 ? PETSC_FALSE : PETSC_TRUE); 2846 ierr = MPI_Allreduce(&use_exact,&use_exact_reduced,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 2847 ierr = PCBDDCSetUseExactDirichlet(pc,use_exact_reduced);CHKERRQ(ierr); 2848 /* print info */ 2849 if (pcbddc->dbg_flag) { 2850 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d infinity error for Dirichlet solve (%s) = % 1.14e \n",PetscGlobalRank,((PetscObject)(pcbddc->ksp_D))->prefix,value);CHKERRQ(ierr); 2851 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 2852 } 2853 if (pcbddc->NullSpace && !use_exact_reduced && !pcbddc->switch_static) { 2854 ierr = PCBDDCNullSpaceAssembleCorrection(pc,PETSC_TRUE,pcis->is_I_local);CHKERRQ(ierr); 2855 } 2856 } 2857 if (neumann) { /* Neumann */ 2858 ierr = KSPGetOperators(pcbddc->ksp_R,&A_RR,NULL);CHKERRQ(ierr); 2859 ierr = VecSetRandom(pcbddc->vec1_R,NULL);CHKERRQ(ierr); 2860 ierr = MatMult(A_RR,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 2861 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec2_R,pcbddc->vec2_R);CHKERRQ(ierr); 2862 ierr = VecAXPY(pcbddc->vec1_R,m_one,pcbddc->vec2_R);CHKERRQ(ierr); 2863 ierr = VecNorm(pcbddc->vec1_R,NORM_INFINITY,&value);CHKERRQ(ierr); 2864 /* need to be adapted? */ 2865 use_exact = (PetscAbsReal(value) > 1.e-4 ? PETSC_FALSE : PETSC_TRUE); 2866 ierr = MPI_Allreduce(&use_exact,&use_exact_reduced,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 2867 /* print info */ 2868 if (pcbddc->dbg_flag) { 2869 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d infinity error for Neumann solve (%s) = % 1.14e\n",PetscGlobalRank,((PetscObject)(pcbddc->ksp_R))->prefix,value);CHKERRQ(ierr); 2870 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 2871 } 2872 if (pcbddc->NullSpace && !use_exact_reduced) { /* is it the right logic? */ 2873 ierr = PCBDDCNullSpaceAssembleCorrection(pc,PETSC_FALSE,pcbddc->is_R_local);CHKERRQ(ierr); 2874 } 2875 } 2876 } 2877 PetscFunctionReturn(0); 2878 } 2879 2880 #undef __FUNCT__ 2881 #define __FUNCT__ "PCBDDCSolveSubstructureCorrection" 2882 static PetscErrorCode PCBDDCSolveSubstructureCorrection(PC pc, Vec inout_B, Vec inout_D, PetscBool applytranspose) 2883 { 2884 PetscErrorCode ierr; 2885 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 2886 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 2887 2888 PetscFunctionBegin; 2889 if (!sub_schurs->reuse_solver) { 2890 ierr = VecSet(pcbddc->vec1_R,0.);CHKERRQ(ierr); 2891 } 2892 if (!pcbddc->switch_static) { 2893 if (applytranspose && pcbddc->local_auxmat1) { 2894 ierr = MatMultTranspose(pcbddc->local_auxmat2,inout_B,pcbddc->vec1_C);CHKERRQ(ierr); 2895 ierr = MatMultTransposeAdd(pcbddc->local_auxmat1,pcbddc->vec1_C,inout_B,inout_B);CHKERRQ(ierr); 2896 } 2897 if (!sub_schurs->reuse_solver) { 2898 ierr = VecScatterBegin(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2899 ierr = VecScatterEnd(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2900 } else { 2901 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 2902 2903 ierr = VecScatterBegin(reuse_solver->correction_scatter_B,inout_B,reuse_solver->rhs_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2904 ierr = VecScatterEnd(reuse_solver->correction_scatter_B,inout_B,reuse_solver->rhs_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2905 } 2906 } else { 2907 ierr = VecScatterBegin(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2908 ierr = VecScatterEnd(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2909 ierr = VecScatterBegin(pcbddc->R_to_D,inout_D,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2910 ierr = VecScatterEnd(pcbddc->R_to_D,inout_D,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2911 if (applytranspose && pcbddc->local_auxmat1) { 2912 ierr = MatMultTranspose(pcbddc->local_auxmat2,pcbddc->vec1_R,pcbddc->vec1_C);CHKERRQ(ierr); 2913 ierr = MatMultTransposeAdd(pcbddc->local_auxmat1,pcbddc->vec1_C,inout_B,inout_B);CHKERRQ(ierr); 2914 ierr = VecScatterBegin(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2915 ierr = VecScatterEnd(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2916 } 2917 } 2918 if (!sub_schurs->reuse_solver) { 2919 if (applytranspose) { 2920 ierr = KSPSolveTranspose(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr); 2921 } else { 2922 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr); 2923 } 2924 } else { 2925 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 2926 2927 if (applytranspose) { 2928 ierr = MatFactorSolveSchurComplementTranspose(reuse_solver->F,reuse_solver->rhs_B,reuse_solver->sol_B);CHKERRQ(ierr); 2929 } else { 2930 ierr = MatFactorSolveSchurComplement(reuse_solver->F,reuse_solver->rhs_B,reuse_solver->sol_B);CHKERRQ(ierr); 2931 } 2932 } 2933 ierr = VecSet(inout_B,0.);CHKERRQ(ierr); 2934 if (!pcbddc->switch_static) { 2935 if (!sub_schurs->reuse_solver) { 2936 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2937 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2938 } else { 2939 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 2940 2941 ierr = VecScatterBegin(reuse_solver->correction_scatter_B,reuse_solver->sol_B,inout_B,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2942 ierr = VecScatterEnd(reuse_solver->correction_scatter_B,reuse_solver->sol_B,inout_B,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2943 } 2944 if (!applytranspose && pcbddc->local_auxmat1) { 2945 ierr = MatMult(pcbddc->local_auxmat1,inout_B,pcbddc->vec1_C);CHKERRQ(ierr); 2946 ierr = MatMultAdd(pcbddc->local_auxmat2,pcbddc->vec1_C,inout_B,inout_B);CHKERRQ(ierr); 2947 } 2948 } else { 2949 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2950 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2951 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,inout_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2952 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,inout_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2953 if (!applytranspose && pcbddc->local_auxmat1) { 2954 ierr = MatMult(pcbddc->local_auxmat1,inout_B,pcbddc->vec1_C);CHKERRQ(ierr); 2955 ierr = MatMultAdd(pcbddc->local_auxmat2,pcbddc->vec1_C,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr); 2956 } 2957 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2958 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2959 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,inout_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2960 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,inout_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2961 } 2962 PetscFunctionReturn(0); 2963 } 2964 2965 /* parameter apply transpose determines if the interface preconditioner should be applied transposed or not */ 2966 #undef __FUNCT__ 2967 #define __FUNCT__ "PCBDDCApplyInterfacePreconditioner" 2968 PetscErrorCode PCBDDCApplyInterfacePreconditioner(PC pc, PetscBool applytranspose) 2969 { 2970 PetscErrorCode ierr; 2971 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 2972 PC_IS* pcis = (PC_IS*) (pc->data); 2973 const PetscScalar zero = 0.0; 2974 2975 PetscFunctionBegin; 2976 /* Application of PSI^T or PHI^T (depending on applytranspose, see comment above) */ 2977 if (applytranspose) { 2978 ierr = MatMultTranspose(pcbddc->coarse_phi_B,pcis->vec1_B,pcbddc->vec1_P);CHKERRQ(ierr); 2979 if (pcbddc->switch_static) { ierr = MatMultTransposeAdd(pcbddc->coarse_phi_D,pcis->vec1_D,pcbddc->vec1_P,pcbddc->vec1_P);CHKERRQ(ierr); } 2980 } else { 2981 ierr = MatMultTranspose(pcbddc->coarse_psi_B,pcis->vec1_B,pcbddc->vec1_P);CHKERRQ(ierr); 2982 if (pcbddc->switch_static) { ierr = MatMultTransposeAdd(pcbddc->coarse_psi_D,pcis->vec1_D,pcbddc->vec1_P,pcbddc->vec1_P);CHKERRQ(ierr); } 2983 } 2984 2985 /* add p0 to the last value of vec1_P holding the coarse dof relative to p0 */ 2986 if (pcbddc->benign_n) { 2987 PetscScalar *array; 2988 PetscInt j; 2989 2990 ierr = VecGetArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 2991 for (j=0;j<pcbddc->benign_n;j++) array[pcbddc->local_primal_size-pcbddc->benign_n+j] += pcbddc->benign_p0[j]; 2992 ierr = VecRestoreArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 2993 } 2994 2995 /* start communications from local primal nodes to rhs of coarse solver */ 2996 ierr = VecSet(pcbddc->coarse_vec,zero);CHKERRQ(ierr); 2997 ierr = PCBDDCScatterCoarseDataBegin(pc,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2998 ierr = PCBDDCScatterCoarseDataEnd(pc,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2999 3000 /* Coarse solution -> rhs and sol updated inside PCBDDCScattarCoarseDataBegin/End */ 3001 /* TODO remove null space when doing multilevel */ 3002 if (pcbddc->coarse_ksp) { 3003 Mat coarse_mat; 3004 Vec rhs,sol; 3005 MatNullSpace nullsp; 3006 3007 ierr = KSPGetRhs(pcbddc->coarse_ksp,&rhs);CHKERRQ(ierr); 3008 ierr = KSPGetSolution(pcbddc->coarse_ksp,&sol);CHKERRQ(ierr); 3009 ierr = KSPGetOperators(pcbddc->coarse_ksp,&coarse_mat,NULL);CHKERRQ(ierr); 3010 ierr = MatGetNullSpace(coarse_mat,&nullsp);CHKERRQ(ierr); 3011 if (nullsp) { 3012 ierr = MatNullSpaceRemove(nullsp,rhs);CHKERRQ(ierr); 3013 } 3014 if (applytranspose) { 3015 ierr = KSPSolveTranspose(pcbddc->coarse_ksp,rhs,sol);CHKERRQ(ierr); 3016 } else { 3017 ierr = KSPSolve(pcbddc->coarse_ksp,rhs,sol);CHKERRQ(ierr); 3018 } 3019 if (nullsp) { 3020 ierr = MatNullSpaceRemove(nullsp,sol);CHKERRQ(ierr); 3021 } 3022 } 3023 3024 /* Local solution on R nodes */ 3025 if (pcis->n) { /* in/out pcbddc->vec1_B,pcbddc->vec1_D */ 3026 ierr = PCBDDCSolveSubstructureCorrection(pc,pcis->vec1_B,pcis->vec1_D,applytranspose);CHKERRQ(ierr); 3027 } 3028 3029 /* communications from coarse sol to local primal nodes */ 3030 ierr = PCBDDCScatterCoarseDataBegin(pc,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 3031 ierr = PCBDDCScatterCoarseDataEnd(pc,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 3032 3033 /* Sum contributions from two levels */ 3034 if (applytranspose) { 3035 ierr = MatMultAdd(pcbddc->coarse_psi_B,pcbddc->vec1_P,pcis->vec1_B,pcis->vec1_B);CHKERRQ(ierr); 3036 if (pcbddc->switch_static) { ierr = MatMultAdd(pcbddc->coarse_psi_D,pcbddc->vec1_P,pcis->vec1_D,pcis->vec1_D);CHKERRQ(ierr); } 3037 } else { 3038 ierr = MatMultAdd(pcbddc->coarse_phi_B,pcbddc->vec1_P,pcis->vec1_B,pcis->vec1_B);CHKERRQ(ierr); 3039 if (pcbddc->switch_static) { ierr = MatMultAdd(pcbddc->coarse_phi_D,pcbddc->vec1_P,pcis->vec1_D,pcis->vec1_D);CHKERRQ(ierr); } 3040 } 3041 /* store p0 */ 3042 if (pcbddc->benign_n) { 3043 PetscScalar *array; 3044 PetscInt j; 3045 3046 ierr = VecGetArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 3047 for (j=0;j<pcbddc->benign_n;j++) pcbddc->benign_p0[j] = array[pcbddc->local_primal_size-pcbddc->benign_n+j]; 3048 ierr = VecRestoreArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 3049 } 3050 PetscFunctionReturn(0); 3051 } 3052 3053 #undef __FUNCT__ 3054 #define __FUNCT__ "PCBDDCScatterCoarseDataBegin" 3055 PetscErrorCode PCBDDCScatterCoarseDataBegin(PC pc,InsertMode imode, ScatterMode smode) 3056 { 3057 PetscErrorCode ierr; 3058 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 3059 PetscScalar *array; 3060 Vec from,to; 3061 3062 PetscFunctionBegin; 3063 if (smode == SCATTER_REVERSE) { /* from global to local -> get data from coarse solution */ 3064 from = pcbddc->coarse_vec; 3065 to = pcbddc->vec1_P; 3066 if (pcbddc->coarse_ksp) { /* get array from coarse processes */ 3067 Vec tvec; 3068 3069 ierr = KSPGetRhs(pcbddc->coarse_ksp,&tvec);CHKERRQ(ierr); 3070 ierr = VecResetArray(tvec);CHKERRQ(ierr); 3071 ierr = KSPGetSolution(pcbddc->coarse_ksp,&tvec);CHKERRQ(ierr); 3072 ierr = VecGetArray(tvec,&array);CHKERRQ(ierr); 3073 ierr = VecPlaceArray(from,array);CHKERRQ(ierr); 3074 ierr = VecRestoreArray(tvec,&array);CHKERRQ(ierr); 3075 } 3076 } else { /* from local to global -> put data in coarse right hand side */ 3077 from = pcbddc->vec1_P; 3078 to = pcbddc->coarse_vec; 3079 } 3080 ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,from,to,imode,smode);CHKERRQ(ierr); 3081 PetscFunctionReturn(0); 3082 } 3083 3084 #undef __FUNCT__ 3085 #define __FUNCT__ "PCBDDCScatterCoarseDataEnd" 3086 PetscErrorCode PCBDDCScatterCoarseDataEnd(PC pc, InsertMode imode, ScatterMode smode) 3087 { 3088 PetscErrorCode ierr; 3089 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 3090 PetscScalar *array; 3091 Vec from,to; 3092 3093 PetscFunctionBegin; 3094 if (smode == SCATTER_REVERSE) { /* from global to local -> get data from coarse solution */ 3095 from = pcbddc->coarse_vec; 3096 to = pcbddc->vec1_P; 3097 } else { /* from local to global -> put data in coarse right hand side */ 3098 from = pcbddc->vec1_P; 3099 to = pcbddc->coarse_vec; 3100 } 3101 ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,from,to,imode,smode);CHKERRQ(ierr); 3102 if (smode == SCATTER_FORWARD) { 3103 if (pcbddc->coarse_ksp) { /* get array from coarse processes */ 3104 Vec tvec; 3105 3106 ierr = KSPGetRhs(pcbddc->coarse_ksp,&tvec);CHKERRQ(ierr); 3107 ierr = VecGetArray(to,&array);CHKERRQ(ierr); 3108 ierr = VecPlaceArray(tvec,array);CHKERRQ(ierr); 3109 ierr = VecRestoreArray(to,&array);CHKERRQ(ierr); 3110 } 3111 } else { 3112 if (pcbddc->coarse_ksp) { /* restore array of pcbddc->coarse_vec */ 3113 ierr = VecResetArray(from);CHKERRQ(ierr); 3114 } 3115 } 3116 PetscFunctionReturn(0); 3117 } 3118 3119 /* uncomment for testing purposes */ 3120 /* #define PETSC_MISSING_LAPACK_GESVD 1 */ 3121 #undef __FUNCT__ 3122 #define __FUNCT__ "PCBDDCConstraintsSetUp" 3123 PetscErrorCode PCBDDCConstraintsSetUp(PC pc) 3124 { 3125 PetscErrorCode ierr; 3126 PC_IS* pcis = (PC_IS*)(pc->data); 3127 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 3128 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 3129 /* one and zero */ 3130 PetscScalar one=1.0,zero=0.0; 3131 /* space to store constraints and their local indices */ 3132 PetscScalar *constraints_data; 3133 PetscInt *constraints_idxs,*constraints_idxs_B; 3134 PetscInt *constraints_idxs_ptr,*constraints_data_ptr; 3135 PetscInt *constraints_n; 3136 /* iterators */ 3137 PetscInt i,j,k,total_counts,total_counts_cc,cum; 3138 /* BLAS integers */ 3139 PetscBLASInt lwork,lierr; 3140 PetscBLASInt Blas_N,Blas_M,Blas_K,Blas_one=1; 3141 PetscBLASInt Blas_LDA,Blas_LDB,Blas_LDC; 3142 /* reuse */ 3143 PetscInt olocal_primal_size,olocal_primal_size_cc; 3144 PetscInt *olocal_primal_ref_node,*olocal_primal_ref_mult; 3145 /* change of basis */ 3146 PetscBool qr_needed; 3147 PetscBT change_basis,qr_needed_idx; 3148 /* auxiliary stuff */ 3149 PetscInt *nnz,*is_indices; 3150 PetscInt ncc; 3151 /* some quantities */ 3152 PetscInt n_vertices,total_primal_vertices,valid_constraints; 3153 PetscInt size_of_constraint,max_size_of_constraint=0,max_constraints,temp_constraints; 3154 3155 PetscFunctionBegin; 3156 /* Destroy Mat objects computed previously */ 3157 ierr = MatDestroy(&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 3158 ierr = MatDestroy(&pcbddc->ConstraintMatrix);CHKERRQ(ierr); 3159 /* save info on constraints from previous setup (if any) */ 3160 olocal_primal_size = pcbddc->local_primal_size; 3161 olocal_primal_size_cc = pcbddc->local_primal_size_cc; 3162 ierr = PetscMalloc2(olocal_primal_size_cc,&olocal_primal_ref_node,olocal_primal_size_cc,&olocal_primal_ref_mult);CHKERRQ(ierr); 3163 ierr = PetscMemcpy(olocal_primal_ref_node,pcbddc->local_primal_ref_node,olocal_primal_size_cc*sizeof(PetscInt));CHKERRQ(ierr); 3164 ierr = PetscMemcpy(olocal_primal_ref_mult,pcbddc->local_primal_ref_mult,olocal_primal_size_cc*sizeof(PetscInt));CHKERRQ(ierr); 3165 ierr = PetscFree2(pcbddc->local_primal_ref_node,pcbddc->local_primal_ref_mult);CHKERRQ(ierr); 3166 ierr = PetscFree(pcbddc->primal_indices_local_idxs);CHKERRQ(ierr); 3167 3168 /* print some info */ 3169 if (pcbddc->dbg_flag) { 3170 IS vertices; 3171 PetscInt nv,nedges,nfaces; 3172 ierr = PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph,&nfaces,NULL,&nedges,NULL,&vertices);CHKERRQ(ierr); 3173 ierr = ISGetSize(vertices,&nv);CHKERRQ(ierr); 3174 ierr = ISDestroy(&vertices);CHKERRQ(ierr); 3175 ierr = PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 3176 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"--------------------------------------------------------------\n");CHKERRQ(ierr); 3177 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate vertices (%d)\n",PetscGlobalRank,nv,pcbddc->use_vertices);CHKERRQ(ierr); 3178 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate edges (%d)\n",PetscGlobalRank,nedges,pcbddc->use_edges);CHKERRQ(ierr); 3179 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate faces (%d)\n",PetscGlobalRank,nfaces,pcbddc->use_faces);CHKERRQ(ierr); 3180 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 3181 ierr = PetscViewerASCIIPopSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 3182 } 3183 3184 if (!pcbddc->adaptive_selection) { 3185 IS ISForVertices,*ISForFaces,*ISForEdges; 3186 MatNullSpace nearnullsp; 3187 const Vec *nearnullvecs; 3188 Vec *localnearnullsp; 3189 PetscScalar *array; 3190 PetscInt n_ISForFaces,n_ISForEdges,nnsp_size; 3191 PetscBool nnsp_has_cnst; 3192 /* LAPACK working arrays for SVD or POD */ 3193 PetscBool skip_lapack,boolforchange; 3194 PetscScalar *work; 3195 PetscReal *singular_vals; 3196 #if defined(PETSC_USE_COMPLEX) 3197 PetscReal *rwork; 3198 #endif 3199 #if defined(PETSC_MISSING_LAPACK_GESVD) 3200 PetscScalar *temp_basis,*correlation_mat; 3201 #else 3202 PetscBLASInt dummy_int=1; 3203 PetscScalar dummy_scalar=1.; 3204 #endif 3205 3206 /* Get index sets for faces, edges and vertices from graph */ 3207 ierr = PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph,&n_ISForFaces,&ISForFaces,&n_ISForEdges,&ISForEdges,&ISForVertices);CHKERRQ(ierr); 3208 /* free unneeded index sets */ 3209 if (!pcbddc->use_vertices) { 3210 ierr = ISDestroy(&ISForVertices);CHKERRQ(ierr); 3211 } 3212 if (!pcbddc->use_edges) { 3213 for (i=0;i<n_ISForEdges;i++) { 3214 ierr = ISDestroy(&ISForEdges[i]);CHKERRQ(ierr); 3215 } 3216 ierr = PetscFree(ISForEdges);CHKERRQ(ierr); 3217 n_ISForEdges = 0; 3218 } 3219 if (!pcbddc->use_faces) { 3220 for (i=0;i<n_ISForFaces;i++) { 3221 ierr = ISDestroy(&ISForFaces[i]);CHKERRQ(ierr); 3222 } 3223 ierr = PetscFree(ISForFaces);CHKERRQ(ierr); 3224 n_ISForFaces = 0; 3225 } 3226 3227 #if defined(PETSC_USE_DEBUG) 3228 /* HACK: when solving singular problems not using vertices, a change of basis is mandatory. 3229 Also use_change_of_basis should be consistent among processors */ 3230 if (pcbddc->NullSpace) { 3231 PetscBool tbool[2],gbool[2]; 3232 3233 if (!ISForVertices && !pcbddc->user_ChangeOfBasisMatrix) { 3234 pcbddc->use_change_of_basis = PETSC_TRUE; 3235 if (!ISForEdges) { 3236 pcbddc->use_change_on_faces = PETSC_TRUE; 3237 } 3238 } 3239 tbool[0] = pcbddc->use_change_of_basis; 3240 tbool[1] = pcbddc->use_change_on_faces; 3241 ierr = MPI_Allreduce(tbool,gbool,2,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 3242 pcbddc->use_change_of_basis = gbool[0]; 3243 pcbddc->use_change_on_faces = gbool[1]; 3244 } 3245 #endif 3246 3247 /* check if near null space is attached to global mat */ 3248 ierr = MatGetNearNullSpace(pc->pmat,&nearnullsp);CHKERRQ(ierr); 3249 if (nearnullsp) { 3250 ierr = MatNullSpaceGetVecs(nearnullsp,&nnsp_has_cnst,&nnsp_size,&nearnullvecs);CHKERRQ(ierr); 3251 /* remove any stored info */ 3252 ierr = MatNullSpaceDestroy(&pcbddc->onearnullspace);CHKERRQ(ierr); 3253 ierr = PetscFree(pcbddc->onearnullvecs_state);CHKERRQ(ierr); 3254 /* store information for BDDC solver reuse */ 3255 ierr = PetscObjectReference((PetscObject)nearnullsp);CHKERRQ(ierr); 3256 pcbddc->onearnullspace = nearnullsp; 3257 ierr = PetscMalloc1(nnsp_size,&pcbddc->onearnullvecs_state);CHKERRQ(ierr); 3258 for (i=0;i<nnsp_size;i++) { 3259 ierr = PetscObjectStateGet((PetscObject)nearnullvecs[i],&pcbddc->onearnullvecs_state[i]);CHKERRQ(ierr); 3260 } 3261 } else { /* if near null space is not provided BDDC uses constants by default */ 3262 nnsp_size = 0; 3263 nnsp_has_cnst = PETSC_TRUE; 3264 } 3265 /* get max number of constraints on a single cc */ 3266 max_constraints = nnsp_size; 3267 if (nnsp_has_cnst) max_constraints++; 3268 3269 /* 3270 Evaluate maximum storage size needed by the procedure 3271 - Indices for connected component i stored at "constraints_idxs + constraints_idxs_ptr[i]" 3272 - Values for constraints on connected component i stored at "constraints_data + constraints_data_ptr[i]" 3273 There can be multiple constraints per connected component 3274 */ 3275 n_vertices = 0; 3276 if (ISForVertices) { 3277 ierr = ISGetSize(ISForVertices,&n_vertices);CHKERRQ(ierr); 3278 } 3279 ncc = n_vertices+n_ISForFaces+n_ISForEdges; 3280 ierr = PetscMalloc3(ncc+1,&constraints_idxs_ptr,ncc+1,&constraints_data_ptr,ncc,&constraints_n);CHKERRQ(ierr); 3281 3282 total_counts = n_ISForFaces+n_ISForEdges; 3283 total_counts *= max_constraints; 3284 total_counts += n_vertices; 3285 ierr = PetscBTCreate(total_counts,&change_basis);CHKERRQ(ierr); 3286 3287 total_counts = 0; 3288 max_size_of_constraint = 0; 3289 for (i=0;i<n_ISForEdges+n_ISForFaces;i++) { 3290 IS used_is; 3291 if (i<n_ISForEdges) { 3292 used_is = ISForEdges[i]; 3293 } else { 3294 used_is = ISForFaces[i-n_ISForEdges]; 3295 } 3296 ierr = ISGetSize(used_is,&j);CHKERRQ(ierr); 3297 total_counts += j; 3298 max_size_of_constraint = PetscMax(j,max_size_of_constraint); 3299 } 3300 ierr = PetscMalloc3(total_counts*max_constraints+n_vertices,&constraints_data,total_counts+n_vertices,&constraints_idxs,total_counts+n_vertices,&constraints_idxs_B);CHKERRQ(ierr); 3301 3302 /* get local part of global near null space vectors */ 3303 ierr = PetscMalloc1(nnsp_size,&localnearnullsp);CHKERRQ(ierr); 3304 for (k=0;k<nnsp_size;k++) { 3305 ierr = VecDuplicate(pcis->vec1_N,&localnearnullsp[k]);CHKERRQ(ierr); 3306 ierr = VecScatterBegin(matis->rctx,nearnullvecs[k],localnearnullsp[k],INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3307 ierr = VecScatterEnd(matis->rctx,nearnullvecs[k],localnearnullsp[k],INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3308 } 3309 3310 /* whether or not to skip lapack calls */ 3311 skip_lapack = PETSC_TRUE; 3312 if (n_ISForFaces+n_ISForEdges && max_constraints > 1 && !pcbddc->use_nnsp_true) skip_lapack = PETSC_FALSE; 3313 3314 /* First we issue queries to allocate optimal workspace for LAPACKgesvd (or LAPACKsyev if SVD is missing) */ 3315 if (!skip_lapack) { 3316 PetscScalar temp_work; 3317 3318 #if defined(PETSC_MISSING_LAPACK_GESVD) 3319 /* Proper Orthogonal Decomposition (POD) using the snapshot method */ 3320 ierr = PetscMalloc1(max_constraints*max_constraints,&correlation_mat);CHKERRQ(ierr); 3321 ierr = PetscMalloc1(max_constraints,&singular_vals);CHKERRQ(ierr); 3322 ierr = PetscMalloc1(max_size_of_constraint*max_constraints,&temp_basis);CHKERRQ(ierr); 3323 #if defined(PETSC_USE_COMPLEX) 3324 ierr = PetscMalloc1(3*max_constraints,&rwork);CHKERRQ(ierr); 3325 #endif 3326 /* now we evaluate the optimal workspace using query with lwork=-1 */ 3327 ierr = PetscBLASIntCast(max_constraints,&Blas_N);CHKERRQ(ierr); 3328 ierr = PetscBLASIntCast(max_constraints,&Blas_LDA);CHKERRQ(ierr); 3329 lwork = -1; 3330 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 3331 #if !defined(PETSC_USE_COMPLEX) 3332 PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,&temp_work,&lwork,&lierr)); 3333 #else 3334 PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,&temp_work,&lwork,rwork,&lierr)); 3335 #endif 3336 ierr = PetscFPTrapPop();CHKERRQ(ierr); 3337 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to SYEV Lapack routine %d",(int)lierr); 3338 #else /* on missing GESVD */ 3339 /* SVD */ 3340 PetscInt max_n,min_n; 3341 max_n = max_size_of_constraint; 3342 min_n = max_constraints; 3343 if (max_size_of_constraint < max_constraints) { 3344 min_n = max_size_of_constraint; 3345 max_n = max_constraints; 3346 } 3347 ierr = PetscMalloc1(min_n,&singular_vals);CHKERRQ(ierr); 3348 #if defined(PETSC_USE_COMPLEX) 3349 ierr = PetscMalloc1(5*min_n,&rwork);CHKERRQ(ierr); 3350 #endif 3351 /* now we evaluate the optimal workspace using query with lwork=-1 */ 3352 lwork = -1; 3353 ierr = PetscBLASIntCast(max_n,&Blas_M);CHKERRQ(ierr); 3354 ierr = PetscBLASIntCast(min_n,&Blas_N);CHKERRQ(ierr); 3355 ierr = PetscBLASIntCast(max_n,&Blas_LDA);CHKERRQ(ierr); 3356 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 3357 #if !defined(PETSC_USE_COMPLEX) 3358 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&constraints_data[0],&Blas_LDA,singular_vals,&dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,&temp_work,&lwork,&lierr)); 3359 #else 3360 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&constraints_data[0],&Blas_LDA,singular_vals,&dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,&temp_work,&lwork,rwork,&lierr)); 3361 #endif 3362 ierr = PetscFPTrapPop();CHKERRQ(ierr); 3363 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to GESVD Lapack routine %d",(int)lierr); 3364 #endif /* on missing GESVD */ 3365 /* Allocate optimal workspace */ 3366 ierr = PetscBLASIntCast((PetscInt)PetscRealPart(temp_work),&lwork);CHKERRQ(ierr); 3367 ierr = PetscMalloc1(lwork,&work);CHKERRQ(ierr); 3368 } 3369 /* Now we can loop on constraining sets */ 3370 total_counts = 0; 3371 constraints_idxs_ptr[0] = 0; 3372 constraints_data_ptr[0] = 0; 3373 /* vertices */ 3374 if (n_vertices) { 3375 ierr = ISGetIndices(ISForVertices,(const PetscInt**)&is_indices);CHKERRQ(ierr); 3376 ierr = PetscMemcpy(constraints_idxs,is_indices,n_vertices*sizeof(PetscInt));CHKERRQ(ierr); 3377 for (i=0;i<n_vertices;i++) { 3378 constraints_n[total_counts] = 1; 3379 constraints_data[total_counts] = 1.0; 3380 constraints_idxs_ptr[total_counts+1] = constraints_idxs_ptr[total_counts]+1; 3381 constraints_data_ptr[total_counts+1] = constraints_data_ptr[total_counts]+1; 3382 total_counts++; 3383 } 3384 ierr = ISRestoreIndices(ISForVertices,(const PetscInt**)&is_indices);CHKERRQ(ierr); 3385 n_vertices = total_counts; 3386 } 3387 3388 /* edges and faces */ 3389 total_counts_cc = total_counts; 3390 for (ncc=0;ncc<n_ISForEdges+n_ISForFaces;ncc++) { 3391 IS used_is; 3392 PetscBool idxs_copied = PETSC_FALSE; 3393 3394 if (ncc<n_ISForEdges) { 3395 used_is = ISForEdges[ncc]; 3396 boolforchange = pcbddc->use_change_of_basis; /* change or not the basis on the edge */ 3397 } else { 3398 used_is = ISForFaces[ncc-n_ISForEdges]; 3399 boolforchange = (PetscBool)(pcbddc->use_change_of_basis && pcbddc->use_change_on_faces); /* change or not the basis on the face */ 3400 } 3401 temp_constraints = 0; /* zero the number of constraints I have on this conn comp */ 3402 3403 ierr = ISGetSize(used_is,&size_of_constraint);CHKERRQ(ierr); 3404 ierr = ISGetIndices(used_is,(const PetscInt**)&is_indices);CHKERRQ(ierr); 3405 /* change of basis should not be performed on local periodic nodes */ 3406 if (pcbddc->mat_graph->mirrors && pcbddc->mat_graph->mirrors[is_indices[0]]) boolforchange = PETSC_FALSE; 3407 if (nnsp_has_cnst) { 3408 PetscScalar quad_value; 3409 3410 ierr = PetscMemcpy(constraints_idxs + constraints_idxs_ptr[total_counts_cc],is_indices,size_of_constraint*sizeof(PetscInt));CHKERRQ(ierr); 3411 idxs_copied = PETSC_TRUE; 3412 3413 if (!pcbddc->use_nnsp_true) { 3414 quad_value = (PetscScalar)(1.0/PetscSqrtReal((PetscReal)size_of_constraint)); 3415 } else { 3416 quad_value = 1.0; 3417 } 3418 for (j=0;j<size_of_constraint;j++) { 3419 constraints_data[constraints_data_ptr[total_counts_cc]+j] = quad_value; 3420 } 3421 temp_constraints++; 3422 total_counts++; 3423 } 3424 for (k=0;k<nnsp_size;k++) { 3425 PetscReal real_value; 3426 PetscScalar *ptr_to_data; 3427 3428 ierr = VecGetArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr); 3429 ptr_to_data = &constraints_data[constraints_data_ptr[total_counts_cc]+temp_constraints*size_of_constraint]; 3430 for (j=0;j<size_of_constraint;j++) { 3431 ptr_to_data[j] = array[is_indices[j]]; 3432 } 3433 ierr = VecRestoreArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr); 3434 /* check if array is null on the connected component */ 3435 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 3436 PetscStackCallBLAS("BLASasum",real_value = BLASasum_(&Blas_N,ptr_to_data,&Blas_one)); 3437 if (real_value > 0.0) { /* keep indices and values */ 3438 temp_constraints++; 3439 total_counts++; 3440 if (!idxs_copied) { 3441 ierr = PetscMemcpy(constraints_idxs + constraints_idxs_ptr[total_counts_cc],is_indices,size_of_constraint*sizeof(PetscInt));CHKERRQ(ierr); 3442 idxs_copied = PETSC_TRUE; 3443 } 3444 } 3445 } 3446 ierr = ISRestoreIndices(used_is,(const PetscInt**)&is_indices);CHKERRQ(ierr); 3447 valid_constraints = temp_constraints; 3448 if (!pcbddc->use_nnsp_true && temp_constraints) { 3449 if (temp_constraints == 1) { /* just normalize the constraint */ 3450 PetscScalar norm,*ptr_to_data; 3451 3452 ptr_to_data = &constraints_data[constraints_data_ptr[total_counts_cc]]; 3453 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 3454 PetscStackCallBLAS("BLASdot",norm = BLASdot_(&Blas_N,ptr_to_data,&Blas_one,ptr_to_data,&Blas_one)); 3455 norm = 1.0/PetscSqrtReal(PetscRealPart(norm)); 3456 PetscStackCallBLAS("BLASscal",BLASscal_(&Blas_N,&norm,ptr_to_data,&Blas_one)); 3457 } else { /* perform SVD */ 3458 PetscReal tol = 1.0e-8; /* tolerance for retaining eigenmodes */ 3459 PetscScalar *ptr_to_data = &constraints_data[constraints_data_ptr[total_counts_cc]]; 3460 3461 #if defined(PETSC_MISSING_LAPACK_GESVD) 3462 /* SVD: Y = U*S*V^H -> U (eigenvectors of Y*Y^H) = Y*V*(S)^\dag 3463 POD: Y^H*Y = V*D*V^H, D = S^H*S -> U = Y*V*D^(-1/2) 3464 -> When PETSC_USE_COMPLEX and PETSC_MISSING_LAPACK_GESVD are defined 3465 the constraints basis will differ (by a complex factor with absolute value equal to 1) 3466 from that computed using LAPACKgesvd 3467 -> This is due to a different computation of eigenvectors in LAPACKheev 3468 -> The quality of the POD-computed basis will be the same */ 3469 ierr = PetscMemzero(correlation_mat,temp_constraints*temp_constraints*sizeof(PetscScalar));CHKERRQ(ierr); 3470 /* Store upper triangular part of correlation matrix */ 3471 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 3472 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 3473 for (j=0;j<temp_constraints;j++) { 3474 for (k=0;k<j+1;k++) { 3475 PetscStackCallBLAS("BLASdot",correlation_mat[j*temp_constraints+k] = BLASdot_(&Blas_N,ptr_to_data+k*size_of_constraint,&Blas_one,ptr_to_data+j*size_of_constraint,&Blas_one)); 3476 } 3477 } 3478 /* compute eigenvalues and eigenvectors of correlation matrix */ 3479 ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr); 3480 ierr = PetscBLASIntCast(temp_constraints,&Blas_LDA);CHKERRQ(ierr); 3481 #if !defined(PETSC_USE_COMPLEX) 3482 PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,work,&lwork,&lierr)); 3483 #else 3484 PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,work,&lwork,rwork,&lierr)); 3485 #endif 3486 ierr = PetscFPTrapPop();CHKERRQ(ierr); 3487 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYEV Lapack routine %d",(int)lierr); 3488 /* retain eigenvalues greater than tol: note that LAPACKsyev gives eigs in ascending order */ 3489 j = 0; 3490 while (j < temp_constraints && singular_vals[j] < tol) j++; 3491 total_counts = total_counts-j; 3492 valid_constraints = temp_constraints-j; 3493 /* scale and copy POD basis into used quadrature memory */ 3494 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 3495 ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr); 3496 ierr = PetscBLASIntCast(temp_constraints,&Blas_K);CHKERRQ(ierr); 3497 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 3498 ierr = PetscBLASIntCast(temp_constraints,&Blas_LDB);CHKERRQ(ierr); 3499 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDC);CHKERRQ(ierr); 3500 if (j<temp_constraints) { 3501 PetscInt ii; 3502 for (k=j;k<temp_constraints;k++) singular_vals[k] = 1.0/PetscSqrtReal(singular_vals[k]); 3503 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 3504 PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&Blas_M,&Blas_N,&Blas_K,&one,ptr_to_data,&Blas_LDA,correlation_mat,&Blas_LDB,&zero,temp_basis,&Blas_LDC)); 3505 ierr = PetscFPTrapPop();CHKERRQ(ierr); 3506 for (k=0;k<temp_constraints-j;k++) { 3507 for (ii=0;ii<size_of_constraint;ii++) { 3508 ptr_to_data[k*size_of_constraint+ii] = singular_vals[temp_constraints-1-k]*temp_basis[(temp_constraints-1-k)*size_of_constraint+ii]; 3509 } 3510 } 3511 } 3512 #else /* on missing GESVD */ 3513 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 3514 ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr); 3515 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 3516 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 3517 #if !defined(PETSC_USE_COMPLEX) 3518 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,ptr_to_data,&Blas_LDA,singular_vals,&dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,work,&lwork,&lierr)); 3519 #else 3520 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,ptr_to_data,&Blas_LDA,singular_vals,&dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,work,&lwork,rwork,&lierr)); 3521 #endif 3522 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in GESVD Lapack routine %d",(int)lierr); 3523 ierr = PetscFPTrapPop();CHKERRQ(ierr); 3524 /* retain eigenvalues greater than tol: note that LAPACKgesvd gives eigs in descending order */ 3525 k = temp_constraints; 3526 if (k > size_of_constraint) k = size_of_constraint; 3527 j = 0; 3528 while (j < k && singular_vals[k-j-1] < tol) j++; 3529 valid_constraints = k-j; 3530 total_counts = total_counts-temp_constraints+valid_constraints; 3531 #endif /* on missing GESVD */ 3532 } 3533 } 3534 /* update pointers information */ 3535 if (valid_constraints) { 3536 constraints_n[total_counts_cc] = valid_constraints; 3537 constraints_idxs_ptr[total_counts_cc+1] = constraints_idxs_ptr[total_counts_cc]+size_of_constraint; 3538 constraints_data_ptr[total_counts_cc+1] = constraints_data_ptr[total_counts_cc]+size_of_constraint*valid_constraints; 3539 /* set change_of_basis flag */ 3540 if (boolforchange) { 3541 PetscBTSet(change_basis,total_counts_cc); 3542 } 3543 total_counts_cc++; 3544 } 3545 } 3546 /* free workspace */ 3547 if (!skip_lapack) { 3548 ierr = PetscFree(work);CHKERRQ(ierr); 3549 #if defined(PETSC_USE_COMPLEX) 3550 ierr = PetscFree(rwork);CHKERRQ(ierr); 3551 #endif 3552 ierr = PetscFree(singular_vals);CHKERRQ(ierr); 3553 #if defined(PETSC_MISSING_LAPACK_GESVD) 3554 ierr = PetscFree(correlation_mat);CHKERRQ(ierr); 3555 ierr = PetscFree(temp_basis);CHKERRQ(ierr); 3556 #endif 3557 } 3558 for (k=0;k<nnsp_size;k++) { 3559 ierr = VecDestroy(&localnearnullsp[k]);CHKERRQ(ierr); 3560 } 3561 ierr = PetscFree(localnearnullsp);CHKERRQ(ierr); 3562 /* free index sets of faces, edges and vertices */ 3563 for (i=0;i<n_ISForFaces;i++) { 3564 ierr = ISDestroy(&ISForFaces[i]);CHKERRQ(ierr); 3565 } 3566 if (n_ISForFaces) { 3567 ierr = PetscFree(ISForFaces);CHKERRQ(ierr); 3568 } 3569 for (i=0;i<n_ISForEdges;i++) { 3570 ierr = ISDestroy(&ISForEdges[i]);CHKERRQ(ierr); 3571 } 3572 if (n_ISForEdges) { 3573 ierr = PetscFree(ISForEdges);CHKERRQ(ierr); 3574 } 3575 ierr = ISDestroy(&ISForVertices);CHKERRQ(ierr); 3576 } else { 3577 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 3578 3579 total_counts = 0; 3580 n_vertices = 0; 3581 if (sub_schurs->is_vertices && pcbddc->use_vertices) { 3582 ierr = ISGetLocalSize(sub_schurs->is_vertices,&n_vertices);CHKERRQ(ierr); 3583 } 3584 max_constraints = 0; 3585 total_counts_cc = 0; 3586 for (i=0;i<sub_schurs->n_subs+n_vertices;i++) { 3587 total_counts += pcbddc->adaptive_constraints_n[i]; 3588 if (pcbddc->adaptive_constraints_n[i]) total_counts_cc++; 3589 max_constraints = PetscMax(max_constraints,pcbddc->adaptive_constraints_n[i]); 3590 } 3591 constraints_idxs_ptr = pcbddc->adaptive_constraints_idxs_ptr; 3592 constraints_data_ptr = pcbddc->adaptive_constraints_data_ptr; 3593 constraints_idxs = pcbddc->adaptive_constraints_idxs; 3594 constraints_data = pcbddc->adaptive_constraints_data; 3595 /* constraints_n differs from pcbddc->adaptive_constraints_n */ 3596 ierr = PetscMalloc1(total_counts_cc,&constraints_n);CHKERRQ(ierr); 3597 total_counts_cc = 0; 3598 for (i=0;i<sub_schurs->n_subs+n_vertices;i++) { 3599 if (pcbddc->adaptive_constraints_n[i]) { 3600 constraints_n[total_counts_cc++] = pcbddc->adaptive_constraints_n[i]; 3601 } 3602 } 3603 #if 0 3604 printf("Found %d totals (%d)\n",total_counts_cc,total_counts); 3605 for (i=0;i<total_counts_cc;i++) { 3606 printf("const %d, start %d",i,constraints_idxs_ptr[i]); 3607 printf(" end %d:\n",constraints_idxs_ptr[i+1]); 3608 for (j=constraints_idxs_ptr[i];j<constraints_idxs_ptr[i+1];j++) { 3609 printf(" %d",constraints_idxs[j]); 3610 } 3611 printf("\n"); 3612 printf("number of cc: %d\n",constraints_n[i]); 3613 } 3614 for (i=0;i<n_vertices;i++) { 3615 PetscPrintf(PETSC_COMM_SELF,"[%d] vertex %d, n %d\n",PetscGlobalRank,i,pcbddc->adaptive_constraints_n[i]); 3616 } 3617 for (i=0;i<sub_schurs->n_subs;i++) { 3618 PetscPrintf(PETSC_COMM_SELF,"[%d] sub %d, edge %d, n %d\n",PetscGlobalRank,i,(PetscBool)PetscBTLookup(sub_schurs->is_edge,i),pcbddc->adaptive_constraints_n[i+n_vertices]); 3619 } 3620 #endif 3621 3622 max_size_of_constraint = 0; 3623 for (i=0;i<total_counts_cc;i++) max_size_of_constraint = PetscMax(max_size_of_constraint,constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i]); 3624 ierr = PetscMalloc1(constraints_idxs_ptr[total_counts_cc],&constraints_idxs_B);CHKERRQ(ierr); 3625 /* Change of basis */ 3626 ierr = PetscBTCreate(total_counts_cc,&change_basis);CHKERRQ(ierr); 3627 if (pcbddc->use_change_of_basis) { 3628 for (i=0;i<sub_schurs->n_subs;i++) { 3629 if (PetscBTLookup(sub_schurs->is_edge,i) || pcbddc->use_change_on_faces) { 3630 ierr = PetscBTSet(change_basis,i+n_vertices);CHKERRQ(ierr); 3631 } 3632 } 3633 } 3634 } 3635 pcbddc->local_primal_size = total_counts; 3636 ierr = PetscMalloc1(pcbddc->local_primal_size+pcbddc->benign_n,&pcbddc->primal_indices_local_idxs);CHKERRQ(ierr); 3637 3638 /* map constraints_idxs in boundary numbering */ 3639 ierr = ISGlobalToLocalMappingApply(pcis->BtoNmap,IS_GTOLM_DROP,constraints_idxs_ptr[total_counts_cc],constraints_idxs,&i,constraints_idxs_B);CHKERRQ(ierr); 3640 if (i != constraints_idxs_ptr[total_counts_cc]) { 3641 SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_SUP,"Error in boundary numbering for constraints indices %d != %d\n",constraints_idxs_ptr[total_counts_cc],i); 3642 } 3643 3644 /* Create constraint matrix */ 3645 ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->ConstraintMatrix);CHKERRQ(ierr); 3646 ierr = MatSetType(pcbddc->ConstraintMatrix,MATAIJ);CHKERRQ(ierr); 3647 ierr = MatSetSizes(pcbddc->ConstraintMatrix,pcbddc->local_primal_size,pcis->n,pcbddc->local_primal_size,pcis->n);CHKERRQ(ierr); 3648 3649 /* find primal_dofs: subdomain corners plus dofs selected as primal after change of basis */ 3650 /* determine if a QR strategy is needed for change of basis */ 3651 qr_needed = PETSC_FALSE; 3652 ierr = PetscBTCreate(total_counts_cc,&qr_needed_idx);CHKERRQ(ierr); 3653 total_primal_vertices=0; 3654 pcbddc->local_primal_size_cc = 0; 3655 for (i=0;i<total_counts_cc;i++) { 3656 size_of_constraint = constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i]; 3657 if (size_of_constraint == 1) { 3658 pcbddc->primal_indices_local_idxs[total_primal_vertices++] = constraints_idxs[constraints_idxs_ptr[i]]; 3659 pcbddc->local_primal_size_cc += 1; 3660 } else if (PetscBTLookup(change_basis,i)) { 3661 for (k=0;k<constraints_n[i];k++) { 3662 pcbddc->primal_indices_local_idxs[total_primal_vertices++] = constraints_idxs[constraints_idxs_ptr[i]+k]; 3663 } 3664 pcbddc->local_primal_size_cc += constraints_n[i]; 3665 if (constraints_n[i] > 1 || pcbddc->use_qr_single || pcbddc->faster_deluxe) { 3666 PetscBTSet(qr_needed_idx,i); 3667 qr_needed = PETSC_TRUE; 3668 } 3669 } else { 3670 pcbddc->local_primal_size_cc += 1; 3671 } 3672 } 3673 /* note that the local variable n_vertices used below stores the number of pointwise constraints */ 3674 pcbddc->n_vertices = total_primal_vertices; 3675 /* permute indices in order to have a sorted set of vertices */ 3676 ierr = PetscSortInt(total_primal_vertices,pcbddc->primal_indices_local_idxs);CHKERRQ(ierr); 3677 3678 ierr = PetscMalloc2(pcbddc->local_primal_size_cc+pcbddc->benign_n,&pcbddc->local_primal_ref_node,pcbddc->local_primal_size_cc+pcbddc->benign_n,&pcbddc->local_primal_ref_mult);CHKERRQ(ierr); 3679 ierr = PetscMemcpy(pcbddc->local_primal_ref_node,pcbddc->primal_indices_local_idxs,total_primal_vertices*sizeof(PetscInt));CHKERRQ(ierr); 3680 for (i=0;i<total_primal_vertices;i++) pcbddc->local_primal_ref_mult[i] = 1; 3681 3682 /* nonzero structure of constraint matrix */ 3683 /* and get reference dof for local constraints */ 3684 ierr = PetscMalloc1(pcbddc->local_primal_size,&nnz);CHKERRQ(ierr); 3685 for (i=0;i<total_primal_vertices;i++) nnz[i] = 1; 3686 3687 j = total_primal_vertices; 3688 total_counts = total_primal_vertices; 3689 cum = total_primal_vertices; 3690 for (i=n_vertices;i<total_counts_cc;i++) { 3691 if (!PetscBTLookup(change_basis,i)) { 3692 pcbddc->local_primal_ref_node[cum] = constraints_idxs[constraints_idxs_ptr[i]]; 3693 pcbddc->local_primal_ref_mult[cum] = constraints_n[i]; 3694 cum++; 3695 size_of_constraint = constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i]; 3696 for (k=0;k<constraints_n[i];k++) { 3697 pcbddc->primal_indices_local_idxs[total_counts++] = constraints_idxs[constraints_idxs_ptr[i]+k]; 3698 nnz[j+k] = size_of_constraint; 3699 } 3700 j += constraints_n[i]; 3701 } 3702 } 3703 ierr = MatSeqAIJSetPreallocation(pcbddc->ConstraintMatrix,0,nnz);CHKERRQ(ierr); 3704 ierr = PetscFree(nnz);CHKERRQ(ierr); 3705 3706 /* set values in constraint matrix */ 3707 for (i=0;i<total_primal_vertices;i++) { 3708 ierr = MatSetValue(pcbddc->ConstraintMatrix,i,pcbddc->local_primal_ref_node[i],1.0,INSERT_VALUES);CHKERRQ(ierr); 3709 } 3710 total_counts = total_primal_vertices; 3711 for (i=n_vertices;i<total_counts_cc;i++) { 3712 if (!PetscBTLookup(change_basis,i)) { 3713 PetscInt *cols; 3714 3715 size_of_constraint = constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i]; 3716 cols = constraints_idxs+constraints_idxs_ptr[i]; 3717 for (k=0;k<constraints_n[i];k++) { 3718 PetscInt row = total_counts+k; 3719 PetscScalar *vals; 3720 3721 vals = constraints_data+constraints_data_ptr[i]+k*size_of_constraint; 3722 ierr = MatSetValues(pcbddc->ConstraintMatrix,1,&row,size_of_constraint,cols,vals,INSERT_VALUES);CHKERRQ(ierr); 3723 } 3724 total_counts += constraints_n[i]; 3725 } 3726 } 3727 /* assembling */ 3728 ierr = MatAssemblyBegin(pcbddc->ConstraintMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3729 ierr = MatAssemblyEnd(pcbddc->ConstraintMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3730 3731 /* 3732 ierr = PetscViewerSetFormat(PETSC_VIEWER_STDOUT_SELF,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr); 3733 ierr = MatView(pcbddc->ConstraintMatrix,(PetscViewer)0);CHKERRQ(ierr); 3734 */ 3735 /* Create matrix for change of basis. We don't need it in case pcbddc->use_change_of_basis is FALSE */ 3736 if (pcbddc->use_change_of_basis) { 3737 /* dual and primal dofs on a single cc */ 3738 PetscInt dual_dofs,primal_dofs; 3739 /* working stuff for GEQRF */ 3740 PetscScalar *qr_basis,*qr_tau = NULL,*qr_work,lqr_work_t; 3741 PetscBLASInt lqr_work; 3742 /* working stuff for UNGQR */ 3743 PetscScalar *gqr_work,lgqr_work_t; 3744 PetscBLASInt lgqr_work; 3745 /* working stuff for TRTRS */ 3746 PetscScalar *trs_rhs; 3747 PetscBLASInt Blas_NRHS; 3748 /* pointers for values insertion into change of basis matrix */ 3749 PetscInt *start_rows,*start_cols; 3750 PetscScalar *start_vals; 3751 /* working stuff for values insertion */ 3752 PetscBT is_primal; 3753 PetscInt *aux_primal_numbering_B; 3754 /* matrix sizes */ 3755 PetscInt global_size,local_size; 3756 /* temporary change of basis */ 3757 Mat localChangeOfBasisMatrix; 3758 /* extra space for debugging */ 3759 PetscScalar *dbg_work; 3760 3761 /* local temporary change of basis acts on local interfaces -> dimension is n_B x n_B */ 3762 ierr = MatCreate(PETSC_COMM_SELF,&localChangeOfBasisMatrix);CHKERRQ(ierr); 3763 ierr = MatSetType(localChangeOfBasisMatrix,MATAIJ);CHKERRQ(ierr); 3764 ierr = MatSetSizes(localChangeOfBasisMatrix,pcis->n,pcis->n,pcis->n,pcis->n);CHKERRQ(ierr); 3765 /* nonzeros for local mat */ 3766 ierr = PetscMalloc1(pcis->n,&nnz);CHKERRQ(ierr); 3767 for (i=0;i<pcis->n;i++) nnz[i]=1; 3768 for (i=n_vertices;i<total_counts_cc;i++) { 3769 if (PetscBTLookup(change_basis,i)) { 3770 size_of_constraint = constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i]; 3771 if (PetscBTLookup(qr_needed_idx,i)) { 3772 for (j=0;j<size_of_constraint;j++) nnz[constraints_idxs[constraints_idxs_ptr[i]+j]] = size_of_constraint; 3773 } else { 3774 nnz[constraints_idxs[constraints_idxs_ptr[i]]] = size_of_constraint; 3775 for (j=1;j<size_of_constraint;j++) nnz[constraints_idxs[constraints_idxs_ptr[i]+j]] = 2; 3776 } 3777 } 3778 } 3779 ierr = MatSeqAIJSetPreallocation(localChangeOfBasisMatrix,0,nnz);CHKERRQ(ierr); 3780 ierr = PetscFree(nnz);CHKERRQ(ierr); 3781 /* Set initial identity in the matrix */ 3782 for (i=0;i<pcis->n;i++) { 3783 ierr = MatSetValue(localChangeOfBasisMatrix,i,i,1.0,INSERT_VALUES);CHKERRQ(ierr); 3784 } 3785 3786 if (pcbddc->dbg_flag) { 3787 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"--------------------------------------------------------------\n");CHKERRQ(ierr); 3788 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Checking change of basis computation for subdomain %04d\n",PetscGlobalRank);CHKERRQ(ierr); 3789 } 3790 3791 3792 /* Now we loop on the constraints which need a change of basis */ 3793 /* 3794 Change of basis matrix is evaluated similarly to the FIRST APPROACH in 3795 Klawonn and Widlund, Dual-primal FETI-DP methods for linear elasticity, (see Sect 6.2.1) 3796 3797 Basic blocks of change of basis matrix T computed by 3798 3799 - Using the following block transformation if there is only a primal dof on the cc (and -pc_bddc_use_qr_single is not specified) 3800 3801 | 1 0 ... 0 s_1/S | 3802 | 0 1 ... 0 s_2/S | 3803 | ... | 3804 | 0 ... 1 s_{n-1}/S | 3805 | -s_1/s_n ... -s_{n-1}/s_n s_n/S | 3806 3807 with S = \sum_{i=1}^n s_i^2 3808 NOTE: in the above example, the primal dof is the last one of the edge in LOCAL ordering 3809 in the current implementation, the primal dof is the first one of the edge in GLOBAL ordering 3810 3811 - QR decomposition of constraints otherwise 3812 */ 3813 if (qr_needed) { 3814 /* space to store Q */ 3815 ierr = PetscMalloc1(max_size_of_constraint*max_size_of_constraint,&qr_basis);CHKERRQ(ierr); 3816 /* first we issue queries for optimal work */ 3817 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_M);CHKERRQ(ierr); 3818 ierr = PetscBLASIntCast(max_constraints,&Blas_N);CHKERRQ(ierr); 3819 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 3820 lqr_work = -1; 3821 PetscStackCallBLAS("LAPACKgeqrf",LAPACKgeqrf_(&Blas_M,&Blas_N,qr_basis,&Blas_LDA,qr_tau,&lqr_work_t,&lqr_work,&lierr)); 3822 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to GEQRF Lapack routine %d",(int)lierr); 3823 ierr = PetscBLASIntCast((PetscInt)PetscRealPart(lqr_work_t),&lqr_work);CHKERRQ(ierr); 3824 ierr = PetscMalloc1((PetscInt)PetscRealPart(lqr_work_t),&qr_work);CHKERRQ(ierr); 3825 lgqr_work = -1; 3826 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_M);CHKERRQ(ierr); 3827 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_N);CHKERRQ(ierr); 3828 ierr = PetscBLASIntCast(max_constraints,&Blas_K);CHKERRQ(ierr); 3829 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 3830 if (Blas_K>Blas_M) Blas_K=Blas_M; /* adjust just for computing optimal work */ 3831 PetscStackCallBLAS("LAPACKungqr",LAPACKungqr_(&Blas_M,&Blas_N,&Blas_K,qr_basis,&Blas_LDA,qr_tau,&lgqr_work_t,&lgqr_work,&lierr)); 3832 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to UNGQR Lapack routine %d",(int)lierr); 3833 ierr = PetscBLASIntCast((PetscInt)PetscRealPart(lgqr_work_t),&lgqr_work);CHKERRQ(ierr); 3834 ierr = PetscMalloc1((PetscInt)PetscRealPart(lgqr_work_t),&gqr_work);CHKERRQ(ierr); 3835 /* array to store scaling factors for reflectors */ 3836 ierr = PetscMalloc1(max_constraints,&qr_tau);CHKERRQ(ierr); 3837 /* array to store rhs and solution of triangular solver */ 3838 ierr = PetscMalloc1(max_constraints*max_constraints,&trs_rhs);CHKERRQ(ierr); 3839 /* allocating workspace for check */ 3840 if (pcbddc->dbg_flag) { 3841 ierr = PetscMalloc1(max_size_of_constraint*(max_constraints+max_size_of_constraint),&dbg_work);CHKERRQ(ierr); 3842 } 3843 } 3844 /* array to store whether a node is primal or not */ 3845 ierr = PetscBTCreate(pcis->n_B,&is_primal);CHKERRQ(ierr); 3846 ierr = PetscMalloc1(total_primal_vertices,&aux_primal_numbering_B);CHKERRQ(ierr); 3847 ierr = ISGlobalToLocalMappingApply(pcis->BtoNmap,IS_GTOLM_DROP,total_primal_vertices,pcbddc->local_primal_ref_node,&i,aux_primal_numbering_B);CHKERRQ(ierr); 3848 if (i != total_primal_vertices) { 3849 SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_SUP,"Error in boundary numbering for BDDC vertices! %d != %d\n",total_primal_vertices,i); 3850 } 3851 for (i=0;i<total_primal_vertices;i++) { 3852 ierr = PetscBTSet(is_primal,aux_primal_numbering_B[i]);CHKERRQ(ierr); 3853 } 3854 ierr = PetscFree(aux_primal_numbering_B);CHKERRQ(ierr); 3855 3856 /* loop on constraints and see whether or not they need a change of basis and compute it */ 3857 for (total_counts=n_vertices;total_counts<total_counts_cc;total_counts++) { 3858 size_of_constraint = constraints_idxs_ptr[total_counts+1]-constraints_idxs_ptr[total_counts]; 3859 if (PetscBTLookup(change_basis,total_counts)) { 3860 /* get constraint info */ 3861 primal_dofs = constraints_n[total_counts]; 3862 dual_dofs = size_of_constraint-primal_dofs; 3863 3864 if (pcbddc->dbg_flag) { 3865 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Constraints %d: %d need a change of basis (size %d)\n",total_counts,primal_dofs,size_of_constraint);CHKERRQ(ierr); 3866 } 3867 3868 if (PetscBTLookup(qr_needed_idx,total_counts)) { /* QR */ 3869 3870 /* copy quadrature constraints for change of basis check */ 3871 if (pcbddc->dbg_flag) { 3872 ierr = PetscMemcpy(dbg_work,&constraints_data[constraints_data_ptr[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr); 3873 } 3874 /* copy temporary constraints into larger work vector (in order to store all columns of Q) */ 3875 ierr = PetscMemcpy(qr_basis,&constraints_data[constraints_data_ptr[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr); 3876 3877 /* compute QR decomposition of constraints */ 3878 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 3879 ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr); 3880 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 3881 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 3882 PetscStackCallBLAS("LAPACKgeqrf",LAPACKgeqrf_(&Blas_M,&Blas_N,qr_basis,&Blas_LDA,qr_tau,qr_work,&lqr_work,&lierr)); 3883 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in GEQRF Lapack routine %d",(int)lierr); 3884 ierr = PetscFPTrapPop();CHKERRQ(ierr); 3885 3886 /* explictly compute R^-T */ 3887 ierr = PetscMemzero(trs_rhs,primal_dofs*primal_dofs*sizeof(*trs_rhs));CHKERRQ(ierr); 3888 for (j=0;j<primal_dofs;j++) trs_rhs[j*(primal_dofs+1)] = 1.0; 3889 ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr); 3890 ierr = PetscBLASIntCast(primal_dofs,&Blas_NRHS);CHKERRQ(ierr); 3891 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 3892 ierr = PetscBLASIntCast(primal_dofs,&Blas_LDB);CHKERRQ(ierr); 3893 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 3894 PetscStackCallBLAS("LAPACKtrtrs",LAPACKtrtrs_("U","T","N",&Blas_N,&Blas_NRHS,qr_basis,&Blas_LDA,trs_rhs,&Blas_LDB,&lierr)); 3895 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in TRTRS Lapack routine %d",(int)lierr); 3896 ierr = PetscFPTrapPop();CHKERRQ(ierr); 3897 3898 /* explicitly compute all columns of Q (Q = [Q1 | Q2] ) overwriting QR factorization in qr_basis */ 3899 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 3900 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 3901 ierr = PetscBLASIntCast(primal_dofs,&Blas_K);CHKERRQ(ierr); 3902 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 3903 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 3904 PetscStackCallBLAS("LAPACKungqr",LAPACKungqr_(&Blas_M,&Blas_N,&Blas_K,qr_basis,&Blas_LDA,qr_tau,gqr_work,&lgqr_work,&lierr)); 3905 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in UNGQR Lapack routine %d",(int)lierr); 3906 ierr = PetscFPTrapPop();CHKERRQ(ierr); 3907 3908 /* first primal_dofs columns of Q need to be re-scaled in order to be unitary w.r.t constraints 3909 i.e. C_{pxn}*Q_{nxn} should be equal to [I_pxp | 0_pxd] (see check below) 3910 where n=size_of_constraint, p=primal_dofs, d=dual_dofs (n=p+d), I and 0 identity and null matrix resp. */ 3911 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 3912 ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr); 3913 ierr = PetscBLASIntCast(primal_dofs,&Blas_K);CHKERRQ(ierr); 3914 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 3915 ierr = PetscBLASIntCast(primal_dofs,&Blas_LDB);CHKERRQ(ierr); 3916 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDC);CHKERRQ(ierr); 3917 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 3918 PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&Blas_M,&Blas_N,&Blas_K,&one,qr_basis,&Blas_LDA,trs_rhs,&Blas_LDB,&zero,constraints_data+constraints_data_ptr[total_counts],&Blas_LDC)); 3919 ierr = PetscFPTrapPop();CHKERRQ(ierr); 3920 ierr = PetscMemcpy(qr_basis,&constraints_data[constraints_data_ptr[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr); 3921 3922 /* insert values in change of basis matrix respecting global ordering of new primal dofs */ 3923 start_rows = &constraints_idxs[constraints_idxs_ptr[total_counts]]; 3924 /* insert cols for primal dofs */ 3925 for (j=0;j<primal_dofs;j++) { 3926 start_vals = &qr_basis[j*size_of_constraint]; 3927 start_cols = &constraints_idxs[constraints_idxs_ptr[total_counts]+j]; 3928 ierr = MatSetValues(localChangeOfBasisMatrix,size_of_constraint,start_rows,1,start_cols,start_vals,INSERT_VALUES);CHKERRQ(ierr); 3929 } 3930 /* insert cols for dual dofs */ 3931 for (j=0,k=0;j<dual_dofs;k++) { 3932 if (!PetscBTLookup(is_primal,constraints_idxs_B[constraints_idxs_ptr[total_counts]+k])) { 3933 start_vals = &qr_basis[(primal_dofs+j)*size_of_constraint]; 3934 start_cols = &constraints_idxs[constraints_idxs_ptr[total_counts]+k]; 3935 ierr = MatSetValues(localChangeOfBasisMatrix,size_of_constraint,start_rows,1,start_cols,start_vals,INSERT_VALUES);CHKERRQ(ierr); 3936 j++; 3937 } 3938 } 3939 3940 /* check change of basis */ 3941 if (pcbddc->dbg_flag) { 3942 PetscInt ii,jj; 3943 PetscBool valid_qr=PETSC_TRUE; 3944 ierr = PetscBLASIntCast(primal_dofs,&Blas_M);CHKERRQ(ierr); 3945 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 3946 ierr = PetscBLASIntCast(size_of_constraint,&Blas_K);CHKERRQ(ierr); 3947 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 3948 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDB);CHKERRQ(ierr); 3949 ierr = PetscBLASIntCast(primal_dofs,&Blas_LDC);CHKERRQ(ierr); 3950 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 3951 PetscStackCallBLAS("BLASgemm",BLASgemm_("T","N",&Blas_M,&Blas_N,&Blas_K,&one,dbg_work,&Blas_LDA,qr_basis,&Blas_LDB,&zero,&dbg_work[size_of_constraint*primal_dofs],&Blas_LDC)); 3952 ierr = PetscFPTrapPop();CHKERRQ(ierr); 3953 for (jj=0;jj<size_of_constraint;jj++) { 3954 for (ii=0;ii<primal_dofs;ii++) { 3955 if (ii != jj && PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]) > 1.e-12) valid_qr = PETSC_FALSE; 3956 if (ii == jj && PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]-1.0) > 1.e-12) valid_qr = PETSC_FALSE; 3957 } 3958 } 3959 if (!valid_qr) { 3960 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> wrong change of basis!\n");CHKERRQ(ierr); 3961 for (jj=0;jj<size_of_constraint;jj++) { 3962 for (ii=0;ii<primal_dofs;ii++) { 3963 if (ii != jj && PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]) > 1.e-12) { 3964 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\tQr basis function %d is not orthogonal to constraint %d (%1.14e)!\n",jj,ii,PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii])); 3965 } 3966 if (ii == jj && PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]-1.0) > 1.e-12) { 3967 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\tQr basis function %d is not unitary w.r.t constraint %d (%1.14e)!\n",jj,ii,PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii])); 3968 } 3969 } 3970 } 3971 } else { 3972 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> right change of basis!\n");CHKERRQ(ierr); 3973 } 3974 } 3975 } else { /* simple transformation block */ 3976 PetscInt row,col; 3977 PetscScalar val,norm; 3978 3979 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 3980 PetscStackCallBLAS("BLASdot",norm = BLASdot_(&Blas_N,constraints_data+constraints_data_ptr[total_counts],&Blas_one,constraints_data+constraints_data_ptr[total_counts],&Blas_one)); 3981 for (j=0;j<size_of_constraint;j++) { 3982 PetscInt row_B = constraints_idxs_B[constraints_idxs_ptr[total_counts]+j]; 3983 row = constraints_idxs[constraints_idxs_ptr[total_counts]+j]; 3984 if (!PetscBTLookup(is_primal,row_B)) { 3985 col = constraints_idxs[constraints_idxs_ptr[total_counts]]; 3986 ierr = MatSetValue(localChangeOfBasisMatrix,row,row,1.0,INSERT_VALUES);CHKERRQ(ierr); 3987 ierr = MatSetValue(localChangeOfBasisMatrix,row,col,constraints_data[constraints_data_ptr[total_counts]+j]/norm,INSERT_VALUES);CHKERRQ(ierr); 3988 } else { 3989 for (k=0;k<size_of_constraint;k++) { 3990 col = constraints_idxs[constraints_idxs_ptr[total_counts]+k]; 3991 if (row != col) { 3992 val = -constraints_data[constraints_data_ptr[total_counts]+k]/constraints_data[constraints_data_ptr[total_counts]]; 3993 } else { 3994 val = constraints_data[constraints_data_ptr[total_counts]]/norm; 3995 } 3996 ierr = MatSetValue(localChangeOfBasisMatrix,row,col,val,INSERT_VALUES);CHKERRQ(ierr); 3997 } 3998 } 3999 } 4000 if (pcbddc->dbg_flag) { 4001 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> using standard change of basis\n");CHKERRQ(ierr); 4002 } 4003 } 4004 } else { 4005 if (pcbddc->dbg_flag) { 4006 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Constraint %d does not need a change of basis (size %d)\n",total_counts,size_of_constraint);CHKERRQ(ierr); 4007 } 4008 } 4009 } 4010 4011 /* free workspace */ 4012 if (qr_needed) { 4013 if (pcbddc->dbg_flag) { 4014 ierr = PetscFree(dbg_work);CHKERRQ(ierr); 4015 } 4016 ierr = PetscFree(trs_rhs);CHKERRQ(ierr); 4017 ierr = PetscFree(qr_tau);CHKERRQ(ierr); 4018 ierr = PetscFree(qr_work);CHKERRQ(ierr); 4019 ierr = PetscFree(gqr_work);CHKERRQ(ierr); 4020 ierr = PetscFree(qr_basis);CHKERRQ(ierr); 4021 } 4022 ierr = PetscBTDestroy(&is_primal);CHKERRQ(ierr); 4023 ierr = MatAssemblyBegin(localChangeOfBasisMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4024 ierr = MatAssemblyEnd(localChangeOfBasisMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4025 4026 /* assembling of global change of variable */ 4027 { 4028 Mat tmat; 4029 PetscInt bs; 4030 4031 ierr = VecGetSize(pcis->vec1_global,&global_size);CHKERRQ(ierr); 4032 ierr = VecGetLocalSize(pcis->vec1_global,&local_size);CHKERRQ(ierr); 4033 ierr = MatDuplicate(pc->pmat,MAT_DO_NOT_COPY_VALUES,&tmat);CHKERRQ(ierr); 4034 ierr = MatISSetLocalMat(tmat,localChangeOfBasisMatrix);CHKERRQ(ierr); 4035 ierr = MatCreate(PetscObjectComm((PetscObject)pc),&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 4036 ierr = MatSetType(pcbddc->ChangeOfBasisMatrix,MATAIJ);CHKERRQ(ierr); 4037 ierr = MatGetBlockSize(pc->pmat,&bs);CHKERRQ(ierr); 4038 ierr = MatSetBlockSize(pcbddc->ChangeOfBasisMatrix,bs);CHKERRQ(ierr); 4039 ierr = MatSetSizes(pcbddc->ChangeOfBasisMatrix,local_size,local_size,global_size,global_size);CHKERRQ(ierr); 4040 ierr = MatISSetMPIXAIJPreallocation_Private(tmat,pcbddc->ChangeOfBasisMatrix,PETSC_TRUE);CHKERRQ(ierr); 4041 ierr = MatISGetMPIXAIJ(tmat,MAT_REUSE_MATRIX,&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 4042 ierr = MatDestroy(&tmat);CHKERRQ(ierr); 4043 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 4044 ierr = VecSet(pcis->vec1_N,1.0);CHKERRQ(ierr); 4045 ierr = VecScatterBegin(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4046 ierr = VecScatterEnd(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4047 ierr = VecReciprocal(pcis->vec1_global);CHKERRQ(ierr); 4048 ierr = MatDiagonalScale(pcbddc->ChangeOfBasisMatrix,pcis->vec1_global,NULL);CHKERRQ(ierr); 4049 } 4050 /* check */ 4051 if (pcbddc->dbg_flag) { 4052 PetscReal error; 4053 Vec x,x_change; 4054 4055 ierr = VecDuplicate(pcis->vec1_global,&x);CHKERRQ(ierr); 4056 ierr = VecDuplicate(pcis->vec1_global,&x_change);CHKERRQ(ierr); 4057 ierr = VecSetRandom(x,NULL);CHKERRQ(ierr); 4058 ierr = VecCopy(x,pcis->vec1_global);CHKERRQ(ierr); 4059 ierr = VecScatterBegin(matis->rctx,x,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4060 ierr = VecScatterEnd(matis->rctx,x,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4061 ierr = MatMult(localChangeOfBasisMatrix,pcis->vec1_N,pcis->vec2_N);CHKERRQ(ierr); 4062 ierr = VecScatterBegin(matis->rctx,pcis->vec2_N,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4063 ierr = VecScatterEnd(matis->rctx,pcis->vec2_N,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4064 ierr = MatMult(pcbddc->ChangeOfBasisMatrix,pcis->vec1_global,x_change);CHKERRQ(ierr); 4065 ierr = VecAXPY(x,-1.0,x_change);CHKERRQ(ierr); 4066 ierr = VecNorm(x,NORM_INFINITY,&error);CHKERRQ(ierr); 4067 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4068 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Error global vs local change: %1.6e\n",error);CHKERRQ(ierr); 4069 ierr = VecDestroy(&x);CHKERRQ(ierr); 4070 ierr = VecDestroy(&x_change);CHKERRQ(ierr); 4071 } 4072 4073 /* adapt sub_schurs computed (if any) */ 4074 if (pcbddc->use_deluxe_scaling) { 4075 PCBDDCSubSchurs sub_schurs=pcbddc->sub_schurs; 4076 if (sub_schurs->S_Ej_all) { 4077 Mat S_new,tmat; 4078 ISLocalToGlobalMapping NtoSall; 4079 IS is_all_N,is_V,is_V_Sall; 4080 const PetscScalar *array; 4081 const PetscInt *idxs_V,*idxs_all; 4082 PetscInt i,n_V; 4083 4084 ierr = ISLocalToGlobalMappingApplyIS(pcis->BtoNmap,sub_schurs->is_Ej_all,&is_all_N);CHKERRQ(ierr); 4085 ierr = MatGetSubMatrix(localChangeOfBasisMatrix,is_all_N,is_all_N,MAT_INITIAL_MATRIX,&tmat);CHKERRQ(ierr); 4086 ierr = ISCreateGeneral(PETSC_COMM_SELF,pcbddc->n_vertices,pcbddc->local_primal_ref_node,PETSC_COPY_VALUES,&is_V);CHKERRQ(ierr); 4087 ierr = ISLocalToGlobalMappingCreateIS(is_all_N,&NtoSall);CHKERRQ(ierr); 4088 ierr = ISGlobalToLocalMappingApplyIS(NtoSall,IS_GTOLM_DROP,is_V,&is_V_Sall);CHKERRQ(ierr); 4089 ierr = ISLocalToGlobalMappingDestroy(&NtoSall);CHKERRQ(ierr); 4090 ierr = ISDestroy(&is_all_N);CHKERRQ(ierr); 4091 ierr = ISDestroy(&is_V);CHKERRQ(ierr); 4092 ierr = MatPtAP(sub_schurs->S_Ej_all,tmat,MAT_INITIAL_MATRIX,1.0,&S_new);CHKERRQ(ierr); 4093 ierr = MatDestroy(&sub_schurs->S_Ej_all);CHKERRQ(ierr); 4094 ierr = PetscObjectReference((PetscObject)S_new);CHKERRQ(ierr); 4095 ierr = MatZeroRowsColumnsIS(S_new,is_V_Sall,1.,NULL,NULL);CHKERRQ(ierr); 4096 ierr = ISGetLocalSize(is_V_Sall,&n_V);CHKERRQ(ierr); 4097 ierr = ISGetIndices(is_V_Sall,&idxs_V);CHKERRQ(ierr); 4098 ierr = ISGetIndices(sub_schurs->is_Ej_all,&idxs_all);CHKERRQ(ierr); 4099 ierr = VecGetArrayRead(pcis->D,&array);CHKERRQ(ierr); 4100 for (i=0;i<n_V;i++) { 4101 PetscScalar val; 4102 PetscInt idx; 4103 4104 idx = idxs_V[i]; 4105 val = array[idxs_all[idxs_V[i]]]; 4106 ierr = MatSetValue(S_new,idx,idx,val,INSERT_VALUES);CHKERRQ(ierr); 4107 } 4108 ierr = MatAssemblyBegin(S_new,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4109 ierr = MatAssemblyEnd(S_new,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4110 sub_schurs->S_Ej_all = S_new; 4111 ierr = MatDestroy(&S_new);CHKERRQ(ierr); 4112 if (sub_schurs->sum_S_Ej_all) { 4113 ierr = MatPtAP(sub_schurs->sum_S_Ej_all,tmat,MAT_INITIAL_MATRIX,1.0,&S_new);CHKERRQ(ierr); 4114 ierr = MatDestroy(&sub_schurs->sum_S_Ej_all);CHKERRQ(ierr); 4115 ierr = PetscObjectReference((PetscObject)S_new);CHKERRQ(ierr); 4116 ierr = MatZeroRowsColumnsIS(S_new,is_V_Sall,1.,NULL,NULL);CHKERRQ(ierr); 4117 sub_schurs->sum_S_Ej_all = S_new; 4118 ierr = MatDestroy(&S_new);CHKERRQ(ierr); 4119 } 4120 ierr = VecRestoreArrayRead(pcis->D,&array);CHKERRQ(ierr); 4121 ierr = ISRestoreIndices(sub_schurs->is_Ej_all,&idxs_all);CHKERRQ(ierr); 4122 ierr = ISRestoreIndices(is_V_Sall,&idxs_V);CHKERRQ(ierr); 4123 ierr = MatDestroy(&tmat);CHKERRQ(ierr); 4124 ierr = ISDestroy(&is_V_Sall);CHKERRQ(ierr); 4125 } 4126 } 4127 ierr = MatDestroy(&localChangeOfBasisMatrix);CHKERRQ(ierr); 4128 } else if (pcbddc->user_ChangeOfBasisMatrix) { 4129 ierr = PetscObjectReference((PetscObject)pcbddc->user_ChangeOfBasisMatrix);CHKERRQ(ierr); 4130 pcbddc->ChangeOfBasisMatrix = pcbddc->user_ChangeOfBasisMatrix; 4131 } 4132 4133 /* set up change of basis context */ 4134 if (pcbddc->ChangeOfBasisMatrix) { 4135 PCBDDCChange_ctx change_ctx; 4136 4137 if (!pcbddc->new_global_mat) { 4138 PetscInt global_size,local_size; 4139 4140 ierr = VecGetSize(pcis->vec1_global,&global_size);CHKERRQ(ierr); 4141 ierr = VecGetLocalSize(pcis->vec1_global,&local_size);CHKERRQ(ierr); 4142 ierr = MatCreate(PetscObjectComm((PetscObject)pc),&pcbddc->new_global_mat);CHKERRQ(ierr); 4143 ierr = MatSetSizes(pcbddc->new_global_mat,local_size,local_size,global_size,global_size);CHKERRQ(ierr); 4144 ierr = MatSetType(pcbddc->new_global_mat,MATSHELL);CHKERRQ(ierr); 4145 ierr = MatShellSetOperation(pcbddc->new_global_mat,MATOP_MULT,(void (*)(void))PCBDDCMatMult_Private);CHKERRQ(ierr); 4146 ierr = MatShellSetOperation(pcbddc->new_global_mat,MATOP_MULT_TRANSPOSE,(void (*)(void))PCBDDCMatMultTranspose_Private);CHKERRQ(ierr); 4147 ierr = PetscNew(&change_ctx);CHKERRQ(ierr); 4148 ierr = MatShellSetContext(pcbddc->new_global_mat,change_ctx);CHKERRQ(ierr); 4149 } else { 4150 ierr = MatShellGetContext(pcbddc->new_global_mat,&change_ctx);CHKERRQ(ierr); 4151 ierr = MatDestroy(&change_ctx->global_change);CHKERRQ(ierr); 4152 ierr = VecDestroyVecs(2,&change_ctx->work);CHKERRQ(ierr); 4153 } 4154 if (!pcbddc->user_ChangeOfBasisMatrix) { 4155 ierr = PetscObjectReference((PetscObject)pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 4156 change_ctx->global_change = pcbddc->ChangeOfBasisMatrix; 4157 } else { 4158 ierr = PetscObjectReference((PetscObject)pcbddc->user_ChangeOfBasisMatrix);CHKERRQ(ierr); 4159 change_ctx->global_change = pcbddc->user_ChangeOfBasisMatrix; 4160 } 4161 ierr = VecDuplicateVecs(pcis->vec1_global,2,&change_ctx->work);CHKERRQ(ierr); 4162 ierr = MatSetUp(pcbddc->new_global_mat);CHKERRQ(ierr); 4163 ierr = MatAssemblyBegin(pcbddc->new_global_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4164 ierr = MatAssemblyEnd(pcbddc->new_global_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4165 } else { 4166 ierr = MatDestroy(&pcbddc->new_global_mat);CHKERRQ(ierr); 4167 } 4168 4169 /* add pressure dofs to set of primal nodes for numbering purposes */ 4170 for (i=0;i<pcbddc->benign_n;i++) { 4171 pcbddc->local_primal_ref_node[pcbddc->local_primal_size_cc] = pcbddc->benign_p0_lidx[i]; 4172 pcbddc->primal_indices_local_idxs[pcbddc->local_primal_size] = pcbddc->benign_p0_lidx[i]; 4173 pcbddc->local_primal_ref_mult[pcbddc->local_primal_size_cc] = 1; 4174 pcbddc->local_primal_size_cc++; 4175 pcbddc->local_primal_size++; 4176 } 4177 4178 /* check if a new primal space has been introduced (also take into account benign trick) */ 4179 pcbddc->new_primal_space_local = PETSC_TRUE; 4180 if (olocal_primal_size == pcbddc->local_primal_size) { 4181 ierr = PetscMemcmp(pcbddc->local_primal_ref_node,olocal_primal_ref_node,olocal_primal_size_cc*sizeof(PetscInt),&pcbddc->new_primal_space_local);CHKERRQ(ierr); 4182 pcbddc->new_primal_space_local = (PetscBool)(!pcbddc->new_primal_space_local); 4183 if (!pcbddc->new_primal_space_local) { 4184 ierr = PetscMemcmp(pcbddc->local_primal_ref_mult,olocal_primal_ref_mult,olocal_primal_size_cc*sizeof(PetscInt),&pcbddc->new_primal_space_local);CHKERRQ(ierr); 4185 pcbddc->new_primal_space_local = (PetscBool)(!pcbddc->new_primal_space_local); 4186 } 4187 } 4188 ierr = PetscFree2(olocal_primal_ref_node,olocal_primal_ref_mult);CHKERRQ(ierr); 4189 /* new_primal_space will be used for numbering of coarse dofs, so it should be the same across all subdomains */ 4190 ierr = MPI_Allreduce(&pcbddc->new_primal_space_local,&pcbddc->new_primal_space,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 4191 4192 /* flush dbg viewer */ 4193 if (pcbddc->dbg_flag) { 4194 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4195 } 4196 4197 /* free workspace */ 4198 ierr = PetscBTDestroy(&qr_needed_idx);CHKERRQ(ierr); 4199 ierr = PetscBTDestroy(&change_basis);CHKERRQ(ierr); 4200 if (!pcbddc->adaptive_selection) { 4201 ierr = PetscFree3(constraints_idxs_ptr,constraints_data_ptr,constraints_n);CHKERRQ(ierr); 4202 ierr = PetscFree3(constraints_data,constraints_idxs,constraints_idxs_B);CHKERRQ(ierr); 4203 } else { 4204 ierr = PetscFree5(pcbddc->adaptive_constraints_n, 4205 pcbddc->adaptive_constraints_idxs_ptr, 4206 pcbddc->adaptive_constraints_data_ptr, 4207 pcbddc->adaptive_constraints_idxs, 4208 pcbddc->adaptive_constraints_data);CHKERRQ(ierr); 4209 ierr = PetscFree(constraints_n);CHKERRQ(ierr); 4210 ierr = PetscFree(constraints_idxs_B);CHKERRQ(ierr); 4211 } 4212 PetscFunctionReturn(0); 4213 } 4214 4215 #undef __FUNCT__ 4216 #define __FUNCT__ "PCBDDCAnalyzeInterface" 4217 PetscErrorCode PCBDDCAnalyzeInterface(PC pc) 4218 { 4219 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 4220 PC_IS *pcis = (PC_IS*)pc->data; 4221 Mat_IS *matis = (Mat_IS*)pc->pmat->data; 4222 PetscInt ierr,i,vertex_size,N; 4223 PetscViewer viewer=pcbddc->dbg_viewer; 4224 4225 PetscFunctionBegin; 4226 /* Reset previously computed graph */ 4227 ierr = PCBDDCGraphReset(pcbddc->mat_graph);CHKERRQ(ierr); 4228 /* Init local Graph struct */ 4229 ierr = MatGetSize(pc->pmat,&N,NULL);CHKERRQ(ierr); 4230 ierr = PCBDDCGraphInit(pcbddc->mat_graph,pcis->mapping,N);CHKERRQ(ierr); 4231 4232 /* Check validity of the csr graph passed in by the user */ 4233 if (pcbddc->mat_graph->nvtxs_csr && pcbddc->mat_graph->nvtxs_csr != pcbddc->mat_graph->nvtxs) { 4234 SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Invalid size of local CSR graph! Found %d, expected %d\n",pcbddc->mat_graph->nvtxs_csr,pcbddc->mat_graph->nvtxs); 4235 } 4236 4237 /* Set default CSR adjacency of local dofs if not provided by the user with PCBDDCSetLocalAdjacencyGraph */ 4238 if (!pcbddc->mat_graph->xadj || !pcbddc->mat_graph->adjncy) { 4239 PetscInt *xadj,*adjncy; 4240 PetscInt nvtxs; 4241 PetscBool flg_row=PETSC_FALSE; 4242 4243 if (pcbddc->use_local_adj) { 4244 4245 ierr = MatGetRowIJ(matis->A,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);CHKERRQ(ierr); 4246 if (flg_row) { 4247 ierr = PCBDDCSetLocalAdjacencyGraph(pc,nvtxs,xadj,adjncy,PETSC_COPY_VALUES);CHKERRQ(ierr); 4248 pcbddc->computed_rowadj = PETSC_TRUE; 4249 } 4250 ierr = MatRestoreRowIJ(matis->A,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);CHKERRQ(ierr); 4251 } else if (pcbddc->current_level && pcis->n_B) { /* just compute subdomain's connected components for coarser levels when the local boundary is not empty */ 4252 IS is_dummy; 4253 ISLocalToGlobalMapping l2gmap_dummy; 4254 PetscInt j,sum; 4255 PetscInt *cxadj,*cadjncy; 4256 const PetscInt *idxs; 4257 PCBDDCGraph graph; 4258 PetscBT is_on_boundary; 4259 4260 ierr = ISCreateStride(PETSC_COMM_SELF,pcis->n,0,1,&is_dummy);CHKERRQ(ierr); 4261 ierr = ISLocalToGlobalMappingCreateIS(is_dummy,&l2gmap_dummy);CHKERRQ(ierr); 4262 ierr = ISDestroy(&is_dummy);CHKERRQ(ierr); 4263 ierr = PCBDDCGraphCreate(&graph);CHKERRQ(ierr); 4264 ierr = PCBDDCGraphInit(graph,l2gmap_dummy,pcis->n);CHKERRQ(ierr); 4265 ierr = ISLocalToGlobalMappingDestroy(&l2gmap_dummy);CHKERRQ(ierr); 4266 ierr = MatGetRowIJ(matis->A,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);CHKERRQ(ierr); 4267 if (flg_row) { 4268 graph->xadj = xadj; 4269 graph->adjncy = adjncy; 4270 } 4271 ierr = PCBDDCGraphSetUp(graph,1,NULL,NULL,0,NULL,NULL);CHKERRQ(ierr); 4272 ierr = PCBDDCGraphComputeConnectedComponents(graph);CHKERRQ(ierr); 4273 ierr = MatRestoreRowIJ(matis->A,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);CHKERRQ(ierr); 4274 4275 if (pcbddc->dbg_flag) { 4276 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"[%d] Found %d subdomains (local size %d)\n",PetscGlobalRank,graph->ncc,pcis->n);CHKERRQ(ierr); 4277 for (i=0;i<graph->ncc;i++) { 4278 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"[%d] %d cc size %d\n",PetscGlobalRank,i,graph->cptr[i+1]-graph->cptr[i]);CHKERRQ(ierr); 4279 } 4280 } 4281 4282 ierr = PetscBTCreate(pcis->n,&is_on_boundary);CHKERRQ(ierr); 4283 ierr = ISGetIndices(pcis->is_B_local,&idxs);CHKERRQ(ierr); 4284 for (i=0;i<pcis->n_B;i++) { 4285 ierr = PetscBTSet(is_on_boundary,idxs[i]);CHKERRQ(ierr); 4286 } 4287 ierr = ISRestoreIndices(pcis->is_B_local,&idxs);CHKERRQ(ierr); 4288 4289 ierr = PetscCalloc1(pcis->n+1,&cxadj);CHKERRQ(ierr); 4290 sum = 0; 4291 for (i=0;i<graph->ncc;i++) { 4292 PetscInt sizecc = 0; 4293 for (j=graph->cptr[i];j<graph->cptr[i+1];j++) { 4294 if (PetscBTLookup(is_on_boundary,graph->queue[j])) { 4295 sizecc++; 4296 } 4297 } 4298 for (j=graph->cptr[i];j<graph->cptr[i+1];j++) { 4299 if (PetscBTLookup(is_on_boundary,graph->queue[j])) { 4300 cxadj[graph->queue[j]] = sizecc; 4301 } 4302 } 4303 sum += sizecc*sizecc; 4304 } 4305 ierr = PetscMalloc1(sum,&cadjncy);CHKERRQ(ierr); 4306 sum = 0; 4307 for (i=0;i<pcis->n;i++) { 4308 PetscInt temp = cxadj[i]; 4309 cxadj[i] = sum; 4310 sum += temp; 4311 } 4312 cxadj[pcis->n] = sum; 4313 for (i=0;i<graph->ncc;i++) { 4314 for (j=graph->cptr[i];j<graph->cptr[i+1];j++) { 4315 if (PetscBTLookup(is_on_boundary,graph->queue[j])) { 4316 PetscInt k,sizecc = 0; 4317 for (k=graph->cptr[i];k<graph->cptr[i+1];k++) { 4318 if (PetscBTLookup(is_on_boundary,graph->queue[k])) { 4319 cadjncy[cxadj[graph->queue[j]]+sizecc]=graph->queue[k]; 4320 sizecc++; 4321 } 4322 } 4323 } 4324 } 4325 } 4326 if (sum) { 4327 ierr = PCBDDCSetLocalAdjacencyGraph(pc,pcis->n,cxadj,cadjncy,PETSC_OWN_POINTER);CHKERRQ(ierr); 4328 } else { 4329 ierr = PetscFree(cxadj);CHKERRQ(ierr); 4330 ierr = PetscFree(cadjncy);CHKERRQ(ierr); 4331 } 4332 graph->xadj = 0; 4333 graph->adjncy = 0; 4334 ierr = PCBDDCGraphDestroy(&graph);CHKERRQ(ierr); 4335 ierr = PetscBTDestroy(&is_on_boundary);CHKERRQ(ierr); 4336 } 4337 } 4338 if (pcbddc->dbg_flag) { 4339 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4340 } 4341 4342 /* Set default dofs' splitting if no information has been provided by the user with PCBDDCSetDofsSplitting or PCBDDCSetDofsSplittingLocal */ 4343 vertex_size = 1; 4344 if (pcbddc->user_provided_isfordofs) { 4345 if (pcbddc->n_ISForDofs) { /* need to convert from global to local and remove references to global dofs splitting */ 4346 ierr = PetscMalloc1(pcbddc->n_ISForDofs,&pcbddc->ISForDofsLocal);CHKERRQ(ierr); 4347 for (i=0;i<pcbddc->n_ISForDofs;i++) { 4348 ierr = PCBDDCGlobalToLocal(matis->rctx,pcis->vec1_global,pcis->vec1_N,pcbddc->ISForDofs[i],&pcbddc->ISForDofsLocal[i]);CHKERRQ(ierr); 4349 ierr = ISDestroy(&pcbddc->ISForDofs[i]);CHKERRQ(ierr); 4350 } 4351 pcbddc->n_ISForDofsLocal = pcbddc->n_ISForDofs; 4352 pcbddc->n_ISForDofs = 0; 4353 ierr = PetscFree(pcbddc->ISForDofs);CHKERRQ(ierr); 4354 } 4355 /* mat block size as vertex size (used for elasticity with rigid body modes as nearnullspace) */ 4356 ierr = MatGetBlockSize(matis->A,&vertex_size);CHKERRQ(ierr); 4357 } else { 4358 if (!pcbddc->n_ISForDofsLocal) { /* field split not present, create it in local ordering */ 4359 ierr = MatGetBlockSize(pc->pmat,&pcbddc->n_ISForDofsLocal);CHKERRQ(ierr); 4360 ierr = PetscMalloc1(pcbddc->n_ISForDofsLocal,&pcbddc->ISForDofsLocal);CHKERRQ(ierr); 4361 for (i=0;i<pcbddc->n_ISForDofsLocal;i++) { 4362 ierr = ISCreateStride(PetscObjectComm((PetscObject)pc),pcis->n/pcbddc->n_ISForDofsLocal,i,pcbddc->n_ISForDofsLocal,&pcbddc->ISForDofsLocal[i]);CHKERRQ(ierr); 4363 } 4364 } 4365 } 4366 4367 /* Setup of Graph */ 4368 if (!pcbddc->DirichletBoundariesLocal && pcbddc->DirichletBoundaries) { /* need to convert from global to local */ 4369 ierr = PCBDDCGlobalToLocal(matis->rctx,pcis->vec1_global,pcis->vec1_N,pcbddc->DirichletBoundaries,&pcbddc->DirichletBoundariesLocal);CHKERRQ(ierr); 4370 } 4371 if (!pcbddc->NeumannBoundariesLocal && pcbddc->NeumannBoundaries) { /* need to convert from global to local */ 4372 ierr = PCBDDCGlobalToLocal(matis->rctx,pcis->vec1_global,pcis->vec1_N,pcbddc->NeumannBoundaries,&pcbddc->NeumannBoundariesLocal);CHKERRQ(ierr); 4373 } 4374 if (!pcbddc->user_primal_vertices_local && pcbddc->user_primal_vertices) { /* need to convert from global to local */ 4375 ierr = PCBDDCGlobalToLocal(matis->rctx,pcis->vec1_global,pcis->vec1_N,pcbddc->user_primal_vertices,&pcbddc->user_primal_vertices_local);CHKERRQ(ierr); 4376 } 4377 ierr = PCBDDCGraphSetUp(pcbddc->mat_graph,vertex_size,pcbddc->NeumannBoundariesLocal,pcbddc->DirichletBoundariesLocal,pcbddc->n_ISForDofsLocal,pcbddc->ISForDofsLocal,pcbddc->user_primal_vertices_local);CHKERRQ(ierr); 4378 4379 /* attach info on disconnected subdomains if present */ 4380 if (pcbddc->n_local_subs) { 4381 PetscInt *local_subs; 4382 4383 ierr = PetscMalloc1(N,&local_subs);CHKERRQ(ierr); 4384 for (i=0;i<pcbddc->n_local_subs;i++) { 4385 const PetscInt *idxs; 4386 PetscInt nl,j; 4387 4388 ierr = ISGetLocalSize(pcbddc->local_subs[i],&nl);CHKERRQ(ierr); 4389 ierr = ISGetIndices(pcbddc->local_subs[i],&idxs);CHKERRQ(ierr); 4390 for (j=0;j<nl;j++) { 4391 local_subs[idxs[j]] = i; 4392 } 4393 ierr = ISRestoreIndices(pcbddc->local_subs[i],&idxs);CHKERRQ(ierr); 4394 } 4395 pcbddc->mat_graph->n_local_subs = pcbddc->n_local_subs; 4396 pcbddc->mat_graph->local_subs = local_subs; 4397 } 4398 4399 /* Graph's connected components analysis */ 4400 ierr = PCBDDCGraphComputeConnectedComponents(pcbddc->mat_graph);CHKERRQ(ierr); 4401 4402 /* print some info to stdout */ 4403 if (pcbddc->dbg_flag) { 4404 ierr = PCBDDCGraphASCIIView(pcbddc->mat_graph,pcbddc->dbg_flag,viewer);CHKERRQ(ierr); 4405 } 4406 4407 /* mark topography has done */ 4408 pcbddc->recompute_topography = PETSC_FALSE; 4409 PetscFunctionReturn(0); 4410 } 4411 4412 /* given an index sets possibly with holes, renumbers the indexes removing the holes */ 4413 #undef __FUNCT__ 4414 #define __FUNCT__ "PCBDDCSubsetNumbering" 4415 PetscErrorCode PCBDDCSubsetNumbering(IS subset, IS subset_mult, PetscInt *N_n, IS *subset_n) 4416 { 4417 PetscSF sf; 4418 PetscLayout map; 4419 const PetscInt *idxs; 4420 PetscInt *leaf_data,*root_data,*gidxs; 4421 PetscInt N,n,i,lbounds[2],gbounds[2],Nl; 4422 PetscInt n_n,nlocals,start,first_index; 4423 PetscMPIInt commsize; 4424 PetscBool first_found; 4425 PetscErrorCode ierr; 4426 4427 PetscFunctionBegin; 4428 ierr = ISGetLocalSize(subset,&n);CHKERRQ(ierr); 4429 if (subset_mult) { 4430 PetscCheckSameComm(subset,1,subset_mult,2); 4431 ierr = ISGetLocalSize(subset,&i);CHKERRQ(ierr); 4432 if (i != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Local subset and multiplicity sizes don't match! %d != %d",n,i); 4433 } 4434 /* create workspace layout for computing global indices of subset */ 4435 ierr = ISGetIndices(subset,&idxs);CHKERRQ(ierr); 4436 lbounds[0] = lbounds[1] = 0; 4437 for (i=0;i<n;i++) { 4438 if (idxs[i] < lbounds[0]) lbounds[0] = idxs[i]; 4439 else if (idxs[i] > lbounds[1]) lbounds[1] = idxs[i]; 4440 } 4441 lbounds[0] = -lbounds[0]; 4442 ierr = MPI_Allreduce(lbounds,gbounds,2,MPIU_INT,MPI_MAX,PetscObjectComm((PetscObject)subset));CHKERRQ(ierr); 4443 gbounds[0] = -gbounds[0]; 4444 N = gbounds[1] - gbounds[0] + 1; 4445 ierr = PetscLayoutCreate(PetscObjectComm((PetscObject)subset),&map);CHKERRQ(ierr); 4446 ierr = PetscLayoutSetBlockSize(map,1);CHKERRQ(ierr); 4447 ierr = PetscLayoutSetSize(map,N);CHKERRQ(ierr); 4448 ierr = PetscLayoutSetUp(map);CHKERRQ(ierr); 4449 ierr = PetscLayoutGetLocalSize(map,&Nl);CHKERRQ(ierr); 4450 4451 /* create sf : leaf_data == multiplicity of indexes, root data == global index in layout */ 4452 ierr = PetscMalloc2(n,&leaf_data,Nl,&root_data);CHKERRQ(ierr); 4453 if (subset_mult) { 4454 const PetscInt* idxs_mult; 4455 4456 ierr = ISGetIndices(subset_mult,&idxs_mult);CHKERRQ(ierr); 4457 ierr = PetscMemcpy(leaf_data,idxs_mult,n*sizeof(PetscInt));CHKERRQ(ierr); 4458 ierr = ISRestoreIndices(subset_mult,&idxs_mult);CHKERRQ(ierr); 4459 } else { 4460 for (i=0;i<n;i++) leaf_data[i] = 1; 4461 } 4462 /* local size of new subset */ 4463 n_n = 0; 4464 for (i=0;i<n;i++) n_n += leaf_data[i]; 4465 4466 /* global indexes in layout */ 4467 ierr = PetscMalloc1(n_n,&gidxs);CHKERRQ(ierr); /* allocating possibly extra space in gidxs which will be used later */ 4468 for (i=0;i<n;i++) gidxs[i] = idxs[i] - gbounds[0]; 4469 ierr = ISRestoreIndices(subset,&idxs);CHKERRQ(ierr); 4470 ierr = PetscSFCreate(PetscObjectComm((PetscObject)subset),&sf);CHKERRQ(ierr); 4471 ierr = PetscSFSetGraphLayout(sf,map,n,NULL,PETSC_COPY_VALUES,gidxs);CHKERRQ(ierr); 4472 ierr = PetscLayoutDestroy(&map);CHKERRQ(ierr); 4473 4474 /* reduce from leaves to roots */ 4475 ierr = PetscMemzero(root_data,Nl*sizeof(PetscInt));CHKERRQ(ierr); 4476 ierr = PetscSFReduceBegin(sf,MPIU_INT,leaf_data,root_data,MPI_MAX);CHKERRQ(ierr); 4477 ierr = PetscSFReduceEnd(sf,MPIU_INT,leaf_data,root_data,MPI_MAX);CHKERRQ(ierr); 4478 4479 /* count indexes in local part of layout */ 4480 nlocals = 0; 4481 first_index = -1; 4482 first_found = PETSC_FALSE; 4483 for (i=0;i<Nl;i++) { 4484 if (!first_found && root_data[i]) { 4485 first_found = PETSC_TRUE; 4486 first_index = i; 4487 } 4488 nlocals += root_data[i]; 4489 } 4490 4491 /* cumulative of number of indexes and size of subset without holes */ 4492 #if defined(PETSC_HAVE_MPI_EXSCAN) 4493 start = 0; 4494 ierr = MPI_Exscan(&nlocals,&start,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)subset));CHKERRQ(ierr); 4495 #else 4496 ierr = MPI_Scan(&nlocals,&start,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)subset));CHKERRQ(ierr); 4497 start = start-nlocals; 4498 #endif 4499 4500 if (N_n) { /* compute total size of new subset if requested */ 4501 *N_n = start + nlocals; 4502 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)subset),&commsize);CHKERRQ(ierr); 4503 ierr = MPI_Bcast(N_n,1,MPIU_INT,commsize-1,PetscObjectComm((PetscObject)subset));CHKERRQ(ierr); 4504 } 4505 4506 /* adapt root data with cumulative */ 4507 if (first_found) { 4508 PetscInt old_index; 4509 4510 root_data[first_index] += start; 4511 old_index = first_index; 4512 for (i=first_index+1;i<Nl;i++) { 4513 if (root_data[i]) { 4514 root_data[i] += root_data[old_index]; 4515 old_index = i; 4516 } 4517 } 4518 } 4519 4520 /* from roots to leaves */ 4521 ierr = PetscSFBcastBegin(sf,MPIU_INT,root_data,leaf_data);CHKERRQ(ierr); 4522 ierr = PetscSFBcastEnd(sf,MPIU_INT,root_data,leaf_data);CHKERRQ(ierr); 4523 ierr = PetscSFDestroy(&sf);CHKERRQ(ierr); 4524 4525 /* create new IS with global indexes without holes */ 4526 if (subset_mult) { 4527 const PetscInt* idxs_mult; 4528 PetscInt cum; 4529 4530 cum = 0; 4531 ierr = ISGetIndices(subset_mult,&idxs_mult);CHKERRQ(ierr); 4532 for (i=0;i<n;i++) { 4533 PetscInt j; 4534 for (j=0;j<idxs_mult[i];j++) gidxs[cum++] = leaf_data[i] - idxs_mult[i] + j; 4535 } 4536 ierr = ISRestoreIndices(subset_mult,&idxs_mult);CHKERRQ(ierr); 4537 } else { 4538 for (i=0;i<n;i++) { 4539 gidxs[i] = leaf_data[i]-1; 4540 } 4541 } 4542 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)subset),n_n,gidxs,PETSC_OWN_POINTER,subset_n);CHKERRQ(ierr); 4543 ierr = PetscFree2(leaf_data,root_data);CHKERRQ(ierr); 4544 PetscFunctionReturn(0); 4545 } 4546 4547 #undef __FUNCT__ 4548 #define __FUNCT__ "PCBDDCOrthonormalizeVecs" 4549 PetscErrorCode PCBDDCOrthonormalizeVecs(PetscInt n, Vec vecs[]) 4550 { 4551 PetscInt i,j; 4552 PetscScalar *alphas; 4553 PetscErrorCode ierr; 4554 4555 PetscFunctionBegin; 4556 /* this implements stabilized Gram-Schmidt */ 4557 ierr = PetscMalloc1(n,&alphas);CHKERRQ(ierr); 4558 for (i=0;i<n;i++) { 4559 ierr = VecNormalize(vecs[i],NULL);CHKERRQ(ierr); 4560 if (i<n) { ierr = VecMDot(vecs[i],n-i-1,&vecs[i+1],&alphas[i+1]);CHKERRQ(ierr); } 4561 for (j=i+1;j<n;j++) { ierr = VecAXPY(vecs[j],PetscConj(-alphas[j]),vecs[i]);CHKERRQ(ierr); } 4562 } 4563 ierr = PetscFree(alphas);CHKERRQ(ierr); 4564 PetscFunctionReturn(0); 4565 } 4566 4567 #undef __FUNCT__ 4568 #define __FUNCT__ "MatISGetSubassemblingPattern" 4569 PetscErrorCode MatISGetSubassemblingPattern(Mat mat, PetscInt n_subdomains, PetscInt redprocs, IS* is_sends) 4570 { 4571 IS ranks_send_to; 4572 PetscInt n_neighs,*neighs,*n_shared,**shared; 4573 PetscMPIInt size,rank,color; 4574 PetscInt *xadj,*adjncy; 4575 PetscInt *adjncy_wgt,*v_wgt,*ranks_send_to_idx; 4576 PetscInt i,local_size,threshold=0; 4577 PetscBool use_vwgt=PETSC_FALSE,use_square=PETSC_FALSE; 4578 PetscSubcomm subcomm; 4579 PetscErrorCode ierr; 4580 4581 PetscFunctionBegin; 4582 ierr = PetscOptionsGetBool(NULL,"-matis_partitioning_use_square",&use_square,NULL);CHKERRQ(ierr); 4583 ierr = PetscOptionsGetBool(NULL,"-matis_partitioning_use_vwgt",&use_vwgt,NULL);CHKERRQ(ierr); 4584 ierr = PetscOptionsGetInt(NULL,"-matis_partitioning_threshold",&threshold,NULL);CHKERRQ(ierr); 4585 4586 /* Get info on mapping */ 4587 ierr = ISLocalToGlobalMappingGetSize(mat->rmap->mapping,&local_size);CHKERRQ(ierr); 4588 ierr = ISLocalToGlobalMappingGetInfo(mat->rmap->mapping,&n_neighs,&neighs,&n_shared,&shared);CHKERRQ(ierr); 4589 4590 /* build local CSR graph of subdomains' connectivity */ 4591 ierr = PetscMalloc1(2,&xadj);CHKERRQ(ierr); 4592 xadj[0] = 0; 4593 xadj[1] = PetscMax(n_neighs-1,0); 4594 ierr = PetscMalloc1(xadj[1],&adjncy);CHKERRQ(ierr); 4595 ierr = PetscMalloc1(xadj[1],&adjncy_wgt);CHKERRQ(ierr); 4596 4597 if (threshold) { 4598 PetscInt xadj_count = 0; 4599 for (i=1;i<n_neighs;i++) { 4600 if (n_shared[i] > threshold) { 4601 adjncy[xadj_count] = neighs[i]; 4602 adjncy_wgt[xadj_count] = n_shared[i]; 4603 xadj_count++; 4604 } 4605 } 4606 xadj[1] = xadj_count; 4607 } else { 4608 if (xadj[1]) { 4609 ierr = PetscMemcpy(adjncy,&neighs[1],xadj[1]*sizeof(*adjncy));CHKERRQ(ierr); 4610 ierr = PetscMemcpy(adjncy_wgt,&n_shared[1],xadj[1]*sizeof(*adjncy_wgt));CHKERRQ(ierr); 4611 } 4612 } 4613 ierr = ISLocalToGlobalMappingRestoreInfo(mat->rmap->mapping,&n_neighs,&neighs,&n_shared,&shared);CHKERRQ(ierr); 4614 if (use_square) { 4615 for (i=0;i<xadj[1];i++) { 4616 adjncy_wgt[i] = adjncy_wgt[i]*adjncy_wgt[i]; 4617 } 4618 } 4619 ierr = PetscSortIntWithArray(xadj[1],adjncy,adjncy_wgt);CHKERRQ(ierr); 4620 4621 ierr = PetscMalloc1(1,&ranks_send_to_idx);CHKERRQ(ierr); 4622 4623 /* 4624 Restrict work on active processes only. 4625 */ 4626 ierr = PetscSubcommCreate(PetscObjectComm((PetscObject)mat),&subcomm);CHKERRQ(ierr); 4627 ierr = PetscSubcommSetNumber(subcomm,2);CHKERRQ(ierr); /* 2 groups, active process and not active processes */ 4628 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr); 4629 ierr = PetscMPIIntCast(!local_size,&color);CHKERRQ(ierr); 4630 ierr = PetscSubcommSetTypeGeneral(subcomm,color,rank);CHKERRQ(ierr); 4631 if (color) { 4632 ierr = PetscFree(xadj);CHKERRQ(ierr); 4633 ierr = PetscFree(adjncy);CHKERRQ(ierr); 4634 ierr = PetscFree(adjncy_wgt);CHKERRQ(ierr); 4635 } else { 4636 Mat subdomain_adj; 4637 IS new_ranks,new_ranks_contig; 4638 MatPartitioning partitioner; 4639 PetscInt prank,rstart=0,rend=0; 4640 PetscInt *is_indices,*oldranks; 4641 PetscBool aggregate; 4642 4643 ierr = MPI_Comm_size(PetscSubcommChild(subcomm),&size);CHKERRQ(ierr); 4644 ierr = PetscMalloc1(size,&oldranks);CHKERRQ(ierr); 4645 prank = rank; 4646 ierr = MPI_Allgather(&prank,1,MPIU_INT,oldranks,1,MPIU_INT,PetscSubcommChild(subcomm));CHKERRQ(ierr); 4647 /* 4648 for (i=0;i<size;i++) { 4649 PetscPrintf(subcomm->comm,"oldranks[%d] = %d\n",i,oldranks[i]); 4650 } 4651 */ 4652 for (i=0;i<xadj[1];i++) { 4653 ierr = PetscFindInt(adjncy[i],size,oldranks,&adjncy[i]);CHKERRQ(ierr); 4654 } 4655 ierr = PetscSortIntWithArray(xadj[1],adjncy,adjncy_wgt);CHKERRQ(ierr); 4656 aggregate = ((redprocs > 0 && redprocs < size) ? PETSC_TRUE : PETSC_FALSE); 4657 if (aggregate) { 4658 PetscInt lrows,row,ncols,*cols; 4659 PetscMPIInt nrank; 4660 PetscScalar *vals; 4661 4662 ierr = MPI_Comm_rank(PetscSubcommChild(subcomm),&nrank);CHKERRQ(ierr); 4663 lrows = 0; 4664 if (nrank<redprocs) { 4665 lrows = size/redprocs; 4666 if (nrank<size%redprocs) lrows++; 4667 } 4668 ierr = MatCreateAIJ(PetscSubcommChild(subcomm),lrows,lrows,size,size,50,NULL,50,NULL,&subdomain_adj);CHKERRQ(ierr); 4669 ierr = MatGetOwnershipRange(subdomain_adj,&rstart,&rend);CHKERRQ(ierr); 4670 ierr = MatSetOption(subdomain_adj,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr); 4671 ierr = MatSetOption(subdomain_adj,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr); 4672 row = nrank; 4673 ncols = xadj[1]-xadj[0]; 4674 cols = adjncy; 4675 ierr = PetscMalloc1(ncols,&vals);CHKERRQ(ierr); 4676 for (i=0;i<ncols;i++) vals[i] = adjncy_wgt[i]; 4677 ierr = MatSetValues(subdomain_adj,1,&row,ncols,cols,vals,INSERT_VALUES);CHKERRQ(ierr); 4678 ierr = MatAssemblyBegin(subdomain_adj,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4679 ierr = MatAssemblyEnd(subdomain_adj,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4680 ierr = PetscFree(xadj);CHKERRQ(ierr); 4681 ierr = PetscFree(adjncy);CHKERRQ(ierr); 4682 ierr = PetscFree(adjncy_wgt);CHKERRQ(ierr); 4683 ierr = PetscFree(vals);CHKERRQ(ierr); 4684 } else { 4685 ierr = MatCreateMPIAdj(PetscSubcommChild(subcomm),1,(PetscInt)size,xadj,adjncy,adjncy_wgt,&subdomain_adj);CHKERRQ(ierr); 4686 } 4687 /* ierr = MatView(subdomain_adj,0);CHKERRQ(ierr); */ 4688 4689 /* Partition */ 4690 ierr = MatPartitioningCreate(PetscSubcommChild(subcomm),&partitioner);CHKERRQ(ierr); 4691 ierr = MatPartitioningSetAdjacency(partitioner,subdomain_adj);CHKERRQ(ierr); 4692 if (use_vwgt) { 4693 ierr = PetscMalloc1(1,&v_wgt);CHKERRQ(ierr); 4694 v_wgt[0] = local_size; 4695 ierr = MatPartitioningSetVertexWeights(partitioner,v_wgt);CHKERRQ(ierr); 4696 } 4697 n_subdomains = PetscMin((PetscInt)size,n_subdomains); 4698 ierr = MatPartitioningSetNParts(partitioner,n_subdomains);CHKERRQ(ierr); 4699 ierr = MatPartitioningSetFromOptions(partitioner);CHKERRQ(ierr); 4700 ierr = MatPartitioningApply(partitioner,&new_ranks);CHKERRQ(ierr); 4701 /* ierr = MatPartitioningView(partitioner,0);CHKERRQ(ierr); */ 4702 4703 /* renumber new_ranks to avoid "holes" in new set of processors */ 4704 ierr = PCBDDCSubsetNumbering(new_ranks,NULL,NULL,&new_ranks_contig);CHKERRQ(ierr); 4705 ierr = ISDestroy(&new_ranks);CHKERRQ(ierr); 4706 ierr = ISGetIndices(new_ranks_contig,(const PetscInt**)&is_indices);CHKERRQ(ierr); 4707 if (!redprocs) { 4708 ranks_send_to_idx[0] = oldranks[is_indices[0]]; 4709 } else { 4710 PetscInt idxs[1]; 4711 PetscMPIInt tag; 4712 MPI_Request *reqs; 4713 4714 ierr = PetscObjectGetNewTag((PetscObject)subdomain_adj,&tag);CHKERRQ(ierr); 4715 ierr = PetscMalloc1(rend-rstart,&reqs);CHKERRQ(ierr); 4716 for (i=rstart;i<rend;i++) { 4717 ierr = MPI_Isend(is_indices+i-rstart,1,MPIU_INT,i,tag,PetscSubcommChild(subcomm),&reqs[i-rstart]);CHKERRQ(ierr); 4718 } 4719 ierr = MPI_Recv(idxs,1,MPIU_INT,MPI_ANY_SOURCE,tag,PetscSubcommChild(subcomm),MPI_STATUS_IGNORE);CHKERRQ(ierr); 4720 ierr = MPI_Waitall(rend-rstart,reqs,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 4721 ierr = PetscFree(reqs);CHKERRQ(ierr); 4722 ranks_send_to_idx[0] = oldranks[idxs[0]]; 4723 } 4724 ierr = ISRestoreIndices(new_ranks_contig,(const PetscInt**)&is_indices);CHKERRQ(ierr); 4725 /* clean up */ 4726 ierr = PetscFree(oldranks);CHKERRQ(ierr); 4727 ierr = ISDestroy(&new_ranks_contig);CHKERRQ(ierr); 4728 ierr = MatDestroy(&subdomain_adj);CHKERRQ(ierr); 4729 ierr = MatPartitioningDestroy(&partitioner);CHKERRQ(ierr); 4730 } 4731 ierr = PetscSubcommDestroy(&subcomm);CHKERRQ(ierr); 4732 4733 /* assemble parallel IS for sends */ 4734 i = 1; 4735 if (color) i=0; 4736 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)mat),i,ranks_send_to_idx,PETSC_OWN_POINTER,&ranks_send_to);CHKERRQ(ierr); 4737 /* get back IS */ 4738 *is_sends = ranks_send_to; 4739 PetscFunctionReturn(0); 4740 } 4741 4742 typedef enum {MATDENSE_PRIVATE=0,MATAIJ_PRIVATE,MATBAIJ_PRIVATE,MATSBAIJ_PRIVATE}MatTypePrivate; 4743 4744 #undef __FUNCT__ 4745 #define __FUNCT__ "MatISSubassemble" 4746 PetscErrorCode MatISSubassemble(Mat mat, IS is_sends, PetscInt n_subdomains, PetscBool restrict_comm, PetscBool restrict_full, MatReuse reuse, Mat *mat_n, PetscInt nis, IS isarray[]) 4747 { 4748 Mat local_mat; 4749 IS is_sends_internal; 4750 PetscInt rows,cols,new_local_rows; 4751 PetscInt i,bs,buf_size_idxs,buf_size_idxs_is,buf_size_vals; 4752 PetscBool ismatis,isdense,newisdense,destroy_mat; 4753 ISLocalToGlobalMapping l2gmap; 4754 PetscInt* l2gmap_indices; 4755 const PetscInt* is_indices; 4756 MatType new_local_type; 4757 /* buffers */ 4758 PetscInt *ptr_idxs,*send_buffer_idxs,*recv_buffer_idxs; 4759 PetscInt *ptr_idxs_is,*send_buffer_idxs_is,*recv_buffer_idxs_is; 4760 PetscInt *recv_buffer_idxs_local; 4761 PetscScalar *ptr_vals,*send_buffer_vals,*recv_buffer_vals; 4762 /* MPI */ 4763 MPI_Comm comm,comm_n; 4764 PetscSubcomm subcomm; 4765 PetscMPIInt n_sends,n_recvs,commsize; 4766 PetscMPIInt *iflags,*ilengths_idxs,*ilengths_vals,*ilengths_idxs_is; 4767 PetscMPIInt *onodes,*onodes_is,*olengths_idxs,*olengths_idxs_is,*olengths_vals; 4768 PetscMPIInt len,tag_idxs,tag_idxs_is,tag_vals,source_dest; 4769 MPI_Request *send_req_idxs,*send_req_idxs_is,*send_req_vals; 4770 MPI_Request *recv_req_idxs,*recv_req_idxs_is,*recv_req_vals; 4771 PetscErrorCode ierr; 4772 4773 PetscFunctionBegin; 4774 /* TODO: add missing checks */ 4775 PetscValidLogicalCollectiveInt(mat,n_subdomains,3); 4776 PetscValidLogicalCollectiveBool(mat,restrict_comm,4); 4777 PetscValidLogicalCollectiveEnum(mat,reuse,5); 4778 PetscValidLogicalCollectiveInt(mat,nis,7); 4779 ierr = PetscObjectTypeCompare((PetscObject)mat,MATIS,&ismatis);CHKERRQ(ierr); 4780 if (!ismatis) SETERRQ1(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot use %s on a matrix object which is not of type MATIS",__FUNCT__); 4781 ierr = MatISGetLocalMat(mat,&local_mat);CHKERRQ(ierr); 4782 ierr = PetscObjectTypeCompare((PetscObject)local_mat,MATSEQDENSE,&isdense);CHKERRQ(ierr); 4783 if (!isdense) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Currently cannot subassemble MATIS when local matrix type is not of type SEQDENSE"); 4784 ierr = MatGetSize(local_mat,&rows,&cols);CHKERRQ(ierr); 4785 if (rows != cols) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Local MATIS matrices should be square"); 4786 if (reuse == MAT_REUSE_MATRIX && *mat_n) { 4787 PetscInt mrows,mcols,mnrows,mncols; 4788 ierr = PetscObjectTypeCompare((PetscObject)*mat_n,MATIS,&ismatis);CHKERRQ(ierr); 4789 if (!ismatis) SETERRQ(PetscObjectComm((PetscObject)*mat_n),PETSC_ERR_SUP,"Cannot reuse a matrix which is not of type MATIS"); 4790 ierr = MatGetSize(mat,&mrows,&mcols);CHKERRQ(ierr); 4791 ierr = MatGetSize(*mat_n,&mnrows,&mncols);CHKERRQ(ierr); 4792 if (mrows != mnrows) SETERRQ2(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot reuse matrix! Wrong number of rows %D != %D",mrows,mnrows); 4793 if (mcols != mncols) SETERRQ2(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot reuse matrix! Wrong number of cols %D != %D",mcols,mncols); 4794 } 4795 ierr = MatGetBlockSize(local_mat,&bs);CHKERRQ(ierr); 4796 PetscValidLogicalCollectiveInt(mat,bs,0); 4797 /* prepare IS for sending if not provided */ 4798 if (!is_sends) { 4799 if (!n_subdomains) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"You should specify either an IS or a target number of subdomains"); 4800 ierr = MatISGetSubassemblingPattern(mat,n_subdomains,0,&is_sends_internal);CHKERRQ(ierr); 4801 } else { 4802 ierr = PetscObjectReference((PetscObject)is_sends);CHKERRQ(ierr); 4803 is_sends_internal = is_sends; 4804 } 4805 4806 /* get comm */ 4807 ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr); 4808 4809 /* compute number of sends */ 4810 ierr = ISGetLocalSize(is_sends_internal,&i);CHKERRQ(ierr); 4811 ierr = PetscMPIIntCast(i,&n_sends);CHKERRQ(ierr); 4812 4813 /* compute number of receives */ 4814 ierr = MPI_Comm_size(comm,&commsize);CHKERRQ(ierr); 4815 ierr = PetscMalloc1(commsize,&iflags);CHKERRQ(ierr); 4816 ierr = PetscMemzero(iflags,commsize*sizeof(*iflags));CHKERRQ(ierr); 4817 ierr = ISGetIndices(is_sends_internal,&is_indices);CHKERRQ(ierr); 4818 for (i=0;i<n_sends;i++) iflags[is_indices[i]] = 1; 4819 ierr = PetscGatherNumberOfMessages(comm,iflags,NULL,&n_recvs);CHKERRQ(ierr); 4820 ierr = PetscFree(iflags);CHKERRQ(ierr); 4821 4822 /* restrict comm if requested */ 4823 subcomm = 0; 4824 destroy_mat = PETSC_FALSE; 4825 if (restrict_comm) { 4826 PetscMPIInt color,subcommsize; 4827 4828 color = 0; 4829 if (restrict_full) { 4830 if (!n_recvs) color = 1; /* processes not receiving anything will not partecipate in new comm (full restriction) */ 4831 } else { 4832 if (!n_recvs && n_sends) color = 1; /* just those processes that are sending but not receiving anything will not partecipate in new comm */ 4833 } 4834 ierr = MPI_Allreduce(&color,&subcommsize,1,MPI_INT,MPI_SUM,comm);CHKERRQ(ierr); 4835 subcommsize = commsize - subcommsize; 4836 /* check if reuse has been requested */ 4837 if (reuse == MAT_REUSE_MATRIX) { 4838 if (*mat_n) { 4839 PetscMPIInt subcommsize2; 4840 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)*mat_n),&subcommsize2);CHKERRQ(ierr); 4841 if (subcommsize != subcommsize2) SETERRQ2(PetscObjectComm((PetscObject)*mat_n),PETSC_ERR_PLIB,"Cannot reuse matrix! wrong subcomm size %d != %d",subcommsize,subcommsize2); 4842 comm_n = PetscObjectComm((PetscObject)*mat_n); 4843 } else { 4844 comm_n = PETSC_COMM_SELF; 4845 } 4846 } else { /* MAT_INITIAL_MATRIX */ 4847 PetscMPIInt rank; 4848 4849 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 4850 ierr = PetscSubcommCreate(comm,&subcomm);CHKERRQ(ierr); 4851 ierr = PetscSubcommSetNumber(subcomm,2);CHKERRQ(ierr); 4852 ierr = PetscSubcommSetTypeGeneral(subcomm,color,rank);CHKERRQ(ierr); 4853 comm_n = PetscSubcommChild(subcomm); 4854 } 4855 /* flag to destroy *mat_n if not significative */ 4856 if (color) destroy_mat = PETSC_TRUE; 4857 } else { 4858 comm_n = comm; 4859 } 4860 4861 /* prepare send/receive buffers */ 4862 ierr = PetscMalloc1(commsize,&ilengths_idxs);CHKERRQ(ierr); 4863 ierr = PetscMemzero(ilengths_idxs,commsize*sizeof(*ilengths_idxs));CHKERRQ(ierr); 4864 ierr = PetscMalloc1(commsize,&ilengths_vals);CHKERRQ(ierr); 4865 ierr = PetscMemzero(ilengths_vals,commsize*sizeof(*ilengths_vals));CHKERRQ(ierr); 4866 if (nis) { 4867 ierr = PetscCalloc1(commsize,&ilengths_idxs_is);CHKERRQ(ierr); 4868 } 4869 4870 /* Get data from local matrices */ 4871 if (!isdense) { 4872 SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Subassembling of AIJ local matrices not yet implemented"); 4873 /* TODO: See below some guidelines on how to prepare the local buffers */ 4874 /* 4875 send_buffer_vals should contain the raw values of the local matrix 4876 send_buffer_idxs should contain: 4877 - MatType_PRIVATE type 4878 - PetscInt size_of_l2gmap 4879 - PetscInt global_row_indices[size_of_l2gmap] 4880 - PetscInt all_other_info_which_is_needed_to_compute_preallocation_and_set_values 4881 */ 4882 } else { 4883 ierr = MatDenseGetArray(local_mat,&send_buffer_vals);CHKERRQ(ierr); 4884 ierr = ISLocalToGlobalMappingGetSize(mat->rmap->mapping,&i);CHKERRQ(ierr); 4885 ierr = PetscMalloc1(i+2,&send_buffer_idxs);CHKERRQ(ierr); 4886 send_buffer_idxs[0] = (PetscInt)MATDENSE_PRIVATE; 4887 send_buffer_idxs[1] = i; 4888 ierr = ISLocalToGlobalMappingGetIndices(mat->rmap->mapping,(const PetscInt**)&ptr_idxs);CHKERRQ(ierr); 4889 ierr = PetscMemcpy(&send_buffer_idxs[2],ptr_idxs,i*sizeof(PetscInt));CHKERRQ(ierr); 4890 ierr = ISLocalToGlobalMappingRestoreIndices(mat->rmap->mapping,(const PetscInt**)&ptr_idxs);CHKERRQ(ierr); 4891 ierr = PetscMPIIntCast(i,&len);CHKERRQ(ierr); 4892 for (i=0;i<n_sends;i++) { 4893 ilengths_vals[is_indices[i]] = len*len; 4894 ilengths_idxs[is_indices[i]] = len+2; 4895 } 4896 } 4897 ierr = PetscGatherMessageLengths2(comm,n_sends,n_recvs,ilengths_idxs,ilengths_vals,&onodes,&olengths_idxs,&olengths_vals);CHKERRQ(ierr); 4898 /* additional is (if any) */ 4899 if (nis) { 4900 PetscMPIInt psum; 4901 PetscInt j; 4902 for (j=0,psum=0;j<nis;j++) { 4903 PetscInt plen; 4904 ierr = ISGetLocalSize(isarray[j],&plen);CHKERRQ(ierr); 4905 ierr = PetscMPIIntCast(plen,&len);CHKERRQ(ierr); 4906 psum += len+1; /* indices + lenght */ 4907 } 4908 ierr = PetscMalloc1(psum,&send_buffer_idxs_is);CHKERRQ(ierr); 4909 for (j=0,psum=0;j<nis;j++) { 4910 PetscInt plen; 4911 const PetscInt *is_array_idxs; 4912 ierr = ISGetLocalSize(isarray[j],&plen);CHKERRQ(ierr); 4913 send_buffer_idxs_is[psum] = plen; 4914 ierr = ISGetIndices(isarray[j],&is_array_idxs);CHKERRQ(ierr); 4915 ierr = PetscMemcpy(&send_buffer_idxs_is[psum+1],is_array_idxs,plen*sizeof(PetscInt));CHKERRQ(ierr); 4916 ierr = ISRestoreIndices(isarray[j],&is_array_idxs);CHKERRQ(ierr); 4917 psum += plen+1; /* indices + lenght */ 4918 } 4919 for (i=0;i<n_sends;i++) { 4920 ilengths_idxs_is[is_indices[i]] = psum; 4921 } 4922 ierr = PetscGatherMessageLengths(comm,n_sends,n_recvs,ilengths_idxs_is,&onodes_is,&olengths_idxs_is);CHKERRQ(ierr); 4923 } 4924 4925 buf_size_idxs = 0; 4926 buf_size_vals = 0; 4927 buf_size_idxs_is = 0; 4928 for (i=0;i<n_recvs;i++) { 4929 buf_size_idxs += (PetscInt)olengths_idxs[i]; 4930 buf_size_vals += (PetscInt)olengths_vals[i]; 4931 if (nis) buf_size_idxs_is += (PetscInt)olengths_idxs_is[i]; 4932 } 4933 ierr = PetscMalloc1(buf_size_idxs,&recv_buffer_idxs);CHKERRQ(ierr); 4934 ierr = PetscMalloc1(buf_size_vals,&recv_buffer_vals);CHKERRQ(ierr); 4935 ierr = PetscMalloc1(buf_size_idxs_is,&recv_buffer_idxs_is);CHKERRQ(ierr); 4936 4937 /* get new tags for clean communications */ 4938 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_idxs);CHKERRQ(ierr); 4939 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_vals);CHKERRQ(ierr); 4940 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_idxs_is);CHKERRQ(ierr); 4941 4942 /* allocate for requests */ 4943 ierr = PetscMalloc1(n_sends,&send_req_idxs);CHKERRQ(ierr); 4944 ierr = PetscMalloc1(n_sends,&send_req_vals);CHKERRQ(ierr); 4945 ierr = PetscMalloc1(n_sends,&send_req_idxs_is);CHKERRQ(ierr); 4946 ierr = PetscMalloc1(n_recvs,&recv_req_idxs);CHKERRQ(ierr); 4947 ierr = PetscMalloc1(n_recvs,&recv_req_vals);CHKERRQ(ierr); 4948 ierr = PetscMalloc1(n_recvs,&recv_req_idxs_is);CHKERRQ(ierr); 4949 4950 /* communications */ 4951 ptr_idxs = recv_buffer_idxs; 4952 ptr_vals = recv_buffer_vals; 4953 ptr_idxs_is = recv_buffer_idxs_is; 4954 for (i=0;i<n_recvs;i++) { 4955 source_dest = onodes[i]; 4956 ierr = MPI_Irecv(ptr_idxs,olengths_idxs[i],MPIU_INT,source_dest,tag_idxs,comm,&recv_req_idxs[i]);CHKERRQ(ierr); 4957 ierr = MPI_Irecv(ptr_vals,olengths_vals[i],MPIU_SCALAR,source_dest,tag_vals,comm,&recv_req_vals[i]);CHKERRQ(ierr); 4958 ptr_idxs += olengths_idxs[i]; 4959 ptr_vals += olengths_vals[i]; 4960 if (nis) { 4961 ierr = MPI_Irecv(ptr_idxs_is,olengths_idxs_is[i],MPIU_INT,source_dest,tag_idxs_is,comm,&recv_req_idxs_is[i]);CHKERRQ(ierr); 4962 ptr_idxs_is += olengths_idxs_is[i]; 4963 } 4964 } 4965 for (i=0;i<n_sends;i++) { 4966 ierr = PetscMPIIntCast(is_indices[i],&source_dest);CHKERRQ(ierr); 4967 ierr = MPI_Isend(send_buffer_idxs,ilengths_idxs[source_dest],MPIU_INT,source_dest,tag_idxs,comm,&send_req_idxs[i]);CHKERRQ(ierr); 4968 ierr = MPI_Isend(send_buffer_vals,ilengths_vals[source_dest],MPIU_SCALAR,source_dest,tag_vals,comm,&send_req_vals[i]);CHKERRQ(ierr); 4969 if (nis) { 4970 ierr = MPI_Isend(send_buffer_idxs_is,ilengths_idxs_is[source_dest],MPIU_INT,source_dest,tag_idxs_is,comm,&send_req_idxs_is[i]);CHKERRQ(ierr); 4971 } 4972 } 4973 ierr = ISRestoreIndices(is_sends_internal,&is_indices);CHKERRQ(ierr); 4974 ierr = ISDestroy(&is_sends_internal);CHKERRQ(ierr); 4975 4976 /* assemble new l2g map */ 4977 ierr = MPI_Waitall(n_recvs,recv_req_idxs,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 4978 ptr_idxs = recv_buffer_idxs; 4979 new_local_rows = 0; 4980 for (i=0;i<n_recvs;i++) { 4981 new_local_rows += *(ptr_idxs+1); /* second element is the local size of the l2gmap */ 4982 ptr_idxs += olengths_idxs[i]; 4983 } 4984 ierr = PetscMalloc1(new_local_rows,&l2gmap_indices);CHKERRQ(ierr); 4985 ptr_idxs = recv_buffer_idxs; 4986 new_local_rows = 0; 4987 for (i=0;i<n_recvs;i++) { 4988 ierr = PetscMemcpy(&l2gmap_indices[new_local_rows],ptr_idxs+2,(*(ptr_idxs+1))*sizeof(PetscInt));CHKERRQ(ierr); 4989 new_local_rows += *(ptr_idxs+1); /* second element is the local size of the l2gmap */ 4990 ptr_idxs += olengths_idxs[i]; 4991 } 4992 ierr = PetscSortRemoveDupsInt(&new_local_rows,l2gmap_indices);CHKERRQ(ierr); 4993 ierr = ISLocalToGlobalMappingCreate(comm_n,1,new_local_rows,l2gmap_indices,PETSC_COPY_VALUES,&l2gmap);CHKERRQ(ierr); 4994 ierr = PetscFree(l2gmap_indices);CHKERRQ(ierr); 4995 4996 /* infer new local matrix type from received local matrices type */ 4997 /* currently if all local matrices are of type X, then the resulting matrix will be of type X, except for the dense case */ 4998 /* it also assumes that if the block size is set, than it is the same among all local matrices (see checks at the beginning of the function) */ 4999 if (n_recvs) { 5000 MatTypePrivate new_local_type_private = (MatTypePrivate)send_buffer_idxs[0]; 5001 ptr_idxs = recv_buffer_idxs; 5002 for (i=0;i<n_recvs;i++) { 5003 if ((PetscInt)new_local_type_private != *ptr_idxs) { 5004 new_local_type_private = MATAIJ_PRIVATE; 5005 break; 5006 } 5007 ptr_idxs += olengths_idxs[i]; 5008 } 5009 switch (new_local_type_private) { 5010 case MATDENSE_PRIVATE: 5011 if (n_recvs>1) { /* subassembling of dense matrices does not give a dense matrix! */ 5012 new_local_type = MATSEQAIJ; 5013 bs = 1; 5014 } else { /* if I receive only 1 dense matrix */ 5015 new_local_type = MATSEQDENSE; 5016 bs = 1; 5017 } 5018 break; 5019 case MATAIJ_PRIVATE: 5020 new_local_type = MATSEQAIJ; 5021 bs = 1; 5022 break; 5023 case MATBAIJ_PRIVATE: 5024 new_local_type = MATSEQBAIJ; 5025 break; 5026 case MATSBAIJ_PRIVATE: 5027 new_local_type = MATSEQSBAIJ; 5028 break; 5029 default: 5030 SETERRQ2(comm,PETSC_ERR_SUP,"Unsupported private type %d in %s",new_local_type_private,__FUNCT__); 5031 break; 5032 } 5033 } else { /* by default, new_local_type is seqdense */ 5034 new_local_type = MATSEQDENSE; 5035 bs = 1; 5036 } 5037 5038 /* create MATIS object if needed */ 5039 if (reuse == MAT_INITIAL_MATRIX) { 5040 ierr = MatGetSize(mat,&rows,&cols);CHKERRQ(ierr); 5041 ierr = MatCreateIS(comm_n,bs,PETSC_DECIDE,PETSC_DECIDE,rows,cols,l2gmap,NULL,mat_n);CHKERRQ(ierr); 5042 } else { 5043 /* it also destroys the local matrices */ 5044 ierr = MatSetLocalToGlobalMapping(*mat_n,l2gmap,l2gmap);CHKERRQ(ierr); 5045 } 5046 ierr = MatISGetLocalMat(*mat_n,&local_mat);CHKERRQ(ierr); 5047 ierr = MatSetType(local_mat,new_local_type);CHKERRQ(ierr); 5048 5049 ierr = MPI_Waitall(n_recvs,recv_req_vals,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 5050 5051 /* Global to local map of received indices */ 5052 ierr = PetscMalloc1(buf_size_idxs,&recv_buffer_idxs_local);CHKERRQ(ierr); /* needed for values insertion */ 5053 ierr = ISGlobalToLocalMappingApply(l2gmap,IS_GTOLM_MASK,buf_size_idxs,recv_buffer_idxs,&i,recv_buffer_idxs_local);CHKERRQ(ierr); 5054 ierr = ISLocalToGlobalMappingDestroy(&l2gmap);CHKERRQ(ierr); 5055 5056 /* restore attributes -> type of incoming data and its size */ 5057 buf_size_idxs = 0; 5058 for (i=0;i<n_recvs;i++) { 5059 recv_buffer_idxs_local[buf_size_idxs] = recv_buffer_idxs[buf_size_idxs]; 5060 recv_buffer_idxs_local[buf_size_idxs+1] = recv_buffer_idxs[buf_size_idxs+1]; 5061 buf_size_idxs += (PetscInt)olengths_idxs[i]; 5062 } 5063 ierr = PetscFree(recv_buffer_idxs);CHKERRQ(ierr); 5064 5065 /* set preallocation */ 5066 ierr = PetscObjectTypeCompare((PetscObject)local_mat,MATSEQDENSE,&newisdense);CHKERRQ(ierr); 5067 if (!newisdense) { 5068 PetscInt *new_local_nnz=0; 5069 5070 ptr_vals = recv_buffer_vals; 5071 ptr_idxs = recv_buffer_idxs_local; 5072 if (n_recvs) { 5073 ierr = PetscCalloc1(new_local_rows,&new_local_nnz);CHKERRQ(ierr); 5074 } 5075 for (i=0;i<n_recvs;i++) { 5076 PetscInt j; 5077 if (*ptr_idxs == (PetscInt)MATDENSE_PRIVATE) { /* preallocation provided for dense case only */ 5078 for (j=0;j<*(ptr_idxs+1);j++) { 5079 new_local_nnz[*(ptr_idxs+2+j)] += *(ptr_idxs+1); 5080 } 5081 } else { 5082 /* TODO */ 5083 } 5084 ptr_idxs += olengths_idxs[i]; 5085 } 5086 if (new_local_nnz) { 5087 for (i=0;i<new_local_rows;i++) new_local_nnz[i] = PetscMin(new_local_nnz[i],new_local_rows); 5088 ierr = MatSeqAIJSetPreallocation(local_mat,0,new_local_nnz);CHKERRQ(ierr); 5089 for (i=0;i<new_local_rows;i++) new_local_nnz[i] /= bs; 5090 ierr = MatSeqBAIJSetPreallocation(local_mat,bs,0,new_local_nnz);CHKERRQ(ierr); 5091 for (i=0;i<new_local_rows;i++) new_local_nnz[i] = PetscMax(new_local_nnz[i]-i,0); 5092 ierr = MatSeqSBAIJSetPreallocation(local_mat,bs,0,new_local_nnz);CHKERRQ(ierr); 5093 } else { 5094 ierr = MatSetUp(local_mat);CHKERRQ(ierr); 5095 } 5096 ierr = PetscFree(new_local_nnz);CHKERRQ(ierr); 5097 } else { 5098 ierr = MatSetUp(local_mat);CHKERRQ(ierr); 5099 } 5100 5101 /* set values */ 5102 ptr_vals = recv_buffer_vals; 5103 ptr_idxs = recv_buffer_idxs_local; 5104 for (i=0;i<n_recvs;i++) { 5105 if (*ptr_idxs == (PetscInt)MATDENSE_PRIVATE) { /* values insertion provided for dense case only */ 5106 ierr = MatSetOption(local_mat,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr); 5107 ierr = MatSetValues(local_mat,*(ptr_idxs+1),ptr_idxs+2,*(ptr_idxs+1),ptr_idxs+2,ptr_vals,ADD_VALUES);CHKERRQ(ierr); 5108 ierr = MatAssemblyBegin(local_mat,MAT_FLUSH_ASSEMBLY);CHKERRQ(ierr); 5109 ierr = MatAssemblyEnd(local_mat,MAT_FLUSH_ASSEMBLY);CHKERRQ(ierr); 5110 ierr = MatSetOption(local_mat,MAT_ROW_ORIENTED,PETSC_TRUE);CHKERRQ(ierr); 5111 } else { 5112 /* TODO */ 5113 } 5114 ptr_idxs += olengths_idxs[i]; 5115 ptr_vals += olengths_vals[i]; 5116 } 5117 ierr = MatAssemblyBegin(local_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5118 ierr = MatAssemblyEnd(local_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5119 ierr = MatAssemblyBegin(*mat_n,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5120 ierr = MatAssemblyEnd(*mat_n,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5121 ierr = PetscFree(recv_buffer_vals);CHKERRQ(ierr); 5122 ierr = PetscFree(recv_buffer_idxs_local);CHKERRQ(ierr); 5123 5124 #if 0 5125 if (!restrict_comm) { /* check */ 5126 Vec lvec,rvec; 5127 PetscReal infty_error; 5128 5129 ierr = MatCreateVecs(mat,&rvec,&lvec);CHKERRQ(ierr); 5130 ierr = VecSetRandom(rvec,NULL);CHKERRQ(ierr); 5131 ierr = MatMult(mat,rvec,lvec);CHKERRQ(ierr); 5132 ierr = VecScale(lvec,-1.0);CHKERRQ(ierr); 5133 ierr = MatMultAdd(*mat_n,rvec,lvec,lvec);CHKERRQ(ierr); 5134 ierr = VecNorm(lvec,NORM_INFINITY,&infty_error);CHKERRQ(ierr); 5135 ierr = PetscPrintf(PetscObjectComm((PetscObject)mat),"Infinity error subassembling %1.6e\n",infty_error); 5136 ierr = VecDestroy(&rvec);CHKERRQ(ierr); 5137 ierr = VecDestroy(&lvec);CHKERRQ(ierr); 5138 } 5139 #endif 5140 5141 /* assemble new additional is (if any) */ 5142 if (nis) { 5143 PetscInt **temp_idxs,*count_is,j,psum; 5144 5145 ierr = MPI_Waitall(n_recvs,recv_req_idxs_is,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 5146 ierr = PetscCalloc1(nis,&count_is);CHKERRQ(ierr); 5147 ptr_idxs = recv_buffer_idxs_is; 5148 psum = 0; 5149 for (i=0;i<n_recvs;i++) { 5150 for (j=0;j<nis;j++) { 5151 PetscInt plen = *(ptr_idxs); /* first element is the local size of IS's indices */ 5152 count_is[j] += plen; /* increment counting of buffer for j-th IS */ 5153 psum += plen; 5154 ptr_idxs += plen+1; /* shift pointer to received data */ 5155 } 5156 } 5157 ierr = PetscMalloc1(nis,&temp_idxs);CHKERRQ(ierr); 5158 ierr = PetscMalloc1(psum,&temp_idxs[0]);CHKERRQ(ierr); 5159 for (i=1;i<nis;i++) { 5160 temp_idxs[i] = temp_idxs[i-1]+count_is[i-1]; 5161 } 5162 ierr = PetscMemzero(count_is,nis*sizeof(PetscInt));CHKERRQ(ierr); 5163 ptr_idxs = recv_buffer_idxs_is; 5164 for (i=0;i<n_recvs;i++) { 5165 for (j=0;j<nis;j++) { 5166 PetscInt plen = *(ptr_idxs); /* first element is the local size of IS's indices */ 5167 ierr = PetscMemcpy(&temp_idxs[j][count_is[j]],ptr_idxs+1,plen*sizeof(PetscInt));CHKERRQ(ierr); 5168 count_is[j] += plen; /* increment starting point of buffer for j-th IS */ 5169 ptr_idxs += plen+1; /* shift pointer to received data */ 5170 } 5171 } 5172 for (i=0;i<nis;i++) { 5173 ierr = ISDestroy(&isarray[i]);CHKERRQ(ierr); 5174 ierr = PetscSortRemoveDupsInt(&count_is[i],temp_idxs[i]);CHKERRQ(ierr);CHKERRQ(ierr); 5175 ierr = ISCreateGeneral(comm_n,count_is[i],temp_idxs[i],PETSC_COPY_VALUES,&isarray[i]);CHKERRQ(ierr); 5176 } 5177 ierr = PetscFree(count_is);CHKERRQ(ierr); 5178 ierr = PetscFree(temp_idxs[0]);CHKERRQ(ierr); 5179 ierr = PetscFree(temp_idxs);CHKERRQ(ierr); 5180 } 5181 /* free workspace */ 5182 ierr = PetscFree(recv_buffer_idxs_is);CHKERRQ(ierr); 5183 ierr = MPI_Waitall(n_sends,send_req_idxs,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 5184 ierr = PetscFree(send_buffer_idxs);CHKERRQ(ierr); 5185 ierr = MPI_Waitall(n_sends,send_req_vals,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 5186 if (isdense) { 5187 ierr = MatISGetLocalMat(mat,&local_mat);CHKERRQ(ierr); 5188 ierr = MatDenseRestoreArray(local_mat,&send_buffer_vals);CHKERRQ(ierr); 5189 } else { 5190 /* ierr = PetscFree(send_buffer_vals);CHKERRQ(ierr); */ 5191 } 5192 if (nis) { 5193 ierr = MPI_Waitall(n_sends,send_req_idxs_is,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 5194 ierr = PetscFree(send_buffer_idxs_is);CHKERRQ(ierr); 5195 } 5196 ierr = PetscFree(recv_req_idxs);CHKERRQ(ierr); 5197 ierr = PetscFree(recv_req_vals);CHKERRQ(ierr); 5198 ierr = PetscFree(recv_req_idxs_is);CHKERRQ(ierr); 5199 ierr = PetscFree(send_req_idxs);CHKERRQ(ierr); 5200 ierr = PetscFree(send_req_vals);CHKERRQ(ierr); 5201 ierr = PetscFree(send_req_idxs_is);CHKERRQ(ierr); 5202 ierr = PetscFree(ilengths_vals);CHKERRQ(ierr); 5203 ierr = PetscFree(ilengths_idxs);CHKERRQ(ierr); 5204 ierr = PetscFree(olengths_vals);CHKERRQ(ierr); 5205 ierr = PetscFree(olengths_idxs);CHKERRQ(ierr); 5206 ierr = PetscFree(onodes);CHKERRQ(ierr); 5207 if (nis) { 5208 ierr = PetscFree(ilengths_idxs_is);CHKERRQ(ierr); 5209 ierr = PetscFree(olengths_idxs_is);CHKERRQ(ierr); 5210 ierr = PetscFree(onodes_is);CHKERRQ(ierr); 5211 } 5212 ierr = PetscSubcommDestroy(&subcomm);CHKERRQ(ierr); 5213 if (destroy_mat) { /* destroy mat is true only if restrict comm is true and process will not partecipate */ 5214 ierr = MatDestroy(mat_n);CHKERRQ(ierr); 5215 for (i=0;i<nis;i++) { 5216 ierr = ISDestroy(&isarray[i]);CHKERRQ(ierr); 5217 } 5218 *mat_n = NULL; 5219 } 5220 PetscFunctionReturn(0); 5221 } 5222 5223 /* temporary hack into ksp private data structure */ 5224 #include <petsc/private/kspimpl.h> 5225 5226 #undef __FUNCT__ 5227 #define __FUNCT__ "PCBDDCSetUpCoarseSolver" 5228 PetscErrorCode PCBDDCSetUpCoarseSolver(PC pc,PetscScalar* coarse_submat_vals) 5229 { 5230 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 5231 PC_IS *pcis = (PC_IS*)pc->data; 5232 Mat coarse_mat,coarse_mat_is,coarse_submat_dense; 5233 MatNullSpace CoarseNullSpace=NULL; 5234 ISLocalToGlobalMapping coarse_islg; 5235 IS coarse_is,*isarray; 5236 PetscInt i,im_active=-1,active_procs=-1; 5237 PetscInt nis,nisdofs,nisneu,nisvert; 5238 PC pc_temp; 5239 PCType coarse_pc_type; 5240 KSPType coarse_ksp_type; 5241 PetscBool multilevel_requested,multilevel_allowed; 5242 PetscBool isredundant,isbddc,isnn,coarse_reuse; 5243 Mat t_coarse_mat_is; 5244 PetscInt void_procs,ncoarse_ml,ncoarse_ds,ncoarse; 5245 PetscMPIInt all_procs; 5246 PetscBool csin_ml,csin_ds,csin,csin_type_simple,redist; 5247 PetscBool compute_vecs = PETSC_FALSE; 5248 PetscScalar *array; 5249 PetscErrorCode ierr; 5250 5251 PetscFunctionBegin; 5252 /* Assign global numbering to coarse dofs */ 5253 if (pcbddc->new_primal_space || pcbddc->coarse_size == -1) { /* a new primal space is present or it is the first initialization, so recompute global numbering */ 5254 PetscInt ocoarse_size; 5255 compute_vecs = PETSC_TRUE; 5256 ocoarse_size = pcbddc->coarse_size; 5257 ierr = PetscFree(pcbddc->global_primal_indices);CHKERRQ(ierr); 5258 ierr = PCBDDCComputePrimalNumbering(pc,&pcbddc->coarse_size,&pcbddc->global_primal_indices);CHKERRQ(ierr); 5259 /* see if we can avoid some work */ 5260 if (pcbddc->coarse_ksp) { /* coarse ksp has already been created */ 5261 /* if the coarse size is different or we are using adaptive selection, better to not reuse the coarse matrix */ 5262 if (ocoarse_size != pcbddc->coarse_size || pcbddc->adaptive_selection) { 5263 PC pc; 5264 PetscBool isbddc; 5265 5266 /* temporary workaround since PCBDDC does not have a reset method so far */ 5267 ierr = KSPGetPC(pcbddc->coarse_ksp,&pc);CHKERRQ(ierr); 5268 ierr = PetscObjectTypeCompare((PetscObject)pc,PCBDDC,&isbddc);CHKERRQ(ierr); 5269 if (isbddc) { 5270 ierr = PCDestroy(&pc);CHKERRQ(ierr); 5271 } 5272 ierr = KSPReset(pcbddc->coarse_ksp);CHKERRQ(ierr); 5273 coarse_reuse = PETSC_FALSE; 5274 } else { /* we can safely reuse already computed coarse matrix */ 5275 coarse_reuse = PETSC_TRUE; 5276 } 5277 } else { /* there's no coarse ksp, so we need to create the coarse matrix too */ 5278 coarse_reuse = PETSC_FALSE; 5279 } 5280 /* reset any subassembling information */ 5281 ierr = ISDestroy(&pcbddc->coarse_subassembling);CHKERRQ(ierr); 5282 ierr = ISDestroy(&pcbddc->coarse_subassembling_init);CHKERRQ(ierr); 5283 } else { /* primal space is unchanged, so we can reuse coarse matrix */ 5284 coarse_reuse = PETSC_TRUE; 5285 } 5286 5287 /* count "active" (i.e. with positive local size) and "void" processes */ 5288 im_active = !!(pcis->n); 5289 ierr = MPI_Allreduce(&im_active,&active_procs,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 5290 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)pc),&all_procs);CHKERRQ(ierr); 5291 void_procs = all_procs-active_procs; 5292 csin_type_simple = PETSC_TRUE; 5293 redist = PETSC_FALSE; 5294 if (pcbddc->current_level && void_procs) { 5295 csin_ml = PETSC_TRUE; 5296 ncoarse_ml = void_procs; 5297 /* it has no sense to redistribute on a set of processors larger than the number of active processes */ 5298 if (pcbddc->redistribute_coarse > 0 && pcbddc->redistribute_coarse < active_procs) { 5299 csin_ds = PETSC_TRUE; 5300 ncoarse_ds = pcbddc->redistribute_coarse; 5301 redist = PETSC_TRUE; 5302 } else { 5303 csin_ds = PETSC_TRUE; 5304 ncoarse_ds = active_procs; 5305 redist = PETSC_TRUE; 5306 } 5307 } else { 5308 csin_ml = PETSC_FALSE; 5309 ncoarse_ml = all_procs; 5310 if (void_procs) { 5311 csin_ds = PETSC_TRUE; 5312 ncoarse_ds = void_procs; 5313 csin_type_simple = PETSC_FALSE; 5314 } else { 5315 if (pcbddc->redistribute_coarse > 0 && pcbddc->redistribute_coarse < all_procs) { 5316 csin_ds = PETSC_TRUE; 5317 ncoarse_ds = pcbddc->redistribute_coarse; 5318 redist = PETSC_TRUE; 5319 } else { 5320 csin_ds = PETSC_FALSE; 5321 ncoarse_ds = all_procs; 5322 } 5323 } 5324 } 5325 5326 /* 5327 test if we can go multilevel: three conditions must be satisfied: 5328 - we have not exceeded the number of levels requested 5329 - we can actually subassemble the active processes 5330 - we can find a suitable number of MPI processes where we can place the subassembled problem 5331 */ 5332 multilevel_allowed = PETSC_FALSE; 5333 multilevel_requested = PETSC_FALSE; 5334 if (pcbddc->current_level < pcbddc->max_levels) { 5335 multilevel_requested = PETSC_TRUE; 5336 if (active_procs/pcbddc->coarsening_ratio < 2 || ncoarse_ml/pcbddc->coarsening_ratio < 2) { 5337 multilevel_allowed = PETSC_FALSE; 5338 } else { 5339 multilevel_allowed = PETSC_TRUE; 5340 } 5341 } 5342 /* determine number of process partecipating to coarse solver */ 5343 if (multilevel_allowed) { 5344 ncoarse = ncoarse_ml; 5345 csin = csin_ml; 5346 redist = PETSC_FALSE; 5347 } else { 5348 ncoarse = ncoarse_ds; 5349 csin = csin_ds; 5350 } 5351 5352 /* creates temporary l2gmap and IS for coarse indexes */ 5353 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),pcbddc->local_primal_size,pcbddc->global_primal_indices,PETSC_COPY_VALUES,&coarse_is);CHKERRQ(ierr); 5354 ierr = ISLocalToGlobalMappingCreateIS(coarse_is,&coarse_islg);CHKERRQ(ierr); 5355 5356 /* creates temporary MATIS object for coarse matrix */ 5357 ierr = MatCreateSeqDense(PETSC_COMM_SELF,pcbddc->local_primal_size,pcbddc->local_primal_size,NULL,&coarse_submat_dense);CHKERRQ(ierr); 5358 ierr = MatDenseGetArray(coarse_submat_dense,&array);CHKERRQ(ierr); 5359 ierr = PetscMemcpy(array,coarse_submat_vals,sizeof(*coarse_submat_vals)*pcbddc->local_primal_size*pcbddc->local_primal_size);CHKERRQ(ierr); 5360 ierr = MatDenseRestoreArray(coarse_submat_dense,&array);CHKERRQ(ierr); 5361 ierr = MatCreateIS(PetscObjectComm((PetscObject)pc),1,PETSC_DECIDE,PETSC_DECIDE,pcbddc->coarse_size,pcbddc->coarse_size,coarse_islg,NULL,&t_coarse_mat_is);CHKERRQ(ierr); 5362 ierr = MatISSetLocalMat(t_coarse_mat_is,coarse_submat_dense);CHKERRQ(ierr); 5363 ierr = MatAssemblyBegin(t_coarse_mat_is,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5364 ierr = MatAssemblyEnd(t_coarse_mat_is,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5365 ierr = MatDestroy(&coarse_submat_dense);CHKERRQ(ierr); 5366 5367 /* compute dofs splitting and neumann boundaries for coarse dofs */ 5368 if (multilevel_allowed && (pcbddc->n_ISForDofsLocal || pcbddc->NeumannBoundariesLocal || (pcbddc->benign_saddle_point && pcbddc->user_primal_vertices_local))) { /* protects from unneded computations */ 5369 PetscInt *tidxs,*tidxs2,nout,tsize,i; 5370 const PetscInt *idxs; 5371 ISLocalToGlobalMapping tmap; 5372 5373 /* create map between primal indices (in local representative ordering) and local primal numbering */ 5374 ierr = ISLocalToGlobalMappingCreate(PETSC_COMM_SELF,1,pcbddc->local_primal_size,pcbddc->primal_indices_local_idxs,PETSC_COPY_VALUES,&tmap);CHKERRQ(ierr); 5375 /* allocate space for temporary storage */ 5376 ierr = PetscMalloc1(pcbddc->local_primal_size,&tidxs);CHKERRQ(ierr); 5377 ierr = PetscMalloc1(pcbddc->local_primal_size,&tidxs2);CHKERRQ(ierr); 5378 /* allocate for IS array */ 5379 nisdofs = pcbddc->n_ISForDofsLocal; 5380 nisneu = !!pcbddc->NeumannBoundariesLocal; 5381 nisvert = 0; 5382 if (pcbddc->benign_saddle_point && pcbddc->user_primal_vertices_local) { 5383 nisvert = 1; 5384 } 5385 nis = nisdofs + nisneu + nisvert; 5386 ierr = PetscMalloc1(nis,&isarray);CHKERRQ(ierr); 5387 /* dofs splitting */ 5388 for (i=0;i<nisdofs;i++) { 5389 /* ierr = ISView(pcbddc->ISForDofsLocal[i],0);CHKERRQ(ierr); */ 5390 ierr = ISGetLocalSize(pcbddc->ISForDofsLocal[i],&tsize);CHKERRQ(ierr); 5391 ierr = ISGetIndices(pcbddc->ISForDofsLocal[i],&idxs);CHKERRQ(ierr); 5392 ierr = ISGlobalToLocalMappingApply(tmap,IS_GTOLM_DROP,tsize,idxs,&nout,tidxs);CHKERRQ(ierr); 5393 ierr = ISRestoreIndices(pcbddc->ISForDofsLocal[i],&idxs);CHKERRQ(ierr); 5394 ierr = ISLocalToGlobalMappingApply(coarse_islg,nout,tidxs,tidxs2);CHKERRQ(ierr); 5395 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),nout,tidxs2,PETSC_COPY_VALUES,&isarray[i]);CHKERRQ(ierr); 5396 /* ierr = ISView(isarray[i],0);CHKERRQ(ierr); */ 5397 } 5398 /* neumann boundaries */ 5399 if (pcbddc->NeumannBoundariesLocal) { 5400 /* ierr = ISView(pcbddc->NeumannBoundariesLocal,0);CHKERRQ(ierr); */ 5401 ierr = ISGetLocalSize(pcbddc->NeumannBoundariesLocal,&tsize);CHKERRQ(ierr); 5402 ierr = ISGetIndices(pcbddc->NeumannBoundariesLocal,&idxs);CHKERRQ(ierr); 5403 ierr = ISGlobalToLocalMappingApply(tmap,IS_GTOLM_DROP,tsize,idxs,&nout,tidxs);CHKERRQ(ierr); 5404 ierr = ISRestoreIndices(pcbddc->NeumannBoundariesLocal,&idxs);CHKERRQ(ierr); 5405 ierr = ISLocalToGlobalMappingApply(coarse_islg,nout,tidxs,tidxs2);CHKERRQ(ierr); 5406 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),nout,tidxs2,PETSC_COPY_VALUES,&isarray[nisdofs]);CHKERRQ(ierr); 5407 /* ierr = ISView(isarray[nisdofs],0);CHKERRQ(ierr); */ 5408 } 5409 /* primal vertices (benign) */ 5410 if (pcbddc->benign_saddle_point && pcbddc->user_primal_vertices_local) { 5411 ierr = ISGetLocalSize(pcbddc->user_primal_vertices_local,&tsize);CHKERRQ(ierr); 5412 ierr = ISGetIndices(pcbddc->user_primal_vertices_local,&idxs);CHKERRQ(ierr); 5413 ierr = ISGlobalToLocalMappingApply(tmap,IS_GTOLM_DROP,tsize,idxs,&nout,tidxs);CHKERRQ(ierr); 5414 ierr = ISRestoreIndices(pcbddc->user_primal_vertices_local,&idxs);CHKERRQ(ierr); 5415 ierr = ISLocalToGlobalMappingApply(coarse_islg,nout,tidxs,tidxs2);CHKERRQ(ierr); 5416 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),nout,tidxs2,PETSC_COPY_VALUES,&isarray[nis-1]);CHKERRQ(ierr); 5417 } 5418 /* free memory */ 5419 ierr = PetscFree(tidxs);CHKERRQ(ierr); 5420 ierr = PetscFree(tidxs2);CHKERRQ(ierr); 5421 ierr = ISLocalToGlobalMappingDestroy(&tmap);CHKERRQ(ierr); 5422 } else { 5423 nis = 0; 5424 nisdofs = 0; 5425 nisneu = 0; 5426 nisvert = 0; 5427 isarray = NULL; 5428 } 5429 /* destroy no longer needed map */ 5430 ierr = ISLocalToGlobalMappingDestroy(&coarse_islg);CHKERRQ(ierr); 5431 5432 /* restrict on coarse candidates (if needed) */ 5433 coarse_mat_is = NULL; 5434 if (csin) { 5435 if (!pcbddc->coarse_subassembling_init ) { /* creates subassembling init pattern if not present */ 5436 if (redist) { 5437 PetscMPIInt rank; 5438 PetscInt spc,n_spc_p1,dest[1],destsize; 5439 5440 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)pc),&rank);CHKERRQ(ierr); 5441 spc = active_procs/ncoarse; 5442 n_spc_p1 = active_procs%ncoarse; 5443 if (im_active) { 5444 destsize = 1; 5445 if (rank > n_spc_p1*(spc+1)-1) { 5446 dest[0] = n_spc_p1+(rank-(n_spc_p1*(spc+1)))/spc; 5447 } else { 5448 dest[0] = rank/(spc+1); 5449 } 5450 } else { 5451 destsize = 0; 5452 } 5453 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),destsize,dest,PETSC_COPY_VALUES,&pcbddc->coarse_subassembling_init);CHKERRQ(ierr); 5454 } else if (csin_type_simple) { 5455 PetscMPIInt rank; 5456 PetscInt issize,isidx; 5457 5458 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)pc),&rank);CHKERRQ(ierr); 5459 if (im_active) { 5460 issize = 1; 5461 isidx = (PetscInt)rank; 5462 } else { 5463 issize = 0; 5464 isidx = -1; 5465 } 5466 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),issize,&isidx,PETSC_COPY_VALUES,&pcbddc->coarse_subassembling_init);CHKERRQ(ierr); 5467 } else { /* get a suitable subassembling pattern from MATIS code */ 5468 ierr = MatISGetSubassemblingPattern(t_coarse_mat_is,ncoarse,pcbddc->coarse_adj_red,&pcbddc->coarse_subassembling_init);CHKERRQ(ierr); 5469 } 5470 5471 /* we need to shift on coarse candidates either if we are not redistributing or we are redistributing and we have enough void processes */ 5472 if (!redist || ncoarse <= void_procs) { 5473 PetscInt ncoarse_cand,tissize,*nisindices; 5474 PetscInt *coarse_candidates; 5475 const PetscInt* tisindices; 5476 5477 /* get coarse candidates' ranks in pc communicator */ 5478 ierr = PetscMalloc1(all_procs,&coarse_candidates);CHKERRQ(ierr); 5479 ierr = MPI_Allgather(&im_active,1,MPIU_INT,coarse_candidates,1,MPIU_INT,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 5480 for (i=0,ncoarse_cand=0;i<all_procs;i++) { 5481 if (!coarse_candidates[i]) { 5482 coarse_candidates[ncoarse_cand++]=i; 5483 } 5484 } 5485 if (ncoarse_cand < ncoarse) SETERRQ2(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"This should not happen! %d < %d",ncoarse_cand,ncoarse); 5486 5487 5488 if (pcbddc->dbg_flag) { 5489 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 5490 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Subassembling pattern init (before shift)\n");CHKERRQ(ierr); 5491 ierr = ISView(pcbddc->coarse_subassembling_init,pcbddc->dbg_viewer);CHKERRQ(ierr); 5492 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Coarse candidates\n");CHKERRQ(ierr); 5493 for (i=0;i<ncoarse_cand;i++) { 5494 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"%d ",coarse_candidates[i]);CHKERRQ(ierr); 5495 } 5496 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"\n");CHKERRQ(ierr); 5497 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 5498 } 5499 /* shift the pattern on coarse candidates */ 5500 ierr = ISGetLocalSize(pcbddc->coarse_subassembling_init,&tissize);CHKERRQ(ierr); 5501 ierr = ISGetIndices(pcbddc->coarse_subassembling_init,&tisindices);CHKERRQ(ierr); 5502 ierr = PetscMalloc1(tissize,&nisindices);CHKERRQ(ierr); 5503 for (i=0;i<tissize;i++) nisindices[i] = coarse_candidates[tisindices[i]]; 5504 ierr = ISRestoreIndices(pcbddc->coarse_subassembling_init,&tisindices);CHKERRQ(ierr); 5505 ierr = ISGeneralSetIndices(pcbddc->coarse_subassembling_init,tissize,nisindices,PETSC_OWN_POINTER);CHKERRQ(ierr); 5506 ierr = PetscFree(coarse_candidates);CHKERRQ(ierr); 5507 } 5508 if (pcbddc->dbg_flag) { 5509 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 5510 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Subassembling pattern init\n");CHKERRQ(ierr); 5511 ierr = ISView(pcbddc->coarse_subassembling_init,pcbddc->dbg_viewer);CHKERRQ(ierr); 5512 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 5513 } 5514 } 5515 /* get temporary coarse mat in IS format restricted on coarse procs (plus additional index sets of isarray) */ 5516 if (multilevel_allowed) { /* we need to keep tracking of void processes for future placements */ 5517 ierr = MatISSubassemble(t_coarse_mat_is,pcbddc->coarse_subassembling_init,0,PETSC_TRUE,PETSC_FALSE,MAT_INITIAL_MATRIX,&coarse_mat_is,nis,isarray);CHKERRQ(ierr); 5518 } else { /* this is the last level, so use just receiving processes in subcomm */ 5519 ierr = MatISSubassemble(t_coarse_mat_is,pcbddc->coarse_subassembling_init,0,PETSC_TRUE,PETSC_TRUE,MAT_INITIAL_MATRIX,&coarse_mat_is,nis,isarray);CHKERRQ(ierr); 5520 } 5521 } else { 5522 if (pcbddc->dbg_flag) { 5523 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 5524 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Subassembling pattern init not needed\n");CHKERRQ(ierr); 5525 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 5526 } 5527 ierr = PetscObjectReference((PetscObject)t_coarse_mat_is);CHKERRQ(ierr); 5528 coarse_mat_is = t_coarse_mat_is; 5529 } 5530 5531 /* create local to global scatters for coarse problem */ 5532 if (compute_vecs) { 5533 PetscInt lrows; 5534 ierr = VecDestroy(&pcbddc->coarse_vec);CHKERRQ(ierr); 5535 if (coarse_mat_is) { 5536 ierr = MatGetLocalSize(coarse_mat_is,&lrows,NULL);CHKERRQ(ierr); 5537 } else { 5538 lrows = 0; 5539 } 5540 ierr = VecCreate(PetscObjectComm((PetscObject)pc),&pcbddc->coarse_vec);CHKERRQ(ierr); 5541 ierr = VecSetSizes(pcbddc->coarse_vec,lrows,PETSC_DECIDE);CHKERRQ(ierr); 5542 ierr = VecSetType(pcbddc->coarse_vec,VECSTANDARD);CHKERRQ(ierr); 5543 ierr = VecScatterDestroy(&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr); 5544 ierr = VecScatterCreate(pcbddc->vec1_P,NULL,pcbddc->coarse_vec,coarse_is,&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr); 5545 } 5546 ierr = ISDestroy(&coarse_is);CHKERRQ(ierr); 5547 ierr = MatDestroy(&t_coarse_mat_is);CHKERRQ(ierr); 5548 5549 /* set defaults for coarse KSP and PC */ 5550 if (multilevel_allowed) { 5551 coarse_ksp_type = KSPRICHARDSON; 5552 coarse_pc_type = PCBDDC; 5553 } else { 5554 coarse_ksp_type = KSPPREONLY; 5555 coarse_pc_type = PCREDUNDANT; 5556 } 5557 5558 /* print some info if requested */ 5559 if (pcbddc->dbg_flag) { 5560 if (!multilevel_allowed) { 5561 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 5562 if (multilevel_requested) { 5563 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Not enough active processes on level %d (active processes %d, coarsening ratio %d)\n",pcbddc->current_level,active_procs,pcbddc->coarsening_ratio);CHKERRQ(ierr); 5564 } else if (pcbddc->max_levels) { 5565 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Maximum number of requested levels reached (%d)\n",pcbddc->max_levels);CHKERRQ(ierr); 5566 } 5567 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 5568 } 5569 } 5570 5571 /* create the coarse KSP object only once with defaults */ 5572 if (coarse_mat_is) { 5573 MatReuse coarse_mat_reuse; 5574 PetscViewer dbg_viewer = NULL; 5575 if (pcbddc->dbg_flag) { 5576 dbg_viewer = PETSC_VIEWER_STDOUT_(PetscObjectComm((PetscObject)coarse_mat_is)); 5577 ierr = PetscViewerASCIIAddTab(dbg_viewer,2*pcbddc->current_level);CHKERRQ(ierr); 5578 } 5579 if (!pcbddc->coarse_ksp) { 5580 char prefix[256],str_level[16]; 5581 size_t len; 5582 ierr = KSPCreate(PetscObjectComm((PetscObject)coarse_mat_is),&pcbddc->coarse_ksp);CHKERRQ(ierr); 5583 ierr = KSPSetErrorIfNotConverged(pcbddc->coarse_ksp,pc->erroriffailure);CHKERRQ(ierr); 5584 ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->coarse_ksp,(PetscObject)pc,1);CHKERRQ(ierr); 5585 ierr = KSPSetTolerances(pcbddc->coarse_ksp,PETSC_DEFAULT,PETSC_DEFAULT,PETSC_DEFAULT,1);CHKERRQ(ierr); 5586 ierr = KSPSetOperators(pcbddc->coarse_ksp,coarse_mat_is,coarse_mat_is);CHKERRQ(ierr); 5587 ierr = KSPSetType(pcbddc->coarse_ksp,coarse_ksp_type);CHKERRQ(ierr); 5588 ierr = KSPSetNormType(pcbddc->coarse_ksp,KSP_NORM_NONE);CHKERRQ(ierr); 5589 ierr = KSPGetPC(pcbddc->coarse_ksp,&pc_temp);CHKERRQ(ierr); 5590 ierr = PCSetType(pc_temp,coarse_pc_type);CHKERRQ(ierr); 5591 /* prefix */ 5592 ierr = PetscStrcpy(prefix,"");CHKERRQ(ierr); 5593 ierr = PetscStrcpy(str_level,"");CHKERRQ(ierr); 5594 if (!pcbddc->current_level) { 5595 ierr = PetscStrcpy(prefix,((PetscObject)pc)->prefix);CHKERRQ(ierr); 5596 ierr = PetscStrcat(prefix,"pc_bddc_coarse_");CHKERRQ(ierr); 5597 } else { 5598 ierr = PetscStrlen(((PetscObject)pc)->prefix,&len);CHKERRQ(ierr); 5599 if (pcbddc->current_level>1) len -= 3; /* remove "lX_" with X level number */ 5600 if (pcbddc->current_level>10) len -= 1; /* remove another char from level number */ 5601 ierr = PetscStrncpy(prefix,((PetscObject)pc)->prefix,len+1);CHKERRQ(ierr); 5602 sprintf(str_level,"l%d_",(int)(pcbddc->current_level)); 5603 ierr = PetscStrcat(prefix,str_level);CHKERRQ(ierr); 5604 } 5605 ierr = KSPSetOptionsPrefix(pcbddc->coarse_ksp,prefix);CHKERRQ(ierr); 5606 /* propagate BDDC info to the next level (these are dummy calls if pc_temp is not of type PCBDDC) */ 5607 ierr = PCBDDCSetLevel(pc_temp,pcbddc->current_level+1);CHKERRQ(ierr); 5608 ierr = PCBDDCSetCoarseningRatio(pc_temp,pcbddc->coarsening_ratio);CHKERRQ(ierr); 5609 ierr = PCBDDCSetLevels(pc_temp,pcbddc->max_levels);CHKERRQ(ierr); 5610 /* allow user customization */ 5611 ierr = KSPSetFromOptions(pcbddc->coarse_ksp);CHKERRQ(ierr); 5612 } 5613 /* propagate BDDC info to the next level (these are dummy calls if pc_temp is not of type PCBDDC) */ 5614 ierr = KSPGetPC(pcbddc->coarse_ksp,&pc_temp);CHKERRQ(ierr); 5615 if (nisdofs) { 5616 ierr = PCBDDCSetDofsSplitting(pc_temp,nisdofs,isarray);CHKERRQ(ierr); 5617 for (i=0;i<nisdofs;i++) { 5618 ierr = ISDestroy(&isarray[i]);CHKERRQ(ierr); 5619 } 5620 } 5621 if (nisneu) { 5622 ierr = PCBDDCSetNeumannBoundaries(pc_temp,isarray[nisdofs]);CHKERRQ(ierr); 5623 ierr = ISDestroy(&isarray[nisdofs]);CHKERRQ(ierr); 5624 } 5625 if (nisvert) { 5626 ierr = PCBDDCSetPrimalVerticesIS(pc_temp,isarray[nis-1]);CHKERRQ(ierr); 5627 ierr = ISDestroy(&isarray[nis-1]);CHKERRQ(ierr); 5628 } 5629 5630 /* get some info after set from options */ 5631 ierr = PetscObjectTypeCompare((PetscObject)pc_temp,PCNN,&isnn);CHKERRQ(ierr); 5632 ierr = PetscObjectTypeCompare((PetscObject)pc_temp,PCBDDC,&isbddc);CHKERRQ(ierr); 5633 ierr = PetscObjectTypeCompare((PetscObject)pc_temp,PCREDUNDANT,&isredundant);CHKERRQ(ierr); 5634 if (isbddc && !multilevel_allowed) { /* multilevel can only be requested via pc_bddc_set_levels */ 5635 ierr = PCSetType(pc_temp,coarse_pc_type);CHKERRQ(ierr); 5636 isbddc = PETSC_FALSE; 5637 } 5638 ierr = PCFactorSetReuseFill(pc_temp,PETSC_TRUE);CHKERRQ(ierr); 5639 if (isredundant) { 5640 KSP inner_ksp; 5641 PC inner_pc; 5642 ierr = PCRedundantGetKSP(pc_temp,&inner_ksp);CHKERRQ(ierr); 5643 ierr = KSPGetPC(inner_ksp,&inner_pc);CHKERRQ(ierr); 5644 ierr = PCFactorSetReuseFill(inner_pc,PETSC_TRUE);CHKERRQ(ierr); 5645 } 5646 5647 /* assemble coarse matrix */ 5648 if (coarse_reuse) { 5649 ierr = KSPGetOperators(pcbddc->coarse_ksp,&coarse_mat,NULL);CHKERRQ(ierr); 5650 ierr = PetscObjectReference((PetscObject)coarse_mat);CHKERRQ(ierr); 5651 coarse_mat_reuse = MAT_REUSE_MATRIX; 5652 } else { 5653 coarse_mat_reuse = MAT_INITIAL_MATRIX; 5654 } 5655 if (isbddc || isnn) { 5656 if (pcbddc->coarsening_ratio > 1) { 5657 if (!pcbddc->coarse_subassembling) { /* subassembling info is not present */ 5658 ierr = MatISGetSubassemblingPattern(coarse_mat_is,active_procs/pcbddc->coarsening_ratio,pcbddc->coarse_adj_red,&pcbddc->coarse_subassembling);CHKERRQ(ierr); 5659 if (pcbddc->dbg_flag) { 5660 ierr = PetscViewerASCIIPrintf(dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 5661 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Subassembling pattern\n");CHKERRQ(ierr); 5662 ierr = ISView(pcbddc->coarse_subassembling,dbg_viewer);CHKERRQ(ierr); 5663 ierr = PetscViewerFlush(dbg_viewer);CHKERRQ(ierr); 5664 } 5665 } 5666 ierr = MatISSubassemble(coarse_mat_is,pcbddc->coarse_subassembling,0,PETSC_FALSE,PETSC_FALSE,coarse_mat_reuse,&coarse_mat,0,NULL);CHKERRQ(ierr); 5667 } else { 5668 ierr = PetscObjectReference((PetscObject)coarse_mat_is);CHKERRQ(ierr); 5669 coarse_mat = coarse_mat_is; 5670 } 5671 } else { 5672 ierr = MatISGetMPIXAIJ(coarse_mat_is,coarse_mat_reuse,&coarse_mat);CHKERRQ(ierr); 5673 } 5674 ierr = MatDestroy(&coarse_mat_is);CHKERRQ(ierr); 5675 5676 /* propagate symmetry info of coarse matrix */ 5677 ierr = MatSetOption(coarse_mat,MAT_STRUCTURALLY_SYMMETRIC,PETSC_TRUE);CHKERRQ(ierr); 5678 if (pc->pmat->symmetric_set) { 5679 ierr = MatSetOption(coarse_mat,MAT_SYMMETRIC,pc->pmat->symmetric);CHKERRQ(ierr); 5680 } 5681 if (pc->pmat->hermitian_set) { 5682 ierr = MatSetOption(coarse_mat,MAT_HERMITIAN,pc->pmat->hermitian);CHKERRQ(ierr); 5683 } 5684 if (pc->pmat->spd_set) { 5685 ierr = MatSetOption(coarse_mat,MAT_SPD,pc->pmat->spd);CHKERRQ(ierr); 5686 } 5687 /* set operators */ 5688 ierr = KSPSetOperators(pcbddc->coarse_ksp,coarse_mat,coarse_mat);CHKERRQ(ierr); 5689 if (pcbddc->dbg_flag) { 5690 ierr = PetscViewerASCIISubtractTab(dbg_viewer,2*pcbddc->current_level);CHKERRQ(ierr); 5691 } 5692 } else { /* processes non partecipating to coarse solver (if any) */ 5693 coarse_mat = 0; 5694 } 5695 ierr = PetscFree(isarray);CHKERRQ(ierr); 5696 #if 0 5697 { 5698 PetscViewer viewer; 5699 char filename[256]; 5700 sprintf(filename,"coarse_mat_level%d.m",pcbddc->current_level); 5701 ierr = PetscViewerASCIIOpen(PetscObjectComm((PetscObject)coarse_mat),filename,&viewer);CHKERRQ(ierr); 5702 ierr = PetscViewerSetFormat(viewer,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr); 5703 ierr = MatView(coarse_mat,viewer);CHKERRQ(ierr); 5704 ierr = PetscViewerDestroy(&viewer);CHKERRQ(ierr); 5705 } 5706 #endif 5707 5708 /* Compute coarse null space (special handling by BDDC only) */ 5709 #if 0 5710 if (pcbddc->NullSpace) { 5711 ierr = PCBDDCNullSpaceAssembleCoarse(pc,coarse_mat,&CoarseNullSpace);CHKERRQ(ierr); 5712 } 5713 #endif 5714 /* hack */ 5715 if (pcbddc->coarse_ksp) { 5716 Vec crhs,csol; 5717 5718 ierr = KSPGetSolution(pcbddc->coarse_ksp,&csol);CHKERRQ(ierr); 5719 ierr = KSPGetRhs(pcbddc->coarse_ksp,&crhs);CHKERRQ(ierr); 5720 if (!csol) { 5721 ierr = MatCreateVecs(coarse_mat,&((pcbddc->coarse_ksp)->vec_sol),NULL);CHKERRQ(ierr); 5722 } 5723 if (!crhs) { 5724 ierr = MatCreateVecs(coarse_mat,NULL,&((pcbddc->coarse_ksp)->vec_rhs));CHKERRQ(ierr); 5725 } 5726 } 5727 5728 /* compute null space for coarse solver if the benign trick has been requested */ 5729 if (pcbddc->benign_null) { 5730 5731 ierr = VecSet(pcbddc->vec1_P,0.);CHKERRQ(ierr); 5732 for (i=0;i<pcbddc->benign_n;i++) { 5733 ierr = VecSetValue(pcbddc->vec1_P,pcbddc->local_primal_size-pcbddc->benign_n+i,1.0,INSERT_VALUES);CHKERRQ(ierr); 5734 } 5735 ierr = VecAssemblyBegin(pcbddc->vec1_P);CHKERRQ(ierr); 5736 ierr = VecAssemblyEnd(pcbddc->vec1_P);CHKERRQ(ierr); 5737 ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,pcbddc->vec1_P,pcbddc->coarse_vec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 5738 ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,pcbddc->vec1_P,pcbddc->coarse_vec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 5739 if (coarse_mat) { 5740 Vec nullv; 5741 PetscScalar *array,*array2; 5742 PetscInt nl; 5743 5744 ierr = MatCreateVecs(coarse_mat,&nullv,NULL);CHKERRQ(ierr); 5745 ierr = VecGetLocalSize(nullv,&nl);CHKERRQ(ierr); 5746 ierr = VecGetArrayRead(pcbddc->coarse_vec,(const PetscScalar**)&array);CHKERRQ(ierr); 5747 ierr = VecGetArray(nullv,&array2);CHKERRQ(ierr); 5748 ierr = PetscMemcpy(array2,array,nl*sizeof(*array));CHKERRQ(ierr); 5749 ierr = VecRestoreArray(nullv,&array2);CHKERRQ(ierr); 5750 ierr = VecRestoreArrayRead(pcbddc->coarse_vec,(const PetscScalar**)&array);CHKERRQ(ierr); 5751 ierr = VecNormalize(nullv,NULL);CHKERRQ(ierr); 5752 ierr = MatNullSpaceCreate(PetscObjectComm((PetscObject)coarse_mat),PETSC_FALSE,1,&nullv,&CoarseNullSpace);CHKERRQ(ierr); 5753 ierr = VecDestroy(&nullv);CHKERRQ(ierr); 5754 } 5755 } 5756 5757 if (pcbddc->coarse_ksp) { 5758 PetscBool ispreonly; 5759 5760 if (CoarseNullSpace) { 5761 PetscBool isnull; 5762 ierr = MatNullSpaceTest(CoarseNullSpace,coarse_mat,&isnull);CHKERRQ(ierr); 5763 if (isnull) { 5764 if (isbddc && !pcbddc->benign_saddle_point) { 5765 ierr = PCBDDCSetNullSpace(pc_temp,CoarseNullSpace);CHKERRQ(ierr); 5766 } else { 5767 ierr = MatSetNullSpace(coarse_mat,CoarseNullSpace);CHKERRQ(ierr); 5768 } 5769 } else { 5770 ierr = MatNullSpaceDestroy(&CoarseNullSpace);CHKERRQ(ierr); 5771 } 5772 } 5773 /* setup coarse ksp */ 5774 ierr = KSPSetUp(pcbddc->coarse_ksp);CHKERRQ(ierr); 5775 /* Check coarse problem if in debug mode or if solving with an iterative method */ 5776 ierr = PetscObjectTypeCompare((PetscObject)pcbddc->coarse_ksp,KSPPREONLY,&ispreonly);CHKERRQ(ierr); 5777 if (pcbddc->dbg_flag || (!ispreonly && pcbddc->use_coarse_estimates) ) { 5778 KSP check_ksp; 5779 KSPType check_ksp_type; 5780 PC check_pc; 5781 Vec check_vec,coarse_vec; 5782 PetscReal abs_infty_error,infty_error,lambda_min=1.0,lambda_max=1.0; 5783 PetscInt its; 5784 PetscBool compute_eigs; 5785 PetscReal *eigs_r,*eigs_c; 5786 PetscInt neigs; 5787 const char *prefix; 5788 5789 /* Create ksp object suitable for estimation of extreme eigenvalues */ 5790 ierr = KSPCreate(PetscObjectComm((PetscObject)pcbddc->coarse_ksp),&check_ksp);CHKERRQ(ierr); 5791 ierr = KSPSetErrorIfNotConverged(pcbddc->coarse_ksp,pc->erroriffailure);CHKERRQ(ierr); 5792 ierr = KSPSetOperators(check_ksp,coarse_mat,coarse_mat);CHKERRQ(ierr); 5793 ierr = KSPSetTolerances(check_ksp,1.e-12,1.e-12,PETSC_DEFAULT,pcbddc->coarse_size);CHKERRQ(ierr); 5794 if (ispreonly) { 5795 check_ksp_type = KSPPREONLY; 5796 compute_eigs = PETSC_FALSE; 5797 } else { 5798 check_ksp_type = KSPGMRES; 5799 compute_eigs = PETSC_TRUE; 5800 } 5801 ierr = KSPSetType(check_ksp,check_ksp_type);CHKERRQ(ierr); 5802 ierr = KSPSetComputeSingularValues(check_ksp,compute_eigs);CHKERRQ(ierr); 5803 ierr = KSPSetComputeEigenvalues(check_ksp,compute_eigs);CHKERRQ(ierr); 5804 ierr = KSPGMRESSetRestart(check_ksp,pcbddc->coarse_size+1);CHKERRQ(ierr); 5805 ierr = KSPGetOptionsPrefix(pcbddc->coarse_ksp,&prefix);CHKERRQ(ierr); 5806 ierr = KSPSetOptionsPrefix(check_ksp,prefix);CHKERRQ(ierr); 5807 ierr = KSPAppendOptionsPrefix(check_ksp,"check_");CHKERRQ(ierr); 5808 ierr = KSPSetFromOptions(check_ksp);CHKERRQ(ierr); 5809 ierr = KSPSetUp(check_ksp);CHKERRQ(ierr); 5810 ierr = KSPGetPC(pcbddc->coarse_ksp,&check_pc);CHKERRQ(ierr); 5811 ierr = KSPSetPC(check_ksp,check_pc);CHKERRQ(ierr); 5812 /* create random vec */ 5813 ierr = KSPGetSolution(pcbddc->coarse_ksp,&coarse_vec);CHKERRQ(ierr); 5814 ierr = VecDuplicate(coarse_vec,&check_vec);CHKERRQ(ierr); 5815 ierr = VecSetRandom(check_vec,NULL);CHKERRQ(ierr); 5816 if (CoarseNullSpace) { 5817 ierr = MatNullSpaceRemove(CoarseNullSpace,check_vec);CHKERRQ(ierr); 5818 } 5819 ierr = MatMult(coarse_mat,check_vec,coarse_vec);CHKERRQ(ierr); 5820 /* solve coarse problem */ 5821 ierr = KSPSolve(check_ksp,coarse_vec,coarse_vec);CHKERRQ(ierr); 5822 if (CoarseNullSpace) { 5823 ierr = MatNullSpaceRemove(CoarseNullSpace,coarse_vec);CHKERRQ(ierr); 5824 } 5825 /* set eigenvalue estimation if preonly has not been requested */ 5826 if (compute_eigs) { 5827 ierr = PetscMalloc1(pcbddc->coarse_size+1,&eigs_r);CHKERRQ(ierr); 5828 ierr = PetscMalloc1(pcbddc->coarse_size+1,&eigs_c);CHKERRQ(ierr); 5829 ierr = KSPComputeEigenvalues(check_ksp,pcbddc->coarse_size+1,eigs_r,eigs_c,&neigs);CHKERRQ(ierr); 5830 lambda_max = eigs_r[neigs-1]; 5831 lambda_min = eigs_r[0]; 5832 if (pcbddc->use_coarse_estimates) { 5833 if (lambda_max>lambda_min) { 5834 ierr = KSPChebyshevSetEigenvalues(pcbddc->coarse_ksp,lambda_max,lambda_min);CHKERRQ(ierr); 5835 ierr = KSPRichardsonSetScale(pcbddc->coarse_ksp,2.0/(lambda_max+lambda_min));CHKERRQ(ierr); 5836 } 5837 } 5838 } 5839 5840 /* check coarse problem residual error */ 5841 if (pcbddc->dbg_flag) { 5842 PetscViewer dbg_viewer = PETSC_VIEWER_STDOUT_(PetscObjectComm((PetscObject)pcbddc->coarse_ksp)); 5843 ierr = PetscViewerASCIIAddTab(dbg_viewer,2*(pcbddc->current_level+1));CHKERRQ(ierr); 5844 ierr = VecAXPY(check_vec,-1.0,coarse_vec);CHKERRQ(ierr); 5845 ierr = VecNorm(check_vec,NORM_INFINITY,&infty_error);CHKERRQ(ierr); 5846 ierr = MatMult(coarse_mat,check_vec,coarse_vec);CHKERRQ(ierr); 5847 ierr = VecNorm(coarse_vec,NORM_INFINITY,&abs_infty_error);CHKERRQ(ierr); 5848 ierr = VecDestroy(&check_vec);CHKERRQ(ierr); 5849 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem details (use estimates %d)\n",pcbddc->use_coarse_estimates);CHKERRQ(ierr); 5850 ierr = PetscObjectPrintClassNamePrefixType((PetscObject)(pcbddc->coarse_ksp),dbg_viewer);CHKERRQ(ierr); 5851 ierr = PetscObjectPrintClassNamePrefixType((PetscObject)(check_pc),dbg_viewer);CHKERRQ(ierr); 5852 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem exact infty_error : %1.6e\n",infty_error);CHKERRQ(ierr); 5853 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem residual infty_error: %1.6e\n",abs_infty_error);CHKERRQ(ierr); 5854 if (CoarseNullSpace) { 5855 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem is singular\n");CHKERRQ(ierr); 5856 } 5857 if (compute_eigs) { 5858 PetscReal lambda_max_s,lambda_min_s; 5859 ierr = KSPGetType(check_ksp,&check_ksp_type);CHKERRQ(ierr); 5860 ierr = KSPGetIterationNumber(check_ksp,&its);CHKERRQ(ierr); 5861 ierr = KSPComputeExtremeSingularValues(check_ksp,&lambda_max_s,&lambda_min_s);CHKERRQ(ierr); 5862 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem eigenvalues (estimated with %d iterations of %s): %1.6e %1.6e (%1.6e %1.6e)\n",its,check_ksp_type,lambda_min,lambda_max,lambda_min_s,lambda_max_s);CHKERRQ(ierr); 5863 for (i=0;i<neigs;i++) { 5864 ierr = PetscViewerASCIIPrintf(dbg_viewer,"%1.6e %1.6ei\n",eigs_r[i],eigs_c[i]);CHKERRQ(ierr); 5865 } 5866 } 5867 ierr = PetscViewerFlush(dbg_viewer);CHKERRQ(ierr); 5868 ierr = PetscViewerASCIISubtractTab(dbg_viewer,2*(pcbddc->current_level+1));CHKERRQ(ierr); 5869 } 5870 ierr = KSPDestroy(&check_ksp);CHKERRQ(ierr); 5871 if (compute_eigs) { 5872 ierr = PetscFree(eigs_r);CHKERRQ(ierr); 5873 ierr = PetscFree(eigs_c);CHKERRQ(ierr); 5874 } 5875 } 5876 } 5877 /* print additional info */ 5878 if (pcbddc->dbg_flag) { 5879 /* waits until all processes reaches this point */ 5880 ierr = PetscBarrier((PetscObject)pc);CHKERRQ(ierr); 5881 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Coarse solver setup completed at level %d\n",pcbddc->current_level);CHKERRQ(ierr); 5882 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 5883 } 5884 5885 /* free memory */ 5886 ierr = MatNullSpaceDestroy(&CoarseNullSpace);CHKERRQ(ierr); 5887 ierr = MatDestroy(&coarse_mat);CHKERRQ(ierr); 5888 PetscFunctionReturn(0); 5889 } 5890 5891 #undef __FUNCT__ 5892 #define __FUNCT__ "PCBDDCComputePrimalNumbering" 5893 PetscErrorCode PCBDDCComputePrimalNumbering(PC pc,PetscInt* coarse_size_n,PetscInt** local_primal_indices_n) 5894 { 5895 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 5896 PC_IS* pcis = (PC_IS*)pc->data; 5897 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 5898 IS subset,subset_mult,subset_n; 5899 PetscInt local_size,coarse_size=0; 5900 PetscInt *local_primal_indices=NULL; 5901 const PetscInt *t_local_primal_indices; 5902 PetscErrorCode ierr; 5903 5904 PetscFunctionBegin; 5905 /* Compute global number of coarse dofs */ 5906 if (pcbddc->local_primal_size && !pcbddc->local_primal_ref_node) { 5907 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"BDDC ConstraintsSetUp should be called first"); 5908 } 5909 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)(pc->pmat)),pcbddc->local_primal_size_cc,pcbddc->local_primal_ref_node,PETSC_COPY_VALUES,&subset_n);CHKERRQ(ierr); 5910 ierr = ISLocalToGlobalMappingApplyIS(pcis->mapping,subset_n,&subset);CHKERRQ(ierr); 5911 ierr = ISDestroy(&subset_n);CHKERRQ(ierr); 5912 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)(pc->pmat)),pcbddc->local_primal_size_cc,pcbddc->local_primal_ref_mult,PETSC_COPY_VALUES,&subset_mult);CHKERRQ(ierr); 5913 ierr = PCBDDCSubsetNumbering(subset,subset_mult,&coarse_size,&subset_n);CHKERRQ(ierr); 5914 ierr = ISDestroy(&subset);CHKERRQ(ierr); 5915 ierr = ISDestroy(&subset_mult);CHKERRQ(ierr); 5916 ierr = ISGetLocalSize(subset_n,&local_size);CHKERRQ(ierr); 5917 if (local_size != pcbddc->local_primal_size) { 5918 SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Invalid number of local primal indices computed %d != %d",local_size,pcbddc->local_primal_size); 5919 } 5920 ierr = PetscMalloc1(local_size,&local_primal_indices);CHKERRQ(ierr); 5921 ierr = ISGetIndices(subset_n,&t_local_primal_indices);CHKERRQ(ierr); 5922 ierr = PetscMemcpy(local_primal_indices,t_local_primal_indices,local_size*sizeof(PetscInt));CHKERRQ(ierr); 5923 ierr = ISRestoreIndices(subset_n,&t_local_primal_indices);CHKERRQ(ierr); 5924 ierr = ISDestroy(&subset_n);CHKERRQ(ierr); 5925 5926 /* check numbering */ 5927 if (pcbddc->dbg_flag) { 5928 PetscScalar coarsesum,*array,*array2; 5929 PetscInt i; 5930 PetscBool set_error = PETSC_FALSE,set_error_reduced = PETSC_FALSE; 5931 5932 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 5933 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 5934 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Check coarse indices\n");CHKERRQ(ierr); 5935 ierr = PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 5936 /* counter */ 5937 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 5938 ierr = VecSet(pcis->vec1_N,1.0);CHKERRQ(ierr); 5939 ierr = VecScatterBegin(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 5940 ierr = VecScatterEnd(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 5941 ierr = VecScatterBegin(matis->rctx,pcis->vec1_global,pcis->vec2_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 5942 ierr = VecScatterEnd(matis->rctx,pcis->vec1_global,pcis->vec2_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 5943 ierr = VecSet(pcis->vec1_N,0.0);CHKERRQ(ierr); 5944 for (i=0;i<pcbddc->local_primal_size;i++) { 5945 ierr = VecSetValue(pcis->vec1_N,pcbddc->primal_indices_local_idxs[i],1.0,INSERT_VALUES);CHKERRQ(ierr); 5946 } 5947 ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr); 5948 ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr); 5949 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 5950 ierr = VecScatterBegin(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 5951 ierr = VecScatterEnd(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 5952 ierr = VecScatterBegin(matis->rctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 5953 ierr = VecScatterEnd(matis->rctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 5954 ierr = VecGetArray(pcis->vec1_N,&array);CHKERRQ(ierr); 5955 ierr = VecGetArray(pcis->vec2_N,&array2);CHKERRQ(ierr); 5956 for (i=0;i<pcis->n;i++) { 5957 if (array[i] != 0.0 && array[i] != array2[i]) { 5958 PetscInt owned = (PetscInt)PetscRealPart(array[i]); 5959 PetscInt neigh = (PetscInt)PetscRealPart(array2[i]); 5960 set_error = PETSC_TRUE; 5961 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d: local index %d owned by a %d processes instead of %d!\n",PetscGlobalRank,i,owned,neigh);CHKERRQ(ierr); 5962 } 5963 } 5964 ierr = VecRestoreArray(pcis->vec2_N,&array2);CHKERRQ(ierr); 5965 ierr = MPI_Allreduce(&set_error,&set_error_reduced,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 5966 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 5967 for (i=0;i<pcis->n;i++) { 5968 if (PetscRealPart(array[i]) > 0.0) array[i] = 1.0/PetscRealPart(array[i]); 5969 } 5970 ierr = VecRestoreArray(pcis->vec1_N,&array);CHKERRQ(ierr); 5971 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 5972 ierr = VecScatterBegin(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 5973 ierr = VecScatterEnd(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 5974 ierr = VecSum(pcis->vec1_global,&coarsesum);CHKERRQ(ierr); 5975 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Size of coarse problem is %d (%lf)\n",coarse_size,PetscRealPart(coarsesum));CHKERRQ(ierr); 5976 if (pcbddc->dbg_flag > 1 || set_error_reduced) { 5977 PetscInt *gidxs; 5978 5979 ierr = PetscMalloc1(pcbddc->local_primal_size,&gidxs);CHKERRQ(ierr); 5980 ierr = ISLocalToGlobalMappingApply(pcis->mapping,pcbddc->local_primal_size,pcbddc->primal_indices_local_idxs,gidxs);CHKERRQ(ierr); 5981 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Distribution of local primal indices\n");CHKERRQ(ierr); 5982 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 5983 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d\n",PetscGlobalRank);CHKERRQ(ierr); 5984 for (i=0;i<pcbddc->local_primal_size;i++) { 5985 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local_primal_indices[%d]=%d (%d,%d)\n",i,local_primal_indices[i],pcbddc->primal_indices_local_idxs[i],gidxs[i]);CHKERRQ(ierr); 5986 } 5987 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 5988 ierr = PetscFree(gidxs);CHKERRQ(ierr); 5989 } 5990 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 5991 ierr = PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 5992 if (set_error_reduced) SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"BDDC Numbering of coarse dofs failed"); 5993 } 5994 /* ierr = PetscPrintf(PetscObjectComm((PetscObject)pc),"Size of coarse problem is %d\n",coarse_size);CHKERRQ(ierr); */ 5995 /* get back data */ 5996 *coarse_size_n = coarse_size; 5997 *local_primal_indices_n = local_primal_indices; 5998 PetscFunctionReturn(0); 5999 } 6000 6001 #undef __FUNCT__ 6002 #define __FUNCT__ "PCBDDCGlobalToLocal" 6003 PetscErrorCode PCBDDCGlobalToLocal(VecScatter g2l_ctx,Vec gwork, Vec lwork, IS globalis, IS* localis) 6004 { 6005 IS localis_t; 6006 PetscInt i,lsize,*idxs,n; 6007 PetscScalar *vals; 6008 PetscErrorCode ierr; 6009 6010 PetscFunctionBegin; 6011 /* get indices in local ordering exploiting local to global map */ 6012 ierr = ISGetLocalSize(globalis,&lsize);CHKERRQ(ierr); 6013 ierr = PetscMalloc1(lsize,&vals);CHKERRQ(ierr); 6014 for (i=0;i<lsize;i++) vals[i] = 1.0; 6015 ierr = ISGetIndices(globalis,(const PetscInt**)&idxs);CHKERRQ(ierr); 6016 ierr = VecSet(gwork,0.0);CHKERRQ(ierr); 6017 ierr = VecSet(lwork,0.0);CHKERRQ(ierr); 6018 if (idxs) { /* multilevel guard */ 6019 ierr = VecSetValues(gwork,lsize,idxs,vals,INSERT_VALUES);CHKERRQ(ierr); 6020 } 6021 ierr = VecAssemblyBegin(gwork);CHKERRQ(ierr); 6022 ierr = ISRestoreIndices(globalis,(const PetscInt**)&idxs);CHKERRQ(ierr); 6023 ierr = PetscFree(vals);CHKERRQ(ierr); 6024 ierr = VecAssemblyEnd(gwork);CHKERRQ(ierr); 6025 /* now compute set in local ordering */ 6026 ierr = VecScatterBegin(g2l_ctx,gwork,lwork,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 6027 ierr = VecScatterEnd(g2l_ctx,gwork,lwork,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 6028 ierr = VecGetArrayRead(lwork,(const PetscScalar**)&vals);CHKERRQ(ierr); 6029 ierr = VecGetSize(lwork,&n);CHKERRQ(ierr); 6030 for (i=0,lsize=0;i<n;i++) { 6031 if (PetscRealPart(vals[i]) > 0.5) { 6032 lsize++; 6033 } 6034 } 6035 ierr = PetscMalloc1(lsize,&idxs);CHKERRQ(ierr); 6036 for (i=0,lsize=0;i<n;i++) { 6037 if (PetscRealPart(vals[i]) > 0.5) { 6038 idxs[lsize++] = i; 6039 } 6040 } 6041 ierr = VecRestoreArrayRead(lwork,(const PetscScalar**)&vals);CHKERRQ(ierr); 6042 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)gwork),lsize,idxs,PETSC_OWN_POINTER,&localis_t);CHKERRQ(ierr); 6043 *localis = localis_t; 6044 PetscFunctionReturn(0); 6045 } 6046 6047 /* the next two functions will be called in KSPMatMult if a change of basis has been requested */ 6048 #undef __FUNCT__ 6049 #define __FUNCT__ "PCBDDCMatMult_Private" 6050 static PetscErrorCode PCBDDCMatMult_Private(Mat A, Vec x, Vec y) 6051 { 6052 PCBDDCChange_ctx change_ctx; 6053 PetscErrorCode ierr; 6054 6055 PetscFunctionBegin; 6056 ierr = MatShellGetContext(A,&change_ctx);CHKERRQ(ierr); 6057 ierr = MatMult(change_ctx->global_change,x,change_ctx->work[0]);CHKERRQ(ierr); 6058 ierr = MatMult(change_ctx->original_mat,change_ctx->work[0],change_ctx->work[1]);CHKERRQ(ierr); 6059 ierr = MatMultTranspose(change_ctx->global_change,change_ctx->work[1],y);CHKERRQ(ierr); 6060 PetscFunctionReturn(0); 6061 } 6062 6063 #undef __FUNCT__ 6064 #define __FUNCT__ "PCBDDCMatMultTranspose_Private" 6065 static PetscErrorCode PCBDDCMatMultTranspose_Private(Mat A, Vec x, Vec y) 6066 { 6067 PCBDDCChange_ctx change_ctx; 6068 PetscErrorCode ierr; 6069 6070 PetscFunctionBegin; 6071 ierr = MatShellGetContext(A,&change_ctx);CHKERRQ(ierr); 6072 ierr = MatMult(change_ctx->global_change,x,change_ctx->work[0]);CHKERRQ(ierr); 6073 ierr = MatMultTranspose(change_ctx->original_mat,change_ctx->work[0],change_ctx->work[1]);CHKERRQ(ierr); 6074 ierr = MatMultTranspose(change_ctx->global_change,change_ctx->work[1],y);CHKERRQ(ierr); 6075 PetscFunctionReturn(0); 6076 } 6077 6078 #undef __FUNCT__ 6079 #define __FUNCT__ "PCBDDCSetUpSubSchurs" 6080 PetscErrorCode PCBDDCSetUpSubSchurs(PC pc) 6081 { 6082 PC_IS *pcis=(PC_IS*)pc->data; 6083 PC_BDDC *pcbddc=(PC_BDDC*)pc->data; 6084 PCBDDCSubSchurs sub_schurs=pcbddc->sub_schurs; 6085 Mat S_j; 6086 PetscInt *used_xadj,*used_adjncy; 6087 PetscBool free_used_adj; 6088 PetscErrorCode ierr; 6089 6090 PetscFunctionBegin; 6091 /* decide the adjacency to be used for determining internal problems for local schur on subsets */ 6092 free_used_adj = PETSC_FALSE; 6093 if (pcbddc->sub_schurs_layers == -1) { 6094 used_xadj = NULL; 6095 used_adjncy = NULL; 6096 } else { 6097 if (pcbddc->sub_schurs_use_useradj && pcbddc->mat_graph->xadj) { 6098 used_xadj = pcbddc->mat_graph->xadj; 6099 used_adjncy = pcbddc->mat_graph->adjncy; 6100 } else if (pcbddc->computed_rowadj) { 6101 used_xadj = pcbddc->mat_graph->xadj; 6102 used_adjncy = pcbddc->mat_graph->adjncy; 6103 } else { 6104 PetscBool flg_row=PETSC_FALSE; 6105 const PetscInt *xadj,*adjncy; 6106 PetscInt nvtxs; 6107 6108 ierr = MatGetRowIJ(pcbddc->local_mat,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,&xadj,&adjncy,&flg_row);CHKERRQ(ierr); 6109 if (flg_row) { 6110 ierr = PetscMalloc2(nvtxs+1,&used_xadj,xadj[nvtxs],&used_adjncy);CHKERRQ(ierr); 6111 ierr = PetscMemcpy(used_xadj,xadj,(nvtxs+1)*sizeof(*xadj));CHKERRQ(ierr); 6112 ierr = PetscMemcpy(used_adjncy,adjncy,(xadj[nvtxs])*sizeof(*adjncy));CHKERRQ(ierr); 6113 free_used_adj = PETSC_TRUE; 6114 } else { 6115 pcbddc->sub_schurs_layers = -1; 6116 used_xadj = NULL; 6117 used_adjncy = NULL; 6118 } 6119 ierr = MatRestoreRowIJ(pcbddc->local_mat,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,&xadj,&adjncy,&flg_row);CHKERRQ(ierr); 6120 } 6121 } 6122 6123 /* setup sub_schurs data */ 6124 ierr = MatCreateSchurComplement(pcis->A_II,pcis->A_II,pcis->A_IB,pcis->A_BI,pcis->A_BB,&S_j);CHKERRQ(ierr); 6125 if (!sub_schurs->schur_explicit) { 6126 /* pcbddc->ksp_D up to date only if not using MatFactor with Schur complement support */ 6127 ierr = MatSchurComplementSetKSP(S_j,pcbddc->ksp_D);CHKERRQ(ierr); 6128 ierr = PCBDDCSubSchursSetUp(sub_schurs,NULL,S_j,PETSC_FALSE,used_xadj,used_adjncy,pcbddc->sub_schurs_layers,pcbddc->faster_deluxe,pcbddc->adaptive_selection,PETSC_FALSE,PETSC_FALSE,0,NULL,NULL);CHKERRQ(ierr); 6129 } else { 6130 PetscBool reuse_solvers = (PetscBool)!pcbddc->use_change_of_basis; 6131 PetscBool isseqaij; 6132 PetscInt benign_n; 6133 6134 if (!pcbddc->use_vertices && reuse_solvers) { 6135 PetscInt n_vertices; 6136 6137 ierr = ISGetLocalSize(sub_schurs->is_vertices,&n_vertices);CHKERRQ(ierr); 6138 reuse_solvers = (PetscBool)!n_vertices; 6139 } 6140 ierr = PetscObjectTypeCompare((PetscObject)pcbddc->local_mat,MATSEQAIJ,&isseqaij);CHKERRQ(ierr); 6141 if (!isseqaij) { 6142 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 6143 if (matis->A == pcbddc->local_mat) { 6144 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 6145 ierr = MatConvert(matis->A,MATSEQAIJ,MAT_INITIAL_MATRIX,&pcbddc->local_mat);CHKERRQ(ierr); 6146 } else { 6147 ierr = MatConvert(pcbddc->local_mat,MATSEQAIJ,MAT_REUSE_MATRIX,&pcbddc->local_mat);CHKERRQ(ierr); 6148 } 6149 } 6150 if (!pcbddc->benign_change_explicit) { 6151 benign_n = pcbddc->benign_n; 6152 } else { 6153 benign_n = 0; 6154 } 6155 ierr = PCBDDCSubSchursSetUp(sub_schurs,pcbddc->local_mat,S_j,pcbddc->sub_schurs_exact_schur,used_xadj,used_adjncy,pcbddc->sub_schurs_layers,pcbddc->faster_deluxe,pcbddc->adaptive_selection,reuse_solvers,pcbddc->benign_saddle_point,benign_n,pcbddc->benign_p0_lidx,pcbddc->benign_zerodiag_subs);CHKERRQ(ierr); 6156 } 6157 ierr = MatDestroy(&S_j);CHKERRQ(ierr); 6158 6159 /* free adjacency */ 6160 if (free_used_adj) { 6161 ierr = PetscFree2(used_xadj,used_adjncy);CHKERRQ(ierr); 6162 } 6163 PetscFunctionReturn(0); 6164 } 6165 6166 #undef __FUNCT__ 6167 #define __FUNCT__ "PCBDDCInitSubSchurs" 6168 PetscErrorCode PCBDDCInitSubSchurs(PC pc) 6169 { 6170 PC_IS *pcis=(PC_IS*)pc->data; 6171 PC_BDDC *pcbddc=(PC_BDDC*)pc->data; 6172 PCBDDCSubSchurs sub_schurs=pcbddc->sub_schurs; 6173 PCBDDCGraph graph; 6174 PetscErrorCode ierr; 6175 6176 PetscFunctionBegin; 6177 /* attach interface graph for determining subsets */ 6178 if (pcbddc->sub_schurs_rebuild) { /* in case rebuild has been requested, it uses a graph generated only by the neighbouring information */ 6179 IS verticesIS,verticescomm; 6180 PetscInt vsize,*idxs; 6181 6182 ierr = PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph,NULL,NULL,NULL,NULL,&verticesIS);CHKERRQ(ierr); 6183 ierr = ISGetSize(verticesIS,&vsize);CHKERRQ(ierr); 6184 ierr = ISGetIndices(verticesIS,(const PetscInt**)&idxs);CHKERRQ(ierr); 6185 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),vsize,idxs,PETSC_COPY_VALUES,&verticescomm);CHKERRQ(ierr); 6186 ierr = ISRestoreIndices(verticesIS,(const PetscInt**)&idxs);CHKERRQ(ierr); 6187 ierr = ISDestroy(&verticesIS);CHKERRQ(ierr); 6188 ierr = PCBDDCGraphCreate(&graph);CHKERRQ(ierr); 6189 ierr = PCBDDCGraphInit(graph,pcbddc->mat_graph->l2gmap,pcbddc->mat_graph->nvtxs_global);CHKERRQ(ierr); 6190 ierr = PCBDDCGraphSetUp(graph,0,NULL,pcbddc->DirichletBoundariesLocal,0,NULL,verticescomm);CHKERRQ(ierr); 6191 ierr = ISDestroy(&verticescomm);CHKERRQ(ierr); 6192 ierr = PCBDDCGraphComputeConnectedComponents(graph);CHKERRQ(ierr); 6193 /* 6194 if (pcbddc->dbg_flag) { 6195 ierr = PCBDDCGraphASCIIView(graph,pcbddc->dbg_flag,pcbddc->dbg_viewer);CHKERRQ(ierr); 6196 } 6197 */ 6198 } else { 6199 graph = pcbddc->mat_graph; 6200 } 6201 6202 /* sub_schurs init */ 6203 ierr = PCBDDCSubSchursInit(sub_schurs,pcis->is_I_local,pcis->is_B_local,graph,pcis->BtoNmap);CHKERRQ(ierr); 6204 6205 /* free graph struct */ 6206 if (pcbddc->sub_schurs_rebuild) { 6207 ierr = PCBDDCGraphDestroy(&graph);CHKERRQ(ierr); 6208 } 6209 PetscFunctionReturn(0); 6210 } 6211 6212 #undef __FUNCT__ 6213 #define __FUNCT__ "PCBDDCCheckOperator" 6214 PetscErrorCode PCBDDCCheckOperator(PC pc) 6215 { 6216 PC_IS *pcis=(PC_IS*)pc->data; 6217 PC_BDDC *pcbddc=(PC_BDDC*)pc->data; 6218 PetscErrorCode ierr; 6219 6220 PetscFunctionBegin; 6221 if (pcbddc->n_vertices == pcbddc->local_primal_size) { 6222 IS zerodiag = NULL; 6223 Mat S_j,B0_B=NULL; 6224 Vec dummy_vec=NULL,vec_check_B,vec_scale_P; 6225 PetscScalar *p0_check,*array,*array2; 6226 PetscReal norm; 6227 PetscInt i; 6228 6229 /* B0 and B0_B */ 6230 if (zerodiag) { 6231 IS dummy; 6232 6233 ierr = ISCreateStride(PETSC_COMM_SELF,pcbddc->benign_n,0,1,&dummy);CHKERRQ(ierr); 6234 ierr = MatGetSubMatrix(pcbddc->benign_B0,dummy,pcis->is_B_local,MAT_INITIAL_MATRIX,&B0_B);CHKERRQ(ierr); 6235 ierr = MatCreateVecs(B0_B,NULL,&dummy_vec);CHKERRQ(ierr); 6236 ierr = ISDestroy(&dummy);CHKERRQ(ierr); 6237 } 6238 /* I need a primal vector to scale primal nodes since BDDC sums contibutions */ 6239 ierr = VecDuplicate(pcbddc->vec1_P,&vec_scale_P);CHKERRQ(ierr); 6240 ierr = VecSet(pcbddc->vec1_P,1.0);CHKERRQ(ierr); 6241 ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,pcbddc->vec1_P,pcbddc->coarse_vec,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 6242 ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,pcbddc->vec1_P,pcbddc->coarse_vec,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 6243 ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,pcbddc->coarse_vec,vec_scale_P,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 6244 ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,pcbddc->coarse_vec,vec_scale_P,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 6245 ierr = VecReciprocal(vec_scale_P);CHKERRQ(ierr); 6246 /* S_j */ 6247 ierr = MatCreateSchurComplement(pcis->A_II,pcis->A_II,pcis->A_IB,pcis->A_BI,pcis->A_BB,&S_j);CHKERRQ(ierr); 6248 ierr = MatSchurComplementSetKSP(S_j,pcbddc->ksp_D);CHKERRQ(ierr); 6249 6250 /* mimic vector in \widetilde{W}_\Gamma */ 6251 ierr = VecSetRandom(pcis->vec1_N,NULL);CHKERRQ(ierr); 6252 /* continuous in primal space */ 6253 ierr = VecSetRandom(pcbddc->coarse_vec,NULL);CHKERRQ(ierr); 6254 ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,pcbddc->coarse_vec,pcbddc->vec1_P,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 6255 ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,pcbddc->coarse_vec,pcbddc->vec1_P,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 6256 ierr = VecGetArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 6257 ierr = PetscCalloc1(pcbddc->benign_n,&p0_check);CHKERRQ(ierr); 6258 for (i=0;i<pcbddc->benign_n;i++) p0_check[i] = array[pcbddc->local_primal_size-pcbddc->benign_n+i]; 6259 ierr = VecSetValues(pcis->vec1_N,pcbddc->local_primal_size,pcbddc->local_primal_ref_node,array,INSERT_VALUES);CHKERRQ(ierr); 6260 ierr = VecRestoreArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 6261 ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr); 6262 ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr); 6263 ierr = VecScatterBegin(pcis->N_to_B,pcis->vec1_N,pcis->vec2_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 6264 ierr = VecScatterEnd(pcis->N_to_B,pcis->vec1_N,pcis->vec2_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 6265 ierr = VecDuplicate(pcis->vec2_B,&vec_check_B);CHKERRQ(ierr); 6266 ierr = VecCopy(pcis->vec2_B,vec_check_B);CHKERRQ(ierr); 6267 6268 /* assemble rhs for coarse problem */ 6269 /* widetilde{S}_\Gamma w_\Gamma + \widetilde{B0}^T_B p0 */ 6270 /* local with Schur */ 6271 ierr = MatMult(S_j,pcis->vec2_B,pcis->vec1_B);CHKERRQ(ierr); 6272 if (zerodiag) { 6273 ierr = VecGetArray(dummy_vec,&array);CHKERRQ(ierr); 6274 for (i=0;i<pcbddc->benign_n;i++) array[i] = p0_check[i]; 6275 ierr = VecRestoreArray(dummy_vec,&array);CHKERRQ(ierr); 6276 ierr = MatMultTransposeAdd(B0_B,dummy_vec,pcis->vec1_B,pcis->vec1_B);CHKERRQ(ierr); 6277 } 6278 /* sum on primal nodes the local contributions */ 6279 ierr = VecScatterBegin(pcis->N_to_B,pcis->vec1_B,pcis->vec1_N,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 6280 ierr = VecScatterEnd(pcis->N_to_B,pcis->vec1_B,pcis->vec1_N,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 6281 ierr = VecGetArray(pcis->vec1_N,&array);CHKERRQ(ierr); 6282 ierr = VecGetArray(pcbddc->vec1_P,&array2);CHKERRQ(ierr); 6283 for (i=0;i<pcbddc->local_primal_size;i++) array2[i] = array[pcbddc->local_primal_ref_node[i]]; 6284 ierr = VecRestoreArray(pcbddc->vec1_P,&array2);CHKERRQ(ierr); 6285 ierr = VecRestoreArray(pcis->vec1_N,&array);CHKERRQ(ierr); 6286 ierr = VecSet(pcbddc->coarse_vec,0.);CHKERRQ(ierr); 6287 ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,pcbddc->vec1_P,pcbddc->coarse_vec,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 6288 ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,pcbddc->vec1_P,pcbddc->coarse_vec,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 6289 ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,pcbddc->coarse_vec,pcbddc->vec1_P,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 6290 ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,pcbddc->coarse_vec,pcbddc->vec1_P,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 6291 ierr = VecGetArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 6292 /* scale primal nodes (BDDC sums contibutions) */ 6293 ierr = VecPointwiseMult(pcbddc->vec1_P,vec_scale_P,pcbddc->vec1_P);CHKERRQ(ierr); 6294 ierr = VecSetValues(pcis->vec1_N,pcbddc->local_primal_size,pcbddc->local_primal_ref_node,array,INSERT_VALUES);CHKERRQ(ierr); 6295 ierr = VecRestoreArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 6296 ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr); 6297 ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr); 6298 ierr = VecScatterBegin(pcis->N_to_B,pcis->vec1_N,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 6299 ierr = VecScatterEnd(pcis->N_to_B,pcis->vec1_N,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 6300 /* global: \widetilde{B0}_B w_\Gamma */ 6301 if (zerodiag) { 6302 ierr = MatMult(B0_B,pcis->vec2_B,dummy_vec);CHKERRQ(ierr); 6303 ierr = VecGetArray(dummy_vec,&array);CHKERRQ(ierr); 6304 for (i=0;i<pcbddc->benign_n;i++) pcbddc->benign_p0[i] = array[i]; 6305 ierr = VecRestoreArray(dummy_vec,&array);CHKERRQ(ierr); 6306 } 6307 /* BDDC */ 6308 ierr = VecSet(pcis->vec1_D,0.);CHKERRQ(ierr); 6309 ierr = PCBDDCApplyInterfacePreconditioner(pc,PETSC_FALSE);CHKERRQ(ierr); 6310 6311 ierr = VecCopy(pcis->vec1_B,pcis->vec2_B);CHKERRQ(ierr); 6312 ierr = VecAXPY(pcis->vec1_B,-1.0,vec_check_B);CHKERRQ(ierr); 6313 ierr = VecNorm(pcis->vec1_B,NORM_INFINITY,&norm);CHKERRQ(ierr); 6314 PetscPrintf(PETSC_COMM_SELF,"[%d] BDDC local error is %1.4e\n",PetscGlobalRank,norm); 6315 for (i=0;i<pcbddc->benign_n;i++) { 6316 PetscPrintf(PETSC_COMM_SELF,"[%d] BDDC p0[%d] error is %1.4e\n",PetscGlobalRank,i,PetscAbsScalar(pcbddc->benign_p0[i]-p0_check[i])); 6317 } 6318 ierr = PetscFree(p0_check);CHKERRQ(ierr); 6319 ierr = VecDestroy(&vec_scale_P);CHKERRQ(ierr); 6320 ierr = VecDestroy(&vec_check_B);CHKERRQ(ierr); 6321 ierr = VecDestroy(&dummy_vec);CHKERRQ(ierr); 6322 ierr = MatDestroy(&S_j);CHKERRQ(ierr); 6323 ierr = MatDestroy(&B0_B);CHKERRQ(ierr); 6324 } 6325 PetscFunctionReturn(0); 6326 } 6327