1 #include <../src/mat/impls/aij/seq/aij.h> 2 #include <petsc/private/pcbddcimpl.h> 3 #include <petsc/private/pcbddcprivateimpl.h> 4 #include <../src/mat/impls/dense/seq/dense.h> 5 #include <petscdmplex.h> 6 #include <petscblaslapack.h> 7 #include <petsc/private/sfimpl.h> 8 #include <petsc/private/dmpleximpl.h> 9 #include <petscdmda.h> 10 11 static PetscErrorCode MatMPIAIJRestrict(Mat,MPI_Comm,Mat*); 12 13 /* if range is true, it returns B s.t. span{B} = range(A) 14 if range is false, it returns B s.t. range(B) _|_ range(A) */ 15 PetscErrorCode MatDenseOrthogonalRangeOrComplement(Mat A, PetscBool range, PetscInt lw, PetscScalar *work, PetscReal *rwork, Mat *B) 16 { 17 PetscScalar *uwork,*data,*U, ds = 0.; 18 PetscReal *sing; 19 PetscBLASInt bM,bN,lwork,lierr,di = 1; 20 PetscInt ulw,i,nr,nc,n; 21 #if defined(PETSC_USE_COMPLEX) 22 PetscReal *rwork2; 23 #endif 24 25 PetscFunctionBegin; 26 PetscCall(MatGetSize(A,&nr,&nc)); 27 if (!nr || !nc) PetscFunctionReturn(0); 28 29 /* workspace */ 30 if (!work) { 31 ulw = PetscMax(PetscMax(1,5*PetscMin(nr,nc)),3*PetscMin(nr,nc)+PetscMax(nr,nc)); 32 PetscCall(PetscMalloc1(ulw,&uwork)); 33 } else { 34 ulw = lw; 35 uwork = work; 36 } 37 n = PetscMin(nr,nc); 38 if (!rwork) { 39 PetscCall(PetscMalloc1(n,&sing)); 40 } else { 41 sing = rwork; 42 } 43 44 /* SVD */ 45 PetscCall(PetscMalloc1(nr*nr,&U)); 46 PetscCall(PetscBLASIntCast(nr,&bM)); 47 PetscCall(PetscBLASIntCast(nc,&bN)); 48 PetscCall(PetscBLASIntCast(ulw,&lwork)); 49 PetscCall(MatDenseGetArray(A,&data)); 50 PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF)); 51 #if !defined(PETSC_USE_COMPLEX) 52 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("A","N",&bM,&bN,data,&bM,sing,U,&bM,&ds,&di,uwork,&lwork,&lierr)); 53 #else 54 PetscCall(PetscMalloc1(5*n,&rwork2)); 55 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("A","N",&bM,&bN,data,&bM,sing,U,&bM,&ds,&di,uwork,&lwork,rwork2,&lierr)); 56 PetscCall(PetscFree(rwork2)); 57 #endif 58 PetscCall(PetscFPTrapPop()); 59 PetscCheck(!lierr,PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in GESVD Lapack routine %d",(int)lierr); 60 PetscCall(MatDenseRestoreArray(A,&data)); 61 for (i=0;i<n;i++) if (sing[i] < PETSC_SMALL) break; 62 if (!rwork) { 63 PetscCall(PetscFree(sing)); 64 } 65 if (!work) { 66 PetscCall(PetscFree(uwork)); 67 } 68 /* create B */ 69 if (!range) { 70 PetscCall(MatCreateSeqDense(PETSC_COMM_SELF,nr,nr-i,NULL,B)); 71 PetscCall(MatDenseGetArray(*B,&data)); 72 PetscCall(PetscArraycpy(data,U+nr*i,(nr-i)*nr)); 73 } else { 74 PetscCall(MatCreateSeqDense(PETSC_COMM_SELF,nr,i,NULL,B)); 75 PetscCall(MatDenseGetArray(*B,&data)); 76 PetscCall(PetscArraycpy(data,U,i*nr)); 77 } 78 PetscCall(MatDenseRestoreArray(*B,&data)); 79 PetscCall(PetscFree(U)); 80 PetscFunctionReturn(0); 81 } 82 83 /* TODO REMOVE */ 84 #if defined(PRINT_GDET) 85 static int inc = 0; 86 static int lev = 0; 87 #endif 88 89 PetscErrorCode PCBDDCComputeNedelecChangeEdge(Mat lG, IS edge, IS extrow, IS extcol, IS corners, Mat* Gins, Mat* GKins, PetscScalar cvals[2], PetscScalar *work, PetscReal *rwork) 90 { 91 Mat GE,GEd; 92 PetscInt rsize,csize,esize; 93 PetscScalar *ptr; 94 95 PetscFunctionBegin; 96 PetscCall(ISGetSize(edge,&esize)); 97 if (!esize) PetscFunctionReturn(0); 98 PetscCall(ISGetSize(extrow,&rsize)); 99 PetscCall(ISGetSize(extcol,&csize)); 100 101 /* gradients */ 102 ptr = work + 5*esize; 103 PetscCall(MatCreateSubMatrix(lG,extrow,extcol,MAT_INITIAL_MATRIX,&GE)); 104 PetscCall(MatCreateSeqDense(PETSC_COMM_SELF,rsize,csize,ptr,Gins)); 105 PetscCall(MatConvert(GE,MATSEQDENSE,MAT_REUSE_MATRIX,Gins)); 106 PetscCall(MatDestroy(&GE)); 107 108 /* constants */ 109 ptr += rsize*csize; 110 PetscCall(MatCreateSeqDense(PETSC_COMM_SELF,esize,csize,ptr,&GEd)); 111 PetscCall(MatCreateSubMatrix(lG,edge,extcol,MAT_INITIAL_MATRIX,&GE)); 112 PetscCall(MatConvert(GE,MATSEQDENSE,MAT_REUSE_MATRIX,&GEd)); 113 PetscCall(MatDestroy(&GE)); 114 PetscCall(MatDenseOrthogonalRangeOrComplement(GEd,PETSC_FALSE,5*esize,work,rwork,GKins)); 115 PetscCall(MatDestroy(&GEd)); 116 117 if (corners) { 118 Mat GEc; 119 const PetscScalar *vals; 120 PetscScalar v; 121 122 PetscCall(MatCreateSubMatrix(lG,edge,corners,MAT_INITIAL_MATRIX,&GEc)); 123 PetscCall(MatTransposeMatMult(GEc,*GKins,MAT_INITIAL_MATRIX,1.0,&GEd)); 124 PetscCall(MatDenseGetArrayRead(GEd,&vals)); 125 /* v = PetscAbsScalar(vals[0]) */; 126 v = 1.; 127 cvals[0] = vals[0]/v; 128 cvals[1] = vals[1]/v; 129 PetscCall(MatDenseRestoreArrayRead(GEd,&vals)); 130 PetscCall(MatScale(*GKins,1./v)); 131 #if defined(PRINT_GDET) 132 { 133 PetscViewer viewer; 134 char filename[256]; 135 sprintf(filename,"Gdet_l%d_r%d_cc%d.m",lev,PetscGlobalRank,inc++); 136 PetscCall(PetscViewerASCIIOpen(PETSC_COMM_SELF,filename,&viewer)); 137 PetscCall(PetscViewerPushFormat(viewer,PETSC_VIEWER_ASCII_MATLAB)); 138 PetscCall(PetscObjectSetName((PetscObject)GEc,"GEc")); 139 PetscCall(MatView(GEc,viewer)); 140 PetscCall(PetscObjectSetName((PetscObject)(*GKins),"GK")); 141 PetscCall(MatView(*GKins,viewer)); 142 PetscCall(PetscObjectSetName((PetscObject)GEd,"Gproj")); 143 PetscCall(MatView(GEd,viewer)); 144 PetscCall(PetscViewerDestroy(&viewer)); 145 } 146 #endif 147 PetscCall(MatDestroy(&GEd)); 148 PetscCall(MatDestroy(&GEc)); 149 } 150 151 PetscFunctionReturn(0); 152 } 153 154 PetscErrorCode PCBDDCNedelecSupport(PC pc) 155 { 156 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 157 Mat_IS *matis = (Mat_IS*)pc->pmat->data; 158 Mat G,T,conn,lG,lGt,lGis,lGall,lGe,lGinit; 159 Vec tvec; 160 PetscSF sfv; 161 ISLocalToGlobalMapping el2g,vl2g,fl2g,al2g; 162 MPI_Comm comm; 163 IS lned,primals,allprimals,nedfieldlocal; 164 IS *eedges,*extrows,*extcols,*alleedges; 165 PetscBT btv,bte,btvc,btb,btbd,btvcand,btvi,btee,bter; 166 PetscScalar *vals,*work; 167 PetscReal *rwork; 168 const PetscInt *idxs,*ii,*jj,*iit,*jjt; 169 PetscInt ne,nv,Lv,order,n,field; 170 PetscInt n_neigh,*neigh,*n_shared,**shared; 171 PetscInt i,j,extmem,cum,maxsize,nee; 172 PetscInt *extrow,*extrowcum,*marks,*vmarks,*gidxs; 173 PetscInt *sfvleaves,*sfvroots; 174 PetscInt *corners,*cedges; 175 PetscInt *ecount,**eneighs,*vcount,**vneighs; 176 PetscInt *emarks; 177 PetscBool print,eerr,done,lrc[2],conforming,global,singular,setprimal; 178 179 PetscFunctionBegin; 180 /* If the discrete gradient is defined for a subset of dofs and global is true, 181 it assumes G is given in global ordering for all the dofs. 182 Otherwise, the ordering is global for the Nedelec field */ 183 order = pcbddc->nedorder; 184 conforming = pcbddc->conforming; 185 field = pcbddc->nedfield; 186 global = pcbddc->nedglobal; 187 setprimal = PETSC_FALSE; 188 print = PETSC_FALSE; 189 singular = PETSC_FALSE; 190 191 /* Command line customization */ 192 PetscOptionsBegin(PetscObjectComm((PetscObject)pc),((PetscObject)pc)->prefix,"BDDC Nedelec options","PC"); 193 PetscCall(PetscOptionsBool("-pc_bddc_nedelec_field_primal","All edge dofs set as primals: Toselli's algorithm C",NULL,setprimal,&setprimal,NULL)); 194 PetscCall(PetscOptionsBool("-pc_bddc_nedelec_singular","Infer nullspace from discrete gradient",NULL,singular,&singular,NULL)); 195 PetscCall(PetscOptionsInt("-pc_bddc_nedelec_order","Test variable order code (to be removed)",NULL,order,&order,NULL)); 196 /* print debug info TODO: to be removed */ 197 PetscCall(PetscOptionsBool("-pc_bddc_nedelec_print","Print debug info",NULL,print,&print,NULL)); 198 PetscOptionsEnd(); 199 200 /* Return if there are no edges in the decomposition and the problem is not singular */ 201 PetscCall(MatISGetLocalToGlobalMapping(pc->pmat,&al2g,NULL)); 202 PetscCall(ISLocalToGlobalMappingGetSize(al2g,&n)); 203 PetscCall(PetscObjectGetComm((PetscObject)pc,&comm)); 204 if (!singular) { 205 PetscCall(VecGetArrayRead(matis->counter,(const PetscScalar**)&vals)); 206 lrc[0] = PETSC_FALSE; 207 for (i=0;i<n;i++) { 208 if (PetscRealPart(vals[i]) > 2.) { 209 lrc[0] = PETSC_TRUE; 210 break; 211 } 212 } 213 PetscCall(VecRestoreArrayRead(matis->counter,(const PetscScalar**)&vals)); 214 PetscCall(MPIU_Allreduce(&lrc[0],&lrc[1],1,MPIU_BOOL,MPI_LOR,comm)); 215 if (!lrc[1]) PetscFunctionReturn(0); 216 } 217 218 /* Get Nedelec field */ 219 PetscCheck(!pcbddc->n_ISForDofsLocal || field < pcbddc->n_ISForDofsLocal,comm,PETSC_ERR_USER,"Invalid field for Nedelec %" PetscInt_FMT ": number of fields is %" PetscInt_FMT,field,pcbddc->n_ISForDofsLocal); 220 if (pcbddc->n_ISForDofsLocal && field >= 0) { 221 PetscCall(PetscObjectReference((PetscObject)pcbddc->ISForDofsLocal[field])); 222 nedfieldlocal = pcbddc->ISForDofsLocal[field]; 223 PetscCall(ISGetLocalSize(nedfieldlocal,&ne)); 224 } else if (!pcbddc->n_ISForDofsLocal && field != PETSC_DECIDE) { 225 ne = n; 226 nedfieldlocal = NULL; 227 global = PETSC_TRUE; 228 } else if (field == PETSC_DECIDE) { 229 PetscInt rst,ren,*idx; 230 231 PetscCall(PetscArrayzero(matis->sf_leafdata,n)); 232 PetscCall(PetscArrayzero(matis->sf_rootdata,pc->pmat->rmap->n)); 233 PetscCall(MatGetOwnershipRange(pcbddc->discretegradient,&rst,&ren)); 234 for (i=rst;i<ren;i++) { 235 PetscInt nc; 236 237 PetscCall(MatGetRow(pcbddc->discretegradient,i,&nc,NULL,NULL)); 238 if (nc > 1) matis->sf_rootdata[i-rst] = 1; 239 PetscCall(MatRestoreRow(pcbddc->discretegradient,i,&nc,NULL,NULL)); 240 } 241 PetscCall(PetscSFBcastBegin(matis->sf,MPIU_INT,matis->sf_rootdata,matis->sf_leafdata,MPI_REPLACE)); 242 PetscCall(PetscSFBcastEnd(matis->sf,MPIU_INT,matis->sf_rootdata,matis->sf_leafdata,MPI_REPLACE)); 243 PetscCall(PetscMalloc1(n,&idx)); 244 for (i=0,ne=0;i<n;i++) if (matis->sf_leafdata[i]) idx[ne++] = i; 245 PetscCall(ISCreateGeneral(comm,ne,idx,PETSC_OWN_POINTER,&nedfieldlocal)); 246 } else { 247 SETERRQ(comm,PETSC_ERR_USER,"When multiple fields are present, the Nedelec field has to be specified"); 248 } 249 250 /* Sanity checks */ 251 PetscCheck(order || conforming,comm,PETSC_ERR_SUP,"Variable order and non-conforming spaces are not supported at the same time"); 252 PetscCheck(!pcbddc->user_ChangeOfBasisMatrix,comm,PETSC_ERR_SUP,"Cannot generate Nedelec support with user defined change of basis"); 253 PetscCheck(!order || (ne%order == 0),PETSC_COMM_SELF,PETSC_ERR_USER,"The number of local edge dofs %" PetscInt_FMT " is not a multiple of the order %" PetscInt_FMT,ne,order); 254 255 /* Just set primal dofs and return */ 256 if (setprimal) { 257 IS enedfieldlocal; 258 PetscInt *eidxs; 259 260 PetscCall(PetscMalloc1(ne,&eidxs)); 261 PetscCall(VecGetArrayRead(matis->counter,(const PetscScalar**)&vals)); 262 if (nedfieldlocal) { 263 PetscCall(ISGetIndices(nedfieldlocal,&idxs)); 264 for (i=0,cum=0;i<ne;i++) { 265 if (PetscRealPart(vals[idxs[i]]) > 2.) { 266 eidxs[cum++] = idxs[i]; 267 } 268 } 269 PetscCall(ISRestoreIndices(nedfieldlocal,&idxs)); 270 } else { 271 for (i=0,cum=0;i<ne;i++) { 272 if (PetscRealPart(vals[i]) > 2.) { 273 eidxs[cum++] = i; 274 } 275 } 276 } 277 PetscCall(VecRestoreArrayRead(matis->counter,(const PetscScalar**)&vals)); 278 PetscCall(ISCreateGeneral(comm,cum,eidxs,PETSC_COPY_VALUES,&enedfieldlocal)); 279 PetscCall(PCBDDCSetPrimalVerticesLocalIS(pc,enedfieldlocal)); 280 PetscCall(PetscFree(eidxs)); 281 PetscCall(ISDestroy(&nedfieldlocal)); 282 PetscCall(ISDestroy(&enedfieldlocal)); 283 PetscFunctionReturn(0); 284 } 285 286 /* Compute some l2g maps */ 287 if (nedfieldlocal) { 288 IS is; 289 290 /* need to map from the local Nedelec field to local numbering */ 291 PetscCall(ISLocalToGlobalMappingCreateIS(nedfieldlocal,&fl2g)); 292 /* need to map from the local Nedelec field to global numbering for the whole dofs*/ 293 PetscCall(ISLocalToGlobalMappingApplyIS(al2g,nedfieldlocal,&is)); 294 PetscCall(ISLocalToGlobalMappingCreateIS(is,&al2g)); 295 /* need to map from the local Nedelec field to global numbering (for Nedelec only) */ 296 if (global) { 297 PetscCall(PetscObjectReference((PetscObject)al2g)); 298 el2g = al2g; 299 } else { 300 IS gis; 301 302 PetscCall(ISRenumber(is,NULL,NULL,&gis)); 303 PetscCall(ISLocalToGlobalMappingCreateIS(gis,&el2g)); 304 PetscCall(ISDestroy(&gis)); 305 } 306 PetscCall(ISDestroy(&is)); 307 } else { 308 /* restore default */ 309 pcbddc->nedfield = -1; 310 /* one ref for the destruction of al2g, one for el2g */ 311 PetscCall(PetscObjectReference((PetscObject)al2g)); 312 PetscCall(PetscObjectReference((PetscObject)al2g)); 313 el2g = al2g; 314 fl2g = NULL; 315 } 316 317 /* Start communication to drop connections for interior edges (for cc analysis only) */ 318 PetscCall(PetscArrayzero(matis->sf_leafdata,n)); 319 PetscCall(PetscArrayzero(matis->sf_rootdata,pc->pmat->rmap->n)); 320 if (nedfieldlocal) { 321 PetscCall(ISGetIndices(nedfieldlocal,&idxs)); 322 for (i=0;i<ne;i++) matis->sf_leafdata[idxs[i]] = 1; 323 PetscCall(ISRestoreIndices(nedfieldlocal,&idxs)); 324 } else { 325 for (i=0;i<ne;i++) matis->sf_leafdata[i] = 1; 326 } 327 PetscCall(PetscSFReduceBegin(matis->sf,MPIU_INT,matis->sf_leafdata,matis->sf_rootdata,MPI_SUM)); 328 PetscCall(PetscSFReduceEnd(matis->sf,MPIU_INT,matis->sf_leafdata,matis->sf_rootdata,MPI_SUM)); 329 330 if (!singular) { /* drop connections with interior edges to avoid unneeded communications and memory movements */ 331 PetscCall(MatDuplicate(pcbddc->discretegradient,MAT_COPY_VALUES,&G)); 332 PetscCall(MatSetOption(G,MAT_KEEP_NONZERO_PATTERN,PETSC_FALSE)); 333 if (global) { 334 PetscInt rst; 335 336 PetscCall(MatGetOwnershipRange(G,&rst,NULL)); 337 for (i=0,cum=0;i<pc->pmat->rmap->n;i++) { 338 if (matis->sf_rootdata[i] < 2) { 339 matis->sf_rootdata[cum++] = i + rst; 340 } 341 } 342 PetscCall(MatSetOption(G,MAT_NO_OFF_PROC_ZERO_ROWS,PETSC_TRUE)); 343 PetscCall(MatZeroRows(G,cum,matis->sf_rootdata,0.,NULL,NULL)); 344 } else { 345 PetscInt *tbz; 346 347 PetscCall(PetscMalloc1(ne,&tbz)); 348 PetscCall(PetscSFBcastBegin(matis->sf,MPIU_INT,matis->sf_rootdata,matis->sf_leafdata,MPI_REPLACE)); 349 PetscCall(PetscSFBcastEnd(matis->sf,MPIU_INT,matis->sf_rootdata,matis->sf_leafdata,MPI_REPLACE)); 350 PetscCall(ISGetIndices(nedfieldlocal,&idxs)); 351 for (i=0,cum=0;i<ne;i++) 352 if (matis->sf_leafdata[idxs[i]] == 1) 353 tbz[cum++] = i; 354 PetscCall(ISRestoreIndices(nedfieldlocal,&idxs)); 355 PetscCall(ISLocalToGlobalMappingApply(el2g,cum,tbz,tbz)); 356 PetscCall(MatZeroRows(G,cum,tbz,0.,NULL,NULL)); 357 PetscCall(PetscFree(tbz)); 358 } 359 } else { /* we need the entire G to infer the nullspace */ 360 PetscCall(PetscObjectReference((PetscObject)pcbddc->discretegradient)); 361 G = pcbddc->discretegradient; 362 } 363 364 /* Extract subdomain relevant rows of G */ 365 PetscCall(ISLocalToGlobalMappingGetIndices(el2g,&idxs)); 366 PetscCall(ISCreateGeneral(comm,ne,idxs,PETSC_USE_POINTER,&lned)); 367 PetscCall(MatCreateSubMatrix(G,lned,NULL,MAT_INITIAL_MATRIX,&lGall)); 368 PetscCall(ISLocalToGlobalMappingRestoreIndices(el2g,&idxs)); 369 PetscCall(ISDestroy(&lned)); 370 PetscCall(MatConvert(lGall,MATIS,MAT_INITIAL_MATRIX,&lGis)); 371 PetscCall(MatDestroy(&lGall)); 372 PetscCall(MatISGetLocalMat(lGis,&lG)); 373 374 /* SF for nodal dofs communications */ 375 PetscCall(MatGetLocalSize(G,NULL,&Lv)); 376 PetscCall(MatISGetLocalToGlobalMapping(lGis,NULL,&vl2g)); 377 PetscCall(PetscObjectReference((PetscObject)vl2g)); 378 PetscCall(ISLocalToGlobalMappingGetSize(vl2g,&nv)); 379 PetscCall(PetscSFCreate(comm,&sfv)); 380 PetscCall(ISLocalToGlobalMappingGetIndices(vl2g,&idxs)); 381 PetscCall(PetscSFSetGraphLayout(sfv,lGis->cmap,nv,NULL,PETSC_OWN_POINTER,idxs)); 382 PetscCall(ISLocalToGlobalMappingRestoreIndices(vl2g,&idxs)); 383 i = singular ? 2 : 1; 384 PetscCall(PetscMalloc2(i*nv,&sfvleaves,i*Lv,&sfvroots)); 385 386 /* Destroy temporary G created in MATIS format and modified G */ 387 PetscCall(PetscObjectReference((PetscObject)lG)); 388 PetscCall(MatDestroy(&lGis)); 389 PetscCall(MatDestroy(&G)); 390 391 if (print) { 392 PetscCall(PetscObjectSetName((PetscObject)lG,"initial_lG")); 393 PetscCall(MatView(lG,NULL)); 394 } 395 396 /* Save lG for values insertion in change of basis */ 397 PetscCall(MatDuplicate(lG,MAT_COPY_VALUES,&lGinit)); 398 399 /* Analyze the edge-nodes connections (duplicate lG) */ 400 PetscCall(MatDuplicate(lG,MAT_COPY_VALUES,&lGe)); 401 PetscCall(MatSetOption(lGe,MAT_KEEP_NONZERO_PATTERN,PETSC_FALSE)); 402 PetscCall(PetscBTCreate(nv,&btv)); 403 PetscCall(PetscBTCreate(ne,&bte)); 404 PetscCall(PetscBTCreate(ne,&btb)); 405 PetscCall(PetscBTCreate(ne,&btbd)); 406 PetscCall(PetscBTCreate(nv,&btvcand)); 407 /* need to import the boundary specification to ensure the 408 proper detection of coarse edges' endpoints */ 409 if (pcbddc->DirichletBoundariesLocal) { 410 IS is; 411 412 if (fl2g) { 413 PetscCall(ISGlobalToLocalMappingApplyIS(fl2g,IS_GTOLM_MASK,pcbddc->DirichletBoundariesLocal,&is)); 414 } else { 415 is = pcbddc->DirichletBoundariesLocal; 416 } 417 PetscCall(ISGetLocalSize(is,&cum)); 418 PetscCall(ISGetIndices(is,&idxs)); 419 for (i=0;i<cum;i++) { 420 if (idxs[i] >= 0) { 421 PetscCall(PetscBTSet(btb,idxs[i])); 422 PetscCall(PetscBTSet(btbd,idxs[i])); 423 } 424 } 425 PetscCall(ISRestoreIndices(is,&idxs)); 426 if (fl2g) { 427 PetscCall(ISDestroy(&is)); 428 } 429 } 430 if (pcbddc->NeumannBoundariesLocal) { 431 IS is; 432 433 if (fl2g) { 434 PetscCall(ISGlobalToLocalMappingApplyIS(fl2g,IS_GTOLM_MASK,pcbddc->NeumannBoundariesLocal,&is)); 435 } else { 436 is = pcbddc->NeumannBoundariesLocal; 437 } 438 PetscCall(ISGetLocalSize(is,&cum)); 439 PetscCall(ISGetIndices(is,&idxs)); 440 for (i=0;i<cum;i++) { 441 if (idxs[i] >= 0) { 442 PetscCall(PetscBTSet(btb,idxs[i])); 443 } 444 } 445 PetscCall(ISRestoreIndices(is,&idxs)); 446 if (fl2g) { 447 PetscCall(ISDestroy(&is)); 448 } 449 } 450 451 /* Count neighs per dof */ 452 PetscCall(ISLocalToGlobalMappingGetNodeInfo(el2g,NULL,&ecount,&eneighs)); 453 PetscCall(ISLocalToGlobalMappingGetNodeInfo(vl2g,NULL,&vcount,&vneighs)); 454 455 /* need to remove coarse faces' dofs and coarse edges' dirichlet dofs 456 for proper detection of coarse edges' endpoints */ 457 PetscCall(PetscBTCreate(ne,&btee)); 458 for (i=0;i<ne;i++) { 459 if ((ecount[i] > 2 && !PetscBTLookup(btbd,i)) || (ecount[i] == 2 && PetscBTLookup(btb,i))) { 460 PetscCall(PetscBTSet(btee,i)); 461 } 462 } 463 PetscCall(PetscMalloc1(ne,&marks)); 464 if (!conforming) { 465 PetscCall(MatTranspose(lGe,MAT_INITIAL_MATRIX,&lGt)); 466 PetscCall(MatGetRowIJ(lGt,0,PETSC_FALSE,PETSC_FALSE,&i,&iit,&jjt,&done)); 467 } 468 PetscCall(MatGetRowIJ(lGe,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done)); 469 PetscCall(MatSeqAIJGetArray(lGe,&vals)); 470 cum = 0; 471 for (i=0;i<ne;i++) { 472 /* eliminate rows corresponding to edge dofs belonging to coarse faces */ 473 if (!PetscBTLookup(btee,i)) { 474 marks[cum++] = i; 475 continue; 476 } 477 /* set badly connected edge dofs as primal */ 478 if (!conforming) { 479 if (ii[i+1]-ii[i] != order + 1) { /* every row of G on the coarse edge should list order+1 nodal dofs */ 480 marks[cum++] = i; 481 PetscCall(PetscBTSet(bte,i)); 482 for (j=ii[i];j<ii[i+1];j++) { 483 PetscCall(PetscBTSet(btv,jj[j])); 484 } 485 } else { 486 /* every edge dofs should be connected trough a certain number of nodal dofs 487 to other edge dofs belonging to coarse edges 488 - at most 2 endpoints 489 - order-1 interior nodal dofs 490 - no undefined nodal dofs (nconn < order) 491 */ 492 PetscInt ends = 0,ints = 0, undef = 0; 493 for (j=ii[i];j<ii[i+1];j++) { 494 PetscInt v = jj[j],k; 495 PetscInt nconn = iit[v+1]-iit[v]; 496 for (k=iit[v];k<iit[v+1];k++) if (!PetscBTLookup(btee,jjt[k])) nconn--; 497 if (nconn > order) ends++; 498 else if (nconn == order) ints++; 499 else undef++; 500 } 501 if (undef || ends > 2 || ints != order -1) { 502 marks[cum++] = i; 503 PetscCall(PetscBTSet(bte,i)); 504 for (j=ii[i];j<ii[i+1];j++) { 505 PetscCall(PetscBTSet(btv,jj[j])); 506 } 507 } 508 } 509 } 510 /* We assume the order on the element edge is ii[i+1]-ii[i]-1 */ 511 if (!order && ii[i+1] != ii[i]) { 512 PetscScalar val = 1./(ii[i+1]-ii[i]-1); 513 for (j=ii[i];j<ii[i+1];j++) vals[j] = val; 514 } 515 } 516 PetscCall(PetscBTDestroy(&btee)); 517 PetscCall(MatSeqAIJRestoreArray(lGe,&vals)); 518 PetscCall(MatRestoreRowIJ(lGe,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done)); 519 if (!conforming) { 520 PetscCall(MatRestoreRowIJ(lGt,0,PETSC_FALSE,PETSC_FALSE,&i,&iit,&jjt,&done)); 521 PetscCall(MatDestroy(&lGt)); 522 } 523 PetscCall(MatZeroRows(lGe,cum,marks,0.,NULL,NULL)); 524 525 /* identify splitpoints and corner candidates */ 526 PetscCall(MatTranspose(lGe,MAT_INITIAL_MATRIX,&lGt)); 527 if (print) { 528 PetscCall(PetscObjectSetName((PetscObject)lGe,"edgerestr_lG")); 529 PetscCall(MatView(lGe,NULL)); 530 PetscCall(PetscObjectSetName((PetscObject)lGt,"edgerestr_lGt")); 531 PetscCall(MatView(lGt,NULL)); 532 } 533 PetscCall(MatGetRowIJ(lGt,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done)); 534 PetscCall(MatSeqAIJGetArray(lGt,&vals)); 535 for (i=0;i<nv;i++) { 536 PetscInt ord = order, test = ii[i+1]-ii[i], vc = vcount[i]; 537 PetscBool sneighs = PETSC_TRUE, bdir = PETSC_FALSE; 538 if (!order) { /* variable order */ 539 PetscReal vorder = 0.; 540 541 for (j=ii[i];j<ii[i+1];j++) vorder += PetscRealPart(vals[j]); 542 test = PetscFloorReal(vorder+10.*PETSC_SQRT_MACHINE_EPSILON); 543 PetscCheck(vorder-test <= PETSC_SQRT_MACHINE_EPSILON,PETSC_COMM_SELF,PETSC_ERR_PLIB,"Unexpected value for vorder: %g (%" PetscInt_FMT ")",(double)vorder,test); 544 ord = 1; 545 } 546 PetscAssert(test%ord == 0,PETSC_COMM_SELF,PETSC_ERR_PLIB,"Unexpected number of edge dofs %" PetscInt_FMT " connected with nodal dof %" PetscInt_FMT " with order %" PetscInt_FMT,test,i,ord); 547 for (j=ii[i];j<ii[i+1] && sneighs;j++) { 548 if (PetscBTLookup(btbd,jj[j])) { 549 bdir = PETSC_TRUE; 550 break; 551 } 552 if (vc != ecount[jj[j]]) { 553 sneighs = PETSC_FALSE; 554 } else { 555 PetscInt k,*vn = vneighs[i], *en = eneighs[jj[j]]; 556 for (k=0;k<vc;k++) { 557 if (vn[k] != en[k]) { 558 sneighs = PETSC_FALSE; 559 break; 560 } 561 } 562 } 563 } 564 if (!sneighs || test >= 3*ord || bdir) { /* splitpoints */ 565 if (print) PetscPrintf(PETSC_COMM_SELF,"SPLITPOINT %" PetscInt_FMT " (%s %s %s)\n",i,PetscBools[!sneighs],PetscBools[test >= 3*ord],PetscBools[bdir]); 566 PetscCall(PetscBTSet(btv,i)); 567 } else if (test == ord) { 568 if (order == 1 || (!order && ii[i+1]-ii[i] == 1)) { 569 if (print) PetscPrintf(PETSC_COMM_SELF,"ENDPOINT %" PetscInt_FMT "\n",i); 570 PetscCall(PetscBTSet(btv,i)); 571 } else { 572 if (print) PetscPrintf(PETSC_COMM_SELF,"CORNER CANDIDATE %" PetscInt_FMT "\n",i); 573 PetscCall(PetscBTSet(btvcand,i)); 574 } 575 } 576 } 577 PetscCall(ISLocalToGlobalMappingRestoreNodeInfo(el2g,NULL,&ecount,&eneighs)); 578 PetscCall(ISLocalToGlobalMappingRestoreNodeInfo(vl2g,NULL,&vcount,&vneighs)); 579 PetscCall(PetscBTDestroy(&btbd)); 580 581 /* a candidate is valid if it is connected to another candidate via a non-primal edge dof */ 582 if (order != 1) { 583 if (print) PetscPrintf(PETSC_COMM_SELF,"INSPECTING CANDIDATES\n"); 584 PetscCall(MatGetRowIJ(lGe,0,PETSC_FALSE,PETSC_FALSE,&i,&iit,&jjt,&done)); 585 for (i=0;i<nv;i++) { 586 if (PetscBTLookup(btvcand,i)) { 587 PetscBool found = PETSC_FALSE; 588 for (j=ii[i];j<ii[i+1] && !found;j++) { 589 PetscInt k,e = jj[j]; 590 if (PetscBTLookup(bte,e)) continue; 591 for (k=iit[e];k<iit[e+1];k++) { 592 PetscInt v = jjt[k]; 593 if (v != i && PetscBTLookup(btvcand,v)) { 594 found = PETSC_TRUE; 595 break; 596 } 597 } 598 } 599 if (!found) { 600 if (print) PetscPrintf(PETSC_COMM_SELF," CANDIDATE %" PetscInt_FMT " CLEARED\n",i); 601 PetscCall(PetscBTClear(btvcand,i)); 602 } else { 603 if (print) PetscPrintf(PETSC_COMM_SELF," CANDIDATE %" PetscInt_FMT " ACCEPTED\n",i); 604 } 605 } 606 } 607 PetscCall(MatRestoreRowIJ(lGe,0,PETSC_FALSE,PETSC_FALSE,&i,&iit,&jjt,&done)); 608 } 609 PetscCall(MatSeqAIJRestoreArray(lGt,&vals)); 610 PetscCall(MatRestoreRowIJ(lGt,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done)); 611 PetscCall(MatDestroy(&lGe)); 612 613 /* Get the local G^T explicitly */ 614 PetscCall(MatDestroy(&lGt)); 615 PetscCall(MatTranspose(lG,MAT_INITIAL_MATRIX,&lGt)); 616 PetscCall(MatSetOption(lGt,MAT_KEEP_NONZERO_PATTERN,PETSC_FALSE)); 617 618 /* Mark interior nodal dofs */ 619 PetscCall(ISLocalToGlobalMappingGetInfo(vl2g,&n_neigh,&neigh,&n_shared,&shared)); 620 PetscCall(PetscBTCreate(nv,&btvi)); 621 for (i=1;i<n_neigh;i++) { 622 for (j=0;j<n_shared[i];j++) { 623 PetscCall(PetscBTSet(btvi,shared[i][j])); 624 } 625 } 626 PetscCall(ISLocalToGlobalMappingRestoreInfo(vl2g,&n_neigh,&neigh,&n_shared,&shared)); 627 628 /* communicate corners and splitpoints */ 629 PetscCall(PetscMalloc1(nv,&vmarks)); 630 PetscCall(PetscArrayzero(sfvleaves,nv)); 631 PetscCall(PetscArrayzero(sfvroots,Lv)); 632 for (i=0;i<nv;i++) if (PetscUnlikely(PetscBTLookup(btv,i))) sfvleaves[i] = 1; 633 634 if (print) { 635 IS tbz; 636 637 cum = 0; 638 for (i=0;i<nv;i++) 639 if (sfvleaves[i]) 640 vmarks[cum++] = i; 641 642 PetscCall(ISCreateGeneral(PETSC_COMM_SELF,cum,vmarks,PETSC_COPY_VALUES,&tbz)); 643 PetscCall(PetscObjectSetName((PetscObject)tbz,"corners_to_be_zeroed_local")); 644 PetscCall(ISView(tbz,NULL)); 645 PetscCall(ISDestroy(&tbz)); 646 } 647 648 PetscCall(PetscSFReduceBegin(sfv,MPIU_INT,sfvleaves,sfvroots,MPI_SUM)); 649 PetscCall(PetscSFReduceEnd(sfv,MPIU_INT,sfvleaves,sfvroots,MPI_SUM)); 650 PetscCall(PetscSFBcastBegin(sfv,MPIU_INT,sfvroots,sfvleaves,MPI_REPLACE)); 651 PetscCall(PetscSFBcastEnd(sfv,MPIU_INT,sfvroots,sfvleaves,MPI_REPLACE)); 652 653 /* Zero rows of lGt corresponding to identified corners 654 and interior nodal dofs */ 655 cum = 0; 656 for (i=0;i<nv;i++) { 657 if (sfvleaves[i]) { 658 vmarks[cum++] = i; 659 PetscCall(PetscBTSet(btv,i)); 660 } 661 if (!PetscBTLookup(btvi,i)) vmarks[cum++] = i; 662 } 663 PetscCall(PetscBTDestroy(&btvi)); 664 if (print) { 665 IS tbz; 666 667 PetscCall(ISCreateGeneral(PETSC_COMM_SELF,cum,vmarks,PETSC_COPY_VALUES,&tbz)); 668 PetscCall(PetscObjectSetName((PetscObject)tbz,"corners_to_be_zeroed_with_interior")); 669 PetscCall(ISView(tbz,NULL)); 670 PetscCall(ISDestroy(&tbz)); 671 } 672 PetscCall(MatZeroRows(lGt,cum,vmarks,0.,NULL,NULL)); 673 PetscCall(PetscFree(vmarks)); 674 PetscCall(PetscSFDestroy(&sfv)); 675 PetscCall(PetscFree2(sfvleaves,sfvroots)); 676 677 /* Recompute G */ 678 PetscCall(MatDestroy(&lG)); 679 PetscCall(MatTranspose(lGt,MAT_INITIAL_MATRIX,&lG)); 680 if (print) { 681 PetscCall(PetscObjectSetName((PetscObject)lG,"used_lG")); 682 PetscCall(MatView(lG,NULL)); 683 PetscCall(PetscObjectSetName((PetscObject)lGt,"used_lGt")); 684 PetscCall(MatView(lGt,NULL)); 685 } 686 687 /* Get primal dofs (if any) */ 688 cum = 0; 689 for (i=0;i<ne;i++) { 690 if (PetscUnlikely(PetscBTLookup(bte,i))) marks[cum++] = i; 691 } 692 if (fl2g) { 693 PetscCall(ISLocalToGlobalMappingApply(fl2g,cum,marks,marks)); 694 } 695 PetscCall(ISCreateGeneral(comm,cum,marks,PETSC_COPY_VALUES,&primals)); 696 if (print) { 697 PetscCall(PetscObjectSetName((PetscObject)primals,"prescribed_primal_dofs")); 698 PetscCall(ISView(primals,NULL)); 699 } 700 PetscCall(PetscBTDestroy(&bte)); 701 /* TODO: what if the user passed in some of them ? */ 702 PetscCall(PCBDDCSetPrimalVerticesLocalIS(pc,primals)); 703 PetscCall(ISDestroy(&primals)); 704 705 /* Compute edge connectivity */ 706 PetscCall(PetscObjectSetOptionsPrefix((PetscObject)lG,"econn_")); 707 708 /* Symbolic conn = lG*lGt */ 709 PetscCall(MatProductCreate(lG,lGt,NULL,&conn)); 710 PetscCall(MatProductSetType(conn,MATPRODUCT_AB)); 711 PetscCall(MatProductSetAlgorithm(conn,"default")); 712 PetscCall(MatProductSetFill(conn,PETSC_DEFAULT)); 713 PetscCall(PetscObjectSetOptionsPrefix((PetscObject)conn,"econn_")); 714 PetscCall(MatProductSetFromOptions(conn)); 715 PetscCall(MatProductSymbolic(conn)); 716 717 PetscCall(MatGetRowIJ(conn,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done)); 718 if (fl2g) { 719 PetscBT btf; 720 PetscInt *iia,*jja,*iiu,*jju; 721 PetscBool rest = PETSC_FALSE,free = PETSC_FALSE; 722 723 /* create CSR for all local dofs */ 724 PetscCall(PetscMalloc1(n+1,&iia)); 725 if (pcbddc->mat_graph->nvtxs_csr) { /* the user has passed in a CSR graph */ 726 PetscCheck(pcbddc->mat_graph->nvtxs_csr == n,PETSC_COMM_SELF,PETSC_ERR_USER,"Invalid size of CSR graph %" PetscInt_FMT ". Should be %" PetscInt_FMT,pcbddc->mat_graph->nvtxs_csr,n); 727 iiu = pcbddc->mat_graph->xadj; 728 jju = pcbddc->mat_graph->adjncy; 729 } else if (pcbddc->use_local_adj) { 730 rest = PETSC_TRUE; 731 PetscCall(MatGetRowIJ(matis->A,0,PETSC_TRUE,PETSC_FALSE,&i,(const PetscInt**)&iiu,(const PetscInt**)&jju,&done)); 732 } else { 733 free = PETSC_TRUE; 734 PetscCall(PetscMalloc2(n+1,&iiu,n,&jju)); 735 iiu[0] = 0; 736 for (i=0;i<n;i++) { 737 iiu[i+1] = i+1; 738 jju[i] = -1; 739 } 740 } 741 742 /* import sizes of CSR */ 743 iia[0] = 0; 744 for (i=0;i<n;i++) iia[i+1] = iiu[i+1]-iiu[i]; 745 746 /* overwrite entries corresponding to the Nedelec field */ 747 PetscCall(PetscBTCreate(n,&btf)); 748 PetscCall(ISGetIndices(nedfieldlocal,&idxs)); 749 for (i=0;i<ne;i++) { 750 PetscCall(PetscBTSet(btf,idxs[i])); 751 iia[idxs[i]+1] = ii[i+1]-ii[i]; 752 } 753 754 /* iia in CSR */ 755 for (i=0;i<n;i++) iia[i+1] += iia[i]; 756 757 /* jja in CSR */ 758 PetscCall(PetscMalloc1(iia[n],&jja)); 759 for (i=0;i<n;i++) 760 if (!PetscBTLookup(btf,i)) 761 for (j=0;j<iiu[i+1]-iiu[i];j++) 762 jja[iia[i]+j] = jju[iiu[i]+j]; 763 764 /* map edge dofs connectivity */ 765 if (jj) { 766 PetscCall(ISLocalToGlobalMappingApply(fl2g,ii[ne],jj,(PetscInt *)jj)); 767 for (i=0;i<ne;i++) { 768 PetscInt e = idxs[i]; 769 for (j=0;j<ii[i+1]-ii[i];j++) jja[iia[e]+j] = jj[ii[i]+j]; 770 } 771 } 772 PetscCall(ISRestoreIndices(nedfieldlocal,&idxs)); 773 PetscCall(PCBDDCSetLocalAdjacencyGraph(pc,n,iia,jja,PETSC_OWN_POINTER)); 774 if (rest) { 775 PetscCall(MatRestoreRowIJ(matis->A,0,PETSC_TRUE,PETSC_FALSE,&i,(const PetscInt**)&iiu,(const PetscInt**)&jju,&done)); 776 } 777 if (free) { 778 PetscCall(PetscFree2(iiu,jju)); 779 } 780 PetscCall(PetscBTDestroy(&btf)); 781 } else { 782 PetscCall(PCBDDCSetLocalAdjacencyGraph(pc,n,ii,jj,PETSC_USE_POINTER)); 783 } 784 785 /* Analyze interface for edge dofs */ 786 PetscCall(PCBDDCAnalyzeInterface(pc)); 787 pcbddc->mat_graph->twodim = PETSC_FALSE; 788 789 /* Get coarse edges in the edge space */ 790 PetscCall(PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph,NULL,NULL,&nee,&alleedges,&allprimals)); 791 PetscCall(MatRestoreRowIJ(conn,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done)); 792 793 if (fl2g) { 794 PetscCall(ISGlobalToLocalMappingApplyIS(fl2g,IS_GTOLM_DROP,allprimals,&primals)); 795 PetscCall(PetscMalloc1(nee,&eedges)); 796 for (i=0;i<nee;i++) { 797 PetscCall(ISGlobalToLocalMappingApplyIS(fl2g,IS_GTOLM_DROP,alleedges[i],&eedges[i])); 798 } 799 } else { 800 eedges = alleedges; 801 primals = allprimals; 802 } 803 804 /* Mark fine edge dofs with their coarse edge id */ 805 PetscCall(PetscArrayzero(marks,ne)); 806 PetscCall(ISGetLocalSize(primals,&cum)); 807 PetscCall(ISGetIndices(primals,&idxs)); 808 for (i=0;i<cum;i++) marks[idxs[i]] = nee+1; 809 PetscCall(ISRestoreIndices(primals,&idxs)); 810 if (print) { 811 PetscCall(PetscObjectSetName((PetscObject)primals,"obtained_primal_dofs")); 812 PetscCall(ISView(primals,NULL)); 813 } 814 815 maxsize = 0; 816 for (i=0;i<nee;i++) { 817 PetscInt size,mark = i+1; 818 819 PetscCall(ISGetLocalSize(eedges[i],&size)); 820 PetscCall(ISGetIndices(eedges[i],&idxs)); 821 for (j=0;j<size;j++) marks[idxs[j]] = mark; 822 PetscCall(ISRestoreIndices(eedges[i],&idxs)); 823 maxsize = PetscMax(maxsize,size); 824 } 825 826 /* Find coarse edge endpoints */ 827 PetscCall(MatGetRowIJ(lG,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done)); 828 PetscCall(MatGetRowIJ(lGt,0,PETSC_FALSE,PETSC_FALSE,&i,&iit,&jjt,&done)); 829 for (i=0;i<nee;i++) { 830 PetscInt mark = i+1,size; 831 832 PetscCall(ISGetLocalSize(eedges[i],&size)); 833 if (!size && nedfieldlocal) continue; 834 PetscCheck(size,PETSC_COMM_SELF,PETSC_ERR_PLIB,"Unexpected zero sized edge %" PetscInt_FMT,i); 835 PetscCall(ISGetIndices(eedges[i],&idxs)); 836 if (print) { 837 PetscCall(PetscPrintf(PETSC_COMM_SELF,"ENDPOINTS ANALYSIS EDGE %" PetscInt_FMT "\n",i)); 838 PetscCall(ISView(eedges[i],NULL)); 839 } 840 for (j=0;j<size;j++) { 841 PetscInt k, ee = idxs[j]; 842 if (print) PetscPrintf(PETSC_COMM_SELF," idx %" PetscInt_FMT "\n",ee); 843 for (k=ii[ee];k<ii[ee+1];k++) { 844 if (print) PetscPrintf(PETSC_COMM_SELF," inspect %" PetscInt_FMT "\n",jj[k]); 845 if (PetscBTLookup(btv,jj[k])) { 846 if (print) PetscPrintf(PETSC_COMM_SELF," corner found (already set) %" PetscInt_FMT "\n",jj[k]); 847 } else if (PetscBTLookup(btvcand,jj[k])) { /* is it ok? */ 848 PetscInt k2; 849 PetscBool corner = PETSC_FALSE; 850 for (k2 = iit[jj[k]];k2 < iit[jj[k]+1];k2++) { 851 if (print) PetscPrintf(PETSC_COMM_SELF," INSPECTING %" PetscInt_FMT ": mark %" PetscInt_FMT " (ref mark %" PetscInt_FMT "), boundary %d\n",jjt[k2],marks[jjt[k2]],mark,(int)!!PetscBTLookup(btb,jjt[k2])); 852 /* it's a corner if either is connected with an edge dof belonging to a different cc or 853 if the edge dof lie on the natural part of the boundary */ 854 if ((marks[jjt[k2]] && marks[jjt[k2]] != mark) || (!marks[jjt[k2]] && PetscBTLookup(btb,jjt[k2]))) { 855 corner = PETSC_TRUE; 856 break; 857 } 858 } 859 if (corner) { /* found the nodal dof corresponding to the endpoint of the edge */ 860 if (print) PetscPrintf(PETSC_COMM_SELF," corner found %" PetscInt_FMT "\n",jj[k]); 861 PetscCall(PetscBTSet(btv,jj[k])); 862 } else { 863 if (print) PetscPrintf(PETSC_COMM_SELF," no corners found\n"); 864 } 865 } 866 } 867 } 868 PetscCall(ISRestoreIndices(eedges[i],&idxs)); 869 } 870 PetscCall(MatRestoreRowIJ(lGt,0,PETSC_FALSE,PETSC_FALSE,&i,&iit,&jjt,&done)); 871 PetscCall(MatRestoreRowIJ(lG,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done)); 872 PetscCall(PetscBTDestroy(&btb)); 873 874 /* Reset marked primal dofs */ 875 PetscCall(ISGetLocalSize(primals,&cum)); 876 PetscCall(ISGetIndices(primals,&idxs)); 877 for (i=0;i<cum;i++) marks[idxs[i]] = 0; 878 PetscCall(ISRestoreIndices(primals,&idxs)); 879 880 /* Now use the initial lG */ 881 PetscCall(MatDestroy(&lG)); 882 PetscCall(MatDestroy(&lGt)); 883 lG = lGinit; 884 PetscCall(MatTranspose(lG,MAT_INITIAL_MATRIX,&lGt)); 885 886 /* Compute extended cols indices */ 887 PetscCall(PetscBTCreate(nv,&btvc)); 888 PetscCall(PetscBTCreate(nee,&bter)); 889 PetscCall(MatGetRowIJ(lG,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done)); 890 PetscCall(MatSeqAIJGetMaxRowNonzeros(lG,&i)); 891 i *= maxsize; 892 PetscCall(PetscCalloc1(nee,&extcols)); 893 PetscCall(PetscMalloc2(i,&extrow,i,&gidxs)); 894 eerr = PETSC_FALSE; 895 for (i=0;i<nee;i++) { 896 PetscInt size,found = 0; 897 898 cum = 0; 899 PetscCall(ISGetLocalSize(eedges[i],&size)); 900 if (!size && nedfieldlocal) continue; 901 PetscCheck(size,PETSC_COMM_SELF,PETSC_ERR_PLIB,"Unexpected zero sized edge %" PetscInt_FMT,i); 902 PetscCall(ISGetIndices(eedges[i],&idxs)); 903 PetscCall(PetscBTMemzero(nv,btvc)); 904 for (j=0;j<size;j++) { 905 PetscInt k,ee = idxs[j]; 906 for (k=ii[ee];k<ii[ee+1];k++) { 907 PetscInt vv = jj[k]; 908 if (!PetscBTLookup(btv,vv)) extrow[cum++] = vv; 909 else if (!PetscBTLookupSet(btvc,vv)) found++; 910 } 911 } 912 PetscCall(ISRestoreIndices(eedges[i],&idxs)); 913 PetscCall(PetscSortRemoveDupsInt(&cum,extrow)); 914 PetscCall(ISLocalToGlobalMappingApply(vl2g,cum,extrow,gidxs)); 915 PetscCall(PetscSortIntWithArray(cum,gidxs,extrow)); 916 PetscCall(ISCreateGeneral(PETSC_COMM_SELF,cum,extrow,PETSC_COPY_VALUES,&extcols[i])); 917 /* it may happen that endpoints are not defined at this point 918 if it is the case, mark this edge for a second pass */ 919 if (cum != size -1 || found != 2) { 920 PetscCall(PetscBTSet(bter,i)); 921 if (print) { 922 PetscCall(PetscObjectSetName((PetscObject)eedges[i],"error_edge")); 923 PetscCall(ISView(eedges[i],NULL)); 924 PetscCall(PetscObjectSetName((PetscObject)extcols[i],"error_extcol")); 925 PetscCall(ISView(extcols[i],NULL)); 926 } 927 eerr = PETSC_TRUE; 928 } 929 } 930 /* PetscCheck(!eerr,PETSC_COMM_SELF,PETSC_ERR_PLIB,"Unexpected SIZE OF EDGE > EXTCOL FIRST PASS"); */ 931 PetscCall(MPIU_Allreduce(&eerr,&done,1,MPIU_BOOL,MPI_LOR,comm)); 932 if (done) { 933 PetscInt *newprimals; 934 935 PetscCall(PetscMalloc1(ne,&newprimals)); 936 PetscCall(ISGetLocalSize(primals,&cum)); 937 PetscCall(ISGetIndices(primals,&idxs)); 938 PetscCall(PetscArraycpy(newprimals,idxs,cum)); 939 PetscCall(ISRestoreIndices(primals,&idxs)); 940 PetscCall(MatGetRowIJ(lGt,0,PETSC_FALSE,PETSC_FALSE,&i,&iit,&jjt,&done)); 941 if (print) PetscPrintf(PETSC_COMM_SELF,"DOING SECOND PASS (eerr %s)\n",PetscBools[eerr]); 942 for (i=0;i<nee;i++) { 943 PetscBool has_candidates = PETSC_FALSE; 944 if (PetscBTLookup(bter,i)) { 945 PetscInt size,mark = i+1; 946 947 PetscCall(ISGetLocalSize(eedges[i],&size)); 948 PetscCall(ISGetIndices(eedges[i],&idxs)); 949 /* for (j=0;j<size;j++) newprimals[cum++] = idxs[j]; */ 950 for (j=0;j<size;j++) { 951 PetscInt k,ee = idxs[j]; 952 if (print) PetscPrintf(PETSC_COMM_SELF,"Inspecting edge dof %" PetscInt_FMT " [%" PetscInt_FMT " %" PetscInt_FMT ")\n",ee,ii[ee],ii[ee+1]); 953 for (k=ii[ee];k<ii[ee+1];k++) { 954 /* set all candidates located on the edge as corners */ 955 if (PetscBTLookup(btvcand,jj[k])) { 956 PetscInt k2,vv = jj[k]; 957 has_candidates = PETSC_TRUE; 958 if (print) PetscPrintf(PETSC_COMM_SELF," Candidate set to vertex %" PetscInt_FMT "\n",vv); 959 PetscCall(PetscBTSet(btv,vv)); 960 /* set all edge dofs connected to candidate as primals */ 961 for (k2=iit[vv];k2<iit[vv+1];k2++) { 962 if (marks[jjt[k2]] == mark) { 963 PetscInt k3,ee2 = jjt[k2]; 964 if (print) PetscPrintf(PETSC_COMM_SELF," Connected edge dof set to primal %" PetscInt_FMT "\n",ee2); 965 newprimals[cum++] = ee2; 966 /* finally set the new corners */ 967 for (k3=ii[ee2];k3<ii[ee2+1];k3++) { 968 if (print) PetscPrintf(PETSC_COMM_SELF," Connected nodal dof set to vertex %" PetscInt_FMT "\n",jj[k3]); 969 PetscCall(PetscBTSet(btv,jj[k3])); 970 } 971 } 972 } 973 } else { 974 if (print) PetscPrintf(PETSC_COMM_SELF," Not a candidate vertex %" PetscInt_FMT "\n",jj[k]); 975 } 976 } 977 } 978 if (!has_candidates) { /* circular edge */ 979 PetscInt k, ee = idxs[0],*tmarks; 980 981 PetscCall(PetscCalloc1(ne,&tmarks)); 982 if (print) PetscPrintf(PETSC_COMM_SELF," Circular edge %" PetscInt_FMT "\n",i); 983 for (k=ii[ee];k<ii[ee+1];k++) { 984 PetscInt k2; 985 if (print) PetscPrintf(PETSC_COMM_SELF," Set to corner %" PetscInt_FMT "\n",jj[k]); 986 PetscCall(PetscBTSet(btv,jj[k])); 987 for (k2=iit[jj[k]];k2<iit[jj[k]+1];k2++) tmarks[jjt[k2]]++; 988 } 989 for (j=0;j<size;j++) { 990 if (tmarks[idxs[j]] > 1) { 991 if (print) PetscPrintf(PETSC_COMM_SELF," Edge dof set to primal %" PetscInt_FMT "\n",idxs[j]); 992 newprimals[cum++] = idxs[j]; 993 } 994 } 995 PetscCall(PetscFree(tmarks)); 996 } 997 PetscCall(ISRestoreIndices(eedges[i],&idxs)); 998 } 999 PetscCall(ISDestroy(&extcols[i])); 1000 } 1001 PetscCall(PetscFree(extcols)); 1002 PetscCall(MatRestoreRowIJ(lGt,0,PETSC_FALSE,PETSC_FALSE,&i,&iit,&jjt,&done)); 1003 PetscCall(PetscSortRemoveDupsInt(&cum,newprimals)); 1004 if (fl2g) { 1005 PetscCall(ISLocalToGlobalMappingApply(fl2g,cum,newprimals,newprimals)); 1006 PetscCall(ISDestroy(&primals)); 1007 for (i=0;i<nee;i++) { 1008 PetscCall(ISDestroy(&eedges[i])); 1009 } 1010 PetscCall(PetscFree(eedges)); 1011 } 1012 PetscCall(PCBDDCGraphRestoreCandidatesIS(pcbddc->mat_graph,NULL,NULL,&nee,&alleedges,&allprimals)); 1013 PetscCall(ISCreateGeneral(comm,cum,newprimals,PETSC_COPY_VALUES,&primals)); 1014 PetscCall(PetscFree(newprimals)); 1015 PetscCall(PCBDDCSetPrimalVerticesLocalIS(pc,primals)); 1016 PetscCall(ISDestroy(&primals)); 1017 PetscCall(PCBDDCAnalyzeInterface(pc)); 1018 pcbddc->mat_graph->twodim = PETSC_FALSE; 1019 PetscCall(PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph,NULL,NULL,&nee,&alleedges,&allprimals)); 1020 if (fl2g) { 1021 PetscCall(ISGlobalToLocalMappingApplyIS(fl2g,IS_GTOLM_DROP,allprimals,&primals)); 1022 PetscCall(PetscMalloc1(nee,&eedges)); 1023 for (i=0;i<nee;i++) { 1024 PetscCall(ISGlobalToLocalMappingApplyIS(fl2g,IS_GTOLM_DROP,alleedges[i],&eedges[i])); 1025 } 1026 } else { 1027 eedges = alleedges; 1028 primals = allprimals; 1029 } 1030 PetscCall(PetscCalloc1(nee,&extcols)); 1031 1032 /* Mark again */ 1033 PetscCall(PetscArrayzero(marks,ne)); 1034 for (i=0;i<nee;i++) { 1035 PetscInt size,mark = i+1; 1036 1037 PetscCall(ISGetLocalSize(eedges[i],&size)); 1038 PetscCall(ISGetIndices(eedges[i],&idxs)); 1039 for (j=0;j<size;j++) marks[idxs[j]] = mark; 1040 PetscCall(ISRestoreIndices(eedges[i],&idxs)); 1041 } 1042 if (print) { 1043 PetscCall(PetscObjectSetName((PetscObject)primals,"obtained_primal_dofs_secondpass")); 1044 PetscCall(ISView(primals,NULL)); 1045 } 1046 1047 /* Recompute extended cols */ 1048 eerr = PETSC_FALSE; 1049 for (i=0;i<nee;i++) { 1050 PetscInt size; 1051 1052 cum = 0; 1053 PetscCall(ISGetLocalSize(eedges[i],&size)); 1054 if (!size && nedfieldlocal) continue; 1055 PetscCheck(size,PETSC_COMM_SELF,PETSC_ERR_PLIB,"Unexpected zero sized edge %" PetscInt_FMT,i); 1056 PetscCall(ISGetIndices(eedges[i],&idxs)); 1057 for (j=0;j<size;j++) { 1058 PetscInt k,ee = idxs[j]; 1059 for (k=ii[ee];k<ii[ee+1];k++) if (!PetscBTLookup(btv,jj[k])) extrow[cum++] = jj[k]; 1060 } 1061 PetscCall(ISRestoreIndices(eedges[i],&idxs)); 1062 PetscCall(PetscSortRemoveDupsInt(&cum,extrow)); 1063 PetscCall(ISLocalToGlobalMappingApply(vl2g,cum,extrow,gidxs)); 1064 PetscCall(PetscSortIntWithArray(cum,gidxs,extrow)); 1065 PetscCall(ISCreateGeneral(PETSC_COMM_SELF,cum,extrow,PETSC_COPY_VALUES,&extcols[i])); 1066 if (cum != size -1) { 1067 if (print) { 1068 PetscCall(PetscObjectSetName((PetscObject)eedges[i],"error_edge_secondpass")); 1069 PetscCall(ISView(eedges[i],NULL)); 1070 PetscCall(PetscObjectSetName((PetscObject)extcols[i],"error_extcol_secondpass")); 1071 PetscCall(ISView(extcols[i],NULL)); 1072 } 1073 eerr = PETSC_TRUE; 1074 } 1075 } 1076 } 1077 PetscCall(MatRestoreRowIJ(lG,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done)); 1078 PetscCall(PetscFree2(extrow,gidxs)); 1079 PetscCall(PetscBTDestroy(&bter)); 1080 if (print) PetscCall(PCBDDCGraphASCIIView(pcbddc->mat_graph,5,PETSC_VIEWER_STDOUT_SELF)); 1081 /* an error should not occur at this point */ 1082 PetscCheck(!eerr,PETSC_COMM_SELF,PETSC_ERR_PLIB,"Unexpected SIZE OF EDGE > EXTCOL SECOND PASS"); 1083 1084 /* Check the number of endpoints */ 1085 PetscCall(MatGetRowIJ(lG,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done)); 1086 PetscCall(PetscMalloc1(2*nee,&corners)); 1087 PetscCall(PetscMalloc1(nee,&cedges)); 1088 for (i=0;i<nee;i++) { 1089 PetscInt size, found = 0, gc[2]; 1090 1091 /* init with defaults */ 1092 cedges[i] = corners[i*2] = corners[i*2+1] = -1; 1093 PetscCall(ISGetLocalSize(eedges[i],&size)); 1094 if (!size && nedfieldlocal) continue; 1095 PetscCheck(size,PETSC_COMM_SELF,PETSC_ERR_PLIB,"Unexpected zero sized edge %" PetscInt_FMT,i); 1096 PetscCall(ISGetIndices(eedges[i],&idxs)); 1097 PetscCall(PetscBTMemzero(nv,btvc)); 1098 for (j=0;j<size;j++) { 1099 PetscInt k,ee = idxs[j]; 1100 for (k=ii[ee];k<ii[ee+1];k++) { 1101 PetscInt vv = jj[k]; 1102 if (PetscBTLookup(btv,vv) && !PetscBTLookupSet(btvc,vv)) { 1103 PetscCheck(found != 2,PETSC_COMM_SELF,PETSC_ERR_PLIB,"Found more then two corners for edge %" PetscInt_FMT,i); 1104 corners[i*2+found++] = vv; 1105 } 1106 } 1107 } 1108 if (found != 2) { 1109 PetscInt e; 1110 if (fl2g) { 1111 PetscCall(ISLocalToGlobalMappingApply(fl2g,1,idxs,&e)); 1112 } else { 1113 e = idxs[0]; 1114 } 1115 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Found %" PetscInt_FMT " corners for edge %" PetscInt_FMT " (astart %" PetscInt_FMT ", estart %" PetscInt_FMT ")",found,i,e,idxs[0]); 1116 } 1117 1118 /* get primal dof index on this coarse edge */ 1119 PetscCall(ISLocalToGlobalMappingApply(vl2g,2,corners+2*i,gc)); 1120 if (gc[0] > gc[1]) { 1121 PetscInt swap = corners[2*i]; 1122 corners[2*i] = corners[2*i+1]; 1123 corners[2*i+1] = swap; 1124 } 1125 cedges[i] = idxs[size-1]; 1126 PetscCall(ISRestoreIndices(eedges[i],&idxs)); 1127 if (print) PetscPrintf(PETSC_COMM_SELF,"EDGE %" PetscInt_FMT ": ce %" PetscInt_FMT ", corners (%" PetscInt_FMT ",%" PetscInt_FMT ")\n",i,cedges[i],corners[2*i],corners[2*i+1]); 1128 } 1129 PetscCall(MatRestoreRowIJ(lG,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done)); 1130 PetscCall(PetscBTDestroy(&btvc)); 1131 1132 if (PetscDefined(USE_DEBUG)) { 1133 /* Inspects columns of lG (rows of lGt) and make sure the change of basis will 1134 not interfere with neighbouring coarse edges */ 1135 PetscCall(PetscMalloc1(nee+1,&emarks)); 1136 PetscCall(MatGetRowIJ(lGt,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done)); 1137 for (i=0;i<nv;i++) { 1138 PetscInt emax = 0,eemax = 0; 1139 1140 if (ii[i+1]==ii[i] || PetscBTLookup(btv,i)) continue; 1141 PetscCall(PetscArrayzero(emarks,nee+1)); 1142 for (j=ii[i];j<ii[i+1];j++) emarks[marks[jj[j]]]++; 1143 for (j=1;j<nee+1;j++) { 1144 if (emax < emarks[j]) { 1145 emax = emarks[j]; 1146 eemax = j; 1147 } 1148 } 1149 /* not relevant for edges */ 1150 if (!eemax) continue; 1151 1152 for (j=ii[i];j<ii[i+1];j++) { 1153 if (marks[jj[j]] && marks[jj[j]] != eemax) { 1154 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Found 2 coarse edges (id %" PetscInt_FMT " and %" PetscInt_FMT ") connected through the %" PetscInt_FMT " nodal dof at edge dof %" PetscInt_FMT,marks[jj[j]]-1,eemax,i,jj[j]); 1155 } 1156 } 1157 } 1158 PetscCall(PetscFree(emarks)); 1159 PetscCall(MatRestoreRowIJ(lGt,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done)); 1160 } 1161 1162 /* Compute extended rows indices for edge blocks of the change of basis */ 1163 PetscCall(MatGetRowIJ(lGt,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done)); 1164 PetscCall(MatSeqAIJGetMaxRowNonzeros(lGt,&extmem)); 1165 extmem *= maxsize; 1166 PetscCall(PetscMalloc1(extmem*nee,&extrow)); 1167 PetscCall(PetscMalloc1(nee,&extrows)); 1168 PetscCall(PetscCalloc1(nee,&extrowcum)); 1169 for (i=0;i<nv;i++) { 1170 PetscInt mark = 0,size,start; 1171 1172 if (ii[i+1]==ii[i] || PetscBTLookup(btv,i)) continue; 1173 for (j=ii[i];j<ii[i+1];j++) 1174 if (marks[jj[j]] && !mark) 1175 mark = marks[jj[j]]; 1176 1177 /* not relevant */ 1178 if (!mark) continue; 1179 1180 /* import extended row */ 1181 mark--; 1182 start = mark*extmem+extrowcum[mark]; 1183 size = ii[i+1]-ii[i]; 1184 PetscCheck(extrowcum[mark] + size <= extmem,PETSC_COMM_SELF,PETSC_ERR_PLIB,"Not enough memory allocated %" PetscInt_FMT " > %" PetscInt_FMT,extrowcum[mark] + size,extmem); 1185 PetscCall(PetscArraycpy(extrow+start,jj+ii[i],size)); 1186 extrowcum[mark] += size; 1187 } 1188 PetscCall(MatRestoreRowIJ(lGt,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done)); 1189 PetscCall(MatDestroy(&lGt)); 1190 PetscCall(PetscFree(marks)); 1191 1192 /* Compress extrows */ 1193 cum = 0; 1194 for (i=0;i<nee;i++) { 1195 PetscInt size = extrowcum[i],*start = extrow + i*extmem; 1196 PetscCall(PetscSortRemoveDupsInt(&size,start)); 1197 PetscCall(ISCreateGeneral(PETSC_COMM_SELF,size,start,PETSC_USE_POINTER,&extrows[i])); 1198 cum = PetscMax(cum,size); 1199 } 1200 PetscCall(PetscFree(extrowcum)); 1201 PetscCall(PetscBTDestroy(&btv)); 1202 PetscCall(PetscBTDestroy(&btvcand)); 1203 1204 /* Workspace for lapack inner calls and VecSetValues */ 1205 PetscCall(PetscMalloc2((5+cum+maxsize)*maxsize,&work,maxsize,&rwork)); 1206 1207 /* Create change of basis matrix (preallocation can be improved) */ 1208 PetscCall(MatCreate(comm,&T)); 1209 PetscCall(MatSetSizes(T,pc->pmat->rmap->n,pc->pmat->rmap->n,pc->pmat->rmap->N,pc->pmat->rmap->N)); 1210 PetscCall(MatSetType(T,MATAIJ)); 1211 PetscCall(MatSeqAIJSetPreallocation(T,10,NULL)); 1212 PetscCall(MatMPIAIJSetPreallocation(T,10,NULL,10,NULL)); 1213 PetscCall(MatSetLocalToGlobalMapping(T,al2g,al2g)); 1214 PetscCall(MatSetOption(T,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE)); 1215 PetscCall(MatSetOption(T,MAT_ROW_ORIENTED,PETSC_FALSE)); 1216 PetscCall(ISLocalToGlobalMappingDestroy(&al2g)); 1217 1218 /* Defaults to identity */ 1219 PetscCall(MatCreateVecs(pc->pmat,&tvec,NULL)); 1220 PetscCall(VecSet(tvec,1.0)); 1221 PetscCall(MatDiagonalSet(T,tvec,INSERT_VALUES)); 1222 PetscCall(VecDestroy(&tvec)); 1223 1224 /* Create discrete gradient for the coarser level if needed */ 1225 PetscCall(MatDestroy(&pcbddc->nedcG)); 1226 PetscCall(ISDestroy(&pcbddc->nedclocal)); 1227 if (pcbddc->current_level < pcbddc->max_levels) { 1228 ISLocalToGlobalMapping cel2g,cvl2g; 1229 IS wis,gwis; 1230 PetscInt cnv,cne; 1231 1232 PetscCall(ISCreateGeneral(comm,nee,cedges,PETSC_COPY_VALUES,&wis)); 1233 if (fl2g) { 1234 PetscCall(ISLocalToGlobalMappingApplyIS(fl2g,wis,&pcbddc->nedclocal)); 1235 } else { 1236 PetscCall(PetscObjectReference((PetscObject)wis)); 1237 pcbddc->nedclocal = wis; 1238 } 1239 PetscCall(ISLocalToGlobalMappingApplyIS(el2g,wis,&gwis)); 1240 PetscCall(ISDestroy(&wis)); 1241 PetscCall(ISRenumber(gwis,NULL,&cne,&wis)); 1242 PetscCall(ISLocalToGlobalMappingCreateIS(wis,&cel2g)); 1243 PetscCall(ISDestroy(&wis)); 1244 PetscCall(ISDestroy(&gwis)); 1245 1246 PetscCall(ISCreateGeneral(comm,2*nee,corners,PETSC_USE_POINTER,&wis)); 1247 PetscCall(ISLocalToGlobalMappingApplyIS(vl2g,wis,&gwis)); 1248 PetscCall(ISDestroy(&wis)); 1249 PetscCall(ISRenumber(gwis,NULL,&cnv,&wis)); 1250 PetscCall(ISLocalToGlobalMappingCreateIS(wis,&cvl2g)); 1251 PetscCall(ISDestroy(&wis)); 1252 PetscCall(ISDestroy(&gwis)); 1253 1254 PetscCall(MatCreate(comm,&pcbddc->nedcG)); 1255 PetscCall(MatSetSizes(pcbddc->nedcG,PETSC_DECIDE,PETSC_DECIDE,cne,cnv)); 1256 PetscCall(MatSetType(pcbddc->nedcG,MATAIJ)); 1257 PetscCall(MatSeqAIJSetPreallocation(pcbddc->nedcG,2,NULL)); 1258 PetscCall(MatMPIAIJSetPreallocation(pcbddc->nedcG,2,NULL,2,NULL)); 1259 PetscCall(MatSetLocalToGlobalMapping(pcbddc->nedcG,cel2g,cvl2g)); 1260 PetscCall(ISLocalToGlobalMappingDestroy(&cel2g)); 1261 PetscCall(ISLocalToGlobalMappingDestroy(&cvl2g)); 1262 } 1263 PetscCall(ISLocalToGlobalMappingDestroy(&vl2g)); 1264 1265 #if defined(PRINT_GDET) 1266 inc = 0; 1267 lev = pcbddc->current_level; 1268 #endif 1269 1270 /* Insert values in the change of basis matrix */ 1271 for (i=0;i<nee;i++) { 1272 Mat Gins = NULL, GKins = NULL; 1273 IS cornersis = NULL; 1274 PetscScalar cvals[2]; 1275 1276 if (pcbddc->nedcG) { 1277 PetscCall(ISCreateGeneral(PETSC_COMM_SELF,2,corners+2*i,PETSC_USE_POINTER,&cornersis)); 1278 } 1279 PetscCall(PCBDDCComputeNedelecChangeEdge(lG,eedges[i],extrows[i],extcols[i],cornersis,&Gins,&GKins,cvals,work,rwork)); 1280 if (Gins && GKins) { 1281 const PetscScalar *data; 1282 const PetscInt *rows,*cols; 1283 PetscInt nrh,nch,nrc,ncc; 1284 1285 PetscCall(ISGetIndices(eedges[i],&cols)); 1286 /* H1 */ 1287 PetscCall(ISGetIndices(extrows[i],&rows)); 1288 PetscCall(MatGetSize(Gins,&nrh,&nch)); 1289 PetscCall(MatDenseGetArrayRead(Gins,&data)); 1290 PetscCall(MatSetValuesLocal(T,nrh,rows,nch,cols,data,INSERT_VALUES)); 1291 PetscCall(MatDenseRestoreArrayRead(Gins,&data)); 1292 PetscCall(ISRestoreIndices(extrows[i],&rows)); 1293 /* complement */ 1294 PetscCall(MatGetSize(GKins,&nrc,&ncc)); 1295 PetscCheck(ncc,PETSC_COMM_SELF,PETSC_ERR_PLIB,"Constant function has not been generated for coarse edge %" PetscInt_FMT,i); 1296 PetscCheck(ncc + nch == nrc,PETSC_COMM_SELF,PETSC_ERR_PLIB,"The sum of the number of columns of GKins %" PetscInt_FMT " and Gins %" PetscInt_FMT " does not match %" PetscInt_FMT " for coarse edge %" PetscInt_FMT,ncc,nch,nrc,i); 1297 PetscCheck(ncc == 1 || !pcbddc->nedcG,PETSC_COMM_SELF,PETSC_ERR_SUP,"Cannot generate the coarse discrete gradient for coarse edge %" PetscInt_FMT " with ncc %" PetscInt_FMT,i,ncc); 1298 PetscCall(MatDenseGetArrayRead(GKins,&data)); 1299 PetscCall(MatSetValuesLocal(T,nrc,cols,ncc,cols+nch,data,INSERT_VALUES)); 1300 PetscCall(MatDenseRestoreArrayRead(GKins,&data)); 1301 1302 /* coarse discrete gradient */ 1303 if (pcbddc->nedcG) { 1304 PetscInt cols[2]; 1305 1306 cols[0] = 2*i; 1307 cols[1] = 2*i+1; 1308 PetscCall(MatSetValuesLocal(pcbddc->nedcG,1,&i,2,cols,cvals,INSERT_VALUES)); 1309 } 1310 PetscCall(ISRestoreIndices(eedges[i],&cols)); 1311 } 1312 PetscCall(ISDestroy(&extrows[i])); 1313 PetscCall(ISDestroy(&extcols[i])); 1314 PetscCall(ISDestroy(&cornersis)); 1315 PetscCall(MatDestroy(&Gins)); 1316 PetscCall(MatDestroy(&GKins)); 1317 } 1318 PetscCall(ISLocalToGlobalMappingDestroy(&el2g)); 1319 1320 /* Start assembling */ 1321 PetscCall(MatAssemblyBegin(T,MAT_FINAL_ASSEMBLY)); 1322 if (pcbddc->nedcG) { 1323 PetscCall(MatAssemblyBegin(pcbddc->nedcG,MAT_FINAL_ASSEMBLY)); 1324 } 1325 1326 /* Free */ 1327 if (fl2g) { 1328 PetscCall(ISDestroy(&primals)); 1329 for (i=0;i<nee;i++) { 1330 PetscCall(ISDestroy(&eedges[i])); 1331 } 1332 PetscCall(PetscFree(eedges)); 1333 } 1334 1335 /* hack mat_graph with primal dofs on the coarse edges */ 1336 { 1337 PCBDDCGraph graph = pcbddc->mat_graph; 1338 PetscInt *oqueue = graph->queue; 1339 PetscInt *ocptr = graph->cptr; 1340 PetscInt ncc,*idxs; 1341 1342 /* find first primal edge */ 1343 if (pcbddc->nedclocal) { 1344 PetscCall(ISGetIndices(pcbddc->nedclocal,(const PetscInt**)&idxs)); 1345 } else { 1346 if (fl2g) { 1347 PetscCall(ISLocalToGlobalMappingApply(fl2g,nee,cedges,cedges)); 1348 } 1349 idxs = cedges; 1350 } 1351 cum = 0; 1352 while (cum < nee && cedges[cum] < 0) cum++; 1353 1354 /* adapt connected components */ 1355 PetscCall(PetscMalloc2(graph->nvtxs+1,&graph->cptr,ocptr[graph->ncc],&graph->queue)); 1356 graph->cptr[0] = 0; 1357 for (i=0,ncc=0;i<graph->ncc;i++) { 1358 PetscInt lc = ocptr[i+1]-ocptr[i]; 1359 if (cum != nee && oqueue[ocptr[i+1]-1] == cedges[cum]) { /* this cc has a primal dof */ 1360 graph->cptr[ncc+1] = graph->cptr[ncc]+1; 1361 graph->queue[graph->cptr[ncc]] = cedges[cum]; 1362 ncc++; 1363 lc--; 1364 cum++; 1365 while (cum < nee && cedges[cum] < 0) cum++; 1366 } 1367 graph->cptr[ncc+1] = graph->cptr[ncc] + lc; 1368 for (j=0;j<lc;j++) graph->queue[graph->cptr[ncc]+j] = oqueue[ocptr[i]+j]; 1369 ncc++; 1370 } 1371 graph->ncc = ncc; 1372 if (pcbddc->nedclocal) { 1373 PetscCall(ISRestoreIndices(pcbddc->nedclocal,(const PetscInt**)&idxs)); 1374 } 1375 PetscCall(PetscFree2(ocptr,oqueue)); 1376 } 1377 PetscCall(ISLocalToGlobalMappingDestroy(&fl2g)); 1378 PetscCall(PCBDDCGraphRestoreCandidatesIS(pcbddc->mat_graph,NULL,NULL,&nee,&alleedges,&allprimals)); 1379 PetscCall(PCBDDCGraphResetCSR(pcbddc->mat_graph)); 1380 PetscCall(MatDestroy(&conn)); 1381 1382 PetscCall(ISDestroy(&nedfieldlocal)); 1383 PetscCall(PetscFree(extrow)); 1384 PetscCall(PetscFree2(work,rwork)); 1385 PetscCall(PetscFree(corners)); 1386 PetscCall(PetscFree(cedges)); 1387 PetscCall(PetscFree(extrows)); 1388 PetscCall(PetscFree(extcols)); 1389 PetscCall(MatDestroy(&lG)); 1390 1391 /* Complete assembling */ 1392 PetscCall(MatAssemblyEnd(T,MAT_FINAL_ASSEMBLY)); 1393 if (pcbddc->nedcG) { 1394 PetscCall(MatAssemblyEnd(pcbddc->nedcG,MAT_FINAL_ASSEMBLY)); 1395 #if 0 1396 PetscCall(PetscObjectSetName((PetscObject)pcbddc->nedcG,"coarse_G")); 1397 PetscCall(MatView(pcbddc->nedcG,NULL)); 1398 #endif 1399 } 1400 1401 /* set change of basis */ 1402 PetscCall(PCBDDCSetChangeOfBasisMat(pc,T,singular)); 1403 PetscCall(MatDestroy(&T)); 1404 1405 PetscFunctionReturn(0); 1406 } 1407 1408 /* the near-null space of BDDC carries information on quadrature weights, 1409 and these can be collinear -> so cheat with MatNullSpaceCreate 1410 and create a suitable set of basis vectors first */ 1411 PetscErrorCode PCBDDCNullSpaceCreate(MPI_Comm comm, PetscBool has_const, PetscInt nvecs, Vec quad_vecs[], MatNullSpace *nnsp) 1412 { 1413 PetscInt i; 1414 1415 PetscFunctionBegin; 1416 for (i=0;i<nvecs;i++) { 1417 PetscInt first,last; 1418 1419 PetscCall(VecGetOwnershipRange(quad_vecs[i],&first,&last)); 1420 PetscCheck(last-first >= 2*nvecs || !has_const,PETSC_COMM_SELF,PETSC_ERR_SUP,"Not implemented"); 1421 if (i>=first && i < last) { 1422 PetscScalar *data; 1423 PetscCall(VecGetArray(quad_vecs[i],&data)); 1424 if (!has_const) { 1425 data[i-first] = 1.; 1426 } else { 1427 data[2*i-first] = 1./PetscSqrtReal(2.); 1428 data[2*i-first+1] = -1./PetscSqrtReal(2.); 1429 } 1430 PetscCall(VecRestoreArray(quad_vecs[i],&data)); 1431 } 1432 PetscCall(PetscObjectStateIncrease((PetscObject)quad_vecs[i])); 1433 } 1434 PetscCall(MatNullSpaceCreate(comm,has_const,nvecs,quad_vecs,nnsp)); 1435 for (i=0;i<nvecs;i++) { /* reset vectors */ 1436 PetscInt first,last; 1437 PetscCall(VecLockReadPop(quad_vecs[i])); 1438 PetscCall(VecGetOwnershipRange(quad_vecs[i],&first,&last)); 1439 if (i>=first && i < last) { 1440 PetscScalar *data; 1441 PetscCall(VecGetArray(quad_vecs[i],&data)); 1442 if (!has_const) { 1443 data[i-first] = 0.; 1444 } else { 1445 data[2*i-first] = 0.; 1446 data[2*i-first+1] = 0.; 1447 } 1448 PetscCall(VecRestoreArray(quad_vecs[i],&data)); 1449 } 1450 PetscCall(PetscObjectStateIncrease((PetscObject)quad_vecs[i])); 1451 PetscCall(VecLockReadPush(quad_vecs[i])); 1452 } 1453 PetscFunctionReturn(0); 1454 } 1455 1456 PetscErrorCode PCBDDCComputeNoNetFlux(Mat A, Mat divudotp, PetscBool transpose, IS vl2l, PCBDDCGraph graph, MatNullSpace *nnsp) 1457 { 1458 Mat loc_divudotp; 1459 Vec p,v,vins,quad_vec,*quad_vecs; 1460 ISLocalToGlobalMapping map; 1461 PetscScalar *vals; 1462 const PetscScalar *array; 1463 PetscInt i,maxneighs = 0,maxsize,*gidxs; 1464 PetscInt n_neigh,*neigh,*n_shared,**shared; 1465 PetscMPIInt rank; 1466 1467 PetscFunctionBegin; 1468 PetscCall(ISLocalToGlobalMappingGetInfo(graph->l2gmap,&n_neigh,&neigh,&n_shared,&shared)); 1469 for (i=0;i<n_neigh;i++) maxneighs = PetscMax(graph->count[shared[i][0]]+1,maxneighs); 1470 PetscCall(MPIU_Allreduce(MPI_IN_PLACE,&maxneighs,1,MPIU_INT,MPI_MAX,PetscObjectComm((PetscObject)A))); 1471 if (!maxneighs) { 1472 PetscCall(ISLocalToGlobalMappingRestoreInfo(graph->l2gmap,&n_neigh,&neigh,&n_shared,&shared)); 1473 *nnsp = NULL; 1474 PetscFunctionReturn(0); 1475 } 1476 maxsize = 0; 1477 for (i=0;i<n_neigh;i++) maxsize = PetscMax(n_shared[i],maxsize); 1478 PetscCall(PetscMalloc2(maxsize,&gidxs,maxsize,&vals)); 1479 /* create vectors to hold quadrature weights */ 1480 PetscCall(MatCreateVecs(A,&quad_vec,NULL)); 1481 if (!transpose) { 1482 PetscCall(MatISGetLocalToGlobalMapping(A,&map,NULL)); 1483 } else { 1484 PetscCall(MatISGetLocalToGlobalMapping(A,NULL,&map)); 1485 } 1486 PetscCall(VecDuplicateVecs(quad_vec,maxneighs,&quad_vecs)); 1487 PetscCall(VecDestroy(&quad_vec)); 1488 PetscCall(PCBDDCNullSpaceCreate(PetscObjectComm((PetscObject)A),PETSC_FALSE,maxneighs,quad_vecs,nnsp)); 1489 for (i=0;i<maxneighs;i++) { 1490 PetscCall(VecLockReadPop(quad_vecs[i])); 1491 } 1492 1493 /* compute local quad vec */ 1494 PetscCall(MatISGetLocalMat(divudotp,&loc_divudotp)); 1495 if (!transpose) { 1496 PetscCall(MatCreateVecs(loc_divudotp,&v,&p)); 1497 } else { 1498 PetscCall(MatCreateVecs(loc_divudotp,&p,&v)); 1499 } 1500 PetscCall(VecSet(p,1.)); 1501 if (!transpose) { 1502 PetscCall(MatMultTranspose(loc_divudotp,p,v)); 1503 } else { 1504 PetscCall(MatMult(loc_divudotp,p,v)); 1505 } 1506 if (vl2l) { 1507 Mat lA; 1508 VecScatter sc; 1509 1510 PetscCall(MatISGetLocalMat(A,&lA)); 1511 PetscCall(MatCreateVecs(lA,&vins,NULL)); 1512 PetscCall(VecScatterCreate(v,NULL,vins,vl2l,&sc)); 1513 PetscCall(VecScatterBegin(sc,v,vins,INSERT_VALUES,SCATTER_FORWARD)); 1514 PetscCall(VecScatterEnd(sc,v,vins,INSERT_VALUES,SCATTER_FORWARD)); 1515 PetscCall(VecScatterDestroy(&sc)); 1516 } else { 1517 vins = v; 1518 } 1519 PetscCall(VecGetArrayRead(vins,&array)); 1520 PetscCall(VecDestroy(&p)); 1521 1522 /* insert in global quadrature vecs */ 1523 PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)A),&rank)); 1524 for (i=1;i<n_neigh;i++) { 1525 const PetscInt *idxs; 1526 PetscInt idx,nn,j; 1527 1528 idxs = shared[i]; 1529 nn = n_shared[i]; 1530 for (j=0;j<nn;j++) vals[j] = array[idxs[j]]; 1531 PetscCall(PetscFindInt(rank,graph->count[idxs[0]],graph->neighbours_set[idxs[0]],&idx)); 1532 idx = -(idx+1); 1533 PetscCheck(idx >= 0 && idx < maxneighs,PETSC_COMM_SELF,PETSC_ERR_PLIB,"Invalid index %" PetscInt_FMT " not in [0,%" PetscInt_FMT ")",idx,maxneighs); 1534 PetscCall(ISLocalToGlobalMappingApply(map,nn,idxs,gidxs)); 1535 PetscCall(VecSetValues(quad_vecs[idx],nn,gidxs,vals,INSERT_VALUES)); 1536 } 1537 PetscCall(ISLocalToGlobalMappingRestoreInfo(graph->l2gmap,&n_neigh,&neigh,&n_shared,&shared)); 1538 PetscCall(VecRestoreArrayRead(vins,&array)); 1539 if (vl2l) { 1540 PetscCall(VecDestroy(&vins)); 1541 } 1542 PetscCall(VecDestroy(&v)); 1543 PetscCall(PetscFree2(gidxs,vals)); 1544 1545 /* assemble near null space */ 1546 for (i=0;i<maxneighs;i++) { 1547 PetscCall(VecAssemblyBegin(quad_vecs[i])); 1548 } 1549 for (i=0;i<maxneighs;i++) { 1550 PetscCall(VecAssemblyEnd(quad_vecs[i])); 1551 PetscCall(VecViewFromOptions(quad_vecs[i],NULL,"-pc_bddc_quad_vecs_view")); 1552 PetscCall(VecLockReadPush(quad_vecs[i])); 1553 } 1554 PetscCall(VecDestroyVecs(maxneighs,&quad_vecs)); 1555 PetscFunctionReturn(0); 1556 } 1557 1558 PetscErrorCode PCBDDCAddPrimalVerticesLocalIS(PC pc, IS primalv) 1559 { 1560 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 1561 1562 PetscFunctionBegin; 1563 if (primalv) { 1564 if (pcbddc->user_primal_vertices_local) { 1565 IS list[2], newp; 1566 1567 list[0] = primalv; 1568 list[1] = pcbddc->user_primal_vertices_local; 1569 PetscCall(ISConcatenate(PetscObjectComm((PetscObject)pc),2,list,&newp)); 1570 PetscCall(ISSortRemoveDups(newp)); 1571 PetscCall(ISDestroy(&list[1])); 1572 pcbddc->user_primal_vertices_local = newp; 1573 } else { 1574 PetscCall(PCBDDCSetPrimalVerticesLocalIS(pc,primalv)); 1575 } 1576 } 1577 PetscFunctionReturn(0); 1578 } 1579 1580 static PetscErrorCode func_coords_private(PetscInt dim, PetscReal t, const PetscReal X[], PetscInt Nf, PetscScalar *out, void *ctx) 1581 { 1582 PetscInt f, *comp = (PetscInt *)ctx; 1583 1584 PetscFunctionBegin; 1585 for (f=0;f<Nf;f++) out[f] = X[*comp]; 1586 PetscFunctionReturn(0); 1587 } 1588 1589 PetscErrorCode PCBDDCComputeLocalTopologyInfo(PC pc) 1590 { 1591 Vec local,global; 1592 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 1593 Mat_IS *matis = (Mat_IS*)pc->pmat->data; 1594 PetscBool monolithic = PETSC_FALSE; 1595 1596 PetscFunctionBegin; 1597 PetscOptionsBegin(PetscObjectComm((PetscObject)pc),((PetscObject)pc)->prefix,"BDDC topology options","PC"); 1598 PetscCall(PetscOptionsBool("-pc_bddc_monolithic","Discard any information on dofs splitting",NULL,monolithic,&monolithic,NULL)); 1599 PetscOptionsEnd(); 1600 /* need to convert from global to local topology information and remove references to information in global ordering */ 1601 PetscCall(MatCreateVecs(pc->pmat,&global,NULL)); 1602 PetscCall(MatCreateVecs(matis->A,&local,NULL)); 1603 PetscCall(VecBindToCPU(global,PETSC_TRUE)); 1604 PetscCall(VecBindToCPU(local,PETSC_TRUE)); 1605 if (monolithic) { /* just get block size to properly compute vertices */ 1606 if (pcbddc->vertex_size == 1) { 1607 PetscCall(MatGetBlockSize(pc->pmat,&pcbddc->vertex_size)); 1608 } 1609 goto boundary; 1610 } 1611 1612 if (pcbddc->user_provided_isfordofs) { 1613 if (pcbddc->n_ISForDofs) { 1614 PetscInt i; 1615 1616 PetscCall(PetscMalloc1(pcbddc->n_ISForDofs,&pcbddc->ISForDofsLocal)); 1617 for (i=0;i<pcbddc->n_ISForDofs;i++) { 1618 PetscInt bs; 1619 1620 PetscCall(PCBDDCGlobalToLocal(matis->rctx,global,local,pcbddc->ISForDofs[i],&pcbddc->ISForDofsLocal[i])); 1621 PetscCall(ISGetBlockSize(pcbddc->ISForDofs[i],&bs)); 1622 PetscCall(ISSetBlockSize(pcbddc->ISForDofsLocal[i],bs)); 1623 PetscCall(ISDestroy(&pcbddc->ISForDofs[i])); 1624 } 1625 pcbddc->n_ISForDofsLocal = pcbddc->n_ISForDofs; 1626 pcbddc->n_ISForDofs = 0; 1627 PetscCall(PetscFree(pcbddc->ISForDofs)); 1628 } 1629 } else { 1630 if (!pcbddc->n_ISForDofsLocal) { /* field split not present */ 1631 DM dm; 1632 1633 PetscCall(MatGetDM(pc->pmat, &dm)); 1634 if (!dm) { 1635 PetscCall(PCGetDM(pc, &dm)); 1636 } 1637 if (dm) { 1638 IS *fields; 1639 PetscInt nf,i; 1640 1641 PetscCall(DMCreateFieldDecomposition(dm,&nf,NULL,&fields,NULL)); 1642 PetscCall(PetscMalloc1(nf,&pcbddc->ISForDofsLocal)); 1643 for (i=0;i<nf;i++) { 1644 PetscInt bs; 1645 1646 PetscCall(PCBDDCGlobalToLocal(matis->rctx,global,local,fields[i],&pcbddc->ISForDofsLocal[i])); 1647 PetscCall(ISGetBlockSize(fields[i],&bs)); 1648 PetscCall(ISSetBlockSize(pcbddc->ISForDofsLocal[i],bs)); 1649 PetscCall(ISDestroy(&fields[i])); 1650 } 1651 PetscCall(PetscFree(fields)); 1652 pcbddc->n_ISForDofsLocal = nf; 1653 } else { /* See if MATIS has fields attached by the conversion from MatNest */ 1654 PetscContainer c; 1655 1656 PetscCall(PetscObjectQuery((PetscObject)pc->pmat,"_convert_nest_lfields",(PetscObject*)&c)); 1657 if (c) { 1658 MatISLocalFields lf; 1659 PetscCall(PetscContainerGetPointer(c,(void**)&lf)); 1660 PetscCall(PCBDDCSetDofsSplittingLocal(pc,lf->nr,lf->rf)); 1661 } else { /* fallback, create the default fields if bs > 1 */ 1662 PetscInt i, n = matis->A->rmap->n; 1663 PetscCall(MatGetBlockSize(pc->pmat,&i)); 1664 if (i > 1) { 1665 pcbddc->n_ISForDofsLocal = i; 1666 PetscCall(PetscMalloc1(pcbddc->n_ISForDofsLocal,&pcbddc->ISForDofsLocal)); 1667 for (i=0;i<pcbddc->n_ISForDofsLocal;i++) { 1668 PetscCall(ISCreateStride(PetscObjectComm((PetscObject)pc),n/pcbddc->n_ISForDofsLocal,i,pcbddc->n_ISForDofsLocal,&pcbddc->ISForDofsLocal[i])); 1669 } 1670 } 1671 } 1672 } 1673 } else { 1674 PetscInt i; 1675 for (i=0;i<pcbddc->n_ISForDofsLocal;i++) { 1676 PetscCall(PCBDDCConsistencyCheckIS(pc,MPI_LAND,&pcbddc->ISForDofsLocal[i])); 1677 } 1678 } 1679 } 1680 1681 boundary: 1682 if (!pcbddc->DirichletBoundariesLocal && pcbddc->DirichletBoundaries) { 1683 PetscCall(PCBDDCGlobalToLocal(matis->rctx,global,local,pcbddc->DirichletBoundaries,&pcbddc->DirichletBoundariesLocal)); 1684 } else if (pcbddc->DirichletBoundariesLocal) { 1685 PetscCall(PCBDDCConsistencyCheckIS(pc,MPI_LAND,&pcbddc->DirichletBoundariesLocal)); 1686 } 1687 if (!pcbddc->NeumannBoundariesLocal && pcbddc->NeumannBoundaries) { 1688 PetscCall(PCBDDCGlobalToLocal(matis->rctx,global,local,pcbddc->NeumannBoundaries,&pcbddc->NeumannBoundariesLocal)); 1689 } else if (pcbddc->NeumannBoundariesLocal) { 1690 PetscCall(PCBDDCConsistencyCheckIS(pc,MPI_LOR,&pcbddc->NeumannBoundariesLocal)); 1691 } 1692 if (!pcbddc->user_primal_vertices_local && pcbddc->user_primal_vertices) { 1693 PetscCall(PCBDDCGlobalToLocal(matis->rctx,global,local,pcbddc->user_primal_vertices,&pcbddc->user_primal_vertices_local)); 1694 } 1695 PetscCall(VecDestroy(&global)); 1696 PetscCall(VecDestroy(&local)); 1697 /* detect local disconnected subdomains if requested (use matis->A) */ 1698 if (pcbddc->detect_disconnected) { 1699 IS primalv = NULL; 1700 PetscInt i; 1701 PetscBool filter = pcbddc->detect_disconnected_filter; 1702 1703 for (i=0;i<pcbddc->n_local_subs;i++) { 1704 PetscCall(ISDestroy(&pcbddc->local_subs[i])); 1705 } 1706 PetscCall(PetscFree(pcbddc->local_subs)); 1707 PetscCall(PCBDDCDetectDisconnectedComponents(pc,filter,&pcbddc->n_local_subs,&pcbddc->local_subs,&primalv)); 1708 PetscCall(PCBDDCAddPrimalVerticesLocalIS(pc,primalv)); 1709 PetscCall(ISDestroy(&primalv)); 1710 } 1711 /* early stage corner detection */ 1712 { 1713 DM dm; 1714 1715 PetscCall(MatGetDM(pc->pmat,&dm)); 1716 if (!dm) { 1717 PetscCall(PCGetDM(pc,&dm)); 1718 } 1719 if (dm) { 1720 PetscBool isda; 1721 1722 PetscCall(PetscObjectTypeCompare((PetscObject)dm,DMDA,&isda)); 1723 if (isda) { 1724 ISLocalToGlobalMapping l2l; 1725 IS corners; 1726 Mat lA; 1727 PetscBool gl,lo; 1728 1729 { 1730 Vec cvec; 1731 const PetscScalar *coords; 1732 PetscInt dof,n,cdim; 1733 PetscBool memc = PETSC_TRUE; 1734 1735 PetscCall(DMDAGetInfo(dm,NULL,NULL,NULL,NULL,NULL,NULL,NULL,&dof,NULL,NULL,NULL,NULL,NULL)); 1736 PetscCall(DMGetCoordinates(dm,&cvec)); 1737 PetscCall(VecGetLocalSize(cvec,&n)); 1738 PetscCall(VecGetBlockSize(cvec,&cdim)); 1739 n /= cdim; 1740 PetscCall(PetscFree(pcbddc->mat_graph->coords)); 1741 PetscCall(PetscMalloc1(dof*n*cdim,&pcbddc->mat_graph->coords)); 1742 PetscCall(VecGetArrayRead(cvec,&coords)); 1743 #if defined(PETSC_USE_COMPLEX) 1744 memc = PETSC_FALSE; 1745 #endif 1746 if (dof != 1) memc = PETSC_FALSE; 1747 if (memc) { 1748 PetscCall(PetscArraycpy(pcbddc->mat_graph->coords,coords,cdim*n*dof)); 1749 } else { /* BDDC graph does not use any blocked information, we need to replicate the data */ 1750 PetscReal *bcoords = pcbddc->mat_graph->coords; 1751 PetscInt i, b, d; 1752 1753 for (i=0;i<n;i++) { 1754 for (b=0;b<dof;b++) { 1755 for (d=0;d<cdim;d++) { 1756 bcoords[i*dof*cdim + b*cdim + d] = PetscRealPart(coords[i*cdim+d]); 1757 } 1758 } 1759 } 1760 } 1761 PetscCall(VecRestoreArrayRead(cvec,&coords)); 1762 pcbddc->mat_graph->cdim = cdim; 1763 pcbddc->mat_graph->cnloc = dof*n; 1764 pcbddc->mat_graph->cloc = PETSC_FALSE; 1765 } 1766 PetscCall(DMDAGetSubdomainCornersIS(dm,&corners)); 1767 PetscCall(MatISGetLocalMat(pc->pmat,&lA)); 1768 PetscCall(MatGetLocalToGlobalMapping(lA,&l2l,NULL)); 1769 PetscCall(MatISRestoreLocalMat(pc->pmat,&lA)); 1770 lo = (PetscBool)(l2l && corners); 1771 PetscCall(MPIU_Allreduce(&lo,&gl,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)pc))); 1772 if (gl) { /* From PETSc's DMDA */ 1773 const PetscInt *idx; 1774 PetscInt dof,bs,*idxout,n; 1775 1776 PetscCall(DMDAGetInfo(dm,NULL,NULL,NULL,NULL,NULL,NULL,NULL,&dof,NULL,NULL,NULL,NULL,NULL)); 1777 PetscCall(ISLocalToGlobalMappingGetBlockSize(l2l,&bs)); 1778 PetscCall(ISGetLocalSize(corners,&n)); 1779 PetscCall(ISGetIndices(corners,&idx)); 1780 if (bs == dof) { 1781 PetscCall(PetscMalloc1(n,&idxout)); 1782 PetscCall(ISLocalToGlobalMappingApplyBlock(l2l,n,idx,idxout)); 1783 } else { /* the original DMDA local-to-local map have been modified */ 1784 PetscInt i,d; 1785 1786 PetscCall(PetscMalloc1(dof*n,&idxout)); 1787 for (i=0;i<n;i++) for (d=0;d<dof;d++) idxout[dof*i+d] = dof*idx[i]+d; 1788 PetscCall(ISLocalToGlobalMappingApply(l2l,dof*n,idxout,idxout)); 1789 1790 bs = 1; 1791 n *= dof; 1792 } 1793 PetscCall(ISRestoreIndices(corners,&idx)); 1794 PetscCall(DMDARestoreSubdomainCornersIS(dm,&corners)); 1795 PetscCall(ISCreateBlock(PetscObjectComm((PetscObject)pc),bs,n,idxout,PETSC_OWN_POINTER,&corners)); 1796 PetscCall(PCBDDCAddPrimalVerticesLocalIS(pc,corners)); 1797 PetscCall(ISDestroy(&corners)); 1798 pcbddc->corner_selected = PETSC_TRUE; 1799 pcbddc->corner_selection = PETSC_TRUE; 1800 } 1801 if (corners) { 1802 PetscCall(DMDARestoreSubdomainCornersIS(dm,&corners)); 1803 } 1804 } 1805 } 1806 } 1807 if (pcbddc->corner_selection && !pcbddc->mat_graph->cdim) { 1808 DM dm; 1809 1810 PetscCall(MatGetDM(pc->pmat,&dm)); 1811 if (!dm) { 1812 PetscCall(PCGetDM(pc,&dm)); 1813 } 1814 if (dm) { /* this can get very expensive, I need to find a faster alternative */ 1815 Vec vcoords; 1816 PetscSection section; 1817 PetscReal *coords; 1818 PetscInt d,cdim,nl,nf,**ctxs; 1819 PetscErrorCode (**funcs)(PetscInt, PetscReal, const PetscReal *, PetscInt, PetscScalar *, void *); 1820 /* debug coordinates */ 1821 PetscViewer viewer; 1822 PetscBool flg; 1823 PetscViewerFormat format; 1824 const char *prefix; 1825 1826 PetscCall(DMGetCoordinateDim(dm,&cdim)); 1827 PetscCall(DMGetLocalSection(dm,§ion)); 1828 PetscCall(PetscSectionGetNumFields(section,&nf)); 1829 PetscCall(DMCreateGlobalVector(dm,&vcoords)); 1830 PetscCall(VecGetLocalSize(vcoords,&nl)); 1831 PetscCall(PetscMalloc1(nl*cdim,&coords)); 1832 PetscCall(PetscMalloc2(nf,&funcs,nf,&ctxs)); 1833 PetscCall(PetscMalloc1(nf,&ctxs[0])); 1834 for (d=0;d<nf;d++) funcs[d] = func_coords_private; 1835 for (d=1;d<nf;d++) ctxs[d] = ctxs[d-1] + 1; 1836 1837 /* debug coordinates */ 1838 PetscCall(PCGetOptionsPrefix(pc,&prefix)); 1839 PetscCall(PetscOptionsGetViewer(PetscObjectComm((PetscObject)vcoords),((PetscObject)vcoords)->options,prefix,"-pc_bddc_coords_vec_view",&viewer,&format,&flg)); 1840 if (flg) PetscCall(PetscViewerPushFormat(viewer,format)); 1841 for (d=0;d<cdim;d++) { 1842 PetscInt i; 1843 const PetscScalar *v; 1844 char name[16]; 1845 1846 for (i=0;i<nf;i++) ctxs[i][0] = d; 1847 PetscCall(PetscSNPrintf(name,sizeof(name),"bddc_coords_%d",(int)d)); 1848 PetscCall(PetscObjectSetName((PetscObject)vcoords,name)); 1849 PetscCall(DMProjectFunction(dm,0.0,funcs,(void**)ctxs,INSERT_VALUES,vcoords)); 1850 if (flg) PetscCall(VecView(vcoords,viewer)); 1851 PetscCall(VecGetArrayRead(vcoords,&v)); 1852 for (i=0;i<nl;i++) coords[i*cdim+d] = PetscRealPart(v[i]); 1853 PetscCall(VecRestoreArrayRead(vcoords,&v)); 1854 } 1855 PetscCall(VecDestroy(&vcoords)); 1856 PetscCall(PCSetCoordinates(pc,cdim,nl,coords)); 1857 PetscCall(PetscFree(coords)); 1858 PetscCall(PetscFree(ctxs[0])); 1859 PetscCall(PetscFree2(funcs,ctxs)); 1860 if (flg) { 1861 PetscCall(PetscViewerPopFormat(viewer)); 1862 PetscCall(PetscViewerDestroy(&viewer)); 1863 } 1864 } 1865 } 1866 PetscFunctionReturn(0); 1867 } 1868 1869 PetscErrorCode PCBDDCConsistencyCheckIS(PC pc, MPI_Op mop, IS *is) 1870 { 1871 Mat_IS *matis = (Mat_IS*)(pc->pmat->data); 1872 IS nis; 1873 const PetscInt *idxs; 1874 PetscInt i,nd,n = matis->A->rmap->n,*nidxs,nnd; 1875 1876 PetscFunctionBegin; 1877 PetscCheck(mop == MPI_LAND || mop == MPI_LOR,PetscObjectComm((PetscObject)(pc)),PETSC_ERR_SUP,"Supported are MPI_LAND and MPI_LOR"); 1878 if (mop == MPI_LAND) { 1879 /* init rootdata with true */ 1880 for (i=0;i<pc->pmat->rmap->n;i++) matis->sf_rootdata[i] = 1; 1881 } else { 1882 PetscCall(PetscArrayzero(matis->sf_rootdata,pc->pmat->rmap->n)); 1883 } 1884 PetscCall(PetscArrayzero(matis->sf_leafdata,n)); 1885 PetscCall(ISGetLocalSize(*is,&nd)); 1886 PetscCall(ISGetIndices(*is,&idxs)); 1887 for (i=0;i<nd;i++) 1888 if (-1 < idxs[i] && idxs[i] < n) 1889 matis->sf_leafdata[idxs[i]] = 1; 1890 PetscCall(ISRestoreIndices(*is,&idxs)); 1891 PetscCall(PetscSFReduceBegin(matis->sf,MPIU_INT,matis->sf_leafdata,matis->sf_rootdata,mop)); 1892 PetscCall(PetscSFReduceEnd(matis->sf,MPIU_INT,matis->sf_leafdata,matis->sf_rootdata,mop)); 1893 PetscCall(PetscSFBcastBegin(matis->sf,MPIU_INT,matis->sf_rootdata,matis->sf_leafdata,MPI_REPLACE)); 1894 PetscCall(PetscSFBcastEnd(matis->sf,MPIU_INT,matis->sf_rootdata,matis->sf_leafdata,MPI_REPLACE)); 1895 if (mop == MPI_LAND) { 1896 PetscCall(PetscMalloc1(nd,&nidxs)); 1897 } else { 1898 PetscCall(PetscMalloc1(n,&nidxs)); 1899 } 1900 for (i=0,nnd=0;i<n;i++) 1901 if (matis->sf_leafdata[i]) 1902 nidxs[nnd++] = i; 1903 PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)(*is)),nnd,nidxs,PETSC_OWN_POINTER,&nis)); 1904 PetscCall(ISDestroy(is)); 1905 *is = nis; 1906 PetscFunctionReturn(0); 1907 } 1908 1909 PetscErrorCode PCBDDCBenignRemoveInterior(PC pc,Vec r,Vec z) 1910 { 1911 PC_IS *pcis = (PC_IS*)(pc->data); 1912 PC_BDDC *pcbddc = (PC_BDDC*)(pc->data); 1913 1914 PetscFunctionBegin; 1915 if (!pcbddc->benign_have_null) { 1916 PetscFunctionReturn(0); 1917 } 1918 if (pcbddc->ChangeOfBasisMatrix) { 1919 Vec swap; 1920 1921 PetscCall(MatMultTranspose(pcbddc->ChangeOfBasisMatrix,r,pcbddc->work_change)); 1922 swap = pcbddc->work_change; 1923 pcbddc->work_change = r; 1924 r = swap; 1925 } 1926 PetscCall(VecScatterBegin(pcis->global_to_D,r,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD)); 1927 PetscCall(VecScatterEnd(pcis->global_to_D,r,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD)); 1928 PetscCall(PetscLogEventBegin(PC_BDDC_Solves[pcbddc->current_level][0],pc,0,0,0)); 1929 PetscCall(KSPSolve(pcbddc->ksp_D,pcis->vec1_D,pcis->vec2_D)); 1930 PetscCall(PetscLogEventEnd(PC_BDDC_Solves[pcbddc->current_level][0],pc,0,0,0)); 1931 PetscCall(KSPCheckSolve(pcbddc->ksp_D,pc,pcis->vec2_D)); 1932 PetscCall(VecSet(z,0.)); 1933 PetscCall(VecScatterBegin(pcis->global_to_D,pcis->vec2_D,z,INSERT_VALUES,SCATTER_REVERSE)); 1934 PetscCall(VecScatterEnd(pcis->global_to_D,pcis->vec2_D,z,INSERT_VALUES,SCATTER_REVERSE)); 1935 if (pcbddc->ChangeOfBasisMatrix) { 1936 pcbddc->work_change = r; 1937 PetscCall(VecCopy(z,pcbddc->work_change)); 1938 PetscCall(MatMult(pcbddc->ChangeOfBasisMatrix,pcbddc->work_change,z)); 1939 } 1940 PetscFunctionReturn(0); 1941 } 1942 1943 PetscErrorCode PCBDDCBenignMatMult_Private_Private(Mat A, Vec x, Vec y, PetscBool transpose) 1944 { 1945 PCBDDCBenignMatMult_ctx ctx; 1946 PetscBool apply_right,apply_left,reset_x; 1947 1948 PetscFunctionBegin; 1949 PetscCall(MatShellGetContext(A,&ctx)); 1950 if (transpose) { 1951 apply_right = ctx->apply_left; 1952 apply_left = ctx->apply_right; 1953 } else { 1954 apply_right = ctx->apply_right; 1955 apply_left = ctx->apply_left; 1956 } 1957 reset_x = PETSC_FALSE; 1958 if (apply_right) { 1959 const PetscScalar *ax; 1960 PetscInt nl,i; 1961 1962 PetscCall(VecGetLocalSize(x,&nl)); 1963 PetscCall(VecGetArrayRead(x,&ax)); 1964 PetscCall(PetscArraycpy(ctx->work,ax,nl)); 1965 PetscCall(VecRestoreArrayRead(x,&ax)); 1966 for (i=0;i<ctx->benign_n;i++) { 1967 PetscScalar sum,val; 1968 const PetscInt *idxs; 1969 PetscInt nz,j; 1970 PetscCall(ISGetLocalSize(ctx->benign_zerodiag_subs[i],&nz)); 1971 PetscCall(ISGetIndices(ctx->benign_zerodiag_subs[i],&idxs)); 1972 sum = 0.; 1973 if (ctx->apply_p0) { 1974 val = ctx->work[idxs[nz-1]]; 1975 for (j=0;j<nz-1;j++) { 1976 sum += ctx->work[idxs[j]]; 1977 ctx->work[idxs[j]] += val; 1978 } 1979 } else { 1980 for (j=0;j<nz-1;j++) { 1981 sum += ctx->work[idxs[j]]; 1982 } 1983 } 1984 ctx->work[idxs[nz-1]] -= sum; 1985 PetscCall(ISRestoreIndices(ctx->benign_zerodiag_subs[i],&idxs)); 1986 } 1987 PetscCall(VecPlaceArray(x,ctx->work)); 1988 reset_x = PETSC_TRUE; 1989 } 1990 if (transpose) { 1991 PetscCall(MatMultTranspose(ctx->A,x,y)); 1992 } else { 1993 PetscCall(MatMult(ctx->A,x,y)); 1994 } 1995 if (reset_x) { 1996 PetscCall(VecResetArray(x)); 1997 } 1998 if (apply_left) { 1999 PetscScalar *ay; 2000 PetscInt i; 2001 2002 PetscCall(VecGetArray(y,&ay)); 2003 for (i=0;i<ctx->benign_n;i++) { 2004 PetscScalar sum,val; 2005 const PetscInt *idxs; 2006 PetscInt nz,j; 2007 PetscCall(ISGetLocalSize(ctx->benign_zerodiag_subs[i],&nz)); 2008 PetscCall(ISGetIndices(ctx->benign_zerodiag_subs[i],&idxs)); 2009 val = -ay[idxs[nz-1]]; 2010 if (ctx->apply_p0) { 2011 sum = 0.; 2012 for (j=0;j<nz-1;j++) { 2013 sum += ay[idxs[j]]; 2014 ay[idxs[j]] += val; 2015 } 2016 ay[idxs[nz-1]] += sum; 2017 } else { 2018 for (j=0;j<nz-1;j++) { 2019 ay[idxs[j]] += val; 2020 } 2021 ay[idxs[nz-1]] = 0.; 2022 } 2023 PetscCall(ISRestoreIndices(ctx->benign_zerodiag_subs[i],&idxs)); 2024 } 2025 PetscCall(VecRestoreArray(y,&ay)); 2026 } 2027 PetscFunctionReturn(0); 2028 } 2029 2030 PetscErrorCode PCBDDCBenignMatMultTranspose_Private(Mat A, Vec x, Vec y) 2031 { 2032 PetscFunctionBegin; 2033 PetscCall(PCBDDCBenignMatMult_Private_Private(A,x,y,PETSC_TRUE)); 2034 PetscFunctionReturn(0); 2035 } 2036 2037 PetscErrorCode PCBDDCBenignMatMult_Private(Mat A, Vec x, Vec y) 2038 { 2039 PetscFunctionBegin; 2040 PetscCall(PCBDDCBenignMatMult_Private_Private(A,x,y,PETSC_FALSE)); 2041 PetscFunctionReturn(0); 2042 } 2043 2044 PetscErrorCode PCBDDCBenignShellMat(PC pc, PetscBool restore) 2045 { 2046 PC_IS *pcis = (PC_IS*)pc->data; 2047 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 2048 PCBDDCBenignMatMult_ctx ctx; 2049 2050 PetscFunctionBegin; 2051 if (!restore) { 2052 Mat A_IB,A_BI; 2053 PetscScalar *work; 2054 PCBDDCReuseSolvers reuse = pcbddc->sub_schurs ? pcbddc->sub_schurs->reuse_solver : NULL; 2055 2056 PetscCheck(!pcbddc->benign_original_mat,PETSC_COMM_SELF,PETSC_ERR_PLIB,"Benign original mat has not been restored"); 2057 if (!pcbddc->benign_change || !pcbddc->benign_n || pcbddc->benign_change_explicit) PetscFunctionReturn(0); 2058 PetscCall(PetscMalloc1(pcis->n,&work)); 2059 PetscCall(MatCreate(PETSC_COMM_SELF,&A_IB)); 2060 PetscCall(MatSetSizes(A_IB,pcis->n-pcis->n_B,pcis->n_B,PETSC_DECIDE,PETSC_DECIDE)); 2061 PetscCall(MatSetType(A_IB,MATSHELL)); 2062 PetscCall(MatShellSetOperation(A_IB,MATOP_MULT,(void (*)(void))PCBDDCBenignMatMult_Private)); 2063 PetscCall(MatShellSetOperation(A_IB,MATOP_MULT_TRANSPOSE,(void (*)(void))PCBDDCBenignMatMultTranspose_Private)); 2064 PetscCall(PetscNew(&ctx)); 2065 PetscCall(MatShellSetContext(A_IB,ctx)); 2066 ctx->apply_left = PETSC_TRUE; 2067 ctx->apply_right = PETSC_FALSE; 2068 ctx->apply_p0 = PETSC_FALSE; 2069 ctx->benign_n = pcbddc->benign_n; 2070 if (reuse) { 2071 ctx->benign_zerodiag_subs = reuse->benign_zerodiag_subs; 2072 ctx->free = PETSC_FALSE; 2073 } else { /* TODO: could be optimized for successive solves */ 2074 ISLocalToGlobalMapping N_to_D; 2075 PetscInt i; 2076 2077 PetscCall(ISLocalToGlobalMappingCreateIS(pcis->is_I_local,&N_to_D)); 2078 PetscCall(PetscMalloc1(pcbddc->benign_n,&ctx->benign_zerodiag_subs)); 2079 for (i=0;i<pcbddc->benign_n;i++) { 2080 PetscCall(ISGlobalToLocalMappingApplyIS(N_to_D,IS_GTOLM_DROP,pcbddc->benign_zerodiag_subs[i],&ctx->benign_zerodiag_subs[i])); 2081 } 2082 PetscCall(ISLocalToGlobalMappingDestroy(&N_to_D)); 2083 ctx->free = PETSC_TRUE; 2084 } 2085 ctx->A = pcis->A_IB; 2086 ctx->work = work; 2087 PetscCall(MatSetUp(A_IB)); 2088 PetscCall(MatAssemblyBegin(A_IB,MAT_FINAL_ASSEMBLY)); 2089 PetscCall(MatAssemblyEnd(A_IB,MAT_FINAL_ASSEMBLY)); 2090 pcis->A_IB = A_IB; 2091 2092 /* A_BI as A_IB^T */ 2093 PetscCall(MatCreateTranspose(A_IB,&A_BI)); 2094 pcbddc->benign_original_mat = pcis->A_BI; 2095 pcis->A_BI = A_BI; 2096 } else { 2097 if (!pcbddc->benign_original_mat) { 2098 PetscFunctionReturn(0); 2099 } 2100 PetscCall(MatShellGetContext(pcis->A_IB,&ctx)); 2101 PetscCall(MatDestroy(&pcis->A_IB)); 2102 pcis->A_IB = ctx->A; 2103 ctx->A = NULL; 2104 PetscCall(MatDestroy(&pcis->A_BI)); 2105 pcis->A_BI = pcbddc->benign_original_mat; 2106 pcbddc->benign_original_mat = NULL; 2107 if (ctx->free) { 2108 PetscInt i; 2109 for (i=0;i<ctx->benign_n;i++) { 2110 PetscCall(ISDestroy(&ctx->benign_zerodiag_subs[i])); 2111 } 2112 PetscCall(PetscFree(ctx->benign_zerodiag_subs)); 2113 } 2114 PetscCall(PetscFree(ctx->work)); 2115 PetscCall(PetscFree(ctx)); 2116 } 2117 PetscFunctionReturn(0); 2118 } 2119 2120 /* used just in bddc debug mode */ 2121 PetscErrorCode PCBDDCBenignProject(PC pc, IS is1, IS is2, Mat *B) 2122 { 2123 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 2124 Mat_IS *matis = (Mat_IS*)pc->pmat->data; 2125 Mat An; 2126 2127 PetscFunctionBegin; 2128 PetscCall(MatPtAP(matis->A,pcbddc->benign_change,MAT_INITIAL_MATRIX,2.0,&An)); 2129 PetscCall(MatZeroRowsColumns(An,pcbddc->benign_n,pcbddc->benign_p0_lidx,1.0,NULL,NULL)); 2130 if (is1) { 2131 PetscCall(MatCreateSubMatrix(An,is1,is2,MAT_INITIAL_MATRIX,B)); 2132 PetscCall(MatDestroy(&An)); 2133 } else { 2134 *B = An; 2135 } 2136 PetscFunctionReturn(0); 2137 } 2138 2139 /* TODO: add reuse flag */ 2140 PetscErrorCode MatSeqAIJCompress(Mat A, Mat *B) 2141 { 2142 Mat Bt; 2143 PetscScalar *a,*bdata; 2144 const PetscInt *ii,*ij; 2145 PetscInt m,n,i,nnz,*bii,*bij; 2146 PetscBool flg_row; 2147 2148 PetscFunctionBegin; 2149 PetscCall(MatGetSize(A,&n,&m)); 2150 PetscCall(MatGetRowIJ(A,0,PETSC_FALSE,PETSC_FALSE,&n,&ii,&ij,&flg_row)); 2151 PetscCall(MatSeqAIJGetArray(A,&a)); 2152 nnz = n; 2153 for (i=0;i<ii[n];i++) { 2154 if (PetscLikely(PetscAbsScalar(a[i]) > PETSC_SMALL)) nnz++; 2155 } 2156 PetscCall(PetscMalloc1(n+1,&bii)); 2157 PetscCall(PetscMalloc1(nnz,&bij)); 2158 PetscCall(PetscMalloc1(nnz,&bdata)); 2159 nnz = 0; 2160 bii[0] = 0; 2161 for (i=0;i<n;i++) { 2162 PetscInt j; 2163 for (j=ii[i];j<ii[i+1];j++) { 2164 PetscScalar entry = a[j]; 2165 if (PetscLikely(PetscAbsScalar(entry) > PETSC_SMALL) || (n == m && ij[j] == i)) { 2166 bij[nnz] = ij[j]; 2167 bdata[nnz] = entry; 2168 nnz++; 2169 } 2170 } 2171 bii[i+1] = nnz; 2172 } 2173 PetscCall(MatSeqAIJRestoreArray(A,&a)); 2174 PetscCall(MatCreateSeqAIJWithArrays(PetscObjectComm((PetscObject)A),n,m,bii,bij,bdata,&Bt)); 2175 PetscCall(MatRestoreRowIJ(A,0,PETSC_FALSE,PETSC_FALSE,&n,&ii,&ij,&flg_row)); 2176 { 2177 Mat_SeqAIJ *b = (Mat_SeqAIJ*)(Bt->data); 2178 b->free_a = PETSC_TRUE; 2179 b->free_ij = PETSC_TRUE; 2180 } 2181 if (*B == A) { 2182 PetscCall(MatDestroy(&A)); 2183 } 2184 *B = Bt; 2185 PetscFunctionReturn(0); 2186 } 2187 2188 PetscErrorCode PCBDDCDetectDisconnectedComponents(PC pc, PetscBool filter, PetscInt *ncc, IS* cc[], IS* primalv) 2189 { 2190 Mat B = NULL; 2191 DM dm; 2192 IS is_dummy,*cc_n; 2193 ISLocalToGlobalMapping l2gmap_dummy; 2194 PCBDDCGraph graph; 2195 PetscInt *xadj_filtered = NULL,*adjncy_filtered = NULL; 2196 PetscInt i,n; 2197 PetscInt *xadj,*adjncy; 2198 PetscBool isplex = PETSC_FALSE; 2199 2200 PetscFunctionBegin; 2201 if (ncc) *ncc = 0; 2202 if (cc) *cc = NULL; 2203 if (primalv) *primalv = NULL; 2204 PetscCall(PCBDDCGraphCreate(&graph)); 2205 PetscCall(MatGetDM(pc->pmat,&dm)); 2206 if (!dm) { 2207 PetscCall(PCGetDM(pc,&dm)); 2208 } 2209 if (dm) { 2210 PetscCall(PetscObjectTypeCompare((PetscObject)dm,DMPLEX,&isplex)); 2211 } 2212 if (filter) isplex = PETSC_FALSE; 2213 2214 if (isplex) { /* this code has been modified from plexpartition.c */ 2215 PetscInt p, pStart, pEnd, a, adjSize, idx, size, nroots; 2216 PetscInt *adj = NULL; 2217 IS cellNumbering; 2218 const PetscInt *cellNum; 2219 PetscBool useCone, useClosure; 2220 PetscSection section; 2221 PetscSegBuffer adjBuffer; 2222 PetscSF sfPoint; 2223 2224 PetscCall(DMPlexGetHeightStratum(dm, 0, &pStart, &pEnd)); 2225 PetscCall(DMGetPointSF(dm, &sfPoint)); 2226 PetscCall(PetscSFGetGraph(sfPoint, &nroots, NULL, NULL, NULL)); 2227 /* Build adjacency graph via a section/segbuffer */ 2228 PetscCall(PetscSectionCreate(PetscObjectComm((PetscObject) dm), §ion)); 2229 PetscCall(PetscSectionSetChart(section, pStart, pEnd)); 2230 PetscCall(PetscSegBufferCreate(sizeof(PetscInt),1000,&adjBuffer)); 2231 /* Always use FVM adjacency to create partitioner graph */ 2232 PetscCall(DMGetBasicAdjacency(dm, &useCone, &useClosure)); 2233 PetscCall(DMSetBasicAdjacency(dm, PETSC_TRUE, PETSC_FALSE)); 2234 PetscCall(DMPlexGetCellNumbering(dm, &cellNumbering)); 2235 PetscCall(ISGetIndices(cellNumbering, &cellNum)); 2236 for (n = 0, p = pStart; p < pEnd; p++) { 2237 /* Skip non-owned cells in parallel (ParMetis expects no overlap) */ 2238 if (nroots > 0) {if (cellNum[p] < 0) continue;} 2239 adjSize = PETSC_DETERMINE; 2240 PetscCall(DMPlexGetAdjacency(dm, p, &adjSize, &adj)); 2241 for (a = 0; a < adjSize; ++a) { 2242 const PetscInt point = adj[a]; 2243 if (pStart <= point && point < pEnd) { 2244 PetscInt *PETSC_RESTRICT pBuf; 2245 PetscCall(PetscSectionAddDof(section, p, 1)); 2246 PetscCall(PetscSegBufferGetInts(adjBuffer, 1, &pBuf)); 2247 *pBuf = point; 2248 } 2249 } 2250 n++; 2251 } 2252 PetscCall(DMSetBasicAdjacency(dm, useCone, useClosure)); 2253 /* Derive CSR graph from section/segbuffer */ 2254 PetscCall(PetscSectionSetUp(section)); 2255 PetscCall(PetscSectionGetStorageSize(section, &size)); 2256 PetscCall(PetscMalloc1(n+1, &xadj)); 2257 for (idx = 0, p = pStart; p < pEnd; p++) { 2258 if (nroots > 0) {if (cellNum[p] < 0) continue;} 2259 PetscCall(PetscSectionGetOffset(section, p, &(xadj[idx++]))); 2260 } 2261 xadj[n] = size; 2262 PetscCall(PetscSegBufferExtractAlloc(adjBuffer, &adjncy)); 2263 /* Clean up */ 2264 PetscCall(PetscSegBufferDestroy(&adjBuffer)); 2265 PetscCall(PetscSectionDestroy(§ion)); 2266 PetscCall(PetscFree(adj)); 2267 graph->xadj = xadj; 2268 graph->adjncy = adjncy; 2269 } else { 2270 Mat A; 2271 PetscBool isseqaij, flg_row; 2272 2273 PetscCall(MatISGetLocalMat(pc->pmat,&A)); 2274 if (!A->rmap->N || !A->cmap->N) { 2275 PetscCall(PCBDDCGraphDestroy(&graph)); 2276 PetscFunctionReturn(0); 2277 } 2278 PetscCall(PetscObjectBaseTypeCompare((PetscObject)A,MATSEQAIJ,&isseqaij)); 2279 if (!isseqaij && filter) { 2280 PetscBool isseqdense; 2281 2282 PetscCall(PetscObjectTypeCompare((PetscObject)A,MATSEQDENSE,&isseqdense)); 2283 if (!isseqdense) { 2284 PetscCall(MatConvert(A,MATSEQAIJ,MAT_INITIAL_MATRIX,&B)); 2285 } else { /* TODO: rectangular case and LDA */ 2286 PetscScalar *array; 2287 PetscReal chop=1.e-6; 2288 2289 PetscCall(MatDuplicate(A,MAT_COPY_VALUES,&B)); 2290 PetscCall(MatDenseGetArray(B,&array)); 2291 PetscCall(MatGetSize(B,&n,NULL)); 2292 for (i=0;i<n;i++) { 2293 PetscInt j; 2294 for (j=i+1;j<n;j++) { 2295 PetscReal thresh = chop*(PetscAbsScalar(array[i*(n+1)])+PetscAbsScalar(array[j*(n+1)])); 2296 if (PetscAbsScalar(array[i*n+j]) < thresh) array[i*n+j] = 0.; 2297 if (PetscAbsScalar(array[j*n+i]) < thresh) array[j*n+i] = 0.; 2298 } 2299 } 2300 PetscCall(MatDenseRestoreArray(B,&array)); 2301 PetscCall(MatConvert(B,MATSEQAIJ,MAT_INPLACE_MATRIX,&B)); 2302 } 2303 } else { 2304 PetscCall(PetscObjectReference((PetscObject)A)); 2305 B = A; 2306 } 2307 PetscCall(MatGetRowIJ(B,0,PETSC_TRUE,PETSC_FALSE,&n,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row)); 2308 2309 /* if filter is true, then removes entries lower than PETSC_SMALL in magnitude */ 2310 if (filter) { 2311 PetscScalar *data; 2312 PetscInt j,cum; 2313 2314 PetscCall(PetscCalloc2(n+1,&xadj_filtered,xadj[n],&adjncy_filtered)); 2315 PetscCall(MatSeqAIJGetArray(B,&data)); 2316 cum = 0; 2317 for (i=0;i<n;i++) { 2318 PetscInt t; 2319 2320 for (j=xadj[i];j<xadj[i+1];j++) { 2321 if (PetscUnlikely(PetscAbsScalar(data[j]) < PETSC_SMALL)) { 2322 continue; 2323 } 2324 adjncy_filtered[cum+xadj_filtered[i]++] = adjncy[j]; 2325 } 2326 t = xadj_filtered[i]; 2327 xadj_filtered[i] = cum; 2328 cum += t; 2329 } 2330 PetscCall(MatSeqAIJRestoreArray(B,&data)); 2331 graph->xadj = xadj_filtered; 2332 graph->adjncy = adjncy_filtered; 2333 } else { 2334 graph->xadj = xadj; 2335 graph->adjncy = adjncy; 2336 } 2337 } 2338 /* compute local connected components using PCBDDCGraph */ 2339 PetscCall(ISCreateStride(PETSC_COMM_SELF,n,0,1,&is_dummy)); 2340 PetscCall(ISLocalToGlobalMappingCreateIS(is_dummy,&l2gmap_dummy)); 2341 PetscCall(ISDestroy(&is_dummy)); 2342 PetscCall(PCBDDCGraphInit(graph,l2gmap_dummy,n,PETSC_MAX_INT)); 2343 PetscCall(ISLocalToGlobalMappingDestroy(&l2gmap_dummy)); 2344 PetscCall(PCBDDCGraphSetUp(graph,1,NULL,NULL,0,NULL,NULL)); 2345 PetscCall(PCBDDCGraphComputeConnectedComponents(graph)); 2346 2347 /* partial clean up */ 2348 PetscCall(PetscFree2(xadj_filtered,adjncy_filtered)); 2349 if (B) { 2350 PetscBool flg_row; 2351 PetscCall(MatRestoreRowIJ(B,0,PETSC_TRUE,PETSC_FALSE,&n,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row)); 2352 PetscCall(MatDestroy(&B)); 2353 } 2354 if (isplex) { 2355 PetscCall(PetscFree(xadj)); 2356 PetscCall(PetscFree(adjncy)); 2357 } 2358 2359 /* get back data */ 2360 if (isplex) { 2361 if (ncc) *ncc = graph->ncc; 2362 if (cc || primalv) { 2363 Mat A; 2364 PetscBT btv,btvt; 2365 PetscSection subSection; 2366 PetscInt *ids,cum,cump,*cids,*pids; 2367 2368 PetscCall(DMPlexGetSubdomainSection(dm,&subSection)); 2369 PetscCall(MatISGetLocalMat(pc->pmat,&A)); 2370 PetscCall(PetscMalloc3(A->rmap->n,&ids,graph->ncc+1,&cids,A->rmap->n,&pids)); 2371 PetscCall(PetscBTCreate(A->rmap->n,&btv)); 2372 PetscCall(PetscBTCreate(A->rmap->n,&btvt)); 2373 2374 cids[0] = 0; 2375 for (i = 0, cump = 0, cum = 0; i < graph->ncc; i++) { 2376 PetscInt j; 2377 2378 PetscCall(PetscBTMemzero(A->rmap->n,btvt)); 2379 for (j = graph->cptr[i]; j < graph->cptr[i+1]; j++) { 2380 PetscInt k, size, *closure = NULL, cell = graph->queue[j]; 2381 2382 PetscCall(DMPlexGetTransitiveClosure(dm,cell,PETSC_TRUE,&size,&closure)); 2383 for (k = 0; k < 2*size; k += 2) { 2384 PetscInt s, pp, p = closure[k], off, dof, cdof; 2385 2386 PetscCall(PetscSectionGetConstraintDof(subSection,p,&cdof)); 2387 PetscCall(PetscSectionGetOffset(subSection,p,&off)); 2388 PetscCall(PetscSectionGetDof(subSection,p,&dof)); 2389 for (s = 0; s < dof-cdof; s++) { 2390 if (PetscBTLookupSet(btvt,off+s)) continue; 2391 if (!PetscBTLookup(btv,off+s)) ids[cum++] = off+s; 2392 else pids[cump++] = off+s; /* cross-vertex */ 2393 } 2394 PetscCall(DMPlexGetTreeParent(dm,p,&pp,NULL)); 2395 if (pp != p) { 2396 PetscCall(PetscSectionGetConstraintDof(subSection,pp,&cdof)); 2397 PetscCall(PetscSectionGetOffset(subSection,pp,&off)); 2398 PetscCall(PetscSectionGetDof(subSection,pp,&dof)); 2399 for (s = 0; s < dof-cdof; s++) { 2400 if (PetscBTLookupSet(btvt,off+s)) continue; 2401 if (!PetscBTLookup(btv,off+s)) ids[cum++] = off+s; 2402 else pids[cump++] = off+s; /* cross-vertex */ 2403 } 2404 } 2405 } 2406 PetscCall(DMPlexRestoreTransitiveClosure(dm,cell,PETSC_TRUE,&size,&closure)); 2407 } 2408 cids[i+1] = cum; 2409 /* mark dofs as already assigned */ 2410 for (j = cids[i]; j < cids[i+1]; j++) { 2411 PetscCall(PetscBTSet(btv,ids[j])); 2412 } 2413 } 2414 if (cc) { 2415 PetscCall(PetscMalloc1(graph->ncc,&cc_n)); 2416 for (i = 0; i < graph->ncc; i++) { 2417 PetscCall(ISCreateGeneral(PETSC_COMM_SELF,cids[i+1]-cids[i],ids+cids[i],PETSC_COPY_VALUES,&cc_n[i])); 2418 } 2419 *cc = cc_n; 2420 } 2421 if (primalv) { 2422 PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)pc),cump,pids,PETSC_COPY_VALUES,primalv)); 2423 } 2424 PetscCall(PetscFree3(ids,cids,pids)); 2425 PetscCall(PetscBTDestroy(&btv)); 2426 PetscCall(PetscBTDestroy(&btvt)); 2427 } 2428 } else { 2429 if (ncc) *ncc = graph->ncc; 2430 if (cc) { 2431 PetscCall(PetscMalloc1(graph->ncc,&cc_n)); 2432 for (i=0;i<graph->ncc;i++) { 2433 PetscCall(ISCreateGeneral(PETSC_COMM_SELF,graph->cptr[i+1]-graph->cptr[i],graph->queue+graph->cptr[i],PETSC_COPY_VALUES,&cc_n[i])); 2434 } 2435 *cc = cc_n; 2436 } 2437 } 2438 /* clean up graph */ 2439 graph->xadj = NULL; 2440 graph->adjncy = NULL; 2441 PetscCall(PCBDDCGraphDestroy(&graph)); 2442 PetscFunctionReturn(0); 2443 } 2444 2445 PetscErrorCode PCBDDCBenignCheck(PC pc, IS zerodiag) 2446 { 2447 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 2448 PC_IS* pcis = (PC_IS*)(pc->data); 2449 IS dirIS = NULL; 2450 PetscInt i; 2451 2452 PetscFunctionBegin; 2453 PetscCall(PCBDDCGraphGetDirichletDofs(pcbddc->mat_graph,&dirIS)); 2454 if (zerodiag) { 2455 Mat A; 2456 Vec vec3_N; 2457 PetscScalar *vals; 2458 const PetscInt *idxs; 2459 PetscInt nz,*count; 2460 2461 /* p0 */ 2462 PetscCall(VecSet(pcis->vec1_N,0.)); 2463 PetscCall(PetscMalloc1(pcis->n,&vals)); 2464 PetscCall(ISGetLocalSize(zerodiag,&nz)); 2465 PetscCall(ISGetIndices(zerodiag,&idxs)); 2466 for (i=0;i<nz;i++) vals[i] = 1.; 2467 PetscCall(VecSetValues(pcis->vec1_N,nz,idxs,vals,INSERT_VALUES)); 2468 PetscCall(VecAssemblyBegin(pcis->vec1_N)); 2469 PetscCall(VecAssemblyEnd(pcis->vec1_N)); 2470 /* v_I */ 2471 PetscCall(VecSetRandom(pcis->vec2_N,NULL)); 2472 for (i=0;i<nz;i++) vals[i] = 0.; 2473 PetscCall(VecSetValues(pcis->vec2_N,nz,idxs,vals,INSERT_VALUES)); 2474 PetscCall(ISRestoreIndices(zerodiag,&idxs)); 2475 PetscCall(ISGetIndices(pcis->is_B_local,&idxs)); 2476 for (i=0;i<pcis->n_B;i++) vals[i] = 0.; 2477 PetscCall(VecSetValues(pcis->vec2_N,pcis->n_B,idxs,vals,INSERT_VALUES)); 2478 PetscCall(ISRestoreIndices(pcis->is_B_local,&idxs)); 2479 if (dirIS) { 2480 PetscInt n; 2481 2482 PetscCall(ISGetLocalSize(dirIS,&n)); 2483 PetscCall(ISGetIndices(dirIS,&idxs)); 2484 for (i=0;i<n;i++) vals[i] = 0.; 2485 PetscCall(VecSetValues(pcis->vec2_N,n,idxs,vals,INSERT_VALUES)); 2486 PetscCall(ISRestoreIndices(dirIS,&idxs)); 2487 } 2488 PetscCall(VecAssemblyBegin(pcis->vec2_N)); 2489 PetscCall(VecAssemblyEnd(pcis->vec2_N)); 2490 PetscCall(VecDuplicate(pcis->vec1_N,&vec3_N)); 2491 PetscCall(VecSet(vec3_N,0.)); 2492 PetscCall(MatISGetLocalMat(pc->pmat,&A)); 2493 PetscCall(MatMult(A,pcis->vec1_N,vec3_N)); 2494 PetscCall(VecDot(vec3_N,pcis->vec2_N,&vals[0])); 2495 PetscCheck(PetscAbsScalar(vals[0]) <= 1.e-1,PETSC_COMM_SELF,PETSC_ERR_SUP,"Benign trick can not be applied! b(v_I,p_0) = %1.6e (should be numerically 0.)",(double)PetscAbsScalar(vals[0])); 2496 PetscCall(PetscFree(vals)); 2497 PetscCall(VecDestroy(&vec3_N)); 2498 2499 /* there should not be any pressure dofs lying on the interface */ 2500 PetscCall(PetscCalloc1(pcis->n,&count)); 2501 PetscCall(ISGetIndices(pcis->is_B_local,&idxs)); 2502 for (i=0;i<pcis->n_B;i++) count[idxs[i]]++; 2503 PetscCall(ISRestoreIndices(pcis->is_B_local,&idxs)); 2504 PetscCall(ISGetIndices(zerodiag,&idxs)); 2505 for (i=0;i<nz;i++) PetscCheck(!count[idxs[i]],PETSC_COMM_SELF,PETSC_ERR_SUP,"Benign trick can not be applied! pressure dof %" PetscInt_FMT " is an interface dof",idxs[i]); 2506 PetscCall(ISRestoreIndices(zerodiag,&idxs)); 2507 PetscCall(PetscFree(count)); 2508 } 2509 PetscCall(ISDestroy(&dirIS)); 2510 2511 /* check PCBDDCBenignGetOrSetP0 */ 2512 PetscCall(VecSetRandom(pcis->vec1_global,NULL)); 2513 for (i=0;i<pcbddc->benign_n;i++) pcbddc->benign_p0[i] = -PetscGlobalRank-i; 2514 PetscCall(PCBDDCBenignGetOrSetP0(pc,pcis->vec1_global,PETSC_FALSE)); 2515 for (i=0;i<pcbddc->benign_n;i++) pcbddc->benign_p0[i] = 1; 2516 PetscCall(PCBDDCBenignGetOrSetP0(pc,pcis->vec1_global,PETSC_TRUE)); 2517 for (i=0;i<pcbddc->benign_n;i++) { 2518 PetscInt val = PetscRealPart(pcbddc->benign_p0[i]); 2519 PetscCheck(val == -PetscGlobalRank-i,PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error testing PCBDDCBenignGetOrSetP0! Found %g at %" PetscInt_FMT " instead of %g",(double)PetscRealPart(pcbddc->benign_p0[i]),i,(double)(-PetscGlobalRank-i)); 2520 } 2521 PetscFunctionReturn(0); 2522 } 2523 2524 PetscErrorCode PCBDDCBenignDetectSaddlePoint(PC pc, PetscBool reuse, IS *zerodiaglocal) 2525 { 2526 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 2527 Mat_IS* matis = (Mat_IS*)(pc->pmat->data); 2528 IS pressures = NULL,zerodiag = NULL,*bzerodiag = NULL,zerodiag_save,*zerodiag_subs; 2529 PetscInt nz,n,benign_n,bsp = 1; 2530 PetscInt *interior_dofs,n_interior_dofs,nneu; 2531 PetscBool sorted,have_null,has_null_pressures,recompute_zerodiag,checkb; 2532 2533 PetscFunctionBegin; 2534 if (reuse) goto project_b0; 2535 PetscCall(PetscSFDestroy(&pcbddc->benign_sf)); 2536 PetscCall(MatDestroy(&pcbddc->benign_B0)); 2537 for (n=0;n<pcbddc->benign_n;n++) { 2538 PetscCall(ISDestroy(&pcbddc->benign_zerodiag_subs[n])); 2539 } 2540 PetscCall(PetscFree(pcbddc->benign_zerodiag_subs)); 2541 has_null_pressures = PETSC_TRUE; 2542 have_null = PETSC_TRUE; 2543 /* if a local information on dofs is present, gets pressure dofs from command line (uses the last field is not provided) 2544 Without local information, it uses only the zerodiagonal dofs (ok if the pressure block is all zero and it is a scalar field) 2545 Checks if all the pressure dofs in each subdomain have a zero diagonal 2546 If not, a change of basis on pressures is not needed 2547 since the local Schur complements are already SPD 2548 */ 2549 if (pcbddc->n_ISForDofsLocal) { 2550 IS iP = NULL; 2551 PetscInt p,*pp; 2552 PetscBool flg; 2553 2554 PetscCall(PetscMalloc1(pcbddc->n_ISForDofsLocal,&pp)); 2555 n = pcbddc->n_ISForDofsLocal; 2556 PetscOptionsBegin(PetscObjectComm((PetscObject)pc),((PetscObject)pc)->prefix,"BDDC benign options","PC"); 2557 PetscCall(PetscOptionsIntArray("-pc_bddc_pressure_field","Field id for pressures",NULL,pp,&n,&flg)); 2558 PetscOptionsEnd(); 2559 if (!flg) { 2560 n = 1; 2561 pp[0] = pcbddc->n_ISForDofsLocal-1; 2562 } 2563 2564 bsp = 0; 2565 for (p=0;p<n;p++) { 2566 PetscInt bs; 2567 2568 PetscCheck(pp[p] >= 0 && pp[p] < pcbddc->n_ISForDofsLocal,PetscObjectComm((PetscObject)pc),PETSC_ERR_USER,"Invalid field id for pressures %" PetscInt_FMT,pp[p]); 2569 PetscCall(ISGetBlockSize(pcbddc->ISForDofsLocal[pp[p]],&bs)); 2570 bsp += bs; 2571 } 2572 PetscCall(PetscMalloc1(bsp,&bzerodiag)); 2573 bsp = 0; 2574 for (p=0;p<n;p++) { 2575 const PetscInt *idxs; 2576 PetscInt b,bs,npl,*bidxs; 2577 2578 PetscCall(ISGetBlockSize(pcbddc->ISForDofsLocal[pp[p]],&bs)); 2579 PetscCall(ISGetLocalSize(pcbddc->ISForDofsLocal[pp[p]],&npl)); 2580 PetscCall(ISGetIndices(pcbddc->ISForDofsLocal[pp[p]],&idxs)); 2581 PetscCall(PetscMalloc1(npl/bs,&bidxs)); 2582 for (b=0;b<bs;b++) { 2583 PetscInt i; 2584 2585 for (i=0;i<npl/bs;i++) bidxs[i] = idxs[bs*i+b]; 2586 PetscCall(ISCreateGeneral(PETSC_COMM_SELF,npl/bs,bidxs,PETSC_COPY_VALUES,&bzerodiag[bsp])); 2587 bsp++; 2588 } 2589 PetscCall(PetscFree(bidxs)); 2590 PetscCall(ISRestoreIndices(pcbddc->ISForDofsLocal[pp[p]],&idxs)); 2591 } 2592 PetscCall(ISConcatenate(PETSC_COMM_SELF,bsp,bzerodiag,&pressures)); 2593 2594 /* remove zeroed out pressures if we are setting up a BDDC solver for a saddle-point FETI-DP */ 2595 PetscCall(PetscObjectQuery((PetscObject)pc,"__KSPFETIDP_lP",(PetscObject*)&iP)); 2596 if (iP) { 2597 IS newpressures; 2598 2599 PetscCall(ISDifference(pressures,iP,&newpressures)); 2600 PetscCall(ISDestroy(&pressures)); 2601 pressures = newpressures; 2602 } 2603 PetscCall(ISSorted(pressures,&sorted)); 2604 if (!sorted) { 2605 PetscCall(ISSort(pressures)); 2606 } 2607 PetscCall(PetscFree(pp)); 2608 } 2609 2610 /* pcis has not been setup yet, so get the local size from the subdomain matrix */ 2611 PetscCall(MatGetLocalSize(pcbddc->local_mat,&n,NULL)); 2612 if (!n) pcbddc->benign_change_explicit = PETSC_TRUE; 2613 PetscCall(MatFindZeroDiagonals(pcbddc->local_mat,&zerodiag)); 2614 PetscCall(ISSorted(zerodiag,&sorted)); 2615 if (!sorted) { 2616 PetscCall(ISSort(zerodiag)); 2617 } 2618 PetscCall(PetscObjectReference((PetscObject)zerodiag)); 2619 zerodiag_save = zerodiag; 2620 PetscCall(ISGetLocalSize(zerodiag,&nz)); 2621 if (!nz) { 2622 if (n) have_null = PETSC_FALSE; 2623 has_null_pressures = PETSC_FALSE; 2624 PetscCall(ISDestroy(&zerodiag)); 2625 } 2626 recompute_zerodiag = PETSC_FALSE; 2627 2628 /* in case disconnected subdomains info is present, split the pressures accordingly (otherwise the benign trick could fail) */ 2629 zerodiag_subs = NULL; 2630 benign_n = 0; 2631 n_interior_dofs = 0; 2632 interior_dofs = NULL; 2633 nneu = 0; 2634 if (pcbddc->NeumannBoundariesLocal) { 2635 PetscCall(ISGetLocalSize(pcbddc->NeumannBoundariesLocal,&nneu)); 2636 } 2637 checkb = (PetscBool)(!pcbddc->NeumannBoundariesLocal || pcbddc->current_level); 2638 if (checkb) { /* need to compute interior nodes */ 2639 PetscInt n,i,j; 2640 PetscInt n_neigh,*neigh,*n_shared,**shared; 2641 PetscInt *iwork; 2642 2643 PetscCall(ISLocalToGlobalMappingGetSize(matis->rmapping,&n)); 2644 PetscCall(ISLocalToGlobalMappingGetInfo(matis->rmapping,&n_neigh,&neigh,&n_shared,&shared)); 2645 PetscCall(PetscCalloc1(n,&iwork)); 2646 PetscCall(PetscMalloc1(n,&interior_dofs)); 2647 for (i=1;i<n_neigh;i++) 2648 for (j=0;j<n_shared[i];j++) 2649 iwork[shared[i][j]] += 1; 2650 for (i=0;i<n;i++) 2651 if (!iwork[i]) 2652 interior_dofs[n_interior_dofs++] = i; 2653 PetscCall(PetscFree(iwork)); 2654 PetscCall(ISLocalToGlobalMappingRestoreInfo(matis->rmapping,&n_neigh,&neigh,&n_shared,&shared)); 2655 } 2656 if (has_null_pressures) { 2657 IS *subs; 2658 PetscInt nsubs,i,j,nl; 2659 const PetscInt *idxs; 2660 PetscScalar *array; 2661 Vec *work; 2662 2663 subs = pcbddc->local_subs; 2664 nsubs = pcbddc->n_local_subs; 2665 /* these vectors are needed to check if the constant on pressures is in the kernel of the local operator B (i.e. B(v_I,p0) should be zero) */ 2666 if (checkb) { 2667 PetscCall(VecDuplicateVecs(matis->y,2,&work)); 2668 PetscCall(ISGetLocalSize(zerodiag,&nl)); 2669 PetscCall(ISGetIndices(zerodiag,&idxs)); 2670 /* work[0] = 1_p */ 2671 PetscCall(VecSet(work[0],0.)); 2672 PetscCall(VecGetArray(work[0],&array)); 2673 for (j=0;j<nl;j++) array[idxs[j]] = 1.; 2674 PetscCall(VecRestoreArray(work[0],&array)); 2675 /* work[0] = 1_v */ 2676 PetscCall(VecSet(work[1],1.)); 2677 PetscCall(VecGetArray(work[1],&array)); 2678 for (j=0;j<nl;j++) array[idxs[j]] = 0.; 2679 PetscCall(VecRestoreArray(work[1],&array)); 2680 PetscCall(ISRestoreIndices(zerodiag,&idxs)); 2681 } 2682 2683 if (nsubs > 1 || bsp > 1) { 2684 IS *is; 2685 PetscInt b,totb; 2686 2687 totb = bsp; 2688 is = bsp > 1 ? bzerodiag : &zerodiag; 2689 nsubs = PetscMax(nsubs,1); 2690 PetscCall(PetscCalloc1(nsubs*totb,&zerodiag_subs)); 2691 for (b=0;b<totb;b++) { 2692 for (i=0;i<nsubs;i++) { 2693 ISLocalToGlobalMapping l2g; 2694 IS t_zerodiag_subs; 2695 PetscInt nl; 2696 2697 if (subs) { 2698 PetscCall(ISLocalToGlobalMappingCreateIS(subs[i],&l2g)); 2699 } else { 2700 IS tis; 2701 2702 PetscCall(MatGetLocalSize(pcbddc->local_mat,&nl,NULL)); 2703 PetscCall(ISCreateStride(PETSC_COMM_SELF,nl,0,1,&tis)); 2704 PetscCall(ISLocalToGlobalMappingCreateIS(tis,&l2g)); 2705 PetscCall(ISDestroy(&tis)); 2706 } 2707 PetscCall(ISGlobalToLocalMappingApplyIS(l2g,IS_GTOLM_DROP,is[b],&t_zerodiag_subs)); 2708 PetscCall(ISGetLocalSize(t_zerodiag_subs,&nl)); 2709 if (nl) { 2710 PetscBool valid = PETSC_TRUE; 2711 2712 if (checkb) { 2713 PetscCall(VecSet(matis->x,0)); 2714 PetscCall(ISGetLocalSize(subs[i],&nl)); 2715 PetscCall(ISGetIndices(subs[i],&idxs)); 2716 PetscCall(VecGetArray(matis->x,&array)); 2717 for (j=0;j<nl;j++) array[idxs[j]] = 1.; 2718 PetscCall(VecRestoreArray(matis->x,&array)); 2719 PetscCall(ISRestoreIndices(subs[i],&idxs)); 2720 PetscCall(VecPointwiseMult(matis->x,work[0],matis->x)); 2721 PetscCall(MatMult(matis->A,matis->x,matis->y)); 2722 PetscCall(VecPointwiseMult(matis->y,work[1],matis->y)); 2723 PetscCall(VecGetArray(matis->y,&array)); 2724 for (j=0;j<n_interior_dofs;j++) { 2725 if (PetscAbsScalar(array[interior_dofs[j]]) > PETSC_SMALL) { 2726 valid = PETSC_FALSE; 2727 break; 2728 } 2729 } 2730 PetscCall(VecRestoreArray(matis->y,&array)); 2731 } 2732 if (valid && nneu) { 2733 const PetscInt *idxs; 2734 PetscInt nzb; 2735 2736 PetscCall(ISGetIndices(pcbddc->NeumannBoundariesLocal,&idxs)); 2737 PetscCall(ISGlobalToLocalMappingApply(l2g,IS_GTOLM_DROP,nneu,idxs,&nzb,NULL)); 2738 PetscCall(ISRestoreIndices(pcbddc->NeumannBoundariesLocal,&idxs)); 2739 if (nzb) valid = PETSC_FALSE; 2740 } 2741 if (valid && pressures) { 2742 IS t_pressure_subs,tmp; 2743 PetscInt i1,i2; 2744 2745 PetscCall(ISGlobalToLocalMappingApplyIS(l2g,IS_GTOLM_DROP,pressures,&t_pressure_subs)); 2746 PetscCall(ISEmbed(t_zerodiag_subs,t_pressure_subs,PETSC_TRUE,&tmp)); 2747 PetscCall(ISGetLocalSize(tmp,&i1)); 2748 PetscCall(ISGetLocalSize(t_zerodiag_subs,&i2)); 2749 if (i2 != i1) valid = PETSC_FALSE; 2750 PetscCall(ISDestroy(&t_pressure_subs)); 2751 PetscCall(ISDestroy(&tmp)); 2752 } 2753 if (valid) { 2754 PetscCall(ISLocalToGlobalMappingApplyIS(l2g,t_zerodiag_subs,&zerodiag_subs[benign_n])); 2755 benign_n++; 2756 } else recompute_zerodiag = PETSC_TRUE; 2757 } 2758 PetscCall(ISDestroy(&t_zerodiag_subs)); 2759 PetscCall(ISLocalToGlobalMappingDestroy(&l2g)); 2760 } 2761 } 2762 } else { /* there's just one subdomain (or zero if they have not been detected */ 2763 PetscBool valid = PETSC_TRUE; 2764 2765 if (nneu) valid = PETSC_FALSE; 2766 if (valid && pressures) { 2767 PetscCall(ISEqual(pressures,zerodiag,&valid)); 2768 } 2769 if (valid && checkb) { 2770 PetscCall(MatMult(matis->A,work[0],matis->x)); 2771 PetscCall(VecPointwiseMult(matis->x,work[1],matis->x)); 2772 PetscCall(VecGetArray(matis->x,&array)); 2773 for (j=0;j<n_interior_dofs;j++) { 2774 if (PetscAbsScalar(array[interior_dofs[j]]) > PETSC_SMALL) { 2775 valid = PETSC_FALSE; 2776 break; 2777 } 2778 } 2779 PetscCall(VecRestoreArray(matis->x,&array)); 2780 } 2781 if (valid) { 2782 benign_n = 1; 2783 PetscCall(PetscMalloc1(benign_n,&zerodiag_subs)); 2784 PetscCall(PetscObjectReference((PetscObject)zerodiag)); 2785 zerodiag_subs[0] = zerodiag; 2786 } 2787 } 2788 if (checkb) { 2789 PetscCall(VecDestroyVecs(2,&work)); 2790 } 2791 } 2792 PetscCall(PetscFree(interior_dofs)); 2793 2794 if (!benign_n) { 2795 PetscInt n; 2796 2797 PetscCall(ISDestroy(&zerodiag)); 2798 recompute_zerodiag = PETSC_FALSE; 2799 PetscCall(MatGetLocalSize(pcbddc->local_mat,&n,NULL)); 2800 if (n) have_null = PETSC_FALSE; 2801 } 2802 2803 /* final check for null pressures */ 2804 if (zerodiag && pressures) { 2805 PetscCall(ISEqual(pressures,zerodiag,&have_null)); 2806 } 2807 2808 if (recompute_zerodiag) { 2809 PetscCall(ISDestroy(&zerodiag)); 2810 if (benign_n == 1) { 2811 PetscCall(PetscObjectReference((PetscObject)zerodiag_subs[0])); 2812 zerodiag = zerodiag_subs[0]; 2813 } else { 2814 PetscInt i,nzn,*new_idxs; 2815 2816 nzn = 0; 2817 for (i=0;i<benign_n;i++) { 2818 PetscInt ns; 2819 PetscCall(ISGetLocalSize(zerodiag_subs[i],&ns)); 2820 nzn += ns; 2821 } 2822 PetscCall(PetscMalloc1(nzn,&new_idxs)); 2823 nzn = 0; 2824 for (i=0;i<benign_n;i++) { 2825 PetscInt ns,*idxs; 2826 PetscCall(ISGetLocalSize(zerodiag_subs[i],&ns)); 2827 PetscCall(ISGetIndices(zerodiag_subs[i],(const PetscInt**)&idxs)); 2828 PetscCall(PetscArraycpy(new_idxs+nzn,idxs,ns)); 2829 PetscCall(ISRestoreIndices(zerodiag_subs[i],(const PetscInt**)&idxs)); 2830 nzn += ns; 2831 } 2832 PetscCall(PetscSortInt(nzn,new_idxs)); 2833 PetscCall(ISCreateGeneral(PETSC_COMM_SELF,nzn,new_idxs,PETSC_OWN_POINTER,&zerodiag)); 2834 } 2835 have_null = PETSC_FALSE; 2836 } 2837 2838 /* determines if the coarse solver will be singular or not */ 2839 PetscCall(MPIU_Allreduce(&have_null,&pcbddc->benign_null,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)pc))); 2840 2841 /* Prepare matrix to compute no-net-flux */ 2842 if (pcbddc->compute_nonetflux && !pcbddc->divudotp) { 2843 Mat A,loc_divudotp; 2844 ISLocalToGlobalMapping rl2g,cl2g,l2gmap; 2845 IS row,col,isused = NULL; 2846 PetscInt M,N,n,st,n_isused; 2847 2848 if (pressures) { 2849 isused = pressures; 2850 } else { 2851 isused = zerodiag_save; 2852 } 2853 PetscCall(MatISGetLocalToGlobalMapping(pc->pmat,&l2gmap,NULL)); 2854 PetscCall(MatISGetLocalMat(pc->pmat,&A)); 2855 PetscCall(MatGetLocalSize(A,&n,NULL)); 2856 PetscCheck(isused || (n == 0),PETSC_COMM_SELF,PETSC_ERR_USER,"Don't know how to extract div u dot p! Please provide the pressure field"); 2857 n_isused = 0; 2858 if (isused) { 2859 PetscCall(ISGetLocalSize(isused,&n_isused)); 2860 } 2861 PetscCallMPI(MPI_Scan(&n_isused,&st,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)pc))); 2862 st = st-n_isused; 2863 if (n) { 2864 const PetscInt *gidxs; 2865 2866 PetscCall(MatCreateSubMatrix(A,isused,NULL,MAT_INITIAL_MATRIX,&loc_divudotp)); 2867 PetscCall(ISLocalToGlobalMappingGetIndices(l2gmap,&gidxs)); 2868 /* TODO: extend ISCreateStride with st = PETSC_DECIDE */ 2869 PetscCall(ISCreateStride(PetscObjectComm((PetscObject)pc),n_isused,st,1,&row)); 2870 PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)pc),n,gidxs,PETSC_COPY_VALUES,&col)); 2871 PetscCall(ISLocalToGlobalMappingRestoreIndices(l2gmap,&gidxs)); 2872 } else { 2873 PetscCall(MatCreateSeqAIJ(PETSC_COMM_SELF,0,0,1,NULL,&loc_divudotp)); 2874 PetscCall(ISCreateStride(PetscObjectComm((PetscObject)pc),n_isused,st,1,&row)); 2875 PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)pc),0,NULL,PETSC_COPY_VALUES,&col)); 2876 } 2877 PetscCall(MatGetSize(pc->pmat,NULL,&N)); 2878 PetscCall(ISGetSize(row,&M)); 2879 PetscCall(ISLocalToGlobalMappingCreateIS(row,&rl2g)); 2880 PetscCall(ISLocalToGlobalMappingCreateIS(col,&cl2g)); 2881 PetscCall(ISDestroy(&row)); 2882 PetscCall(ISDestroy(&col)); 2883 PetscCall(MatCreate(PetscObjectComm((PetscObject)pc),&pcbddc->divudotp)); 2884 PetscCall(MatSetType(pcbddc->divudotp,MATIS)); 2885 PetscCall(MatSetSizes(pcbddc->divudotp,PETSC_DECIDE,PETSC_DECIDE,M,N)); 2886 PetscCall(MatSetLocalToGlobalMapping(pcbddc->divudotp,rl2g,cl2g)); 2887 PetscCall(ISLocalToGlobalMappingDestroy(&rl2g)); 2888 PetscCall(ISLocalToGlobalMappingDestroy(&cl2g)); 2889 PetscCall(MatISSetLocalMat(pcbddc->divudotp,loc_divudotp)); 2890 PetscCall(MatDestroy(&loc_divudotp)); 2891 PetscCall(MatAssemblyBegin(pcbddc->divudotp,MAT_FINAL_ASSEMBLY)); 2892 PetscCall(MatAssemblyEnd(pcbddc->divudotp,MAT_FINAL_ASSEMBLY)); 2893 } 2894 PetscCall(ISDestroy(&zerodiag_save)); 2895 PetscCall(ISDestroy(&pressures)); 2896 if (bzerodiag) { 2897 PetscInt i; 2898 2899 for (i=0;i<bsp;i++) { 2900 PetscCall(ISDestroy(&bzerodiag[i])); 2901 } 2902 PetscCall(PetscFree(bzerodiag)); 2903 } 2904 pcbddc->benign_n = benign_n; 2905 pcbddc->benign_zerodiag_subs = zerodiag_subs; 2906 2907 /* determines if the problem has subdomains with 0 pressure block */ 2908 have_null = (PetscBool)(!!pcbddc->benign_n); 2909 PetscCall(MPIU_Allreduce(&have_null,&pcbddc->benign_have_null,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc))); 2910 2911 project_b0: 2912 PetscCall(MatGetLocalSize(pcbddc->local_mat,&n,NULL)); 2913 /* change of basis and p0 dofs */ 2914 if (pcbddc->benign_n) { 2915 PetscInt i,s,*nnz; 2916 2917 /* local change of basis for pressures */ 2918 PetscCall(MatDestroy(&pcbddc->benign_change)); 2919 PetscCall(MatCreate(PetscObjectComm((PetscObject)pcbddc->local_mat),&pcbddc->benign_change)); 2920 PetscCall(MatSetType(pcbddc->benign_change,MATAIJ)); 2921 PetscCall(MatSetSizes(pcbddc->benign_change,n,n,PETSC_DECIDE,PETSC_DECIDE)); 2922 PetscCall(PetscMalloc1(n,&nnz)); 2923 for (i=0;i<n;i++) nnz[i] = 1; /* defaults to identity */ 2924 for (i=0;i<pcbddc->benign_n;i++) { 2925 const PetscInt *idxs; 2926 PetscInt nzs,j; 2927 2928 PetscCall(ISGetLocalSize(pcbddc->benign_zerodiag_subs[i],&nzs)); 2929 PetscCall(ISGetIndices(pcbddc->benign_zerodiag_subs[i],&idxs)); 2930 for (j=0;j<nzs-1;j++) nnz[idxs[j]] = 2; /* change on pressures */ 2931 nnz[idxs[nzs-1]] = nzs; /* last local pressure dof in subdomain */ 2932 PetscCall(ISRestoreIndices(pcbddc->benign_zerodiag_subs[i],&idxs)); 2933 } 2934 PetscCall(MatSeqAIJSetPreallocation(pcbddc->benign_change,0,nnz)); 2935 PetscCall(MatSetOption(pcbddc->benign_change,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_TRUE)); 2936 PetscCall(PetscFree(nnz)); 2937 /* set identity by default */ 2938 for (i=0;i<n;i++) { 2939 PetscCall(MatSetValue(pcbddc->benign_change,i,i,1.,INSERT_VALUES)); 2940 } 2941 PetscCall(PetscFree3(pcbddc->benign_p0_lidx,pcbddc->benign_p0_gidx,pcbddc->benign_p0)); 2942 PetscCall(PetscMalloc3(pcbddc->benign_n,&pcbddc->benign_p0_lidx,pcbddc->benign_n,&pcbddc->benign_p0_gidx,pcbddc->benign_n,&pcbddc->benign_p0)); 2943 /* set change on pressures */ 2944 for (s=0;s<pcbddc->benign_n;s++) { 2945 PetscScalar *array; 2946 const PetscInt *idxs; 2947 PetscInt nzs; 2948 2949 PetscCall(ISGetLocalSize(pcbddc->benign_zerodiag_subs[s],&nzs)); 2950 PetscCall(ISGetIndices(pcbddc->benign_zerodiag_subs[s],&idxs)); 2951 for (i=0;i<nzs-1;i++) { 2952 PetscScalar vals[2]; 2953 PetscInt cols[2]; 2954 2955 cols[0] = idxs[i]; 2956 cols[1] = idxs[nzs-1]; 2957 vals[0] = 1.; 2958 vals[1] = 1.; 2959 PetscCall(MatSetValues(pcbddc->benign_change,1,cols,2,cols,vals,INSERT_VALUES)); 2960 } 2961 PetscCall(PetscMalloc1(nzs,&array)); 2962 for (i=0;i<nzs-1;i++) array[i] = -1.; 2963 array[nzs-1] = 1.; 2964 PetscCall(MatSetValues(pcbddc->benign_change,1,idxs+nzs-1,nzs,idxs,array,INSERT_VALUES)); 2965 /* store local idxs for p0 */ 2966 pcbddc->benign_p0_lidx[s] = idxs[nzs-1]; 2967 PetscCall(ISRestoreIndices(pcbddc->benign_zerodiag_subs[s],&idxs)); 2968 PetscCall(PetscFree(array)); 2969 } 2970 PetscCall(MatAssemblyBegin(pcbddc->benign_change,MAT_FINAL_ASSEMBLY)); 2971 PetscCall(MatAssemblyEnd(pcbddc->benign_change,MAT_FINAL_ASSEMBLY)); 2972 2973 /* project if needed */ 2974 if (pcbddc->benign_change_explicit) { 2975 Mat M; 2976 2977 PetscCall(MatPtAP(pcbddc->local_mat,pcbddc->benign_change,MAT_INITIAL_MATRIX,2.0,&M)); 2978 PetscCall(MatDestroy(&pcbddc->local_mat)); 2979 PetscCall(MatSeqAIJCompress(M,&pcbddc->local_mat)); 2980 PetscCall(MatDestroy(&M)); 2981 } 2982 /* store global idxs for p0 */ 2983 PetscCall(ISLocalToGlobalMappingApply(matis->rmapping,pcbddc->benign_n,pcbddc->benign_p0_lidx,pcbddc->benign_p0_gidx)); 2984 } 2985 *zerodiaglocal = zerodiag; 2986 PetscFunctionReturn(0); 2987 } 2988 2989 PetscErrorCode PCBDDCBenignGetOrSetP0(PC pc, Vec v, PetscBool get) 2990 { 2991 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 2992 PetscScalar *array; 2993 2994 PetscFunctionBegin; 2995 if (!pcbddc->benign_sf) { 2996 PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)pc),&pcbddc->benign_sf)); 2997 PetscCall(PetscSFSetGraphLayout(pcbddc->benign_sf,pc->pmat->rmap,pcbddc->benign_n,NULL,PETSC_OWN_POINTER,pcbddc->benign_p0_gidx)); 2998 } 2999 if (get) { 3000 PetscCall(VecGetArrayRead(v,(const PetscScalar**)&array)); 3001 PetscCall(PetscSFBcastBegin(pcbddc->benign_sf,MPIU_SCALAR,array,pcbddc->benign_p0,MPI_REPLACE)); 3002 PetscCall(PetscSFBcastEnd(pcbddc->benign_sf,MPIU_SCALAR,array,pcbddc->benign_p0,MPI_REPLACE)); 3003 PetscCall(VecRestoreArrayRead(v,(const PetscScalar**)&array)); 3004 } else { 3005 PetscCall(VecGetArray(v,&array)); 3006 PetscCall(PetscSFReduceBegin(pcbddc->benign_sf,MPIU_SCALAR,pcbddc->benign_p0,array,MPI_REPLACE)); 3007 PetscCall(PetscSFReduceEnd(pcbddc->benign_sf,MPIU_SCALAR,pcbddc->benign_p0,array,MPI_REPLACE)); 3008 PetscCall(VecRestoreArray(v,&array)); 3009 } 3010 PetscFunctionReturn(0); 3011 } 3012 3013 PetscErrorCode PCBDDCBenignPopOrPushB0(PC pc, PetscBool pop) 3014 { 3015 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 3016 3017 PetscFunctionBegin; 3018 /* TODO: add error checking 3019 - avoid nested pop (or push) calls. 3020 - cannot push before pop. 3021 - cannot call this if pcbddc->local_mat is NULL 3022 */ 3023 if (!pcbddc->benign_n) { 3024 PetscFunctionReturn(0); 3025 } 3026 if (pop) { 3027 if (pcbddc->benign_change_explicit) { 3028 IS is_p0; 3029 MatReuse reuse; 3030 3031 /* extract B_0 */ 3032 reuse = MAT_INITIAL_MATRIX; 3033 if (pcbddc->benign_B0) { 3034 reuse = MAT_REUSE_MATRIX; 3035 } 3036 PetscCall(ISCreateGeneral(PETSC_COMM_SELF,pcbddc->benign_n,pcbddc->benign_p0_lidx,PETSC_COPY_VALUES,&is_p0)); 3037 PetscCall(MatCreateSubMatrix(pcbddc->local_mat,is_p0,NULL,reuse,&pcbddc->benign_B0)); 3038 /* remove rows and cols from local problem */ 3039 PetscCall(MatSetOption(pcbddc->local_mat,MAT_KEEP_NONZERO_PATTERN,PETSC_TRUE)); 3040 PetscCall(MatSetOption(pcbddc->local_mat,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_FALSE)); 3041 PetscCall(MatZeroRowsColumnsIS(pcbddc->local_mat,is_p0,1.0,NULL,NULL)); 3042 PetscCall(ISDestroy(&is_p0)); 3043 } else { 3044 Mat_IS *matis = (Mat_IS*)pc->pmat->data; 3045 PetscScalar *vals; 3046 PetscInt i,n,*idxs_ins; 3047 3048 PetscCall(VecGetLocalSize(matis->y,&n)); 3049 PetscCall(PetscMalloc2(n,&idxs_ins,n,&vals)); 3050 if (!pcbddc->benign_B0) { 3051 PetscInt *nnz; 3052 PetscCall(MatCreate(PetscObjectComm((PetscObject)pcbddc->local_mat),&pcbddc->benign_B0)); 3053 PetscCall(MatSetType(pcbddc->benign_B0,MATAIJ)); 3054 PetscCall(MatSetSizes(pcbddc->benign_B0,pcbddc->benign_n,n,PETSC_DECIDE,PETSC_DECIDE)); 3055 PetscCall(PetscMalloc1(pcbddc->benign_n,&nnz)); 3056 for (i=0;i<pcbddc->benign_n;i++) { 3057 PetscCall(ISGetLocalSize(pcbddc->benign_zerodiag_subs[i],&nnz[i])); 3058 nnz[i] = n - nnz[i]; 3059 } 3060 PetscCall(MatSeqAIJSetPreallocation(pcbddc->benign_B0,0,nnz)); 3061 PetscCall(MatSetOption(pcbddc->benign_B0,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_TRUE)); 3062 PetscCall(PetscFree(nnz)); 3063 } 3064 3065 for (i=0;i<pcbddc->benign_n;i++) { 3066 PetscScalar *array; 3067 PetscInt *idxs,j,nz,cum; 3068 3069 PetscCall(VecSet(matis->x,0.)); 3070 PetscCall(ISGetLocalSize(pcbddc->benign_zerodiag_subs[i],&nz)); 3071 PetscCall(ISGetIndices(pcbddc->benign_zerodiag_subs[i],(const PetscInt**)&idxs)); 3072 for (j=0;j<nz;j++) vals[j] = 1.; 3073 PetscCall(VecSetValues(matis->x,nz,idxs,vals,INSERT_VALUES)); 3074 PetscCall(VecAssemblyBegin(matis->x)); 3075 PetscCall(VecAssemblyEnd(matis->x)); 3076 PetscCall(VecSet(matis->y,0.)); 3077 PetscCall(MatMult(matis->A,matis->x,matis->y)); 3078 PetscCall(VecGetArray(matis->y,&array)); 3079 cum = 0; 3080 for (j=0;j<n;j++) { 3081 if (PetscUnlikely(PetscAbsScalar(array[j]) > PETSC_SMALL)) { 3082 vals[cum] = array[j]; 3083 idxs_ins[cum] = j; 3084 cum++; 3085 } 3086 } 3087 PetscCall(MatSetValues(pcbddc->benign_B0,1,&i,cum,idxs_ins,vals,INSERT_VALUES)); 3088 PetscCall(VecRestoreArray(matis->y,&array)); 3089 PetscCall(ISRestoreIndices(pcbddc->benign_zerodiag_subs[i],(const PetscInt**)&idxs)); 3090 } 3091 PetscCall(MatAssemblyBegin(pcbddc->benign_B0,MAT_FINAL_ASSEMBLY)); 3092 PetscCall(MatAssemblyEnd(pcbddc->benign_B0,MAT_FINAL_ASSEMBLY)); 3093 PetscCall(PetscFree2(idxs_ins,vals)); 3094 } 3095 } else { /* push */ 3096 if (pcbddc->benign_change_explicit) { 3097 PetscInt i; 3098 3099 for (i=0;i<pcbddc->benign_n;i++) { 3100 PetscScalar *B0_vals; 3101 PetscInt *B0_cols,B0_ncol; 3102 3103 PetscCall(MatGetRow(pcbddc->benign_B0,i,&B0_ncol,(const PetscInt**)&B0_cols,(const PetscScalar**)&B0_vals)); 3104 PetscCall(MatSetValues(pcbddc->local_mat,1,pcbddc->benign_p0_lidx+i,B0_ncol,B0_cols,B0_vals,INSERT_VALUES)); 3105 PetscCall(MatSetValues(pcbddc->local_mat,B0_ncol,B0_cols,1,pcbddc->benign_p0_lidx+i,B0_vals,INSERT_VALUES)); 3106 PetscCall(MatSetValue(pcbddc->local_mat,pcbddc->benign_p0_lidx[i],pcbddc->benign_p0_lidx[i],0.0,INSERT_VALUES)); 3107 PetscCall(MatRestoreRow(pcbddc->benign_B0,i,&B0_ncol,(const PetscInt**)&B0_cols,(const PetscScalar**)&B0_vals)); 3108 } 3109 PetscCall(MatAssemblyBegin(pcbddc->local_mat,MAT_FINAL_ASSEMBLY)); 3110 PetscCall(MatAssemblyEnd(pcbddc->local_mat,MAT_FINAL_ASSEMBLY)); 3111 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Cannot push B0!"); 3112 } 3113 PetscFunctionReturn(0); 3114 } 3115 3116 PetscErrorCode PCBDDCAdaptiveSelection(PC pc) 3117 { 3118 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 3119 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 3120 PetscBLASInt B_dummyint,B_neigs,B_ierr,B_lwork; 3121 PetscBLASInt *B_iwork,*B_ifail; 3122 PetscScalar *work,lwork; 3123 PetscScalar *St,*S,*eigv; 3124 PetscScalar *Sarray,*Starray; 3125 PetscReal *eigs,thresh,lthresh,uthresh; 3126 PetscInt i,nmax,nmin,nv,cum,mss,cum2,cumarray,maxneigs; 3127 PetscBool allocated_S_St,upart; 3128 #if defined(PETSC_USE_COMPLEX) 3129 PetscReal *rwork; 3130 #endif 3131 3132 PetscFunctionBegin; 3133 if (!pcbddc->adaptive_selection) PetscFunctionReturn(0); 3134 PetscCheck(sub_schurs,PETSC_COMM_SELF,PETSC_ERR_PLIB,"Adaptive selection of constraints requires SubSchurs data"); 3135 PetscCheck(sub_schurs->schur_explicit || !sub_schurs->n_subs,PetscObjectComm((PetscObject)pc),PETSC_ERR_SUP,"Adaptive selection of constraints requires MUMPS and/or MKL_CPARDISO"); 3136 PetscCheck(!sub_schurs->n_subs || sub_schurs->is_symmetric,PETSC_COMM_SELF,PETSC_ERR_SUP,"Adaptive selection not yet implemented for this matrix pencil (herm %d, symm %d, posdef %d)",sub_schurs->is_hermitian,sub_schurs->is_symmetric,sub_schurs->is_posdef); 3137 PetscCall(PetscLogEventBegin(PC_BDDC_AdaptiveSetUp[pcbddc->current_level],pc,0,0,0)); 3138 3139 if (pcbddc->dbg_flag) { 3140 if (!pcbddc->dbg_viewer) pcbddc->dbg_viewer = PETSC_VIEWER_STDOUT_(PetscObjectComm((PetscObject)pc)); 3141 PetscCall(PetscViewerFlush(pcbddc->dbg_viewer)); 3142 PetscCall(PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n")); 3143 PetscCall(PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Check adaptive selection of constraints\n")); 3144 PetscCall(PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer)); 3145 } 3146 3147 if (pcbddc->dbg_flag) { 3148 PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d cc %" PetscInt_FMT " (%d,%d).\n",PetscGlobalRank,sub_schurs->n_subs,sub_schurs->is_hermitian,sub_schurs->is_posdef)); 3149 } 3150 3151 /* max size of subsets */ 3152 mss = 0; 3153 for (i=0;i<sub_schurs->n_subs;i++) { 3154 PetscInt subset_size; 3155 3156 PetscCall(ISGetLocalSize(sub_schurs->is_subs[i],&subset_size)); 3157 mss = PetscMax(mss,subset_size); 3158 } 3159 3160 /* min/max and threshold */ 3161 nmax = pcbddc->adaptive_nmax > 0 ? pcbddc->adaptive_nmax : mss; 3162 nmin = pcbddc->adaptive_nmin > 0 ? pcbddc->adaptive_nmin : 0; 3163 nmax = PetscMax(nmin,nmax); 3164 allocated_S_St = PETSC_FALSE; 3165 if (nmin || !sub_schurs->is_posdef) { /* XXX */ 3166 allocated_S_St = PETSC_TRUE; 3167 } 3168 3169 /* allocate lapack workspace */ 3170 cum = cum2 = 0; 3171 maxneigs = 0; 3172 for (i=0;i<sub_schurs->n_subs;i++) { 3173 PetscInt n,subset_size; 3174 3175 PetscCall(ISGetLocalSize(sub_schurs->is_subs[i],&subset_size)); 3176 n = PetscMin(subset_size,nmax); 3177 cum += subset_size; 3178 cum2 += subset_size*n; 3179 maxneigs = PetscMax(maxneigs,n); 3180 } 3181 lwork = 0; 3182 if (mss) { 3183 if (sub_schurs->is_symmetric) { 3184 PetscScalar sdummy = 0.; 3185 PetscBLASInt B_itype = 1; 3186 PetscBLASInt B_N = mss, idummy = 0; 3187 PetscReal rdummy = 0.,zero = 0.0; 3188 PetscReal eps = 0.0; /* dlamch? */ 3189 3190 B_lwork = -1; 3191 /* some implementations may complain about NULL pointers, even if we are querying */ 3192 S = &sdummy; 3193 St = &sdummy; 3194 eigs = &rdummy; 3195 eigv = &sdummy; 3196 B_iwork = &idummy; 3197 B_ifail = &idummy; 3198 #if defined(PETSC_USE_COMPLEX) 3199 rwork = &rdummy; 3200 #endif 3201 thresh = 1.0; 3202 PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF)); 3203 #if defined(PETSC_USE_COMPLEX) 3204 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&zero,&thresh,&B_dummyint,&B_dummyint,&eps,&B_neigs,eigs,eigv,&B_N,&lwork,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr)); 3205 #else 3206 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&zero,&thresh,&B_dummyint,&B_dummyint,&eps,&B_neigs,eigs,eigv,&B_N,&lwork,&B_lwork,B_iwork,B_ifail,&B_ierr)); 3207 #endif 3208 PetscCheck(B_ierr == 0,PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to SYGVX Lapack routine %d",(int)B_ierr); 3209 PetscCall(PetscFPTrapPop()); 3210 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Not yet implemented"); 3211 } 3212 3213 nv = 0; 3214 if (sub_schurs->is_vertices && pcbddc->use_vertices) { /* complement set of active subsets, each entry is a vertex (boundary made by active subsets, vertices and dirichlet dofs) */ 3215 PetscCall(ISGetLocalSize(sub_schurs->is_vertices,&nv)); 3216 } 3217 PetscCall(PetscBLASIntCast((PetscInt)PetscRealPart(lwork),&B_lwork)); 3218 if (allocated_S_St) { 3219 PetscCall(PetscMalloc2(mss*mss,&S,mss*mss,&St)); 3220 } 3221 PetscCall(PetscMalloc5(mss*mss,&eigv,mss,&eigs,B_lwork,&work,5*mss,&B_iwork,mss,&B_ifail)); 3222 #if defined(PETSC_USE_COMPLEX) 3223 PetscCall(PetscMalloc1(7*mss,&rwork)); 3224 #endif 3225 PetscCall(PetscMalloc5(nv+sub_schurs->n_subs,&pcbddc->adaptive_constraints_n, 3226 nv+sub_schurs->n_subs+1,&pcbddc->adaptive_constraints_idxs_ptr, 3227 nv+sub_schurs->n_subs+1,&pcbddc->adaptive_constraints_data_ptr, 3228 nv+cum,&pcbddc->adaptive_constraints_idxs, 3229 nv+cum2,&pcbddc->adaptive_constraints_data)); 3230 PetscCall(PetscArrayzero(pcbddc->adaptive_constraints_n,nv+sub_schurs->n_subs)); 3231 3232 maxneigs = 0; 3233 cum = cumarray = 0; 3234 pcbddc->adaptive_constraints_idxs_ptr[0] = 0; 3235 pcbddc->adaptive_constraints_data_ptr[0] = 0; 3236 if (sub_schurs->is_vertices && pcbddc->use_vertices) { 3237 const PetscInt *idxs; 3238 3239 PetscCall(ISGetIndices(sub_schurs->is_vertices,&idxs)); 3240 for (cum=0;cum<nv;cum++) { 3241 pcbddc->adaptive_constraints_n[cum] = 1; 3242 pcbddc->adaptive_constraints_idxs[cum] = idxs[cum]; 3243 pcbddc->adaptive_constraints_data[cum] = 1.0; 3244 pcbddc->adaptive_constraints_idxs_ptr[cum+1] = pcbddc->adaptive_constraints_idxs_ptr[cum]+1; 3245 pcbddc->adaptive_constraints_data_ptr[cum+1] = pcbddc->adaptive_constraints_data_ptr[cum]+1; 3246 } 3247 PetscCall(ISRestoreIndices(sub_schurs->is_vertices,&idxs)); 3248 } 3249 3250 if (mss) { /* multilevel */ 3251 if (sub_schurs->gdsw) { 3252 PetscCall(MatSeqAIJGetArray(sub_schurs->sum_S_Ej_all,&Sarray)); 3253 PetscCall(MatSeqAIJGetArray(sub_schurs->sum_S_Ej_tilda_all,&Starray)); 3254 } else { 3255 PetscCall(MatSeqAIJGetArray(sub_schurs->sum_S_Ej_inv_all,&Sarray)); 3256 PetscCall(MatSeqAIJGetArray(sub_schurs->sum_S_Ej_tilda_all,&Starray)); 3257 } 3258 } 3259 3260 lthresh = pcbddc->adaptive_threshold[0]; 3261 uthresh = pcbddc->adaptive_threshold[1]; 3262 upart = pcbddc->use_deluxe_scaling; 3263 for (i=0;i<sub_schurs->n_subs;i++) { 3264 const PetscInt *idxs; 3265 PetscReal upper,lower; 3266 PetscInt j,subset_size,eigs_start = 0; 3267 PetscBLASInt B_N; 3268 PetscBool same_data = PETSC_FALSE; 3269 PetscBool scal = PETSC_FALSE; 3270 3271 if (upart) { 3272 upper = PETSC_MAX_REAL; 3273 lower = uthresh; 3274 } else { 3275 if (sub_schurs->gdsw) { 3276 upper = uthresh; 3277 lower = PETSC_MIN_REAL; 3278 } else { 3279 PetscCheck(sub_schurs->is_posdef,PETSC_COMM_SELF,PETSC_ERR_SUP,"Not yet implemented without deluxe scaling"); 3280 upper = 1./uthresh; 3281 lower = 0.; 3282 } 3283 } 3284 PetscCall(ISGetLocalSize(sub_schurs->is_subs[i],&subset_size)); 3285 PetscCall(ISGetIndices(sub_schurs->is_subs[i],&idxs)); 3286 PetscCall(PetscBLASIntCast(subset_size,&B_N)); 3287 /* this is experimental: we assume the dofs have been properly grouped to have 3288 the diagonal blocks Schur complements either positive or negative definite (true for Stokes) */ 3289 if (!sub_schurs->is_posdef) { 3290 Mat T; 3291 3292 for (j=0;j<subset_size;j++) { 3293 if (PetscRealPart(*(Sarray+cumarray+j*(subset_size+1))) < 0.0) { 3294 PetscCall(MatCreateSeqDense(PETSC_COMM_SELF,subset_size,subset_size,Sarray+cumarray,&T)); 3295 PetscCall(MatScale(T,-1.0)); 3296 PetscCall(MatDestroy(&T)); 3297 PetscCall(MatCreateSeqDense(PETSC_COMM_SELF,subset_size,subset_size,Starray+cumarray,&T)); 3298 PetscCall(MatScale(T,-1.0)); 3299 PetscCall(MatDestroy(&T)); 3300 if (sub_schurs->change_primal_sub) { 3301 PetscInt nz,k; 3302 const PetscInt *idxs; 3303 3304 PetscCall(ISGetLocalSize(sub_schurs->change_primal_sub[i],&nz)); 3305 PetscCall(ISGetIndices(sub_schurs->change_primal_sub[i],&idxs)); 3306 for (k=0;k<nz;k++) { 3307 *( Sarray + cumarray + idxs[k]*(subset_size+1)) *= -1.0; 3308 *(Starray + cumarray + idxs[k]*(subset_size+1)) = 0.0; 3309 } 3310 PetscCall(ISRestoreIndices(sub_schurs->change_primal_sub[i],&idxs)); 3311 } 3312 scal = PETSC_TRUE; 3313 break; 3314 } 3315 } 3316 } 3317 3318 if (allocated_S_St) { /* S and S_t should be copied since we could need them later */ 3319 if (sub_schurs->is_symmetric) { 3320 PetscInt j,k; 3321 if (sub_schurs->n_subs == 1) { /* zeroing memory to use PetscArraycmp() later */ 3322 PetscCall(PetscArrayzero(S,subset_size*subset_size)); 3323 PetscCall(PetscArrayzero(St,subset_size*subset_size)); 3324 } 3325 for (j=0;j<subset_size;j++) { 3326 for (k=j;k<subset_size;k++) { 3327 S [j*subset_size+k] = Sarray [cumarray+j*subset_size+k]; 3328 St[j*subset_size+k] = Starray[cumarray+j*subset_size+k]; 3329 } 3330 } 3331 } else { 3332 PetscCall(PetscArraycpy(S,Sarray+cumarray,subset_size*subset_size)); 3333 PetscCall(PetscArraycpy(St,Starray+cumarray,subset_size*subset_size)); 3334 } 3335 } else { 3336 S = Sarray + cumarray; 3337 St = Starray + cumarray; 3338 } 3339 /* see if we can save some work */ 3340 if (sub_schurs->n_subs == 1 && pcbddc->use_deluxe_scaling) { 3341 PetscCall(PetscArraycmp(S,St,subset_size*subset_size,&same_data)); 3342 } 3343 3344 if (same_data && !sub_schurs->change) { /* there's no need of constraints here */ 3345 B_neigs = 0; 3346 } else { 3347 if (sub_schurs->is_symmetric) { 3348 PetscBLASInt B_itype = 1; 3349 PetscBLASInt B_IL, B_IU; 3350 PetscReal eps = -1.0; /* dlamch? */ 3351 PetscInt nmin_s; 3352 PetscBool compute_range; 3353 3354 B_neigs = 0; 3355 compute_range = (PetscBool)!same_data; 3356 if (nmin >= subset_size) compute_range = PETSC_FALSE; 3357 3358 if (pcbddc->dbg_flag) { 3359 PetscInt nc = 0; 3360 3361 if (sub_schurs->change_primal_sub) { 3362 PetscCall(ISGetLocalSize(sub_schurs->change_primal_sub[i],&nc)); 3363 } 3364 PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Computing for sub %" PetscInt_FMT "/%" PetscInt_FMT " size %" PetscInt_FMT " count %" PetscInt_FMT " fid %" PetscInt_FMT " (range %d) (change %" PetscInt_FMT ").\n",i,sub_schurs->n_subs,subset_size,pcbddc->mat_graph->count[idxs[0]]+1,pcbddc->mat_graph->which_dof[idxs[0]],compute_range,nc)); 3365 } 3366 3367 PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF)); 3368 if (compute_range) { 3369 3370 /* ask for eigenvalues larger than thresh */ 3371 if (sub_schurs->is_posdef) { 3372 #if defined(PETSC_USE_COMPLEX) 3373 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&lower,&upper,&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr)); 3374 #else 3375 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&lower,&upper,&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr)); 3376 #endif 3377 PetscCall(PetscLogFlops((4.0*subset_size*subset_size*subset_size)/3.0)); 3378 } else { /* no theory so far, but it works nicely */ 3379 PetscInt recipe = 0,recipe_m = 1; 3380 PetscReal bb[2]; 3381 3382 PetscCall(PetscOptionsGetInt(NULL,((PetscObject)pc)->prefix,"-pc_bddc_adaptive_recipe",&recipe,NULL)); 3383 switch (recipe) { 3384 case 0: 3385 if (scal) { bb[0] = PETSC_MIN_REAL; bb[1] = lthresh; } 3386 else { bb[0] = uthresh; bb[1] = PETSC_MAX_REAL; } 3387 #if defined(PETSC_USE_COMPLEX) 3388 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&bb[0],&bb[1],&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr)); 3389 #else 3390 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&bb[0],&bb[1],&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr)); 3391 #endif 3392 PetscCall(PetscLogFlops((4.0*subset_size*subset_size*subset_size)/3.0)); 3393 break; 3394 case 1: 3395 bb[0] = PETSC_MIN_REAL; bb[1] = lthresh*lthresh; 3396 #if defined(PETSC_USE_COMPLEX) 3397 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&bb[0],&bb[1],&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr)); 3398 #else 3399 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&bb[0],&bb[1],&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr)); 3400 #endif 3401 PetscCall(PetscLogFlops((4.0*subset_size*subset_size*subset_size)/3.0)); 3402 if (!scal) { 3403 PetscBLASInt B_neigs2 = 0; 3404 3405 bb[0] = PetscMax(lthresh*lthresh,uthresh); bb[1] = PETSC_MAX_REAL; 3406 PetscCall(PetscArraycpy(S,Sarray+cumarray,subset_size*subset_size)); 3407 PetscCall(PetscArraycpy(St,Starray+cumarray,subset_size*subset_size)); 3408 #if defined(PETSC_USE_COMPLEX) 3409 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&bb[0],&bb[1],&B_IL,&B_IU,&eps,&B_neigs2,eigs+B_neigs,eigv+B_neigs*B_N,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr)); 3410 #else 3411 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&bb[0],&bb[1],&B_IL,&B_IU,&eps,&B_neigs2,eigs+B_neigs,eigv+B_neigs*B_N,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr)); 3412 #endif 3413 PetscCall(PetscLogFlops((4.0*subset_size*subset_size*subset_size)/3.0)); 3414 B_neigs += B_neigs2; 3415 } 3416 break; 3417 case 2: 3418 if (scal) { 3419 bb[0] = PETSC_MIN_REAL; 3420 bb[1] = 0; 3421 #if defined(PETSC_USE_COMPLEX) 3422 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&bb[0],&bb[1],&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr)); 3423 #else 3424 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&bb[0],&bb[1],&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr)); 3425 #endif 3426 PetscCall(PetscLogFlops((4.0*subset_size*subset_size*subset_size)/3.0)); 3427 } else { 3428 PetscBLASInt B_neigs2 = 0; 3429 PetscBool import = PETSC_FALSE; 3430 3431 lthresh = PetscMax(lthresh,0.0); 3432 if (lthresh > 0.0) { 3433 bb[0] = PETSC_MIN_REAL; 3434 bb[1] = lthresh*lthresh; 3435 3436 import = PETSC_TRUE; 3437 #if defined(PETSC_USE_COMPLEX) 3438 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&bb[0],&bb[1],&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr)); 3439 #else 3440 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&bb[0],&bb[1],&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr)); 3441 #endif 3442 PetscCall(PetscLogFlops((4.0*subset_size*subset_size*subset_size)/3.0)); 3443 } 3444 bb[0] = PetscMax(lthresh*lthresh,uthresh); 3445 bb[1] = PETSC_MAX_REAL; 3446 if (import) { 3447 PetscCall(PetscArraycpy(S,Sarray+cumarray,subset_size*subset_size)); 3448 PetscCall(PetscArraycpy(St,Starray+cumarray,subset_size*subset_size)); 3449 } 3450 #if defined(PETSC_USE_COMPLEX) 3451 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&bb[0],&bb[1],&B_IL,&B_IU,&eps,&B_neigs2,eigs+B_neigs,eigv+B_neigs*B_N,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr)); 3452 #else 3453 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&bb[0],&bb[1],&B_IL,&B_IU,&eps,&B_neigs2,eigs+B_neigs,eigv+B_neigs*B_N,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr)); 3454 #endif 3455 PetscCall(PetscLogFlops((4.0*subset_size*subset_size*subset_size)/3.0)); 3456 B_neigs += B_neigs2; 3457 } 3458 break; 3459 case 3: 3460 if (scal) { 3461 PetscCall(PetscOptionsGetInt(NULL,((PetscObject)pc)->prefix,"-pc_bddc_adaptive_recipe3_min_scal",&recipe_m,NULL)); 3462 } else { 3463 PetscCall(PetscOptionsGetInt(NULL,((PetscObject)pc)->prefix,"-pc_bddc_adaptive_recipe3_min",&recipe_m,NULL)); 3464 } 3465 if (!scal) { 3466 bb[0] = uthresh; 3467 bb[1] = PETSC_MAX_REAL; 3468 #if defined(PETSC_USE_COMPLEX) 3469 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&bb[0],&bb[1],&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr)); 3470 #else 3471 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&bb[0],&bb[1],&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr)); 3472 #endif 3473 PetscCall(PetscLogFlops((4.0*subset_size*subset_size*subset_size)/3.0)); 3474 } 3475 if (recipe_m > 0 && B_N - B_neigs > 0) { 3476 PetscBLASInt B_neigs2 = 0; 3477 3478 B_IL = 1; 3479 PetscCall(PetscBLASIntCast(PetscMin(recipe_m,B_N - B_neigs),&B_IU)); 3480 PetscCall(PetscArraycpy(S,Sarray+cumarray,subset_size*subset_size)); 3481 PetscCall(PetscArraycpy(St,Starray+cumarray,subset_size*subset_size)); 3482 #if defined(PETSC_USE_COMPLEX) 3483 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","I","L",&B_N,St,&B_N,S,&B_N,&lower,&upper,&B_IL,&B_IU,&eps,&B_neigs2,eigs+B_neigs,eigv+B_neigs*B_N,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr)); 3484 #else 3485 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","I","L",&B_N,St,&B_N,S,&B_N,&lower,&upper,&B_IL,&B_IU,&eps,&B_neigs2,eigs+B_neigs,eigv+B_neigs*B_N,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr)); 3486 #endif 3487 PetscCall(PetscLogFlops((4.0*subset_size*subset_size*subset_size)/3.0)); 3488 B_neigs += B_neigs2; 3489 } 3490 break; 3491 case 4: 3492 bb[0] = PETSC_MIN_REAL; bb[1] = lthresh; 3493 #if defined(PETSC_USE_COMPLEX) 3494 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&bb[0],&bb[1],&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr)); 3495 #else 3496 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&bb[0],&bb[1],&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr)); 3497 #endif 3498 PetscCall(PetscLogFlops((4.0*subset_size*subset_size*subset_size)/3.0)); 3499 { 3500 PetscBLASInt B_neigs2 = 0; 3501 3502 bb[0] = PetscMax(lthresh+PETSC_SMALL,uthresh); bb[1] = PETSC_MAX_REAL; 3503 PetscCall(PetscArraycpy(S,Sarray+cumarray,subset_size*subset_size)); 3504 PetscCall(PetscArraycpy(St,Starray+cumarray,subset_size*subset_size)); 3505 #if defined(PETSC_USE_COMPLEX) 3506 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&bb[0],&bb[1],&B_IL,&B_IU,&eps,&B_neigs2,eigs+B_neigs,eigv+B_neigs*B_N,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr)); 3507 #else 3508 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&bb[0],&bb[1],&B_IL,&B_IU,&eps,&B_neigs2,eigs+B_neigs,eigv+B_neigs*B_N,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr)); 3509 #endif 3510 PetscCall(PetscLogFlops((4.0*subset_size*subset_size*subset_size)/3.0)); 3511 B_neigs += B_neigs2; 3512 } 3513 break; 3514 case 5: /* same as before: first compute all eigenvalues, then filter */ 3515 #if defined(PETSC_USE_COMPLEX) 3516 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","A","L",&B_N,St,&B_N,S,&B_N,&bb[0],&bb[1],&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr)); 3517 #else 3518 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","A","L",&B_N,St,&B_N,S,&B_N,&bb[0],&bb[1],&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr)); 3519 #endif 3520 PetscCall(PetscLogFlops((4.0*subset_size*subset_size*subset_size)/3.0)); 3521 { 3522 PetscInt e,k,ne; 3523 for (e=0,ne=0;e<B_neigs;e++) { 3524 if (eigs[e] < lthresh || eigs[e] > uthresh) { 3525 for (k=0;k<B_N;k++) S[ne*B_N+k] = eigv[e*B_N+k]; 3526 eigs[ne] = eigs[e]; 3527 ne++; 3528 } 3529 } 3530 PetscCall(PetscArraycpy(eigv,S,B_N*ne)); 3531 B_neigs = ne; 3532 } 3533 break; 3534 default: 3535 SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_SUP,"Unknown recipe %" PetscInt_FMT,recipe); 3536 } 3537 } 3538 } else if (!same_data) { /* this is just to see all the eigenvalues */ 3539 B_IU = PetscMax(1,PetscMin(B_N,nmax)); 3540 B_IL = 1; 3541 #if defined(PETSC_USE_COMPLEX) 3542 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","I","L",&B_N,St,&B_N,S,&B_N,&lower,&upper,&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr)); 3543 #else 3544 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","I","L",&B_N,St,&B_N,S,&B_N,&lower,&upper,&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr)); 3545 #endif 3546 PetscCall(PetscLogFlops((4.0*subset_size*subset_size*subset_size)/3.0)); 3547 } else { /* same_data is true, so just get the adaptive functional requested by the user */ 3548 PetscInt k; 3549 PetscCheck(sub_schurs->change_primal_sub,PETSC_COMM_SELF,PETSC_ERR_PLIB,"This should not happen"); 3550 PetscCall(ISGetLocalSize(sub_schurs->change_primal_sub[i],&nmax)); 3551 PetscCall(PetscBLASIntCast(nmax,&B_neigs)); 3552 nmin = nmax; 3553 PetscCall(PetscArrayzero(eigv,subset_size*nmax)); 3554 for (k=0;k<nmax;k++) { 3555 eigs[k] = 1./PETSC_SMALL; 3556 eigv[k*(subset_size+1)] = 1.0; 3557 } 3558 } 3559 PetscCall(PetscFPTrapPop()); 3560 if (B_ierr) { 3561 PetscCheck(B_ierr >= 0,PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: illegal value for argument %" PetscBLASInt_FMT,-B_ierr); 3562 PetscCheck(B_ierr > B_N,PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: %" PetscBLASInt_FMT " eigenvalues failed to converge",B_ierr); 3563 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: leading minor of order %" PetscBLASInt_FMT " is not positive definite",B_ierr-B_N-1); 3564 } 3565 3566 if (B_neigs > nmax) { 3567 if (pcbddc->dbg_flag) { 3568 PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," found %" PetscBLASInt_FMT " eigs, more than maximum required %" PetscInt_FMT ".\n",B_neigs,nmax)); 3569 } 3570 if (upart) eigs_start = scal ? 0 : B_neigs-nmax; 3571 B_neigs = nmax; 3572 } 3573 3574 nmin_s = PetscMin(nmin,B_N); 3575 if (B_neigs < nmin_s) { 3576 PetscBLASInt B_neigs2 = 0; 3577 3578 if (upart) { 3579 if (scal) { 3580 B_IU = nmin_s; 3581 B_IL = B_neigs + 1; 3582 } else { 3583 B_IL = B_N - nmin_s + 1; 3584 B_IU = B_N - B_neigs; 3585 } 3586 } else { 3587 B_IL = B_neigs + 1; 3588 B_IU = nmin_s; 3589 } 3590 if (pcbddc->dbg_flag) { 3591 PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," found %" PetscBLASInt_FMT " eigs, less than minimum required %" PetscInt_FMT ". Asking for %" PetscBLASInt_FMT " to %" PetscBLASInt_FMT " incl (fortran like)\n",B_neigs,nmin,B_IL,B_IU)); 3592 } 3593 if (sub_schurs->is_symmetric) { 3594 PetscInt j,k; 3595 for (j=0;j<subset_size;j++) { 3596 for (k=j;k<subset_size;k++) { 3597 S [j*subset_size+k] = Sarray [cumarray+j*subset_size+k]; 3598 St[j*subset_size+k] = Starray[cumarray+j*subset_size+k]; 3599 } 3600 } 3601 } else { 3602 PetscCall(PetscArraycpy(S,Sarray+cumarray,subset_size*subset_size)); 3603 PetscCall(PetscArraycpy(St,Starray+cumarray,subset_size*subset_size)); 3604 } 3605 PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF)); 3606 #if defined(PETSC_USE_COMPLEX) 3607 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","I","L",&B_N,St,&B_N,S,&B_N,&lower,&upper,&B_IL,&B_IU,&eps,&B_neigs2,eigs+B_neigs,eigv+B_neigs*subset_size,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr)); 3608 #else 3609 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","I","L",&B_N,St,&B_N,S,&B_N,&lower,&upper,&B_IL,&B_IU,&eps,&B_neigs2,eigs+B_neigs,eigv+B_neigs*subset_size,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr)); 3610 #endif 3611 PetscCall(PetscLogFlops((4.0*subset_size*subset_size*subset_size)/3.0)); 3612 PetscCall(PetscFPTrapPop()); 3613 B_neigs += B_neigs2; 3614 } 3615 if (B_ierr) { 3616 PetscCheck(B_ierr >= 0,PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: illegal value for argument %" PetscBLASInt_FMT,-B_ierr); 3617 PetscCheck(B_ierr > B_N,PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: %" PetscBLASInt_FMT " eigenvalues failed to converge",B_ierr); 3618 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: leading minor of order %" PetscBLASInt_FMT " is not positive definite",B_ierr-B_N-1); 3619 } 3620 if (pcbddc->dbg_flag) { 3621 PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," -> Got %" PetscBLASInt_FMT " eigs\n",B_neigs)); 3622 for (j=0;j<B_neigs;j++) { 3623 if (!sub_schurs->gdsw) { 3624 if (eigs[j] == 0.0) { 3625 PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," Inf\n")); 3626 } else { 3627 if (upart) { 3628 PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," %1.6e\n",(double)eigs[j+eigs_start])); 3629 } else { 3630 PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," %1.6e\n",(double)(1./eigs[j+eigs_start]))); 3631 } 3632 } 3633 } else { 3634 double pg = (double)eigs[j+eigs_start]; 3635 if (pg < 2*PETSC_SMALL) pg = 0.0; 3636 PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," %1.6e\n",pg)); 3637 } 3638 } 3639 } 3640 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Not yet implemented"); 3641 } 3642 /* change the basis back to the original one */ 3643 if (sub_schurs->change) { 3644 Mat change,phi,phit; 3645 3646 if (pcbddc->dbg_flag > 2) { 3647 PetscInt ii; 3648 for (ii=0;ii<B_neigs;ii++) { 3649 PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," -> Eigenvector (old basis) %" PetscInt_FMT "/%" PetscBLASInt_FMT " (%" PetscBLASInt_FMT ")\n",ii,B_neigs,B_N)); 3650 for (j=0;j<B_N;j++) { 3651 #if defined(PETSC_USE_COMPLEX) 3652 PetscReal r = PetscRealPart(eigv[(ii+eigs_start)*subset_size+j]); 3653 PetscReal c = PetscImaginaryPart(eigv[(ii+eigs_start)*subset_size+j]); 3654 PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," %1.4e + %1.4e i\n",(double)r,(double)c)); 3655 #else 3656 PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," %1.4e\n",(double)(eigv[(ii+eigs_start)*subset_size+j]))); 3657 #endif 3658 } 3659 } 3660 } 3661 PetscCall(KSPGetOperators(sub_schurs->change[i],&change,NULL)); 3662 PetscCall(MatCreateSeqDense(PETSC_COMM_SELF,subset_size,B_neigs,eigv+eigs_start*subset_size,&phit)); 3663 PetscCall(MatMatMult(change,phit,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&phi)); 3664 PetscCall(MatCopy(phi,phit,SAME_NONZERO_PATTERN)); 3665 PetscCall(MatDestroy(&phit)); 3666 PetscCall(MatDestroy(&phi)); 3667 } 3668 maxneigs = PetscMax(B_neigs,maxneigs); 3669 pcbddc->adaptive_constraints_n[i+nv] = B_neigs; 3670 if (B_neigs) { 3671 PetscCall(PetscArraycpy(pcbddc->adaptive_constraints_data+pcbddc->adaptive_constraints_data_ptr[cum],eigv+eigs_start*subset_size,B_neigs*subset_size)); 3672 3673 if (pcbddc->dbg_flag > 1) { 3674 PetscInt ii; 3675 for (ii=0;ii<B_neigs;ii++) { 3676 PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," -> Eigenvector %" PetscInt_FMT "/%" PetscBLASInt_FMT " (%" PetscBLASInt_FMT ")\n",ii,B_neigs,B_N)); 3677 for (j=0;j<B_N;j++) { 3678 #if defined(PETSC_USE_COMPLEX) 3679 PetscReal r = PetscRealPart(pcbddc->adaptive_constraints_data[ii*subset_size+j+pcbddc->adaptive_constraints_data_ptr[cum]]); 3680 PetscReal c = PetscImaginaryPart(pcbddc->adaptive_constraints_data[ii*subset_size+j+pcbddc->adaptive_constraints_data_ptr[cum]]); 3681 PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," %1.4e + %1.4e i\n",(double)r,(double)c)); 3682 #else 3683 PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," %1.4e\n",(double)PetscRealPart(pcbddc->adaptive_constraints_data[ii*subset_size+j+pcbddc->adaptive_constraints_data_ptr[cum]]))); 3684 #endif 3685 } 3686 } 3687 } 3688 PetscCall(PetscArraycpy(pcbddc->adaptive_constraints_idxs+pcbddc->adaptive_constraints_idxs_ptr[cum],idxs,subset_size)); 3689 pcbddc->adaptive_constraints_idxs_ptr[cum+1] = pcbddc->adaptive_constraints_idxs_ptr[cum] + subset_size; 3690 pcbddc->adaptive_constraints_data_ptr[cum+1] = pcbddc->adaptive_constraints_data_ptr[cum] + subset_size*B_neigs; 3691 cum++; 3692 } 3693 PetscCall(ISRestoreIndices(sub_schurs->is_subs[i],&idxs)); 3694 /* shift for next computation */ 3695 cumarray += subset_size*subset_size; 3696 } 3697 if (pcbddc->dbg_flag) { 3698 PetscCall(PetscViewerFlush(pcbddc->dbg_viewer)); 3699 } 3700 3701 if (mss) { 3702 if (sub_schurs->gdsw) { 3703 PetscCall(MatSeqAIJGetArray(sub_schurs->sum_S_Ej_all,&Sarray)); 3704 PetscCall(MatSeqAIJGetArray(sub_schurs->sum_S_Ej_tilda_all,&Starray)); 3705 } else { 3706 PetscCall(MatSeqAIJRestoreArray(sub_schurs->sum_S_Ej_inv_all,&Sarray)); 3707 PetscCall(MatSeqAIJRestoreArray(sub_schurs->sum_S_Ej_tilda_all,&Starray)); 3708 /* destroy matrices (junk) */ 3709 PetscCall(MatDestroy(&sub_schurs->sum_S_Ej_inv_all)); 3710 PetscCall(MatDestroy(&sub_schurs->sum_S_Ej_tilda_all)); 3711 } 3712 } 3713 if (allocated_S_St) { 3714 PetscCall(PetscFree2(S,St)); 3715 } 3716 PetscCall(PetscFree5(eigv,eigs,work,B_iwork,B_ifail)); 3717 #if defined(PETSC_USE_COMPLEX) 3718 PetscCall(PetscFree(rwork)); 3719 #endif 3720 if (pcbddc->dbg_flag) { 3721 PetscInt maxneigs_r; 3722 PetscCall(MPIU_Allreduce(&maxneigs,&maxneigs_r,1,MPIU_INT,MPI_MAX,PetscObjectComm((PetscObject)pc))); 3723 PetscCall(PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Maximum number of constraints per cc %" PetscInt_FMT "\n",maxneigs_r)); 3724 } 3725 PetscCall(PetscLogEventEnd(PC_BDDC_AdaptiveSetUp[pcbddc->current_level],pc,0,0,0)); 3726 PetscFunctionReturn(0); 3727 } 3728 3729 PetscErrorCode PCBDDCSetUpSolvers(PC pc) 3730 { 3731 PetscScalar *coarse_submat_vals; 3732 3733 PetscFunctionBegin; 3734 /* Setup local scatters R_to_B and (optionally) R_to_D */ 3735 /* PCBDDCSetUpLocalWorkVectors should be called first! */ 3736 PetscCall(PCBDDCSetUpLocalScatters(pc)); 3737 3738 /* Setup local neumann solver ksp_R */ 3739 /* PCBDDCSetUpLocalScatters should be called first! */ 3740 PetscCall(PCBDDCSetUpLocalSolvers(pc,PETSC_FALSE,PETSC_TRUE)); 3741 3742 /* 3743 Setup local correction and local part of coarse basis. 3744 Gives back the dense local part of the coarse matrix in column major ordering 3745 */ 3746 PetscCall(PCBDDCSetUpCorrection(pc,&coarse_submat_vals)); 3747 3748 /* Compute total number of coarse nodes and setup coarse solver */ 3749 PetscCall(PCBDDCSetUpCoarseSolver(pc,coarse_submat_vals)); 3750 3751 /* free */ 3752 PetscCall(PetscFree(coarse_submat_vals)); 3753 PetscFunctionReturn(0); 3754 } 3755 3756 PetscErrorCode PCBDDCResetCustomization(PC pc) 3757 { 3758 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 3759 3760 PetscFunctionBegin; 3761 PetscCall(ISDestroy(&pcbddc->user_primal_vertices)); 3762 PetscCall(ISDestroy(&pcbddc->user_primal_vertices_local)); 3763 PetscCall(ISDestroy(&pcbddc->NeumannBoundaries)); 3764 PetscCall(ISDestroy(&pcbddc->NeumannBoundariesLocal)); 3765 PetscCall(ISDestroy(&pcbddc->DirichletBoundaries)); 3766 PetscCall(MatNullSpaceDestroy(&pcbddc->onearnullspace)); 3767 PetscCall(PetscFree(pcbddc->onearnullvecs_state)); 3768 PetscCall(ISDestroy(&pcbddc->DirichletBoundariesLocal)); 3769 PetscCall(PCBDDCSetDofsSplitting(pc,0,NULL)); 3770 PetscCall(PCBDDCSetDofsSplittingLocal(pc,0,NULL)); 3771 PetscFunctionReturn(0); 3772 } 3773 3774 PetscErrorCode PCBDDCResetTopography(PC pc) 3775 { 3776 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 3777 PetscInt i; 3778 3779 PetscFunctionBegin; 3780 PetscCall(MatDestroy(&pcbddc->nedcG)); 3781 PetscCall(ISDestroy(&pcbddc->nedclocal)); 3782 PetscCall(MatDestroy(&pcbddc->discretegradient)); 3783 PetscCall(MatDestroy(&pcbddc->user_ChangeOfBasisMatrix)); 3784 PetscCall(MatDestroy(&pcbddc->ChangeOfBasisMatrix)); 3785 PetscCall(MatDestroy(&pcbddc->switch_static_change)); 3786 PetscCall(VecDestroy(&pcbddc->work_change)); 3787 PetscCall(MatDestroy(&pcbddc->ConstraintMatrix)); 3788 PetscCall(MatDestroy(&pcbddc->divudotp)); 3789 PetscCall(ISDestroy(&pcbddc->divudotp_vl2l)); 3790 PetscCall(PCBDDCGraphDestroy(&pcbddc->mat_graph)); 3791 for (i=0;i<pcbddc->n_local_subs;i++) { 3792 PetscCall(ISDestroy(&pcbddc->local_subs[i])); 3793 } 3794 pcbddc->n_local_subs = 0; 3795 PetscCall(PetscFree(pcbddc->local_subs)); 3796 PetscCall(PCBDDCSubSchursDestroy(&pcbddc->sub_schurs)); 3797 pcbddc->graphanalyzed = PETSC_FALSE; 3798 pcbddc->recompute_topography = PETSC_TRUE; 3799 pcbddc->corner_selected = PETSC_FALSE; 3800 PetscFunctionReturn(0); 3801 } 3802 3803 PetscErrorCode PCBDDCResetSolvers(PC pc) 3804 { 3805 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 3806 3807 PetscFunctionBegin; 3808 PetscCall(VecDestroy(&pcbddc->coarse_vec)); 3809 if (pcbddc->coarse_phi_B) { 3810 PetscScalar *array; 3811 PetscCall(MatDenseGetArray(pcbddc->coarse_phi_B,&array)); 3812 PetscCall(PetscFree(array)); 3813 } 3814 PetscCall(MatDestroy(&pcbddc->coarse_phi_B)); 3815 PetscCall(MatDestroy(&pcbddc->coarse_phi_D)); 3816 PetscCall(MatDestroy(&pcbddc->coarse_psi_B)); 3817 PetscCall(MatDestroy(&pcbddc->coarse_psi_D)); 3818 PetscCall(VecDestroy(&pcbddc->vec1_P)); 3819 PetscCall(VecDestroy(&pcbddc->vec1_C)); 3820 PetscCall(MatDestroy(&pcbddc->local_auxmat2)); 3821 PetscCall(MatDestroy(&pcbddc->local_auxmat1)); 3822 PetscCall(VecDestroy(&pcbddc->vec1_R)); 3823 PetscCall(VecDestroy(&pcbddc->vec2_R)); 3824 PetscCall(ISDestroy(&pcbddc->is_R_local)); 3825 PetscCall(VecScatterDestroy(&pcbddc->R_to_B)); 3826 PetscCall(VecScatterDestroy(&pcbddc->R_to_D)); 3827 PetscCall(VecScatterDestroy(&pcbddc->coarse_loc_to_glob)); 3828 PetscCall(KSPReset(pcbddc->ksp_D)); 3829 PetscCall(KSPReset(pcbddc->ksp_R)); 3830 PetscCall(KSPReset(pcbddc->coarse_ksp)); 3831 PetscCall(MatDestroy(&pcbddc->local_mat)); 3832 PetscCall(PetscFree(pcbddc->primal_indices_local_idxs)); 3833 PetscCall(PetscFree2(pcbddc->local_primal_ref_node,pcbddc->local_primal_ref_mult)); 3834 PetscCall(PetscFree(pcbddc->global_primal_indices)); 3835 PetscCall(ISDestroy(&pcbddc->coarse_subassembling)); 3836 PetscCall(MatDestroy(&pcbddc->benign_change)); 3837 PetscCall(VecDestroy(&pcbddc->benign_vec)); 3838 PetscCall(PCBDDCBenignShellMat(pc,PETSC_TRUE)); 3839 PetscCall(MatDestroy(&pcbddc->benign_B0)); 3840 PetscCall(PetscSFDestroy(&pcbddc->benign_sf)); 3841 if (pcbddc->benign_zerodiag_subs) { 3842 PetscInt i; 3843 for (i=0;i<pcbddc->benign_n;i++) { 3844 PetscCall(ISDestroy(&pcbddc->benign_zerodiag_subs[i])); 3845 } 3846 PetscCall(PetscFree(pcbddc->benign_zerodiag_subs)); 3847 } 3848 PetscCall(PetscFree3(pcbddc->benign_p0_lidx,pcbddc->benign_p0_gidx,pcbddc->benign_p0)); 3849 PetscFunctionReturn(0); 3850 } 3851 3852 PetscErrorCode PCBDDCSetUpLocalWorkVectors(PC pc) 3853 { 3854 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 3855 PC_IS *pcis = (PC_IS*)pc->data; 3856 VecType impVecType; 3857 PetscInt n_constraints,n_R,old_size; 3858 3859 PetscFunctionBegin; 3860 n_constraints = pcbddc->local_primal_size - pcbddc->benign_n - pcbddc->n_vertices; 3861 n_R = pcis->n - pcbddc->n_vertices; 3862 PetscCall(VecGetType(pcis->vec1_N,&impVecType)); 3863 /* local work vectors (try to avoid unneeded work)*/ 3864 /* R nodes */ 3865 old_size = -1; 3866 if (pcbddc->vec1_R) { 3867 PetscCall(VecGetSize(pcbddc->vec1_R,&old_size)); 3868 } 3869 if (n_R != old_size) { 3870 PetscCall(VecDestroy(&pcbddc->vec1_R)); 3871 PetscCall(VecDestroy(&pcbddc->vec2_R)); 3872 PetscCall(VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_R)); 3873 PetscCall(VecSetSizes(pcbddc->vec1_R,PETSC_DECIDE,n_R)); 3874 PetscCall(VecSetType(pcbddc->vec1_R,impVecType)); 3875 PetscCall(VecDuplicate(pcbddc->vec1_R,&pcbddc->vec2_R)); 3876 } 3877 /* local primal dofs */ 3878 old_size = -1; 3879 if (pcbddc->vec1_P) { 3880 PetscCall(VecGetSize(pcbddc->vec1_P,&old_size)); 3881 } 3882 if (pcbddc->local_primal_size != old_size) { 3883 PetscCall(VecDestroy(&pcbddc->vec1_P)); 3884 PetscCall(VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_P)); 3885 PetscCall(VecSetSizes(pcbddc->vec1_P,PETSC_DECIDE,pcbddc->local_primal_size)); 3886 PetscCall(VecSetType(pcbddc->vec1_P,impVecType)); 3887 } 3888 /* local explicit constraints */ 3889 old_size = -1; 3890 if (pcbddc->vec1_C) { 3891 PetscCall(VecGetSize(pcbddc->vec1_C,&old_size)); 3892 } 3893 if (n_constraints && n_constraints != old_size) { 3894 PetscCall(VecDestroy(&pcbddc->vec1_C)); 3895 PetscCall(VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_C)); 3896 PetscCall(VecSetSizes(pcbddc->vec1_C,PETSC_DECIDE,n_constraints)); 3897 PetscCall(VecSetType(pcbddc->vec1_C,impVecType)); 3898 } 3899 PetscFunctionReturn(0); 3900 } 3901 3902 PetscErrorCode PCBDDCSetUpCorrection(PC pc, PetscScalar **coarse_submat_vals_n) 3903 { 3904 /* pointers to pcis and pcbddc */ 3905 PC_IS* pcis = (PC_IS*)pc->data; 3906 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 3907 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 3908 /* submatrices of local problem */ 3909 Mat A_RV,A_VR,A_VV,local_auxmat2_R; 3910 /* submatrices of local coarse problem */ 3911 Mat S_VV,S_CV,S_VC,S_CC; 3912 /* working matrices */ 3913 Mat C_CR; 3914 /* additional working stuff */ 3915 PC pc_R; 3916 Mat F,Brhs = NULL; 3917 Vec dummy_vec; 3918 PetscBool isLU,isCHOL,need_benign_correction,sparserhs; 3919 PetscScalar *coarse_submat_vals; /* TODO: use a PETSc matrix */ 3920 PetscScalar *work; 3921 PetscInt *idx_V_B; 3922 PetscInt lda_rhs,n,n_vertices,n_constraints,*p0_lidx_I; 3923 PetscInt i,n_R,n_D,n_B; 3924 PetscScalar one=1.0,m_one=-1.0; 3925 3926 PetscFunctionBegin; 3927 PetscCheck(pcbddc->symmetric_primal || !pcbddc->benign_n,PETSC_COMM_SELF,PETSC_ERR_SUP,"Non-symmetric primal basis computation with benign trick not yet implemented"); 3928 PetscCall(PetscLogEventBegin(PC_BDDC_CorrectionSetUp[pcbddc->current_level],pc,0,0,0)); 3929 3930 /* Set Non-overlapping dimensions */ 3931 n_vertices = pcbddc->n_vertices; 3932 n_constraints = pcbddc->local_primal_size - pcbddc->benign_n - n_vertices; 3933 n_B = pcis->n_B; 3934 n_D = pcis->n - n_B; 3935 n_R = pcis->n - n_vertices; 3936 3937 /* vertices in boundary numbering */ 3938 PetscCall(PetscMalloc1(n_vertices,&idx_V_B)); 3939 PetscCall(ISGlobalToLocalMappingApply(pcis->BtoNmap,IS_GTOLM_DROP,n_vertices,pcbddc->local_primal_ref_node,&i,idx_V_B)); 3940 PetscCheck(i == n_vertices,PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error in boundary numbering for BDDC vertices! %" PetscInt_FMT " != %" PetscInt_FMT,n_vertices,i); 3941 3942 /* Subdomain contribution (Non-overlapping) to coarse matrix */ 3943 PetscCall(PetscCalloc1(pcbddc->local_primal_size*pcbddc->local_primal_size,&coarse_submat_vals)); 3944 PetscCall(MatCreateSeqDense(PETSC_COMM_SELF,n_vertices,n_vertices,coarse_submat_vals,&S_VV)); 3945 PetscCall(MatDenseSetLDA(S_VV,pcbddc->local_primal_size)); 3946 PetscCall(MatCreateSeqDense(PETSC_COMM_SELF,n_constraints,n_vertices,coarse_submat_vals+n_vertices,&S_CV)); 3947 PetscCall(MatDenseSetLDA(S_CV,pcbddc->local_primal_size)); 3948 PetscCall(MatCreateSeqDense(PETSC_COMM_SELF,n_vertices,n_constraints,coarse_submat_vals+pcbddc->local_primal_size*n_vertices,&S_VC)); 3949 PetscCall(MatDenseSetLDA(S_VC,pcbddc->local_primal_size)); 3950 PetscCall(MatCreateSeqDense(PETSC_COMM_SELF,n_constraints,n_constraints,coarse_submat_vals+(pcbddc->local_primal_size+1)*n_vertices,&S_CC)); 3951 PetscCall(MatDenseSetLDA(S_CC,pcbddc->local_primal_size)); 3952 3953 /* determine if can use MatSolve routines instead of calling KSPSolve on ksp_R */ 3954 PetscCall(KSPGetPC(pcbddc->ksp_R,&pc_R)); 3955 PetscCall(PCSetUp(pc_R)); 3956 PetscCall(PetscObjectTypeCompare((PetscObject)pc_R,PCLU,&isLU)); 3957 PetscCall(PetscObjectTypeCompare((PetscObject)pc_R,PCCHOLESKY,&isCHOL)); 3958 lda_rhs = n_R; 3959 need_benign_correction = PETSC_FALSE; 3960 if (isLU || isCHOL) { 3961 PetscCall(PCFactorGetMatrix(pc_R,&F)); 3962 } else if (sub_schurs && sub_schurs->reuse_solver) { 3963 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 3964 MatFactorType type; 3965 3966 F = reuse_solver->F; 3967 PetscCall(MatGetFactorType(F,&type)); 3968 if (type == MAT_FACTOR_CHOLESKY) isCHOL = PETSC_TRUE; 3969 if (type == MAT_FACTOR_LU) isLU = PETSC_TRUE; 3970 PetscCall(MatGetSize(F,&lda_rhs,NULL)); 3971 need_benign_correction = (PetscBool)(!!reuse_solver->benign_n); 3972 } else F = NULL; 3973 3974 /* determine if we can use a sparse right-hand side */ 3975 sparserhs = PETSC_FALSE; 3976 if (F) { 3977 MatSolverType solver; 3978 3979 PetscCall(MatFactorGetSolverType(F,&solver)); 3980 PetscCall(PetscStrcmp(solver,MATSOLVERMUMPS,&sparserhs)); 3981 } 3982 3983 /* allocate workspace */ 3984 n = 0; 3985 if (n_constraints) { 3986 n += lda_rhs*n_constraints; 3987 } 3988 if (n_vertices) { 3989 n = PetscMax(2*lda_rhs*n_vertices,n); 3990 n = PetscMax((lda_rhs+n_B)*n_vertices,n); 3991 } 3992 if (!pcbddc->symmetric_primal) { 3993 n = PetscMax(2*lda_rhs*pcbddc->local_primal_size,n); 3994 } 3995 PetscCall(PetscMalloc1(n,&work)); 3996 3997 /* create dummy vector to modify rhs and sol of MatMatSolve (work array will never be used) */ 3998 dummy_vec = NULL; 3999 if (need_benign_correction && lda_rhs != n_R && F) { 4000 PetscCall(VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&dummy_vec)); 4001 PetscCall(VecSetSizes(dummy_vec,lda_rhs,PETSC_DECIDE)); 4002 PetscCall(VecSetType(dummy_vec,((PetscObject)pcis->vec1_N)->type_name)); 4003 } 4004 4005 PetscCall(MatDestroy(&pcbddc->local_auxmat1)); 4006 PetscCall(MatDestroy(&pcbddc->local_auxmat2)); 4007 4008 /* Precompute stuffs needed for preprocessing and application of BDDC*/ 4009 if (n_constraints) { 4010 Mat M3,C_B; 4011 IS is_aux; 4012 4013 /* Extract constraints on R nodes: C_{CR} */ 4014 PetscCall(ISCreateStride(PETSC_COMM_SELF,n_constraints,n_vertices,1,&is_aux)); 4015 PetscCall(MatCreateSubMatrix(pcbddc->ConstraintMatrix,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&C_CR)); 4016 PetscCall(MatCreateSubMatrix(pcbddc->ConstraintMatrix,is_aux,pcis->is_B_local,MAT_INITIAL_MATRIX,&C_B)); 4017 4018 /* Assemble local_auxmat2_R = (- A_{RR}^{-1} C^T_{CR}) needed by BDDC setup */ 4019 /* Assemble pcbddc->local_auxmat2 = R_to_B (- A_{RR}^{-1} C^T_{CR}) needed by BDDC application */ 4020 if (!sparserhs) { 4021 PetscCall(PetscArrayzero(work,lda_rhs*n_constraints)); 4022 for (i=0;i<n_constraints;i++) { 4023 const PetscScalar *row_cmat_values; 4024 const PetscInt *row_cmat_indices; 4025 PetscInt size_of_constraint,j; 4026 4027 PetscCall(MatGetRow(C_CR,i,&size_of_constraint,&row_cmat_indices,&row_cmat_values)); 4028 for (j=0;j<size_of_constraint;j++) { 4029 work[row_cmat_indices[j]+i*lda_rhs] = -row_cmat_values[j]; 4030 } 4031 PetscCall(MatRestoreRow(C_CR,i,&size_of_constraint,&row_cmat_indices,&row_cmat_values)); 4032 } 4033 PetscCall(MatCreateSeqDense(PETSC_COMM_SELF,lda_rhs,n_constraints,work,&Brhs)); 4034 } else { 4035 Mat tC_CR; 4036 4037 PetscCall(MatScale(C_CR,-1.0)); 4038 if (lda_rhs != n_R) { 4039 PetscScalar *aa; 4040 PetscInt r,*ii,*jj; 4041 PetscBool done; 4042 4043 PetscCall(MatGetRowIJ(C_CR,0,PETSC_FALSE,PETSC_FALSE,&r,(const PetscInt**)&ii,(const PetscInt**)&jj,&done)); 4044 PetscCheck(done,PETSC_COMM_SELF,PETSC_ERR_PLIB,"GetRowIJ failed"); 4045 PetscCall(MatSeqAIJGetArray(C_CR,&aa)); 4046 PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,n_constraints,lda_rhs,ii,jj,aa,&tC_CR)); 4047 PetscCall(MatRestoreRowIJ(C_CR,0,PETSC_FALSE,PETSC_FALSE,&r,(const PetscInt**)&ii,(const PetscInt**)&jj,&done)); 4048 PetscCheck(done,PETSC_COMM_SELF,PETSC_ERR_PLIB,"RestoreRowIJ failed"); 4049 } else { 4050 PetscCall(PetscObjectReference((PetscObject)C_CR)); 4051 tC_CR = C_CR; 4052 } 4053 PetscCall(MatCreateTranspose(tC_CR,&Brhs)); 4054 PetscCall(MatDestroy(&tC_CR)); 4055 } 4056 PetscCall(MatCreateSeqDense(PETSC_COMM_SELF,lda_rhs,n_constraints,NULL,&local_auxmat2_R)); 4057 if (F) { 4058 if (need_benign_correction) { 4059 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 4060 4061 /* rhs is already zero on interior dofs, no need to change the rhs */ 4062 PetscCall(PetscArrayzero(reuse_solver->benign_save_vals,pcbddc->benign_n)); 4063 } 4064 PetscCall(MatMatSolve(F,Brhs,local_auxmat2_R)); 4065 if (need_benign_correction) { 4066 PetscScalar *marr; 4067 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 4068 4069 PetscCall(MatDenseGetArray(local_auxmat2_R,&marr)); 4070 if (lda_rhs != n_R) { 4071 for (i=0;i<n_constraints;i++) { 4072 PetscCall(VecPlaceArray(dummy_vec,marr+i*lda_rhs)); 4073 PetscCall(PCBDDCReuseSolversBenignAdapt(reuse_solver,dummy_vec,NULL,PETSC_TRUE,PETSC_TRUE)); 4074 PetscCall(VecResetArray(dummy_vec)); 4075 } 4076 } else { 4077 for (i=0;i<n_constraints;i++) { 4078 PetscCall(VecPlaceArray(pcbddc->vec1_R,marr+i*lda_rhs)); 4079 PetscCall(PCBDDCReuseSolversBenignAdapt(reuse_solver,pcbddc->vec1_R,NULL,PETSC_TRUE,PETSC_TRUE)); 4080 PetscCall(VecResetArray(pcbddc->vec1_R)); 4081 } 4082 } 4083 PetscCall(MatDenseRestoreArray(local_auxmat2_R,&marr)); 4084 } 4085 } else { 4086 PetscScalar *marr; 4087 4088 PetscCall(MatDenseGetArray(local_auxmat2_R,&marr)); 4089 for (i=0;i<n_constraints;i++) { 4090 PetscCall(VecPlaceArray(pcbddc->vec1_R,work+i*lda_rhs)); 4091 PetscCall(VecPlaceArray(pcbddc->vec2_R,marr+i*lda_rhs)); 4092 PetscCall(KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R)); 4093 PetscCall(KSPCheckSolve(pcbddc->ksp_R,pc,pcbddc->vec2_R)); 4094 PetscCall(VecResetArray(pcbddc->vec1_R)); 4095 PetscCall(VecResetArray(pcbddc->vec2_R)); 4096 } 4097 PetscCall(MatDenseRestoreArray(local_auxmat2_R,&marr)); 4098 } 4099 if (sparserhs) { 4100 PetscCall(MatScale(C_CR,-1.0)); 4101 } 4102 PetscCall(MatDestroy(&Brhs)); 4103 if (!pcbddc->switch_static) { 4104 PetscCall(MatCreateSeqDense(PETSC_COMM_SELF,n_B,n_constraints,NULL,&pcbddc->local_auxmat2)); 4105 for (i=0;i<n_constraints;i++) { 4106 Vec r, b; 4107 PetscCall(MatDenseGetColumnVecRead(local_auxmat2_R,i,&r)); 4108 PetscCall(MatDenseGetColumnVec(pcbddc->local_auxmat2,i,&b)); 4109 PetscCall(VecScatterBegin(pcbddc->R_to_B,r,b,INSERT_VALUES,SCATTER_FORWARD)); 4110 PetscCall(VecScatterEnd(pcbddc->R_to_B,r,b,INSERT_VALUES,SCATTER_FORWARD)); 4111 PetscCall(MatDenseRestoreColumnVec(pcbddc->local_auxmat2,i,&b)); 4112 PetscCall(MatDenseRestoreColumnVecRead(local_auxmat2_R,i,&r)); 4113 } 4114 PetscCall(MatMatMult(C_B,pcbddc->local_auxmat2,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&M3)); 4115 } else { 4116 if (lda_rhs != n_R) { 4117 IS dummy; 4118 4119 PetscCall(ISCreateStride(PETSC_COMM_SELF,n_R,0,1,&dummy)); 4120 PetscCall(MatCreateSubMatrix(local_auxmat2_R,dummy,NULL,MAT_INITIAL_MATRIX,&pcbddc->local_auxmat2)); 4121 PetscCall(ISDestroy(&dummy)); 4122 } else { 4123 PetscCall(PetscObjectReference((PetscObject)local_auxmat2_R)); 4124 pcbddc->local_auxmat2 = local_auxmat2_R; 4125 } 4126 PetscCall(MatMatMult(C_CR,pcbddc->local_auxmat2,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&M3)); 4127 } 4128 PetscCall(ISDestroy(&is_aux)); 4129 /* Assemble explicitly S_CC = ( C_{CR} A_{RR}^{-1} C^T_{CR})^{-1} */ 4130 PetscCall(MatScale(M3,m_one)); 4131 if (isCHOL) { 4132 PetscCall(MatCholeskyFactor(M3,NULL,NULL)); 4133 } else { 4134 PetscCall(MatLUFactor(M3,NULL,NULL,NULL)); 4135 } 4136 PetscCall(MatSeqDenseInvertFactors_Private(M3)); 4137 /* Assemble local_auxmat1 = S_CC*C_{CB} needed by BDDC application in KSP and in preproc */ 4138 PetscCall(MatMatMult(M3,C_B,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&pcbddc->local_auxmat1)); 4139 PetscCall(MatDestroy(&C_B)); 4140 PetscCall(MatCopy(M3,S_CC,SAME_NONZERO_PATTERN)); /* S_CC can have a different LDA, MatMatSolve doesn't support it */ 4141 PetscCall(MatDestroy(&M3)); 4142 } 4143 4144 /* Get submatrices from subdomain matrix */ 4145 if (n_vertices) { 4146 #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA) 4147 PetscBool oldpin; 4148 #endif 4149 PetscBool isaij; 4150 IS is_aux; 4151 4152 if (sub_schurs && sub_schurs->reuse_solver) { /* is_R_local is not sorted, ISComplement doesn't like it */ 4153 IS tis; 4154 4155 PetscCall(ISDuplicate(pcbddc->is_R_local,&tis)); 4156 PetscCall(ISSort(tis)); 4157 PetscCall(ISComplement(tis,0,pcis->n,&is_aux)); 4158 PetscCall(ISDestroy(&tis)); 4159 } else { 4160 PetscCall(ISComplement(pcbddc->is_R_local,0,pcis->n,&is_aux)); 4161 } 4162 #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA) 4163 oldpin = pcbddc->local_mat->boundtocpu; 4164 #endif 4165 PetscCall(MatBindToCPU(pcbddc->local_mat,PETSC_TRUE)); 4166 PetscCall(MatCreateSubMatrix(pcbddc->local_mat,pcbddc->is_R_local,is_aux,MAT_INITIAL_MATRIX,&A_RV)); 4167 PetscCall(MatCreateSubMatrix(pcbddc->local_mat,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&A_VR)); 4168 PetscCall(PetscObjectBaseTypeCompare((PetscObject)A_VR,MATSEQAIJ,&isaij)); 4169 if (!isaij) { /* TODO REMOVE: MatMatMult(A_VR,A_RRmA_RV) below may raise an error */ 4170 PetscCall(MatConvert(A_VR,MATSEQAIJ,MAT_INPLACE_MATRIX,&A_VR)); 4171 } 4172 PetscCall(MatCreateSubMatrix(pcbddc->local_mat,is_aux,is_aux,MAT_INITIAL_MATRIX,&A_VV)); 4173 #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA) 4174 PetscCall(MatBindToCPU(pcbddc->local_mat,oldpin)); 4175 #endif 4176 PetscCall(ISDestroy(&is_aux)); 4177 } 4178 4179 /* Matrix of coarse basis functions (local) */ 4180 if (pcbddc->coarse_phi_B) { 4181 PetscInt on_B,on_primal,on_D=n_D; 4182 if (pcbddc->coarse_phi_D) { 4183 PetscCall(MatGetSize(pcbddc->coarse_phi_D,&on_D,NULL)); 4184 } 4185 PetscCall(MatGetSize(pcbddc->coarse_phi_B,&on_B,&on_primal)); 4186 if (on_B != n_B || on_primal != pcbddc->local_primal_size || on_D != n_D) { 4187 PetscScalar *marray; 4188 4189 PetscCall(MatDenseGetArray(pcbddc->coarse_phi_B,&marray)); 4190 PetscCall(PetscFree(marray)); 4191 PetscCall(MatDestroy(&pcbddc->coarse_phi_B)); 4192 PetscCall(MatDestroy(&pcbddc->coarse_psi_B)); 4193 PetscCall(MatDestroy(&pcbddc->coarse_phi_D)); 4194 PetscCall(MatDestroy(&pcbddc->coarse_psi_D)); 4195 } 4196 } 4197 4198 if (!pcbddc->coarse_phi_B) { 4199 PetscScalar *marr; 4200 4201 /* memory size */ 4202 n = n_B*pcbddc->local_primal_size; 4203 if (pcbddc->switch_static || pcbddc->dbg_flag) n += n_D*pcbddc->local_primal_size; 4204 if (!pcbddc->symmetric_primal) n *= 2; 4205 PetscCall(PetscCalloc1(n,&marr)); 4206 PetscCall(MatCreateSeqDense(PETSC_COMM_SELF,n_B,pcbddc->local_primal_size,marr,&pcbddc->coarse_phi_B)); 4207 marr += n_B*pcbddc->local_primal_size; 4208 if (pcbddc->switch_static || pcbddc->dbg_flag) { 4209 PetscCall(MatCreateSeqDense(PETSC_COMM_SELF,n_D,pcbddc->local_primal_size,marr,&pcbddc->coarse_phi_D)); 4210 marr += n_D*pcbddc->local_primal_size; 4211 } 4212 if (!pcbddc->symmetric_primal) { 4213 PetscCall(MatCreateSeqDense(PETSC_COMM_SELF,n_B,pcbddc->local_primal_size,marr,&pcbddc->coarse_psi_B)); 4214 marr += n_B*pcbddc->local_primal_size; 4215 if (pcbddc->switch_static || pcbddc->dbg_flag) { 4216 PetscCall(MatCreateSeqDense(PETSC_COMM_SELF,n_D,pcbddc->local_primal_size,marr,&pcbddc->coarse_psi_D)); 4217 } 4218 } else { 4219 PetscCall(PetscObjectReference((PetscObject)pcbddc->coarse_phi_B)); 4220 pcbddc->coarse_psi_B = pcbddc->coarse_phi_B; 4221 if (pcbddc->switch_static || pcbddc->dbg_flag) { 4222 PetscCall(PetscObjectReference((PetscObject)pcbddc->coarse_phi_D)); 4223 pcbddc->coarse_psi_D = pcbddc->coarse_phi_D; 4224 } 4225 } 4226 } 4227 4228 /* We are now ready to evaluate coarse basis functions and subdomain contribution to coarse problem */ 4229 p0_lidx_I = NULL; 4230 if (pcbddc->benign_n && (pcbddc->switch_static || pcbddc->dbg_flag)) { 4231 const PetscInt *idxs; 4232 4233 PetscCall(ISGetIndices(pcis->is_I_local,&idxs)); 4234 PetscCall(PetscMalloc1(pcbddc->benign_n,&p0_lidx_I)); 4235 for (i=0;i<pcbddc->benign_n;i++) { 4236 PetscCall(PetscFindInt(pcbddc->benign_p0_lidx[i],pcis->n-pcis->n_B,idxs,&p0_lidx_I[i])); 4237 } 4238 PetscCall(ISRestoreIndices(pcis->is_I_local,&idxs)); 4239 } 4240 4241 /* vertices */ 4242 if (n_vertices) { 4243 PetscBool restoreavr = PETSC_FALSE; 4244 4245 PetscCall(MatConvert(A_VV,MATDENSE,MAT_INPLACE_MATRIX,&A_VV)); 4246 4247 if (n_R) { 4248 Mat A_RRmA_RV,A_RV_bcorr=NULL,S_VVt; /* S_VVt with LDA=N */ 4249 PetscBLASInt B_N,B_one = 1; 4250 const PetscScalar *x; 4251 PetscScalar *y; 4252 4253 PetscCall(MatScale(A_RV,m_one)); 4254 if (need_benign_correction) { 4255 ISLocalToGlobalMapping RtoN; 4256 IS is_p0; 4257 PetscInt *idxs_p0,n; 4258 4259 PetscCall(PetscMalloc1(pcbddc->benign_n,&idxs_p0)); 4260 PetscCall(ISLocalToGlobalMappingCreateIS(pcbddc->is_R_local,&RtoN)); 4261 PetscCall(ISGlobalToLocalMappingApply(RtoN,IS_GTOLM_DROP,pcbddc->benign_n,pcbddc->benign_p0_lidx,&n,idxs_p0)); 4262 PetscCheck(n == pcbddc->benign_n,PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error in R numbering for benign p0! %" PetscInt_FMT " != %" PetscInt_FMT,n,pcbddc->benign_n); 4263 PetscCall(ISLocalToGlobalMappingDestroy(&RtoN)); 4264 PetscCall(ISCreateGeneral(PETSC_COMM_SELF,n,idxs_p0,PETSC_OWN_POINTER,&is_p0)); 4265 PetscCall(MatCreateSubMatrix(A_RV,is_p0,NULL,MAT_INITIAL_MATRIX,&A_RV_bcorr)); 4266 PetscCall(ISDestroy(&is_p0)); 4267 } 4268 4269 PetscCall(MatCreateSeqDense(PETSC_COMM_SELF,lda_rhs,n_vertices,work,&A_RRmA_RV)); 4270 if (!sparserhs || need_benign_correction) { 4271 if (lda_rhs == n_R) { 4272 PetscCall(MatConvert(A_RV,MATDENSE,MAT_INPLACE_MATRIX,&A_RV)); 4273 } else { 4274 PetscScalar *av,*array; 4275 const PetscInt *xadj,*adjncy; 4276 PetscInt n; 4277 PetscBool flg_row; 4278 4279 array = work+lda_rhs*n_vertices; 4280 PetscCall(PetscArrayzero(array,lda_rhs*n_vertices)); 4281 PetscCall(MatConvert(A_RV,MATSEQAIJ,MAT_INPLACE_MATRIX,&A_RV)); 4282 PetscCall(MatGetRowIJ(A_RV,0,PETSC_FALSE,PETSC_FALSE,&n,&xadj,&adjncy,&flg_row)); 4283 PetscCall(MatSeqAIJGetArray(A_RV,&av)); 4284 for (i=0;i<n;i++) { 4285 PetscInt j; 4286 for (j=xadj[i];j<xadj[i+1];j++) array[lda_rhs*adjncy[j]+i] = av[j]; 4287 } 4288 PetscCall(MatRestoreRowIJ(A_RV,0,PETSC_FALSE,PETSC_FALSE,&n,&xadj,&adjncy,&flg_row)); 4289 PetscCall(MatDestroy(&A_RV)); 4290 PetscCall(MatCreateSeqDense(PETSC_COMM_SELF,lda_rhs,n_vertices,array,&A_RV)); 4291 } 4292 if (need_benign_correction) { 4293 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 4294 PetscScalar *marr; 4295 4296 PetscCall(MatDenseGetArray(A_RV,&marr)); 4297 /* need \Phi^T A_RV = (I+L)A_RV, L given by 4298 4299 | 0 0 0 | (V) 4300 L = | 0 0 -1 | (P-p0) 4301 | 0 0 -1 | (p0) 4302 4303 */ 4304 for (i=0;i<reuse_solver->benign_n;i++) { 4305 const PetscScalar *vals; 4306 const PetscInt *idxs,*idxs_zero; 4307 PetscInt n,j,nz; 4308 4309 PetscCall(ISGetLocalSize(reuse_solver->benign_zerodiag_subs[i],&nz)); 4310 PetscCall(ISGetIndices(reuse_solver->benign_zerodiag_subs[i],&idxs_zero)); 4311 PetscCall(MatGetRow(A_RV_bcorr,i,&n,&idxs,&vals)); 4312 for (j=0;j<n;j++) { 4313 PetscScalar val = vals[j]; 4314 PetscInt k,col = idxs[j]; 4315 for (k=0;k<nz;k++) marr[idxs_zero[k]+lda_rhs*col] -= val; 4316 } 4317 PetscCall(MatRestoreRow(A_RV_bcorr,i,&n,&idxs,&vals)); 4318 PetscCall(ISRestoreIndices(reuse_solver->benign_zerodiag_subs[i],&idxs_zero)); 4319 } 4320 PetscCall(MatDenseRestoreArray(A_RV,&marr)); 4321 } 4322 PetscCall(PetscObjectReference((PetscObject)A_RV)); 4323 Brhs = A_RV; 4324 } else { 4325 Mat tA_RVT,A_RVT; 4326 4327 if (!pcbddc->symmetric_primal) { 4328 /* A_RV already scaled by -1 */ 4329 PetscCall(MatTranspose(A_RV,MAT_INITIAL_MATRIX,&A_RVT)); 4330 } else { 4331 restoreavr = PETSC_TRUE; 4332 PetscCall(MatScale(A_VR,-1.0)); 4333 PetscCall(PetscObjectReference((PetscObject)A_VR)); 4334 A_RVT = A_VR; 4335 } 4336 if (lda_rhs != n_R) { 4337 PetscScalar *aa; 4338 PetscInt r,*ii,*jj; 4339 PetscBool done; 4340 4341 PetscCall(MatGetRowIJ(A_RVT,0,PETSC_FALSE,PETSC_FALSE,&r,(const PetscInt**)&ii,(const PetscInt**)&jj,&done)); 4342 PetscCheck(done,PETSC_COMM_SELF,PETSC_ERR_PLIB,"GetRowIJ failed"); 4343 PetscCall(MatSeqAIJGetArray(A_RVT,&aa)); 4344 PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,n_vertices,lda_rhs,ii,jj,aa,&tA_RVT)); 4345 PetscCall(MatRestoreRowIJ(A_RVT,0,PETSC_FALSE,PETSC_FALSE,&r,(const PetscInt**)&ii,(const PetscInt**)&jj,&done)); 4346 PetscCheck(done,PETSC_COMM_SELF,PETSC_ERR_PLIB,"RestoreRowIJ failed"); 4347 } else { 4348 PetscCall(PetscObjectReference((PetscObject)A_RVT)); 4349 tA_RVT = A_RVT; 4350 } 4351 PetscCall(MatCreateTranspose(tA_RVT,&Brhs)); 4352 PetscCall(MatDestroy(&tA_RVT)); 4353 PetscCall(MatDestroy(&A_RVT)); 4354 } 4355 if (F) { 4356 /* need to correct the rhs */ 4357 if (need_benign_correction) { 4358 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 4359 PetscScalar *marr; 4360 4361 PetscCall(MatDenseGetArray(Brhs,&marr)); 4362 if (lda_rhs != n_R) { 4363 for (i=0;i<n_vertices;i++) { 4364 PetscCall(VecPlaceArray(dummy_vec,marr+i*lda_rhs)); 4365 PetscCall(PCBDDCReuseSolversBenignAdapt(reuse_solver,dummy_vec,NULL,PETSC_FALSE,PETSC_TRUE)); 4366 PetscCall(VecResetArray(dummy_vec)); 4367 } 4368 } else { 4369 for (i=0;i<n_vertices;i++) { 4370 PetscCall(VecPlaceArray(pcbddc->vec1_R,marr+i*lda_rhs)); 4371 PetscCall(PCBDDCReuseSolversBenignAdapt(reuse_solver,pcbddc->vec1_R,NULL,PETSC_FALSE,PETSC_TRUE)); 4372 PetscCall(VecResetArray(pcbddc->vec1_R)); 4373 } 4374 } 4375 PetscCall(MatDenseRestoreArray(Brhs,&marr)); 4376 } 4377 PetscCall(MatMatSolve(F,Brhs,A_RRmA_RV)); 4378 if (restoreavr) { 4379 PetscCall(MatScale(A_VR,-1.0)); 4380 } 4381 /* need to correct the solution */ 4382 if (need_benign_correction) { 4383 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 4384 PetscScalar *marr; 4385 4386 PetscCall(MatDenseGetArray(A_RRmA_RV,&marr)); 4387 if (lda_rhs != n_R) { 4388 for (i=0;i<n_vertices;i++) { 4389 PetscCall(VecPlaceArray(dummy_vec,marr+i*lda_rhs)); 4390 PetscCall(PCBDDCReuseSolversBenignAdapt(reuse_solver,dummy_vec,NULL,PETSC_TRUE,PETSC_TRUE)); 4391 PetscCall(VecResetArray(dummy_vec)); 4392 } 4393 } else { 4394 for (i=0;i<n_vertices;i++) { 4395 PetscCall(VecPlaceArray(pcbddc->vec1_R,marr+i*lda_rhs)); 4396 PetscCall(PCBDDCReuseSolversBenignAdapt(reuse_solver,pcbddc->vec1_R,NULL,PETSC_TRUE,PETSC_TRUE)); 4397 PetscCall(VecResetArray(pcbddc->vec1_R)); 4398 } 4399 } 4400 PetscCall(MatDenseRestoreArray(A_RRmA_RV,&marr)); 4401 } 4402 } else { 4403 PetscCall(MatDenseGetArray(Brhs,&y)); 4404 for (i=0;i<n_vertices;i++) { 4405 PetscCall(VecPlaceArray(pcbddc->vec1_R,y+i*lda_rhs)); 4406 PetscCall(VecPlaceArray(pcbddc->vec2_R,work+i*lda_rhs)); 4407 PetscCall(KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R)); 4408 PetscCall(KSPCheckSolve(pcbddc->ksp_R,pc,pcbddc->vec2_R)); 4409 PetscCall(VecResetArray(pcbddc->vec1_R)); 4410 PetscCall(VecResetArray(pcbddc->vec2_R)); 4411 } 4412 PetscCall(MatDenseRestoreArray(Brhs,&y)); 4413 } 4414 PetscCall(MatDestroy(&A_RV)); 4415 PetscCall(MatDestroy(&Brhs)); 4416 /* S_VV and S_CV */ 4417 if (n_constraints) { 4418 Mat B; 4419 4420 PetscCall(PetscArrayzero(work+lda_rhs*n_vertices,n_B*n_vertices)); 4421 for (i=0;i<n_vertices;i++) { 4422 PetscCall(VecPlaceArray(pcbddc->vec1_R,work+i*lda_rhs)); 4423 PetscCall(VecPlaceArray(pcis->vec1_B,work+lda_rhs*n_vertices+i*n_B)); 4424 PetscCall(VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD)); 4425 PetscCall(VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD)); 4426 PetscCall(VecResetArray(pcis->vec1_B)); 4427 PetscCall(VecResetArray(pcbddc->vec1_R)); 4428 } 4429 PetscCall(MatCreateSeqDense(PETSC_COMM_SELF,n_B,n_vertices,work+lda_rhs*n_vertices,&B)); 4430 /* Reuse dense S_C = pcbddc->local_auxmat1 * B */ 4431 PetscCall(MatProductCreateWithMat(pcbddc->local_auxmat1,B,NULL,S_CV)); 4432 PetscCall(MatProductSetType(S_CV,MATPRODUCT_AB)); 4433 PetscCall(MatProductSetFromOptions(S_CV)); 4434 PetscCall(MatProductSymbolic(S_CV)); 4435 PetscCall(MatProductNumeric(S_CV)); 4436 PetscCall(MatProductClear(S_CV)); 4437 4438 PetscCall(MatDestroy(&B)); 4439 PetscCall(MatCreateSeqDense(PETSC_COMM_SELF,lda_rhs,n_vertices,work+lda_rhs*n_vertices,&B)); 4440 /* Reuse B = local_auxmat2_R * S_CV */ 4441 PetscCall(MatProductCreateWithMat(local_auxmat2_R,S_CV,NULL,B)); 4442 PetscCall(MatProductSetType(B,MATPRODUCT_AB)); 4443 PetscCall(MatProductSetFromOptions(B)); 4444 PetscCall(MatProductSymbolic(B)); 4445 PetscCall(MatProductNumeric(B)); 4446 4447 PetscCall(MatScale(S_CV,m_one)); 4448 PetscCall(PetscBLASIntCast(lda_rhs*n_vertices,&B_N)); 4449 PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&B_N,&one,work+lda_rhs*n_vertices,&B_one,work,&B_one)); 4450 PetscCall(MatDestroy(&B)); 4451 } 4452 if (lda_rhs != n_R) { 4453 PetscCall(MatDestroy(&A_RRmA_RV)); 4454 PetscCall(MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_vertices,work,&A_RRmA_RV)); 4455 PetscCall(MatDenseSetLDA(A_RRmA_RV,lda_rhs)); 4456 } 4457 PetscCall(MatMatMult(A_VR,A_RRmA_RV,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&S_VVt)); 4458 /* need A_VR * \Phi * A_RRmA_RV = A_VR * (I+L)^T * A_RRmA_RV, L given as before */ 4459 if (need_benign_correction) { 4460 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 4461 PetscScalar *marr,*sums; 4462 4463 PetscCall(PetscMalloc1(n_vertices,&sums)); 4464 PetscCall(MatDenseGetArray(S_VVt,&marr)); 4465 for (i=0;i<reuse_solver->benign_n;i++) { 4466 const PetscScalar *vals; 4467 const PetscInt *idxs,*idxs_zero; 4468 PetscInt n,j,nz; 4469 4470 PetscCall(ISGetLocalSize(reuse_solver->benign_zerodiag_subs[i],&nz)); 4471 PetscCall(ISGetIndices(reuse_solver->benign_zerodiag_subs[i],&idxs_zero)); 4472 for (j=0;j<n_vertices;j++) { 4473 PetscInt k; 4474 sums[j] = 0.; 4475 for (k=0;k<nz;k++) sums[j] += work[idxs_zero[k]+j*lda_rhs]; 4476 } 4477 PetscCall(MatGetRow(A_RV_bcorr,i,&n,&idxs,&vals)); 4478 for (j=0;j<n;j++) { 4479 PetscScalar val = vals[j]; 4480 PetscInt k; 4481 for (k=0;k<n_vertices;k++) { 4482 marr[idxs[j]+k*n_vertices] += val*sums[k]; 4483 } 4484 } 4485 PetscCall(MatRestoreRow(A_RV_bcorr,i,&n,&idxs,&vals)); 4486 PetscCall(ISRestoreIndices(reuse_solver->benign_zerodiag_subs[i],&idxs_zero)); 4487 } 4488 PetscCall(PetscFree(sums)); 4489 PetscCall(MatDenseRestoreArray(S_VVt,&marr)); 4490 PetscCall(MatDestroy(&A_RV_bcorr)); 4491 } 4492 PetscCall(MatDestroy(&A_RRmA_RV)); 4493 PetscCall(PetscBLASIntCast(n_vertices*n_vertices,&B_N)); 4494 PetscCall(MatDenseGetArrayRead(A_VV,&x)); 4495 PetscCall(MatDenseGetArray(S_VVt,&y)); 4496 PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&B_N,&one,x,&B_one,y,&B_one)); 4497 PetscCall(MatDenseRestoreArrayRead(A_VV,&x)); 4498 PetscCall(MatDenseRestoreArray(S_VVt,&y)); 4499 PetscCall(MatCopy(S_VVt,S_VV,SAME_NONZERO_PATTERN)); 4500 PetscCall(MatDestroy(&S_VVt)); 4501 } else { 4502 PetscCall(MatCopy(A_VV,S_VV,SAME_NONZERO_PATTERN)); 4503 } 4504 PetscCall(MatDestroy(&A_VV)); 4505 4506 /* coarse basis functions */ 4507 for (i=0;i<n_vertices;i++) { 4508 Vec v; 4509 PetscScalar one = 1.0,zero = 0.0; 4510 4511 PetscCall(VecPlaceArray(pcbddc->vec1_R,work+lda_rhs*i)); 4512 PetscCall(MatDenseGetColumnVec(pcbddc->coarse_phi_B,i,&v)); 4513 PetscCall(VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,v,INSERT_VALUES,SCATTER_FORWARD)); 4514 PetscCall(VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,v,INSERT_VALUES,SCATTER_FORWARD)); 4515 if (PetscDefined(USE_DEBUG)) { /* The following VecSetValues() expects a sequential matrix */ 4516 PetscMPIInt rank; 4517 PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)pcbddc->coarse_phi_B),&rank)); 4518 PetscCheck(rank <= 1,PetscObjectComm((PetscObject)pcbddc->coarse_phi_B),PETSC_ERR_PLIB,"Expected a sequential dense matrix"); 4519 } 4520 PetscCall(VecSetValues(v,1,&idx_V_B[i],&one,INSERT_VALUES)); 4521 PetscCall(VecAssemblyBegin(v)); /* If v is on device, hope VecSetValues() eventually implemented by a host to device memcopy */ 4522 PetscCall(VecAssemblyEnd(v)); 4523 PetscCall(MatDenseRestoreColumnVec(pcbddc->coarse_phi_B,i,&v)); 4524 4525 if (pcbddc->switch_static || pcbddc->dbg_flag) { 4526 PetscInt j; 4527 4528 PetscCall(MatDenseGetColumnVec(pcbddc->coarse_phi_D,i,&v)); 4529 PetscCall(VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,v,INSERT_VALUES,SCATTER_FORWARD)); 4530 PetscCall(VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,v,INSERT_VALUES,SCATTER_FORWARD)); 4531 if (PetscDefined(USE_DEBUG)) { /* The following VecSetValues() expects a sequential matrix */ 4532 PetscMPIInt rank; 4533 PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)pcbddc->coarse_phi_D),&rank)); 4534 PetscCheck(rank <= 1,PetscObjectComm((PetscObject)pcbddc->coarse_phi_D),PETSC_ERR_PLIB,"Expected a sequential dense matrix"); 4535 } 4536 for (j=0;j<pcbddc->benign_n;j++) PetscCall(VecSetValues(v,1,&p0_lidx_I[j],&zero,INSERT_VALUES)); 4537 PetscCall(VecAssemblyBegin(v)); 4538 PetscCall(VecAssemblyEnd(v)); 4539 PetscCall(MatDenseRestoreColumnVec(pcbddc->coarse_phi_D,i,&v)); 4540 } 4541 PetscCall(VecResetArray(pcbddc->vec1_R)); 4542 } 4543 /* if n_R == 0 the object is not destroyed */ 4544 PetscCall(MatDestroy(&A_RV)); 4545 } 4546 PetscCall(VecDestroy(&dummy_vec)); 4547 4548 if (n_constraints) { 4549 Mat B; 4550 4551 PetscCall(MatCreateSeqDense(PETSC_COMM_SELF,lda_rhs,n_constraints,work,&B)); 4552 PetscCall(MatScale(S_CC,m_one)); 4553 PetscCall(MatProductCreateWithMat(local_auxmat2_R,S_CC,NULL,B)); 4554 PetscCall(MatProductSetType(B,MATPRODUCT_AB)); 4555 PetscCall(MatProductSetFromOptions(B)); 4556 PetscCall(MatProductSymbolic(B)); 4557 PetscCall(MatProductNumeric(B)); 4558 4559 PetscCall(MatScale(S_CC,m_one)); 4560 if (n_vertices) { 4561 if (isCHOL || need_benign_correction) { /* if we can solve the interior problem with cholesky, we should also be fine with transposing here */ 4562 PetscCall(MatTranspose(S_CV,MAT_REUSE_MATRIX,&S_VC)); 4563 } else { 4564 Mat S_VCt; 4565 4566 if (lda_rhs != n_R) { 4567 PetscCall(MatDestroy(&B)); 4568 PetscCall(MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_constraints,work,&B)); 4569 PetscCall(MatDenseSetLDA(B,lda_rhs)); 4570 } 4571 PetscCall(MatMatMult(A_VR,B,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&S_VCt)); 4572 PetscCall(MatCopy(S_VCt,S_VC,SAME_NONZERO_PATTERN)); 4573 PetscCall(MatDestroy(&S_VCt)); 4574 } 4575 } 4576 PetscCall(MatDestroy(&B)); 4577 /* coarse basis functions */ 4578 for (i=0;i<n_constraints;i++) { 4579 Vec v; 4580 4581 PetscCall(VecPlaceArray(pcbddc->vec1_R,work+lda_rhs*i)); 4582 PetscCall(MatDenseGetColumnVec(pcbddc->coarse_phi_B,i+n_vertices,&v)); 4583 PetscCall(VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,v,INSERT_VALUES,SCATTER_FORWARD)); 4584 PetscCall(VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,v,INSERT_VALUES,SCATTER_FORWARD)); 4585 PetscCall(MatDenseRestoreColumnVec(pcbddc->coarse_phi_B,i+n_vertices,&v)); 4586 if (pcbddc->switch_static || pcbddc->dbg_flag) { 4587 PetscInt j; 4588 PetscScalar zero = 0.0; 4589 PetscCall(MatDenseGetColumnVec(pcbddc->coarse_phi_D,i+n_vertices,&v)); 4590 PetscCall(VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,v,INSERT_VALUES,SCATTER_FORWARD)); 4591 PetscCall(VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,v,INSERT_VALUES,SCATTER_FORWARD)); 4592 for (j=0;j<pcbddc->benign_n;j++) PetscCall(VecSetValues(v,1,&p0_lidx_I[j],&zero,INSERT_VALUES)); 4593 PetscCall(VecAssemblyBegin(v)); 4594 PetscCall(VecAssemblyEnd(v)); 4595 PetscCall(MatDenseRestoreColumnVec(pcbddc->coarse_phi_D,i+n_vertices,&v)); 4596 } 4597 PetscCall(VecResetArray(pcbddc->vec1_R)); 4598 } 4599 } 4600 if (n_constraints) { 4601 PetscCall(MatDestroy(&local_auxmat2_R)); 4602 } 4603 PetscCall(PetscFree(p0_lidx_I)); 4604 4605 /* coarse matrix entries relative to B_0 */ 4606 if (pcbddc->benign_n) { 4607 Mat B0_B,B0_BPHI; 4608 IS is_dummy; 4609 const PetscScalar *data; 4610 PetscInt j; 4611 4612 PetscCall(ISCreateStride(PETSC_COMM_SELF,pcbddc->benign_n,0,1,&is_dummy)); 4613 PetscCall(MatCreateSubMatrix(pcbddc->benign_B0,is_dummy,pcis->is_B_local,MAT_INITIAL_MATRIX,&B0_B)); 4614 PetscCall(ISDestroy(&is_dummy)); 4615 PetscCall(MatMatMult(B0_B,pcbddc->coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&B0_BPHI)); 4616 PetscCall(MatConvert(B0_BPHI,MATSEQDENSE,MAT_INPLACE_MATRIX,&B0_BPHI)); 4617 PetscCall(MatDenseGetArrayRead(B0_BPHI,&data)); 4618 for (j=0;j<pcbddc->benign_n;j++) { 4619 PetscInt primal_idx = pcbddc->local_primal_size - pcbddc->benign_n + j; 4620 for (i=0;i<pcbddc->local_primal_size;i++) { 4621 coarse_submat_vals[primal_idx*pcbddc->local_primal_size+i] = data[i*pcbddc->benign_n+j]; 4622 coarse_submat_vals[i*pcbddc->local_primal_size+primal_idx] = data[i*pcbddc->benign_n+j]; 4623 } 4624 } 4625 PetscCall(MatDenseRestoreArrayRead(B0_BPHI,&data)); 4626 PetscCall(MatDestroy(&B0_B)); 4627 PetscCall(MatDestroy(&B0_BPHI)); 4628 } 4629 4630 /* compute other basis functions for non-symmetric problems */ 4631 if (!pcbddc->symmetric_primal) { 4632 Mat B_V=NULL,B_C=NULL; 4633 PetscScalar *marray; 4634 4635 if (n_constraints) { 4636 Mat S_CCT,C_CRT; 4637 4638 PetscCall(MatTranspose(C_CR,MAT_INITIAL_MATRIX,&C_CRT)); 4639 PetscCall(MatTranspose(S_CC,MAT_INITIAL_MATRIX,&S_CCT)); 4640 PetscCall(MatMatMult(C_CRT,S_CCT,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&B_C)); 4641 PetscCall(MatDestroy(&S_CCT)); 4642 if (n_vertices) { 4643 Mat S_VCT; 4644 4645 PetscCall(MatTranspose(S_VC,MAT_INITIAL_MATRIX,&S_VCT)); 4646 PetscCall(MatMatMult(C_CRT,S_VCT,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&B_V)); 4647 PetscCall(MatDestroy(&S_VCT)); 4648 } 4649 PetscCall(MatDestroy(&C_CRT)); 4650 } else { 4651 PetscCall(MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_vertices,NULL,&B_V)); 4652 } 4653 if (n_vertices && n_R) { 4654 PetscScalar *av,*marray; 4655 const PetscInt *xadj,*adjncy; 4656 PetscInt n; 4657 PetscBool flg_row; 4658 4659 /* B_V = B_V - A_VR^T */ 4660 PetscCall(MatConvert(A_VR,MATSEQAIJ,MAT_INPLACE_MATRIX,&A_VR)); 4661 PetscCall(MatGetRowIJ(A_VR,0,PETSC_FALSE,PETSC_FALSE,&n,&xadj,&adjncy,&flg_row)); 4662 PetscCall(MatSeqAIJGetArray(A_VR,&av)); 4663 PetscCall(MatDenseGetArray(B_V,&marray)); 4664 for (i=0;i<n;i++) { 4665 PetscInt j; 4666 for (j=xadj[i];j<xadj[i+1];j++) marray[i*n_R + adjncy[j]] -= av[j]; 4667 } 4668 PetscCall(MatDenseRestoreArray(B_V,&marray)); 4669 PetscCall(MatRestoreRowIJ(A_VR,0,PETSC_FALSE,PETSC_FALSE,&n,&xadj,&adjncy,&flg_row)); 4670 PetscCall(MatDestroy(&A_VR)); 4671 } 4672 4673 /* currently there's no support for MatTransposeMatSolve(F,B,X) */ 4674 if (n_vertices) { 4675 PetscCall(MatDenseGetArray(B_V,&marray)); 4676 for (i=0;i<n_vertices;i++) { 4677 PetscCall(VecPlaceArray(pcbddc->vec1_R,marray+i*n_R)); 4678 PetscCall(VecPlaceArray(pcbddc->vec2_R,work+i*n_R)); 4679 PetscCall(KSPSolveTranspose(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R)); 4680 PetscCall(KSPCheckSolve(pcbddc->ksp_R,pc,pcbddc->vec2_R)); 4681 PetscCall(VecResetArray(pcbddc->vec1_R)); 4682 PetscCall(VecResetArray(pcbddc->vec2_R)); 4683 } 4684 PetscCall(MatDenseRestoreArray(B_V,&marray)); 4685 } 4686 if (B_C) { 4687 PetscCall(MatDenseGetArray(B_C,&marray)); 4688 for (i=n_vertices;i<n_constraints+n_vertices;i++) { 4689 PetscCall(VecPlaceArray(pcbddc->vec1_R,marray+(i-n_vertices)*n_R)); 4690 PetscCall(VecPlaceArray(pcbddc->vec2_R,work+i*n_R)); 4691 PetscCall(KSPSolveTranspose(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R)); 4692 PetscCall(KSPCheckSolve(pcbddc->ksp_R,pc,pcbddc->vec2_R)); 4693 PetscCall(VecResetArray(pcbddc->vec1_R)); 4694 PetscCall(VecResetArray(pcbddc->vec2_R)); 4695 } 4696 PetscCall(MatDenseRestoreArray(B_C,&marray)); 4697 } 4698 /* coarse basis functions */ 4699 for (i=0;i<pcbddc->local_primal_size;i++) { 4700 Vec v; 4701 4702 PetscCall(VecPlaceArray(pcbddc->vec1_R,work+i*n_R)); 4703 PetscCall(MatDenseGetColumnVec(pcbddc->coarse_psi_B,i,&v)); 4704 PetscCall(VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,v,INSERT_VALUES,SCATTER_FORWARD)); 4705 PetscCall(VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,v,INSERT_VALUES,SCATTER_FORWARD)); 4706 if (i<n_vertices) { 4707 PetscScalar one = 1.0; 4708 PetscCall(VecSetValues(v,1,&idx_V_B[i],&one,INSERT_VALUES)); 4709 PetscCall(VecAssemblyBegin(v)); 4710 PetscCall(VecAssemblyEnd(v)); 4711 } 4712 PetscCall(MatDenseRestoreColumnVec(pcbddc->coarse_psi_B,i,&v)); 4713 4714 if (pcbddc->switch_static || pcbddc->dbg_flag) { 4715 PetscCall(MatDenseGetColumnVec(pcbddc->coarse_psi_D,i,&v)); 4716 PetscCall(VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,v,INSERT_VALUES,SCATTER_FORWARD)); 4717 PetscCall(VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,v,INSERT_VALUES,SCATTER_FORWARD)); 4718 PetscCall(MatDenseRestoreColumnVec(pcbddc->coarse_psi_D,i,&v)); 4719 } 4720 PetscCall(VecResetArray(pcbddc->vec1_R)); 4721 } 4722 PetscCall(MatDestroy(&B_V)); 4723 PetscCall(MatDestroy(&B_C)); 4724 } 4725 4726 /* free memory */ 4727 PetscCall(PetscFree(idx_V_B)); 4728 PetscCall(MatDestroy(&S_VV)); 4729 PetscCall(MatDestroy(&S_CV)); 4730 PetscCall(MatDestroy(&S_VC)); 4731 PetscCall(MatDestroy(&S_CC)); 4732 PetscCall(PetscFree(work)); 4733 if (n_vertices) { 4734 PetscCall(MatDestroy(&A_VR)); 4735 } 4736 if (n_constraints) { 4737 PetscCall(MatDestroy(&C_CR)); 4738 } 4739 PetscCall(PetscLogEventEnd(PC_BDDC_CorrectionSetUp[pcbddc->current_level],pc,0,0,0)); 4740 4741 /* Checking coarse_sub_mat and coarse basis functios */ 4742 /* Symmetric case : It should be \Phi^{(j)^T} A^{(j)} \Phi^{(j)}=coarse_sub_mat */ 4743 /* Non-symmetric case : It should be \Psi^{(j)^T} A^{(j)} \Phi^{(j)}=coarse_sub_mat */ 4744 if (pcbddc->dbg_flag) { 4745 Mat coarse_sub_mat; 4746 Mat AUXMAT,TM1,TM2,TM3,TM4; 4747 Mat coarse_phi_D,coarse_phi_B; 4748 Mat coarse_psi_D,coarse_psi_B; 4749 Mat A_II,A_BB,A_IB,A_BI; 4750 Mat C_B,CPHI; 4751 IS is_dummy; 4752 Vec mones; 4753 MatType checkmattype=MATSEQAIJ; 4754 PetscReal real_value; 4755 4756 if (pcbddc->benign_n && !pcbddc->benign_change_explicit) { 4757 Mat A; 4758 PetscCall(PCBDDCBenignProject(pc,NULL,NULL,&A)); 4759 PetscCall(MatCreateSubMatrix(A,pcis->is_I_local,pcis->is_I_local,MAT_INITIAL_MATRIX,&A_II)); 4760 PetscCall(MatCreateSubMatrix(A,pcis->is_I_local,pcis->is_B_local,MAT_INITIAL_MATRIX,&A_IB)); 4761 PetscCall(MatCreateSubMatrix(A,pcis->is_B_local,pcis->is_I_local,MAT_INITIAL_MATRIX,&A_BI)); 4762 PetscCall(MatCreateSubMatrix(A,pcis->is_B_local,pcis->is_B_local,MAT_INITIAL_MATRIX,&A_BB)); 4763 PetscCall(MatDestroy(&A)); 4764 } else { 4765 PetscCall(MatConvert(pcis->A_II,checkmattype,MAT_INITIAL_MATRIX,&A_II)); 4766 PetscCall(MatConvert(pcis->A_IB,checkmattype,MAT_INITIAL_MATRIX,&A_IB)); 4767 PetscCall(MatConvert(pcis->A_BI,checkmattype,MAT_INITIAL_MATRIX,&A_BI)); 4768 PetscCall(MatConvert(pcis->A_BB,checkmattype,MAT_INITIAL_MATRIX,&A_BB)); 4769 } 4770 PetscCall(MatConvert(pcbddc->coarse_phi_D,checkmattype,MAT_INITIAL_MATRIX,&coarse_phi_D)); 4771 PetscCall(MatConvert(pcbddc->coarse_phi_B,checkmattype,MAT_INITIAL_MATRIX,&coarse_phi_B)); 4772 if (!pcbddc->symmetric_primal) { 4773 PetscCall(MatConvert(pcbddc->coarse_psi_D,checkmattype,MAT_INITIAL_MATRIX,&coarse_psi_D)); 4774 PetscCall(MatConvert(pcbddc->coarse_psi_B,checkmattype,MAT_INITIAL_MATRIX,&coarse_psi_B)); 4775 } 4776 PetscCall(MatCreateSeqDense(PETSC_COMM_SELF,pcbddc->local_primal_size,pcbddc->local_primal_size,coarse_submat_vals,&coarse_sub_mat)); 4777 4778 PetscCall(PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n")); 4779 PetscCall(PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Check coarse sub mat computation (symmetric %d)\n",pcbddc->symmetric_primal)); 4780 PetscCall(PetscViewerFlush(pcbddc->dbg_viewer)); 4781 if (!pcbddc->symmetric_primal) { 4782 PetscCall(MatMatMult(A_II,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT)); 4783 PetscCall(MatTransposeMatMult(coarse_psi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM1)); 4784 PetscCall(MatDestroy(&AUXMAT)); 4785 PetscCall(MatMatMult(A_BB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT)); 4786 PetscCall(MatTransposeMatMult(coarse_psi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM2)); 4787 PetscCall(MatDestroy(&AUXMAT)); 4788 PetscCall(MatMatMult(A_IB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT)); 4789 PetscCall(MatTransposeMatMult(coarse_psi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM3)); 4790 PetscCall(MatDestroy(&AUXMAT)); 4791 PetscCall(MatMatMult(A_BI,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT)); 4792 PetscCall(MatTransposeMatMult(coarse_psi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM4)); 4793 PetscCall(MatDestroy(&AUXMAT)); 4794 } else { 4795 PetscCall(MatPtAP(A_II,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&TM1)); 4796 PetscCall(MatPtAP(A_BB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&TM2)); 4797 PetscCall(MatMatMult(A_IB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT)); 4798 PetscCall(MatTransposeMatMult(coarse_phi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM3)); 4799 PetscCall(MatDestroy(&AUXMAT)); 4800 PetscCall(MatMatMult(A_BI,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT)); 4801 PetscCall(MatTransposeMatMult(coarse_phi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM4)); 4802 PetscCall(MatDestroy(&AUXMAT)); 4803 } 4804 PetscCall(MatAXPY(TM1,one,TM2,DIFFERENT_NONZERO_PATTERN)); 4805 PetscCall(MatAXPY(TM1,one,TM3,DIFFERENT_NONZERO_PATTERN)); 4806 PetscCall(MatAXPY(TM1,one,TM4,DIFFERENT_NONZERO_PATTERN)); 4807 PetscCall(MatConvert(TM1,MATSEQDENSE,MAT_INPLACE_MATRIX,&TM1)); 4808 if (pcbddc->benign_n) { 4809 Mat B0_B,B0_BPHI; 4810 const PetscScalar *data2; 4811 PetscScalar *data; 4812 PetscInt j; 4813 4814 PetscCall(ISCreateStride(PETSC_COMM_SELF,pcbddc->benign_n,0,1,&is_dummy)); 4815 PetscCall(MatCreateSubMatrix(pcbddc->benign_B0,is_dummy,pcis->is_B_local,MAT_INITIAL_MATRIX,&B0_B)); 4816 PetscCall(MatMatMult(B0_B,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&B0_BPHI)); 4817 PetscCall(MatConvert(B0_BPHI,MATSEQDENSE,MAT_INPLACE_MATRIX,&B0_BPHI)); 4818 PetscCall(MatDenseGetArray(TM1,&data)); 4819 PetscCall(MatDenseGetArrayRead(B0_BPHI,&data2)); 4820 for (j=0;j<pcbddc->benign_n;j++) { 4821 PetscInt primal_idx = pcbddc->local_primal_size - pcbddc->benign_n + j; 4822 for (i=0;i<pcbddc->local_primal_size;i++) { 4823 data[primal_idx*pcbddc->local_primal_size+i] += data2[i*pcbddc->benign_n+j]; 4824 data[i*pcbddc->local_primal_size+primal_idx] += data2[i*pcbddc->benign_n+j]; 4825 } 4826 } 4827 PetscCall(MatDenseRestoreArray(TM1,&data)); 4828 PetscCall(MatDenseRestoreArrayRead(B0_BPHI,&data2)); 4829 PetscCall(MatDestroy(&B0_B)); 4830 PetscCall(ISDestroy(&is_dummy)); 4831 PetscCall(MatDestroy(&B0_BPHI)); 4832 } 4833 #if 0 4834 { 4835 PetscViewer viewer; 4836 char filename[256]; 4837 sprintf(filename,"details_local_coarse_mat%d_level%d.m",PetscGlobalRank,pcbddc->current_level); 4838 PetscCall(PetscViewerASCIIOpen(PETSC_COMM_SELF,filename,&viewer)); 4839 PetscCall(PetscViewerPushFormat(viewer,PETSC_VIEWER_ASCII_MATLAB)); 4840 PetscCall(PetscObjectSetName((PetscObject)coarse_sub_mat,"computed")); 4841 PetscCall(MatView(coarse_sub_mat,viewer)); 4842 PetscCall(PetscObjectSetName((PetscObject)TM1,"projected")); 4843 PetscCall(MatView(TM1,viewer)); 4844 if (pcbddc->coarse_phi_B) { 4845 PetscCall(PetscObjectSetName((PetscObject)pcbddc->coarse_phi_B,"phi_B")); 4846 PetscCall(MatView(pcbddc->coarse_phi_B,viewer)); 4847 } 4848 if (pcbddc->coarse_phi_D) { 4849 PetscCall(PetscObjectSetName((PetscObject)pcbddc->coarse_phi_D,"phi_D")); 4850 PetscCall(MatView(pcbddc->coarse_phi_D,viewer)); 4851 } 4852 if (pcbddc->coarse_psi_B) { 4853 PetscCall(PetscObjectSetName((PetscObject)pcbddc->coarse_psi_B,"psi_B")); 4854 PetscCall(MatView(pcbddc->coarse_psi_B,viewer)); 4855 } 4856 if (pcbddc->coarse_psi_D) { 4857 PetscCall(PetscObjectSetName((PetscObject)pcbddc->coarse_psi_D,"psi_D")); 4858 PetscCall(MatView(pcbddc->coarse_psi_D,viewer)); 4859 } 4860 PetscCall(PetscObjectSetName((PetscObject)pcbddc->local_mat,"A")); 4861 PetscCall(MatView(pcbddc->local_mat,viewer)); 4862 PetscCall(PetscObjectSetName((PetscObject)pcbddc->ConstraintMatrix,"C")); 4863 PetscCall(MatView(pcbddc->ConstraintMatrix,viewer)); 4864 PetscCall(PetscObjectSetName((PetscObject)pcis->is_I_local,"I")); 4865 PetscCall(ISView(pcis->is_I_local,viewer)); 4866 PetscCall(PetscObjectSetName((PetscObject)pcis->is_B_local,"B")); 4867 PetscCall(ISView(pcis->is_B_local,viewer)); 4868 PetscCall(PetscObjectSetName((PetscObject)pcbddc->is_R_local,"R")); 4869 PetscCall(ISView(pcbddc->is_R_local,viewer)); 4870 PetscCall(PetscViewerDestroy(&viewer)); 4871 } 4872 #endif 4873 PetscCall(MatAXPY(TM1,m_one,coarse_sub_mat,DIFFERENT_NONZERO_PATTERN)); 4874 PetscCall(MatNorm(TM1,NORM_FROBENIUS,&real_value)); 4875 PetscCall(PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer)); 4876 PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d matrix error % 1.14e\n",PetscGlobalRank,(double)real_value)); 4877 4878 /* check constraints */ 4879 PetscCall(ISCreateStride(PETSC_COMM_SELF,pcbddc->local_primal_size-pcbddc->benign_n,0,1,&is_dummy)); 4880 PetscCall(MatCreateSubMatrix(pcbddc->ConstraintMatrix,is_dummy,pcis->is_B_local,MAT_INITIAL_MATRIX,&C_B)); 4881 if (!pcbddc->benign_n) { /* TODO: add benign case */ 4882 PetscCall(MatMatMult(C_B,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&CPHI)); 4883 } else { 4884 PetscScalar *data; 4885 Mat tmat; 4886 PetscCall(MatDenseGetArray(pcbddc->coarse_phi_B,&data)); 4887 PetscCall(MatCreateSeqDense(PETSC_COMM_SELF,pcis->n_B,pcbddc->local_primal_size-pcbddc->benign_n,data,&tmat)); 4888 PetscCall(MatDenseRestoreArray(pcbddc->coarse_phi_B,&data)); 4889 PetscCall(MatMatMult(C_B,tmat,MAT_INITIAL_MATRIX,1.0,&CPHI)); 4890 PetscCall(MatDestroy(&tmat)); 4891 } 4892 PetscCall(MatCreateVecs(CPHI,&mones,NULL)); 4893 PetscCall(VecSet(mones,-1.0)); 4894 PetscCall(MatDiagonalSet(CPHI,mones,ADD_VALUES)); 4895 PetscCall(MatNorm(CPHI,NORM_FROBENIUS,&real_value)); 4896 PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d phi constraints error % 1.14e\n",PetscGlobalRank,(double)real_value)); 4897 if (!pcbddc->symmetric_primal) { 4898 PetscCall(MatMatMult(C_B,coarse_psi_B,MAT_REUSE_MATRIX,1.0,&CPHI)); 4899 PetscCall(VecSet(mones,-1.0)); 4900 PetscCall(MatDiagonalSet(CPHI,mones,ADD_VALUES)); 4901 PetscCall(MatNorm(CPHI,NORM_FROBENIUS,&real_value)); 4902 PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d psi constraints error % 1.14e\n",PetscGlobalRank,(double)real_value)); 4903 } 4904 PetscCall(MatDestroy(&C_B)); 4905 PetscCall(MatDestroy(&CPHI)); 4906 PetscCall(ISDestroy(&is_dummy)); 4907 PetscCall(VecDestroy(&mones)); 4908 PetscCall(PetscViewerFlush(pcbddc->dbg_viewer)); 4909 PetscCall(MatDestroy(&A_II)); 4910 PetscCall(MatDestroy(&A_BB)); 4911 PetscCall(MatDestroy(&A_IB)); 4912 PetscCall(MatDestroy(&A_BI)); 4913 PetscCall(MatDestroy(&TM1)); 4914 PetscCall(MatDestroy(&TM2)); 4915 PetscCall(MatDestroy(&TM3)); 4916 PetscCall(MatDestroy(&TM4)); 4917 PetscCall(MatDestroy(&coarse_phi_D)); 4918 PetscCall(MatDestroy(&coarse_phi_B)); 4919 if (!pcbddc->symmetric_primal) { 4920 PetscCall(MatDestroy(&coarse_psi_D)); 4921 PetscCall(MatDestroy(&coarse_psi_B)); 4922 } 4923 PetscCall(MatDestroy(&coarse_sub_mat)); 4924 } 4925 /* FINAL CUDA support (we cannot currently mix viennacl and cuda vectors */ 4926 { 4927 PetscBool gpu; 4928 4929 PetscCall(PetscObjectTypeCompare((PetscObject)pcis->vec1_N,VECSEQCUDA,&gpu)); 4930 if (gpu) { 4931 if (pcbddc->local_auxmat1) { 4932 PetscCall(MatConvert(pcbddc->local_auxmat1,MATSEQDENSECUDA,MAT_INPLACE_MATRIX,&pcbddc->local_auxmat1)); 4933 } 4934 if (pcbddc->local_auxmat2) { 4935 PetscCall(MatConvert(pcbddc->local_auxmat2,MATSEQDENSECUDA,MAT_INPLACE_MATRIX,&pcbddc->local_auxmat2)); 4936 } 4937 if (pcbddc->coarse_phi_B) { 4938 PetscCall(MatConvert(pcbddc->coarse_phi_B,MATSEQDENSECUDA,MAT_INPLACE_MATRIX,&pcbddc->coarse_phi_B)); 4939 } 4940 if (pcbddc->coarse_phi_D) { 4941 PetscCall(MatConvert(pcbddc->coarse_phi_D,MATSEQDENSECUDA,MAT_INPLACE_MATRIX,&pcbddc->coarse_phi_D)); 4942 } 4943 if (pcbddc->coarse_psi_B) { 4944 PetscCall(MatConvert(pcbddc->coarse_psi_B,MATSEQDENSECUDA,MAT_INPLACE_MATRIX,&pcbddc->coarse_psi_B)); 4945 } 4946 if (pcbddc->coarse_psi_D) { 4947 PetscCall(MatConvert(pcbddc->coarse_psi_D,MATSEQDENSECUDA,MAT_INPLACE_MATRIX,&pcbddc->coarse_psi_D)); 4948 } 4949 } 4950 } 4951 /* get back data */ 4952 *coarse_submat_vals_n = coarse_submat_vals; 4953 PetscFunctionReturn(0); 4954 } 4955 4956 PetscErrorCode MatCreateSubMatrixUnsorted(Mat A, IS isrow, IS iscol, Mat* B) 4957 { 4958 Mat *work_mat; 4959 IS isrow_s,iscol_s; 4960 PetscBool rsorted,csorted; 4961 PetscInt rsize,*idxs_perm_r=NULL,csize,*idxs_perm_c=NULL; 4962 4963 PetscFunctionBegin; 4964 PetscCall(ISSorted(isrow,&rsorted)); 4965 PetscCall(ISSorted(iscol,&csorted)); 4966 PetscCall(ISGetLocalSize(isrow,&rsize)); 4967 PetscCall(ISGetLocalSize(iscol,&csize)); 4968 4969 if (!rsorted) { 4970 const PetscInt *idxs; 4971 PetscInt *idxs_sorted,i; 4972 4973 PetscCall(PetscMalloc1(rsize,&idxs_perm_r)); 4974 PetscCall(PetscMalloc1(rsize,&idxs_sorted)); 4975 for (i=0;i<rsize;i++) { 4976 idxs_perm_r[i] = i; 4977 } 4978 PetscCall(ISGetIndices(isrow,&idxs)); 4979 PetscCall(PetscSortIntWithPermutation(rsize,idxs,idxs_perm_r)); 4980 for (i=0;i<rsize;i++) { 4981 idxs_sorted[i] = idxs[idxs_perm_r[i]]; 4982 } 4983 PetscCall(ISRestoreIndices(isrow,&idxs)); 4984 PetscCall(ISCreateGeneral(PETSC_COMM_SELF,rsize,idxs_sorted,PETSC_OWN_POINTER,&isrow_s)); 4985 } else { 4986 PetscCall(PetscObjectReference((PetscObject)isrow)); 4987 isrow_s = isrow; 4988 } 4989 4990 if (!csorted) { 4991 if (isrow == iscol) { 4992 PetscCall(PetscObjectReference((PetscObject)isrow_s)); 4993 iscol_s = isrow_s; 4994 } else { 4995 const PetscInt *idxs; 4996 PetscInt *idxs_sorted,i; 4997 4998 PetscCall(PetscMalloc1(csize,&idxs_perm_c)); 4999 PetscCall(PetscMalloc1(csize,&idxs_sorted)); 5000 for (i=0;i<csize;i++) { 5001 idxs_perm_c[i] = i; 5002 } 5003 PetscCall(ISGetIndices(iscol,&idxs)); 5004 PetscCall(PetscSortIntWithPermutation(csize,idxs,idxs_perm_c)); 5005 for (i=0;i<csize;i++) { 5006 idxs_sorted[i] = idxs[idxs_perm_c[i]]; 5007 } 5008 PetscCall(ISRestoreIndices(iscol,&idxs)); 5009 PetscCall(ISCreateGeneral(PETSC_COMM_SELF,csize,idxs_sorted,PETSC_OWN_POINTER,&iscol_s)); 5010 } 5011 } else { 5012 PetscCall(PetscObjectReference((PetscObject)iscol)); 5013 iscol_s = iscol; 5014 } 5015 5016 PetscCall(MatCreateSubMatrices(A,1,&isrow_s,&iscol_s,MAT_INITIAL_MATRIX,&work_mat)); 5017 5018 if (!rsorted || !csorted) { 5019 Mat new_mat; 5020 IS is_perm_r,is_perm_c; 5021 5022 if (!rsorted) { 5023 PetscInt *idxs_r,i; 5024 PetscCall(PetscMalloc1(rsize,&idxs_r)); 5025 for (i=0;i<rsize;i++) { 5026 idxs_r[idxs_perm_r[i]] = i; 5027 } 5028 PetscCall(PetscFree(idxs_perm_r)); 5029 PetscCall(ISCreateGeneral(PETSC_COMM_SELF,rsize,idxs_r,PETSC_OWN_POINTER,&is_perm_r)); 5030 } else { 5031 PetscCall(ISCreateStride(PETSC_COMM_SELF,rsize,0,1,&is_perm_r)); 5032 } 5033 PetscCall(ISSetPermutation(is_perm_r)); 5034 5035 if (!csorted) { 5036 if (isrow_s == iscol_s) { 5037 PetscCall(PetscObjectReference((PetscObject)is_perm_r)); 5038 is_perm_c = is_perm_r; 5039 } else { 5040 PetscInt *idxs_c,i; 5041 PetscCheck(idxs_perm_c,PETSC_COMM_SELF,PETSC_ERR_PLIB,"Permutation array not present"); 5042 PetscCall(PetscMalloc1(csize,&idxs_c)); 5043 for (i=0;i<csize;i++) { 5044 idxs_c[idxs_perm_c[i]] = i; 5045 } 5046 PetscCall(PetscFree(idxs_perm_c)); 5047 PetscCall(ISCreateGeneral(PETSC_COMM_SELF,csize,idxs_c,PETSC_OWN_POINTER,&is_perm_c)); 5048 } 5049 } else { 5050 PetscCall(ISCreateStride(PETSC_COMM_SELF,csize,0,1,&is_perm_c)); 5051 } 5052 PetscCall(ISSetPermutation(is_perm_c)); 5053 5054 PetscCall(MatPermute(work_mat[0],is_perm_r,is_perm_c,&new_mat)); 5055 PetscCall(MatDestroy(&work_mat[0])); 5056 work_mat[0] = new_mat; 5057 PetscCall(ISDestroy(&is_perm_r)); 5058 PetscCall(ISDestroy(&is_perm_c)); 5059 } 5060 5061 PetscCall(PetscObjectReference((PetscObject)work_mat[0])); 5062 *B = work_mat[0]; 5063 PetscCall(MatDestroyMatrices(1,&work_mat)); 5064 PetscCall(ISDestroy(&isrow_s)); 5065 PetscCall(ISDestroy(&iscol_s)); 5066 PetscFunctionReturn(0); 5067 } 5068 5069 PetscErrorCode PCBDDCComputeLocalMatrix(PC pc, Mat ChangeOfBasisMatrix) 5070 { 5071 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 5072 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 5073 Mat new_mat,lA; 5074 IS is_local,is_global; 5075 PetscInt local_size; 5076 PetscBool isseqaij; 5077 5078 PetscFunctionBegin; 5079 PetscCall(MatDestroy(&pcbddc->local_mat)); 5080 PetscCall(MatGetSize(matis->A,&local_size,NULL)); 5081 PetscCall(ISCreateStride(PetscObjectComm((PetscObject)matis->A),local_size,0,1,&is_local)); 5082 PetscCall(ISLocalToGlobalMappingApplyIS(matis->rmapping,is_local,&is_global)); 5083 PetscCall(ISDestroy(&is_local)); 5084 PetscCall(MatCreateSubMatrixUnsorted(ChangeOfBasisMatrix,is_global,is_global,&new_mat)); 5085 PetscCall(ISDestroy(&is_global)); 5086 5087 if (pcbddc->dbg_flag) { 5088 Vec x,x_change; 5089 PetscReal error; 5090 5091 PetscCall(MatCreateVecs(ChangeOfBasisMatrix,&x,&x_change)); 5092 PetscCall(VecSetRandom(x,NULL)); 5093 PetscCall(MatMult(ChangeOfBasisMatrix,x,x_change)); 5094 PetscCall(VecScatterBegin(matis->cctx,x,matis->x,INSERT_VALUES,SCATTER_FORWARD)); 5095 PetscCall(VecScatterEnd(matis->cctx,x,matis->x,INSERT_VALUES,SCATTER_FORWARD)); 5096 PetscCall(MatMult(new_mat,matis->x,matis->y)); 5097 if (!pcbddc->change_interior) { 5098 const PetscScalar *x,*y,*v; 5099 PetscReal lerror = 0.; 5100 PetscInt i; 5101 5102 PetscCall(VecGetArrayRead(matis->x,&x)); 5103 PetscCall(VecGetArrayRead(matis->y,&y)); 5104 PetscCall(VecGetArrayRead(matis->counter,&v)); 5105 for (i=0;i<local_size;i++) 5106 if (PetscRealPart(v[i]) < 1.5 && PetscAbsScalar(x[i]-y[i]) > lerror) 5107 lerror = PetscAbsScalar(x[i]-y[i]); 5108 PetscCall(VecRestoreArrayRead(matis->x,&x)); 5109 PetscCall(VecRestoreArrayRead(matis->y,&y)); 5110 PetscCall(VecRestoreArrayRead(matis->counter,&v)); 5111 PetscCall(MPIU_Allreduce(&lerror,&error,1,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)pc))); 5112 if (error > PETSC_SMALL) { 5113 if (!pcbddc->user_ChangeOfBasisMatrix || pcbddc->current_level) { 5114 SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"Error global vs local change on I: %1.6e",(double)error); 5115 } else { 5116 SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_USER,"Error global vs local change on I: %1.6e",(double)error); 5117 } 5118 } 5119 } 5120 PetscCall(VecScatterBegin(matis->rctx,matis->y,x,INSERT_VALUES,SCATTER_REVERSE)); 5121 PetscCall(VecScatterEnd(matis->rctx,matis->y,x,INSERT_VALUES,SCATTER_REVERSE)); 5122 PetscCall(VecAXPY(x,-1.0,x_change)); 5123 PetscCall(VecNorm(x,NORM_INFINITY,&error)); 5124 if (error > PETSC_SMALL) { 5125 if (!pcbddc->user_ChangeOfBasisMatrix || pcbddc->current_level) { 5126 SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"Error global vs local change on N: %1.6e",(double)error); 5127 } else { 5128 SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_USER,"Error global vs local change on N: %1.6e",(double)error); 5129 } 5130 } 5131 PetscCall(VecDestroy(&x)); 5132 PetscCall(VecDestroy(&x_change)); 5133 } 5134 5135 /* lA is present if we are setting up an inner BDDC for a saddle point FETI-DP */ 5136 PetscCall(PetscObjectQuery((PetscObject)pc,"__KSPFETIDP_lA" ,(PetscObject*)&lA)); 5137 5138 /* TODO: HOW TO WORK WITH BAIJ and SBAIJ and SEQDENSE? */ 5139 PetscCall(PetscObjectBaseTypeCompare((PetscObject)matis->A,MATSEQAIJ,&isseqaij)); 5140 if (isseqaij) { 5141 PetscCall(MatDestroy(&pcbddc->local_mat)); 5142 PetscCall(MatPtAP(matis->A,new_mat,MAT_INITIAL_MATRIX,2.0,&pcbddc->local_mat)); 5143 if (lA) { 5144 Mat work; 5145 PetscCall(MatPtAP(lA,new_mat,MAT_INITIAL_MATRIX,2.0,&work)); 5146 PetscCall(PetscObjectCompose((PetscObject)pc,"__KSPFETIDP_lA" ,(PetscObject)work)); 5147 PetscCall(MatDestroy(&work)); 5148 } 5149 } else { 5150 Mat work_mat; 5151 5152 PetscCall(MatDestroy(&pcbddc->local_mat)); 5153 PetscCall(MatConvert(matis->A,MATSEQAIJ,MAT_INITIAL_MATRIX,&work_mat)); 5154 PetscCall(MatPtAP(work_mat,new_mat,MAT_INITIAL_MATRIX,2.0,&pcbddc->local_mat)); 5155 PetscCall(MatDestroy(&work_mat)); 5156 if (lA) { 5157 Mat work; 5158 PetscCall(MatConvert(lA,MATSEQAIJ,MAT_INITIAL_MATRIX,&work_mat)); 5159 PetscCall(MatPtAP(work_mat,new_mat,MAT_INITIAL_MATRIX,2.0,&work)); 5160 PetscCall(PetscObjectCompose((PetscObject)pc,"__KSPFETIDP_lA" ,(PetscObject)work)); 5161 PetscCall(MatDestroy(&work)); 5162 } 5163 } 5164 if (matis->A->symmetric_set) { 5165 PetscCall(MatSetOption(pcbddc->local_mat,MAT_SYMMETRIC,matis->A->symmetric)); 5166 #if !defined(PETSC_USE_COMPLEX) 5167 PetscCall(MatSetOption(pcbddc->local_mat,MAT_HERMITIAN,matis->A->symmetric)); 5168 #endif 5169 } 5170 PetscCall(MatDestroy(&new_mat)); 5171 PetscFunctionReturn(0); 5172 } 5173 5174 PetscErrorCode PCBDDCSetUpLocalScatters(PC pc) 5175 { 5176 PC_IS* pcis = (PC_IS*)(pc->data); 5177 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 5178 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 5179 PetscInt *idx_R_local=NULL; 5180 PetscInt n_vertices,i,j,n_R,n_D,n_B; 5181 PetscInt vbs,bs; 5182 PetscBT bitmask=NULL; 5183 5184 PetscFunctionBegin; 5185 /* 5186 No need to setup local scatters if 5187 - primal space is unchanged 5188 AND 5189 - we actually have locally some primal dofs (could not be true in multilevel or for isolated subdomains) 5190 AND 5191 - we are not in debugging mode (this is needed since there are Synchronized prints at the end of the subroutine 5192 */ 5193 if (!pcbddc->new_primal_space_local && pcbddc->local_primal_size && !pcbddc->dbg_flag) { 5194 PetscFunctionReturn(0); 5195 } 5196 /* destroy old objects */ 5197 PetscCall(ISDestroy(&pcbddc->is_R_local)); 5198 PetscCall(VecScatterDestroy(&pcbddc->R_to_B)); 5199 PetscCall(VecScatterDestroy(&pcbddc->R_to_D)); 5200 /* Set Non-overlapping dimensions */ 5201 n_B = pcis->n_B; 5202 n_D = pcis->n - n_B; 5203 n_vertices = pcbddc->n_vertices; 5204 5205 /* Dohrmann's notation: dofs splitted in R (Remaining: all dofs but the vertices) and V (Vertices) */ 5206 5207 /* create auxiliary bitmask and allocate workspace */ 5208 if (!sub_schurs || !sub_schurs->reuse_solver) { 5209 PetscCall(PetscMalloc1(pcis->n-n_vertices,&idx_R_local)); 5210 PetscCall(PetscBTCreate(pcis->n,&bitmask)); 5211 for (i=0;i<n_vertices;i++) { 5212 PetscCall(PetscBTSet(bitmask,pcbddc->local_primal_ref_node[i])); 5213 } 5214 5215 for (i=0, n_R=0; i<pcis->n; i++) { 5216 if (!PetscBTLookup(bitmask,i)) { 5217 idx_R_local[n_R++] = i; 5218 } 5219 } 5220 } else { /* A different ordering (already computed) is present if we are reusing the Schur solver */ 5221 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 5222 5223 PetscCall(ISGetIndices(reuse_solver->is_R,(const PetscInt**)&idx_R_local)); 5224 PetscCall(ISGetLocalSize(reuse_solver->is_R,&n_R)); 5225 } 5226 5227 /* Block code */ 5228 vbs = 1; 5229 PetscCall(MatGetBlockSize(pcbddc->local_mat,&bs)); 5230 if (bs>1 && !(n_vertices%bs)) { 5231 PetscBool is_blocked = PETSC_TRUE; 5232 PetscInt *vary; 5233 if (!sub_schurs || !sub_schurs->reuse_solver) { 5234 PetscCall(PetscMalloc1(pcis->n/bs,&vary)); 5235 PetscCall(PetscArrayzero(vary,pcis->n/bs)); 5236 /* Verify that the vertex indices correspond to each element in a block (code taken from sbaij2.c) */ 5237 /* it is ok to check this way since local_primal_ref_node are always sorted by local numbering and idx_R_local is obtained as a complement */ 5238 for (i=0; i<n_vertices; i++) vary[pcbddc->local_primal_ref_node[i]/bs]++; 5239 for (i=0; i<pcis->n/bs; i++) { 5240 if (vary[i]!=0 && vary[i]!=bs) { 5241 is_blocked = PETSC_FALSE; 5242 break; 5243 } 5244 } 5245 PetscCall(PetscFree(vary)); 5246 } else { 5247 /* Verify directly the R set */ 5248 for (i=0; i<n_R/bs; i++) { 5249 PetscInt j,node=idx_R_local[bs*i]; 5250 for (j=1; j<bs; j++) { 5251 if (node != idx_R_local[bs*i+j]-j) { 5252 is_blocked = PETSC_FALSE; 5253 break; 5254 } 5255 } 5256 } 5257 } 5258 if (is_blocked) { /* build compressed IS for R nodes (complement of vertices) */ 5259 vbs = bs; 5260 for (i=0;i<n_R/vbs;i++) { 5261 idx_R_local[i] = idx_R_local[vbs*i]/vbs; 5262 } 5263 } 5264 } 5265 PetscCall(ISCreateBlock(PETSC_COMM_SELF,vbs,n_R/vbs,idx_R_local,PETSC_COPY_VALUES,&pcbddc->is_R_local)); 5266 if (sub_schurs && sub_schurs->reuse_solver) { 5267 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 5268 5269 PetscCall(ISRestoreIndices(reuse_solver->is_R,(const PetscInt**)&idx_R_local)); 5270 PetscCall(ISDestroy(&reuse_solver->is_R)); 5271 PetscCall(PetscObjectReference((PetscObject)pcbddc->is_R_local)); 5272 reuse_solver->is_R = pcbddc->is_R_local; 5273 } else { 5274 PetscCall(PetscFree(idx_R_local)); 5275 } 5276 5277 /* print some info if requested */ 5278 if (pcbddc->dbg_flag) { 5279 PetscCall(PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n")); 5280 PetscCall(PetscViewerFlush(pcbddc->dbg_viewer)); 5281 PetscCall(PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer)); 5282 PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d local dimensions\n",PetscGlobalRank)); 5283 PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local_size = %" PetscInt_FMT ", dirichlet_size = %" PetscInt_FMT ", boundary_size = %" PetscInt_FMT "\n",pcis->n,n_D,n_B)); 5284 PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"r_size = %" PetscInt_FMT ", v_size = %" PetscInt_FMT ", constraints = %" PetscInt_FMT ", local_primal_size = %" PetscInt_FMT "\n",n_R,n_vertices,pcbddc->local_primal_size-n_vertices-pcbddc->benign_n,pcbddc->local_primal_size)); 5285 PetscCall(PetscViewerFlush(pcbddc->dbg_viewer)); 5286 } 5287 5288 /* VecScatters pcbddc->R_to_B and (optionally) pcbddc->R_to_D */ 5289 if (!sub_schurs || !sub_schurs->reuse_solver) { 5290 IS is_aux1,is_aux2; 5291 PetscInt *aux_array1,*aux_array2,*is_indices,*idx_R_local; 5292 5293 PetscCall(ISGetIndices(pcbddc->is_R_local,(const PetscInt**)&idx_R_local)); 5294 PetscCall(PetscMalloc1(pcis->n_B-n_vertices,&aux_array1)); 5295 PetscCall(PetscMalloc1(pcis->n_B-n_vertices,&aux_array2)); 5296 PetscCall(ISGetIndices(pcis->is_I_local,(const PetscInt**)&is_indices)); 5297 for (i=0; i<n_D; i++) { 5298 PetscCall(PetscBTSet(bitmask,is_indices[i])); 5299 } 5300 PetscCall(ISRestoreIndices(pcis->is_I_local,(const PetscInt**)&is_indices)); 5301 for (i=0, j=0; i<n_R; i++) { 5302 if (!PetscBTLookup(bitmask,idx_R_local[i])) { 5303 aux_array1[j++] = i; 5304 } 5305 } 5306 PetscCall(ISCreateGeneral(PETSC_COMM_SELF,j,aux_array1,PETSC_OWN_POINTER,&is_aux1)); 5307 PetscCall(ISGetIndices(pcis->is_B_local,(const PetscInt**)&is_indices)); 5308 for (i=0, j=0; i<n_B; i++) { 5309 if (!PetscBTLookup(bitmask,is_indices[i])) { 5310 aux_array2[j++] = i; 5311 } 5312 } 5313 PetscCall(ISRestoreIndices(pcis->is_B_local,(const PetscInt**)&is_indices)); 5314 PetscCall(ISCreateGeneral(PETSC_COMM_SELF,j,aux_array2,PETSC_OWN_POINTER,&is_aux2)); 5315 PetscCall(VecScatterCreate(pcbddc->vec1_R,is_aux1,pcis->vec1_B,is_aux2,&pcbddc->R_to_B)); 5316 PetscCall(ISDestroy(&is_aux1)); 5317 PetscCall(ISDestroy(&is_aux2)); 5318 5319 if (pcbddc->switch_static || pcbddc->dbg_flag) { 5320 PetscCall(PetscMalloc1(n_D,&aux_array1)); 5321 for (i=0, j=0; i<n_R; i++) { 5322 if (PetscBTLookup(bitmask,idx_R_local[i])) { 5323 aux_array1[j++] = i; 5324 } 5325 } 5326 PetscCall(ISCreateGeneral(PETSC_COMM_SELF,j,aux_array1,PETSC_OWN_POINTER,&is_aux1)); 5327 PetscCall(VecScatterCreate(pcbddc->vec1_R,is_aux1,pcis->vec1_D,(IS)0,&pcbddc->R_to_D)); 5328 PetscCall(ISDestroy(&is_aux1)); 5329 } 5330 PetscCall(PetscBTDestroy(&bitmask)); 5331 PetscCall(ISRestoreIndices(pcbddc->is_R_local,(const PetscInt**)&idx_R_local)); 5332 } else { 5333 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 5334 IS tis; 5335 PetscInt schur_size; 5336 5337 PetscCall(ISGetLocalSize(reuse_solver->is_B,&schur_size)); 5338 PetscCall(ISCreateStride(PETSC_COMM_SELF,schur_size,n_D,1,&tis)); 5339 PetscCall(VecScatterCreate(pcbddc->vec1_R,tis,pcis->vec1_B,reuse_solver->is_B,&pcbddc->R_to_B)); 5340 PetscCall(ISDestroy(&tis)); 5341 if (pcbddc->switch_static || pcbddc->dbg_flag) { 5342 PetscCall(ISCreateStride(PETSC_COMM_SELF,n_D,0,1,&tis)); 5343 PetscCall(VecScatterCreate(pcbddc->vec1_R,tis,pcis->vec1_D,(IS)0,&pcbddc->R_to_D)); 5344 PetscCall(ISDestroy(&tis)); 5345 } 5346 } 5347 PetscFunctionReturn(0); 5348 } 5349 5350 static PetscErrorCode MatNullSpacePropagateAny_Private(Mat A, IS is, Mat B) 5351 { 5352 MatNullSpace NullSpace; 5353 Mat dmat; 5354 const Vec *nullvecs; 5355 Vec v,v2,*nullvecs2; 5356 VecScatter sct = NULL; 5357 PetscContainer c; 5358 PetscScalar *ddata; 5359 PetscInt k,nnsp_size,bsiz,bsiz2,n,N,bs; 5360 PetscBool nnsp_has_cnst; 5361 5362 PetscFunctionBegin; 5363 if (!is && !B) { /* MATIS */ 5364 Mat_IS* matis = (Mat_IS*)A->data; 5365 5366 if (!B) { 5367 PetscCall(MatISGetLocalMat(A,&B)); 5368 } 5369 sct = matis->cctx; 5370 PetscCall(PetscObjectReference((PetscObject)sct)); 5371 } else { 5372 PetscCall(MatGetNullSpace(B,&NullSpace)); 5373 if (!NullSpace) { 5374 PetscCall(MatGetNearNullSpace(B,&NullSpace)); 5375 } 5376 if (NullSpace) PetscFunctionReturn(0); 5377 } 5378 PetscCall(MatGetNullSpace(A,&NullSpace)); 5379 if (!NullSpace) { 5380 PetscCall(MatGetNearNullSpace(A,&NullSpace)); 5381 } 5382 if (!NullSpace) PetscFunctionReturn(0); 5383 5384 PetscCall(MatCreateVecs(A,&v,NULL)); 5385 PetscCall(MatCreateVecs(B,&v2,NULL)); 5386 if (!sct) { 5387 PetscCall(VecScatterCreate(v,is,v2,NULL,&sct)); 5388 } 5389 PetscCall(MatNullSpaceGetVecs(NullSpace,&nnsp_has_cnst,&nnsp_size,(const Vec**)&nullvecs)); 5390 bsiz = bsiz2 = nnsp_size+!!nnsp_has_cnst; 5391 PetscCall(PetscMalloc1(bsiz,&nullvecs2)); 5392 PetscCall(VecGetBlockSize(v2,&bs)); 5393 PetscCall(VecGetSize(v2,&N)); 5394 PetscCall(VecGetLocalSize(v2,&n)); 5395 PetscCall(PetscMalloc1(n*bsiz,&ddata)); 5396 for (k=0;k<nnsp_size;k++) { 5397 PetscCall(VecCreateMPIWithArray(PetscObjectComm((PetscObject)B),bs,n,N,ddata + n*k,&nullvecs2[k])); 5398 PetscCall(VecScatterBegin(sct,nullvecs[k],nullvecs2[k],INSERT_VALUES,SCATTER_FORWARD)); 5399 PetscCall(VecScatterEnd(sct,nullvecs[k],nullvecs2[k],INSERT_VALUES,SCATTER_FORWARD)); 5400 } 5401 if (nnsp_has_cnst) { 5402 PetscCall(VecCreateMPIWithArray(PetscObjectComm((PetscObject)B),bs,n,N,ddata + n*nnsp_size,&nullvecs2[nnsp_size])); 5403 PetscCall(VecSet(nullvecs2[nnsp_size],1.0)); 5404 } 5405 PetscCall(PCBDDCOrthonormalizeVecs(&bsiz2,nullvecs2)); 5406 PetscCall(MatNullSpaceCreate(PetscObjectComm((PetscObject)B),PETSC_FALSE,bsiz2,nullvecs2,&NullSpace)); 5407 5408 PetscCall(MatCreateDense(PetscObjectComm((PetscObject)B),n,PETSC_DECIDE,N,bsiz2,ddata,&dmat)); 5409 PetscCall(PetscContainerCreate(PetscObjectComm((PetscObject)B),&c)); 5410 PetscCall(PetscContainerSetPointer(c,ddata)); 5411 PetscCall(PetscContainerSetUserDestroy(c,PetscContainerUserDestroyDefault)); 5412 PetscCall(PetscObjectCompose((PetscObject)dmat,"_PBDDC_Null_dmat_arr",(PetscObject)c)); 5413 PetscCall(PetscContainerDestroy(&c)); 5414 PetscCall(PetscObjectCompose((PetscObject)NullSpace,"_PBDDC_Null_dmat",(PetscObject)dmat)); 5415 PetscCall(MatDestroy(&dmat)); 5416 5417 for (k=0;k<bsiz;k++) { 5418 PetscCall(VecDestroy(&nullvecs2[k])); 5419 } 5420 PetscCall(PetscFree(nullvecs2)); 5421 PetscCall(MatSetNearNullSpace(B,NullSpace)); 5422 PetscCall(MatNullSpaceDestroy(&NullSpace)); 5423 PetscCall(VecDestroy(&v)); 5424 PetscCall(VecDestroy(&v2)); 5425 PetscCall(VecScatterDestroy(&sct)); 5426 PetscFunctionReturn(0); 5427 } 5428 5429 PetscErrorCode PCBDDCSetUpLocalSolvers(PC pc, PetscBool dirichlet, PetscBool neumann) 5430 { 5431 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 5432 PC_IS *pcis = (PC_IS*)pc->data; 5433 PC pc_temp; 5434 Mat A_RR; 5435 MatNullSpace nnsp; 5436 MatReuse reuse; 5437 PetscScalar m_one = -1.0; 5438 PetscReal value; 5439 PetscInt n_D,n_R; 5440 PetscBool issbaij,opts; 5441 void (*f)(void) = NULL; 5442 char dir_prefix[256],neu_prefix[256],str_level[16]; 5443 size_t len; 5444 5445 PetscFunctionBegin; 5446 PetscCall(PetscLogEventBegin(PC_BDDC_LocalSolvers[pcbddc->current_level],pc,0,0,0)); 5447 /* approximate solver, propagate NearNullSpace if needed */ 5448 if (!pc->setupcalled && (pcbddc->NullSpace_corr[0] || pcbddc->NullSpace_corr[2])) { 5449 MatNullSpace gnnsp1,gnnsp2; 5450 PetscBool lhas,ghas; 5451 5452 PetscCall(MatGetNearNullSpace(pcbddc->local_mat,&nnsp)); 5453 PetscCall(MatGetNearNullSpace(pc->pmat,&gnnsp1)); 5454 PetscCall(MatGetNullSpace(pc->pmat,&gnnsp2)); 5455 lhas = nnsp ? PETSC_TRUE : PETSC_FALSE; 5456 PetscCall(MPIU_Allreduce(&lhas,&ghas,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc))); 5457 if (!ghas && (gnnsp1 || gnnsp2)) { 5458 PetscCall(MatNullSpacePropagateAny_Private(pc->pmat,NULL,NULL)); 5459 } 5460 } 5461 5462 /* compute prefixes */ 5463 PetscCall(PetscStrcpy(dir_prefix,"")); 5464 PetscCall(PetscStrcpy(neu_prefix,"")); 5465 if (!pcbddc->current_level) { 5466 PetscCall(PetscStrncpy(dir_prefix,((PetscObject)pc)->prefix,sizeof(dir_prefix))); 5467 PetscCall(PetscStrncpy(neu_prefix,((PetscObject)pc)->prefix,sizeof(neu_prefix))); 5468 PetscCall(PetscStrlcat(dir_prefix,"pc_bddc_dirichlet_",sizeof(dir_prefix))); 5469 PetscCall(PetscStrlcat(neu_prefix,"pc_bddc_neumann_",sizeof(neu_prefix))); 5470 } else { 5471 PetscCall(PetscSNPrintf(str_level,sizeof(str_level),"l%d_",(int)(pcbddc->current_level))); 5472 PetscCall(PetscStrlen(((PetscObject)pc)->prefix,&len)); 5473 len -= 15; /* remove "pc_bddc_coarse_" */ 5474 if (pcbddc->current_level>1) len -= 3; /* remove "lX_" with X level number */ 5475 if (pcbddc->current_level>10) len -= 1; /* remove another char from level number */ 5476 /* Nonstandard use of PetscStrncpy() to only copy a portion of the input string */ 5477 PetscCall(PetscStrncpy(dir_prefix,((PetscObject)pc)->prefix,len+1)); 5478 PetscCall(PetscStrncpy(neu_prefix,((PetscObject)pc)->prefix,len+1)); 5479 PetscCall(PetscStrlcat(dir_prefix,"pc_bddc_dirichlet_",sizeof(dir_prefix))); 5480 PetscCall(PetscStrlcat(neu_prefix,"pc_bddc_neumann_",sizeof(neu_prefix))); 5481 PetscCall(PetscStrlcat(dir_prefix,str_level,sizeof(dir_prefix))); 5482 PetscCall(PetscStrlcat(neu_prefix,str_level,sizeof(neu_prefix))); 5483 } 5484 5485 /* DIRICHLET PROBLEM */ 5486 if (dirichlet) { 5487 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 5488 if (pcbddc->benign_n && !pcbddc->benign_change_explicit) { 5489 PetscCheck(sub_schurs && sub_schurs->reuse_solver,PETSC_COMM_SELF,PETSC_ERR_SUP,"Not yet implemented"); 5490 if (pcbddc->dbg_flag) { 5491 Mat A_IIn; 5492 5493 PetscCall(PCBDDCBenignProject(pc,pcis->is_I_local,pcis->is_I_local,&A_IIn)); 5494 PetscCall(MatDestroy(&pcis->A_II)); 5495 pcis->A_II = A_IIn; 5496 } 5497 } 5498 if (pcbddc->local_mat->symmetric_set) { 5499 PetscCall(MatSetOption(pcis->A_II,MAT_SYMMETRIC,pcbddc->local_mat->symmetric)); 5500 } 5501 /* Matrix for Dirichlet problem is pcis->A_II */ 5502 n_D = pcis->n - pcis->n_B; 5503 opts = PETSC_FALSE; 5504 if (!pcbddc->ksp_D) { /* create object if not yet build */ 5505 opts = PETSC_TRUE; 5506 PetscCall(KSPCreate(PETSC_COMM_SELF,&pcbddc->ksp_D)); 5507 PetscCall(PetscObjectIncrementTabLevel((PetscObject)pcbddc->ksp_D,(PetscObject)pc,1)); 5508 /* default */ 5509 PetscCall(KSPSetType(pcbddc->ksp_D,KSPPREONLY)); 5510 PetscCall(KSPSetOptionsPrefix(pcbddc->ksp_D,dir_prefix)); 5511 PetscCall(PetscObjectTypeCompare((PetscObject)pcis->pA_II,MATSEQSBAIJ,&issbaij)); 5512 PetscCall(KSPGetPC(pcbddc->ksp_D,&pc_temp)); 5513 if (issbaij) { 5514 PetscCall(PCSetType(pc_temp,PCCHOLESKY)); 5515 } else { 5516 PetscCall(PCSetType(pc_temp,PCLU)); 5517 } 5518 PetscCall(KSPSetErrorIfNotConverged(pcbddc->ksp_D,pc->erroriffailure)); 5519 } 5520 PetscCall(MatSetOptionsPrefix(pcis->pA_II,((PetscObject)pcbddc->ksp_D)->prefix)); 5521 PetscCall(KSPSetOperators(pcbddc->ksp_D,pcis->A_II,pcis->pA_II)); 5522 /* Allow user's customization */ 5523 if (opts) { 5524 PetscCall(KSPSetFromOptions(pcbddc->ksp_D)); 5525 } 5526 PetscCall(MatGetNearNullSpace(pcis->pA_II,&nnsp)); 5527 if (pcbddc->NullSpace_corr[0] && !nnsp) { /* approximate solver, propagate NearNullSpace */ 5528 PetscCall(MatNullSpacePropagateAny_Private(pcbddc->local_mat,pcis->is_I_local,pcis->pA_II)); 5529 } 5530 PetscCall(MatGetNearNullSpace(pcis->pA_II,&nnsp)); 5531 PetscCall(KSPGetPC(pcbddc->ksp_D,&pc_temp)); 5532 PetscCall(PetscObjectQueryFunction((PetscObject)pc_temp,"PCSetCoordinates_C",&f)); 5533 if (f && pcbddc->mat_graph->cloc && !nnsp) { 5534 PetscReal *coords = pcbddc->mat_graph->coords,*scoords; 5535 const PetscInt *idxs; 5536 PetscInt cdim = pcbddc->mat_graph->cdim,nl,i,d; 5537 5538 PetscCall(ISGetLocalSize(pcis->is_I_local,&nl)); 5539 PetscCall(ISGetIndices(pcis->is_I_local,&idxs)); 5540 PetscCall(PetscMalloc1(nl*cdim,&scoords)); 5541 for (i=0;i<nl;i++) { 5542 for (d=0;d<cdim;d++) { 5543 scoords[i*cdim+d] = coords[idxs[i]*cdim+d]; 5544 } 5545 } 5546 PetscCall(ISRestoreIndices(pcis->is_I_local,&idxs)); 5547 PetscCall(PCSetCoordinates(pc_temp,cdim,nl,scoords)); 5548 PetscCall(PetscFree(scoords)); 5549 } 5550 if (sub_schurs && sub_schurs->reuse_solver) { 5551 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 5552 5553 PetscCall(KSPSetPC(pcbddc->ksp_D,reuse_solver->interior_solver)); 5554 } 5555 5556 /* umfpack interface has a bug when matrix dimension is zero. TODO solve from umfpack interface */ 5557 if (!n_D) { 5558 PetscCall(KSPGetPC(pcbddc->ksp_D,&pc_temp)); 5559 PetscCall(PCSetType(pc_temp,PCNONE)); 5560 } 5561 PetscCall(KSPSetUp(pcbddc->ksp_D)); 5562 /* set ksp_D into pcis data */ 5563 PetscCall(PetscObjectReference((PetscObject)pcbddc->ksp_D)); 5564 PetscCall(KSPDestroy(&pcis->ksp_D)); 5565 pcis->ksp_D = pcbddc->ksp_D; 5566 } 5567 5568 /* NEUMANN PROBLEM */ 5569 A_RR = NULL; 5570 if (neumann) { 5571 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 5572 PetscInt ibs,mbs; 5573 PetscBool issbaij, reuse_neumann_solver; 5574 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 5575 5576 reuse_neumann_solver = PETSC_FALSE; 5577 if (sub_schurs && sub_schurs->reuse_solver) { 5578 IS iP; 5579 5580 reuse_neumann_solver = PETSC_TRUE; 5581 PetscCall(PetscObjectQuery((PetscObject)sub_schurs->A,"__KSPFETIDP_iP",(PetscObject*)&iP)); 5582 if (iP) reuse_neumann_solver = PETSC_FALSE; 5583 } 5584 /* Matrix for Neumann problem is A_RR -> we need to create/reuse it at this point */ 5585 PetscCall(ISGetSize(pcbddc->is_R_local,&n_R)); 5586 if (pcbddc->ksp_R) { /* already created ksp */ 5587 PetscInt nn_R; 5588 PetscCall(KSPGetOperators(pcbddc->ksp_R,NULL,&A_RR)); 5589 PetscCall(PetscObjectReference((PetscObject)A_RR)); 5590 PetscCall(MatGetSize(A_RR,&nn_R,NULL)); 5591 if (nn_R != n_R) { /* old ksp is not reusable, so reset it */ 5592 PetscCall(KSPReset(pcbddc->ksp_R)); 5593 PetscCall(MatDestroy(&A_RR)); 5594 reuse = MAT_INITIAL_MATRIX; 5595 } else { /* same sizes, but nonzero pattern depend on primal vertices so it can be changed */ 5596 if (pcbddc->new_primal_space_local) { /* we are not sure the matrix will have the same nonzero pattern */ 5597 PetscCall(MatDestroy(&A_RR)); 5598 reuse = MAT_INITIAL_MATRIX; 5599 } else { /* safe to reuse the matrix */ 5600 reuse = MAT_REUSE_MATRIX; 5601 } 5602 } 5603 /* last check */ 5604 if (pc->flag == DIFFERENT_NONZERO_PATTERN) { 5605 PetscCall(MatDestroy(&A_RR)); 5606 reuse = MAT_INITIAL_MATRIX; 5607 } 5608 } else { /* first time, so we need to create the matrix */ 5609 reuse = MAT_INITIAL_MATRIX; 5610 } 5611 /* convert pcbddc->local_mat if needed later in PCBDDCSetUpCorrection 5612 TODO: Get Rid of these conversions */ 5613 PetscCall(MatGetBlockSize(pcbddc->local_mat,&mbs)); 5614 PetscCall(ISGetBlockSize(pcbddc->is_R_local,&ibs)); 5615 PetscCall(PetscObjectTypeCompare((PetscObject)pcbddc->local_mat,MATSEQSBAIJ,&issbaij)); 5616 if (ibs != mbs) { /* need to convert to SEQAIJ to extract any submatrix with is_R_local */ 5617 if (matis->A == pcbddc->local_mat) { 5618 PetscCall(MatDestroy(&pcbddc->local_mat)); 5619 PetscCall(MatConvert(matis->A,MATSEQAIJ,MAT_INITIAL_MATRIX,&pcbddc->local_mat)); 5620 } else { 5621 PetscCall(MatConvert(pcbddc->local_mat,MATSEQAIJ,MAT_INPLACE_MATRIX,&pcbddc->local_mat)); 5622 } 5623 } else if (issbaij) { /* need to convert to BAIJ to get offdiagonal blocks */ 5624 if (matis->A == pcbddc->local_mat) { 5625 PetscCall(MatDestroy(&pcbddc->local_mat)); 5626 PetscCall(MatConvert(matis->A,mbs > 1 ? MATSEQBAIJ : MATSEQAIJ,MAT_INITIAL_MATRIX,&pcbddc->local_mat)); 5627 } else { 5628 PetscCall(MatConvert(pcbddc->local_mat,mbs > 1 ? MATSEQBAIJ : MATSEQAIJ,MAT_INPLACE_MATRIX,&pcbddc->local_mat)); 5629 } 5630 } 5631 /* extract A_RR */ 5632 if (reuse_neumann_solver) { 5633 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 5634 5635 if (pcbddc->dbg_flag) { /* we need A_RR to test the solver later */ 5636 PetscCall(MatDestroy(&A_RR)); 5637 if (reuse_solver->benign_n) { /* we are not using the explicit change of basis on the pressures */ 5638 PetscCall(PCBDDCBenignProject(pc,pcbddc->is_R_local,pcbddc->is_R_local,&A_RR)); 5639 } else { 5640 PetscCall(MatCreateSubMatrix(pcbddc->local_mat,pcbddc->is_R_local,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&A_RR)); 5641 } 5642 } else { 5643 PetscCall(MatDestroy(&A_RR)); 5644 PetscCall(PCGetOperators(reuse_solver->correction_solver,&A_RR,NULL)); 5645 PetscCall(PetscObjectReference((PetscObject)A_RR)); 5646 } 5647 } else { /* we have to build the neumann solver, so we need to extract the relevant matrix */ 5648 PetscCall(MatCreateSubMatrix(pcbddc->local_mat,pcbddc->is_R_local,pcbddc->is_R_local,reuse,&A_RR)); 5649 } 5650 if (pcbddc->local_mat->symmetric_set) { 5651 PetscCall(MatSetOption(A_RR,MAT_SYMMETRIC,pcbddc->local_mat->symmetric)); 5652 } 5653 opts = PETSC_FALSE; 5654 if (!pcbddc->ksp_R) { /* create object if not present */ 5655 opts = PETSC_TRUE; 5656 PetscCall(KSPCreate(PETSC_COMM_SELF,&pcbddc->ksp_R)); 5657 PetscCall(PetscObjectIncrementTabLevel((PetscObject)pcbddc->ksp_R,(PetscObject)pc,1)); 5658 /* default */ 5659 PetscCall(KSPSetType(pcbddc->ksp_R,KSPPREONLY)); 5660 PetscCall(KSPSetOptionsPrefix(pcbddc->ksp_R,neu_prefix)); 5661 PetscCall(KSPGetPC(pcbddc->ksp_R,&pc_temp)); 5662 PetscCall(PetscObjectTypeCompare((PetscObject)A_RR,MATSEQSBAIJ,&issbaij)); 5663 if (issbaij) { 5664 PetscCall(PCSetType(pc_temp,PCCHOLESKY)); 5665 } else { 5666 PetscCall(PCSetType(pc_temp,PCLU)); 5667 } 5668 PetscCall(KSPSetErrorIfNotConverged(pcbddc->ksp_R,pc->erroriffailure)); 5669 } 5670 PetscCall(KSPSetOperators(pcbddc->ksp_R,A_RR,A_RR)); 5671 PetscCall(MatSetOptionsPrefix(A_RR,((PetscObject)pcbddc->ksp_R)->prefix)); 5672 if (opts) { /* Allow user's customization once */ 5673 PetscCall(KSPSetFromOptions(pcbddc->ksp_R)); 5674 } 5675 PetscCall(MatGetNearNullSpace(A_RR,&nnsp)); 5676 if (pcbddc->NullSpace_corr[2] && !nnsp) { /* approximate solver, propagate NearNullSpace */ 5677 PetscCall(MatNullSpacePropagateAny_Private(pcbddc->local_mat,pcbddc->is_R_local,A_RR)); 5678 } 5679 PetscCall(MatGetNearNullSpace(A_RR,&nnsp)); 5680 PetscCall(KSPGetPC(pcbddc->ksp_R,&pc_temp)); 5681 PetscCall(PetscObjectQueryFunction((PetscObject)pc_temp,"PCSetCoordinates_C",&f)); 5682 if (f && pcbddc->mat_graph->cloc && !nnsp) { 5683 PetscReal *coords = pcbddc->mat_graph->coords,*scoords; 5684 const PetscInt *idxs; 5685 PetscInt cdim = pcbddc->mat_graph->cdim,nl,i,d; 5686 5687 PetscCall(ISGetLocalSize(pcbddc->is_R_local,&nl)); 5688 PetscCall(ISGetIndices(pcbddc->is_R_local,&idxs)); 5689 PetscCall(PetscMalloc1(nl*cdim,&scoords)); 5690 for (i=0;i<nl;i++) { 5691 for (d=0;d<cdim;d++) { 5692 scoords[i*cdim+d] = coords[idxs[i]*cdim+d]; 5693 } 5694 } 5695 PetscCall(ISRestoreIndices(pcbddc->is_R_local,&idxs)); 5696 PetscCall(PCSetCoordinates(pc_temp,cdim,nl,scoords)); 5697 PetscCall(PetscFree(scoords)); 5698 } 5699 5700 /* umfpack interface has a bug when matrix dimension is zero. TODO solve from umfpack interface */ 5701 if (!n_R) { 5702 PetscCall(KSPGetPC(pcbddc->ksp_R,&pc_temp)); 5703 PetscCall(PCSetType(pc_temp,PCNONE)); 5704 } 5705 /* Reuse solver if it is present */ 5706 if (reuse_neumann_solver) { 5707 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 5708 5709 PetscCall(KSPSetPC(pcbddc->ksp_R,reuse_solver->correction_solver)); 5710 } 5711 PetscCall(KSPSetUp(pcbddc->ksp_R)); 5712 } 5713 5714 if (pcbddc->dbg_flag) { 5715 PetscCall(PetscViewerFlush(pcbddc->dbg_viewer)); 5716 PetscCall(PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer)); 5717 PetscCall(PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n")); 5718 } 5719 PetscCall(PetscLogEventEnd(PC_BDDC_LocalSolvers[pcbddc->current_level],pc,0,0,0)); 5720 5721 /* adapt Dirichlet and Neumann solvers if a nullspace correction has been requested */ 5722 if (pcbddc->NullSpace_corr[0]) { 5723 PetscCall(PCBDDCSetUseExactDirichlet(pc,PETSC_FALSE)); 5724 } 5725 if (dirichlet && pcbddc->NullSpace_corr[0] && !pcbddc->switch_static) { 5726 PetscCall(PCBDDCNullSpaceAssembleCorrection(pc,PETSC_TRUE,pcbddc->NullSpace_corr[1])); 5727 } 5728 if (neumann && pcbddc->NullSpace_corr[2]) { 5729 PetscCall(PCBDDCNullSpaceAssembleCorrection(pc,PETSC_FALSE,pcbddc->NullSpace_corr[3])); 5730 } 5731 /* check Dirichlet and Neumann solvers */ 5732 if (pcbddc->dbg_flag) { 5733 if (dirichlet) { /* Dirichlet */ 5734 PetscCall(VecSetRandom(pcis->vec1_D,NULL)); 5735 PetscCall(MatMult(pcis->A_II,pcis->vec1_D,pcis->vec2_D)); 5736 PetscCall(KSPSolve(pcbddc->ksp_D,pcis->vec2_D,pcis->vec2_D)); 5737 PetscCall(KSPCheckSolve(pcbddc->ksp_D,pc,pcis->vec2_D)); 5738 PetscCall(VecAXPY(pcis->vec1_D,m_one,pcis->vec2_D)); 5739 PetscCall(VecNorm(pcis->vec1_D,NORM_INFINITY,&value)); 5740 PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d infinity error for Dirichlet solve (%s) = % 1.14e \n",PetscGlobalRank,((PetscObject)(pcbddc->ksp_D))->prefix,(double)value)); 5741 PetscCall(PetscViewerFlush(pcbddc->dbg_viewer)); 5742 } 5743 if (neumann) { /* Neumann */ 5744 PetscCall(VecSetRandom(pcbddc->vec1_R,NULL)); 5745 PetscCall(MatMult(A_RR,pcbddc->vec1_R,pcbddc->vec2_R)); 5746 PetscCall(KSPSolve(pcbddc->ksp_R,pcbddc->vec2_R,pcbddc->vec2_R)); 5747 PetscCall(KSPCheckSolve(pcbddc->ksp_R,pc,pcbddc->vec2_R)); 5748 PetscCall(VecAXPY(pcbddc->vec1_R,m_one,pcbddc->vec2_R)); 5749 PetscCall(VecNorm(pcbddc->vec1_R,NORM_INFINITY,&value)); 5750 PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d infinity error for Neumann solve (%s) = % 1.14e\n",PetscGlobalRank,((PetscObject)(pcbddc->ksp_R))->prefix,(double)value)); 5751 PetscCall(PetscViewerFlush(pcbddc->dbg_viewer)); 5752 } 5753 } 5754 /* free Neumann problem's matrix */ 5755 PetscCall(MatDestroy(&A_RR)); 5756 PetscFunctionReturn(0); 5757 } 5758 5759 static PetscErrorCode PCBDDCSolveSubstructureCorrection(PC pc, Vec inout_B, Vec inout_D, PetscBool applytranspose) 5760 { 5761 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 5762 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 5763 PetscBool reuse_solver = sub_schurs ? ( sub_schurs->reuse_solver ? PETSC_TRUE : PETSC_FALSE) : PETSC_FALSE; 5764 5765 PetscFunctionBegin; 5766 if (!reuse_solver) { 5767 PetscCall(VecSet(pcbddc->vec1_R,0.)); 5768 } 5769 if (!pcbddc->switch_static) { 5770 if (applytranspose && pcbddc->local_auxmat1) { 5771 PetscCall(MatMultTranspose(pcbddc->local_auxmat2,inout_B,pcbddc->vec1_C)); 5772 PetscCall(MatMultTransposeAdd(pcbddc->local_auxmat1,pcbddc->vec1_C,inout_B,inout_B)); 5773 } 5774 if (!reuse_solver) { 5775 PetscCall(VecScatterBegin(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE)); 5776 PetscCall(VecScatterEnd(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE)); 5777 } else { 5778 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 5779 5780 PetscCall(VecScatterBegin(reuse_solver->correction_scatter_B,inout_B,reuse_solver->rhs_B,INSERT_VALUES,SCATTER_FORWARD)); 5781 PetscCall(VecScatterEnd(reuse_solver->correction_scatter_B,inout_B,reuse_solver->rhs_B,INSERT_VALUES,SCATTER_FORWARD)); 5782 } 5783 } else { 5784 PetscCall(VecScatterBegin(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE)); 5785 PetscCall(VecScatterEnd(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE)); 5786 PetscCall(VecScatterBegin(pcbddc->R_to_D,inout_D,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE)); 5787 PetscCall(VecScatterEnd(pcbddc->R_to_D,inout_D,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE)); 5788 if (applytranspose && pcbddc->local_auxmat1) { 5789 PetscCall(MatMultTranspose(pcbddc->local_auxmat2,pcbddc->vec1_R,pcbddc->vec1_C)); 5790 PetscCall(MatMultTransposeAdd(pcbddc->local_auxmat1,pcbddc->vec1_C,inout_B,inout_B)); 5791 PetscCall(VecScatterBegin(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE)); 5792 PetscCall(VecScatterEnd(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE)); 5793 } 5794 } 5795 PetscCall(PetscLogEventBegin(PC_BDDC_Solves[pcbddc->current_level][1],pc,0,0,0)); 5796 if (!reuse_solver || pcbddc->switch_static) { 5797 if (applytranspose) { 5798 PetscCall(KSPSolveTranspose(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec1_R)); 5799 } else { 5800 PetscCall(KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec1_R)); 5801 } 5802 PetscCall(KSPCheckSolve(pcbddc->ksp_R,pc,pcbddc->vec1_R)); 5803 } else { 5804 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 5805 5806 if (applytranspose) { 5807 PetscCall(MatFactorSolveSchurComplementTranspose(reuse_solver->F,reuse_solver->rhs_B,reuse_solver->sol_B)); 5808 } else { 5809 PetscCall(MatFactorSolveSchurComplement(reuse_solver->F,reuse_solver->rhs_B,reuse_solver->sol_B)); 5810 } 5811 } 5812 PetscCall(PetscLogEventEnd(PC_BDDC_Solves[pcbddc->current_level][1],pc,0,0,0)); 5813 PetscCall(VecSet(inout_B,0.)); 5814 if (!pcbddc->switch_static) { 5815 if (!reuse_solver) { 5816 PetscCall(VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD)); 5817 PetscCall(VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD)); 5818 } else { 5819 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 5820 5821 PetscCall(VecScatterBegin(reuse_solver->correction_scatter_B,reuse_solver->sol_B,inout_B,INSERT_VALUES,SCATTER_REVERSE)); 5822 PetscCall(VecScatterEnd(reuse_solver->correction_scatter_B,reuse_solver->sol_B,inout_B,INSERT_VALUES,SCATTER_REVERSE)); 5823 } 5824 if (!applytranspose && pcbddc->local_auxmat1) { 5825 PetscCall(MatMult(pcbddc->local_auxmat1,inout_B,pcbddc->vec1_C)); 5826 PetscCall(MatMultAdd(pcbddc->local_auxmat2,pcbddc->vec1_C,inout_B,inout_B)); 5827 } 5828 } else { 5829 PetscCall(VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD)); 5830 PetscCall(VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD)); 5831 PetscCall(VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,inout_D,INSERT_VALUES,SCATTER_FORWARD)); 5832 PetscCall(VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,inout_D,INSERT_VALUES,SCATTER_FORWARD)); 5833 if (!applytranspose && pcbddc->local_auxmat1) { 5834 PetscCall(MatMult(pcbddc->local_auxmat1,inout_B,pcbddc->vec1_C)); 5835 PetscCall(MatMultAdd(pcbddc->local_auxmat2,pcbddc->vec1_C,pcbddc->vec1_R,pcbddc->vec1_R)); 5836 } 5837 PetscCall(VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD)); 5838 PetscCall(VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD)); 5839 PetscCall(VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,inout_D,INSERT_VALUES,SCATTER_FORWARD)); 5840 PetscCall(VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,inout_D,INSERT_VALUES,SCATTER_FORWARD)); 5841 } 5842 PetscFunctionReturn(0); 5843 } 5844 5845 /* parameter apply transpose determines if the interface preconditioner should be applied transposed or not */ 5846 PetscErrorCode PCBDDCApplyInterfacePreconditioner(PC pc, PetscBool applytranspose) 5847 { 5848 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 5849 PC_IS* pcis = (PC_IS*) (pc->data); 5850 const PetscScalar zero = 0.0; 5851 5852 PetscFunctionBegin; 5853 /* Application of PSI^T or PHI^T (depending on applytranspose, see comment above) */ 5854 if (!pcbddc->benign_apply_coarse_only) { 5855 if (applytranspose) { 5856 PetscCall(MatMultTranspose(pcbddc->coarse_phi_B,pcis->vec1_B,pcbddc->vec1_P)); 5857 if (pcbddc->switch_static) PetscCall(MatMultTransposeAdd(pcbddc->coarse_phi_D,pcis->vec1_D,pcbddc->vec1_P,pcbddc->vec1_P)); 5858 } else { 5859 PetscCall(MatMultTranspose(pcbddc->coarse_psi_B,pcis->vec1_B,pcbddc->vec1_P)); 5860 if (pcbddc->switch_static) PetscCall(MatMultTransposeAdd(pcbddc->coarse_psi_D,pcis->vec1_D,pcbddc->vec1_P,pcbddc->vec1_P)); 5861 } 5862 } else { 5863 PetscCall(VecSet(pcbddc->vec1_P,zero)); 5864 } 5865 5866 /* add p0 to the last value of vec1_P holding the coarse dof relative to p0 */ 5867 if (pcbddc->benign_n) { 5868 PetscScalar *array; 5869 PetscInt j; 5870 5871 PetscCall(VecGetArray(pcbddc->vec1_P,&array)); 5872 for (j=0;j<pcbddc->benign_n;j++) array[pcbddc->local_primal_size-pcbddc->benign_n+j] += pcbddc->benign_p0[j]; 5873 PetscCall(VecRestoreArray(pcbddc->vec1_P,&array)); 5874 } 5875 5876 /* start communications from local primal nodes to rhs of coarse solver */ 5877 PetscCall(VecSet(pcbddc->coarse_vec,zero)); 5878 PetscCall(PCBDDCScatterCoarseDataBegin(pc,ADD_VALUES,SCATTER_FORWARD)); 5879 PetscCall(PCBDDCScatterCoarseDataEnd(pc,ADD_VALUES,SCATTER_FORWARD)); 5880 5881 /* Coarse solution -> rhs and sol updated inside PCBDDCScattarCoarseDataBegin/End */ 5882 if (pcbddc->coarse_ksp) { 5883 Mat coarse_mat; 5884 Vec rhs,sol; 5885 MatNullSpace nullsp; 5886 PetscBool isbddc = PETSC_FALSE; 5887 5888 if (pcbddc->benign_have_null) { 5889 PC coarse_pc; 5890 5891 PetscCall(KSPGetPC(pcbddc->coarse_ksp,&coarse_pc)); 5892 PetscCall(PetscObjectTypeCompare((PetscObject)coarse_pc,PCBDDC,&isbddc)); 5893 /* we need to propagate to coarser levels the need for a possible benign correction */ 5894 if (isbddc && pcbddc->benign_apply_coarse_only && !pcbddc->benign_skip_correction) { 5895 PC_BDDC* coarsepcbddc = (PC_BDDC*)(coarse_pc->data); 5896 coarsepcbddc->benign_skip_correction = PETSC_FALSE; 5897 coarsepcbddc->benign_apply_coarse_only = PETSC_TRUE; 5898 } 5899 } 5900 PetscCall(KSPGetRhs(pcbddc->coarse_ksp,&rhs)); 5901 PetscCall(KSPGetSolution(pcbddc->coarse_ksp,&sol)); 5902 PetscCall(KSPGetOperators(pcbddc->coarse_ksp,&coarse_mat,NULL)); 5903 if (applytranspose) { 5904 PetscCheck(!pcbddc->benign_apply_coarse_only,PetscObjectComm((PetscObject)pcbddc->coarse_ksp),PETSC_ERR_SUP,"Not yet implemented"); 5905 PetscCall(PetscLogEventBegin(PC_BDDC_Solves[pcbddc->current_level][2],pc,0,0,0)); 5906 PetscCall(KSPSolveTranspose(pcbddc->coarse_ksp,rhs,sol)); 5907 PetscCall(PetscLogEventEnd(PC_BDDC_Solves[pcbddc->current_level][2],pc,0,0,0)); 5908 PetscCall(KSPCheckSolve(pcbddc->coarse_ksp,pc,sol)); 5909 PetscCall(MatGetTransposeNullSpace(coarse_mat,&nullsp)); 5910 if (nullsp) { 5911 PetscCall(MatNullSpaceRemove(nullsp,sol)); 5912 } 5913 } else { 5914 PetscCall(MatGetNullSpace(coarse_mat,&nullsp)); 5915 if (pcbddc->benign_apply_coarse_only && isbddc) { /* need just to apply the coarse preconditioner during presolve */ 5916 PC coarse_pc; 5917 5918 if (nullsp) { 5919 PetscCall(MatNullSpaceRemove(nullsp,rhs)); 5920 } 5921 PetscCall(KSPGetPC(pcbddc->coarse_ksp,&coarse_pc)); 5922 PetscCall(PCPreSolve(coarse_pc,pcbddc->coarse_ksp)); 5923 PetscCall(PCBDDCBenignRemoveInterior(coarse_pc,rhs,sol)); 5924 PetscCall(PCPostSolve(coarse_pc,pcbddc->coarse_ksp)); 5925 } else { 5926 PetscCall(PetscLogEventBegin(PC_BDDC_Solves[pcbddc->current_level][2],pc,0,0,0)); 5927 PetscCall(KSPSolve(pcbddc->coarse_ksp,rhs,sol)); 5928 PetscCall(PetscLogEventEnd(PC_BDDC_Solves[pcbddc->current_level][2],pc,0,0,0)); 5929 PetscCall(KSPCheckSolve(pcbddc->coarse_ksp,pc,sol)); 5930 if (nullsp) { 5931 PetscCall(MatNullSpaceRemove(nullsp,sol)); 5932 } 5933 } 5934 } 5935 /* we don't need the benign correction at coarser levels anymore */ 5936 if (pcbddc->benign_have_null && isbddc) { 5937 PC coarse_pc; 5938 PC_BDDC* coarsepcbddc; 5939 5940 PetscCall(KSPGetPC(pcbddc->coarse_ksp,&coarse_pc)); 5941 coarsepcbddc = (PC_BDDC*)(coarse_pc->data); 5942 coarsepcbddc->benign_skip_correction = PETSC_TRUE; 5943 coarsepcbddc->benign_apply_coarse_only = PETSC_FALSE; 5944 } 5945 } 5946 5947 /* Local solution on R nodes */ 5948 if (pcis->n && !pcbddc->benign_apply_coarse_only) { 5949 PetscCall(PCBDDCSolveSubstructureCorrection(pc,pcis->vec1_B,pcis->vec1_D,applytranspose)); 5950 } 5951 /* communications from coarse sol to local primal nodes */ 5952 PetscCall(PCBDDCScatterCoarseDataBegin(pc,INSERT_VALUES,SCATTER_REVERSE)); 5953 PetscCall(PCBDDCScatterCoarseDataEnd(pc,INSERT_VALUES,SCATTER_REVERSE)); 5954 5955 /* Sum contributions from the two levels */ 5956 if (!pcbddc->benign_apply_coarse_only) { 5957 if (applytranspose) { 5958 PetscCall(MatMultAdd(pcbddc->coarse_psi_B,pcbddc->vec1_P,pcis->vec1_B,pcis->vec1_B)); 5959 if (pcbddc->switch_static) PetscCall(MatMultAdd(pcbddc->coarse_psi_D,pcbddc->vec1_P,pcis->vec1_D,pcis->vec1_D)); 5960 } else { 5961 PetscCall(MatMultAdd(pcbddc->coarse_phi_B,pcbddc->vec1_P,pcis->vec1_B,pcis->vec1_B)); 5962 if (pcbddc->switch_static) PetscCall(MatMultAdd(pcbddc->coarse_phi_D,pcbddc->vec1_P,pcis->vec1_D,pcis->vec1_D)); 5963 } 5964 /* store p0 */ 5965 if (pcbddc->benign_n) { 5966 PetscScalar *array; 5967 PetscInt j; 5968 5969 PetscCall(VecGetArray(pcbddc->vec1_P,&array)); 5970 for (j=0;j<pcbddc->benign_n;j++) pcbddc->benign_p0[j] = array[pcbddc->local_primal_size-pcbddc->benign_n+j]; 5971 PetscCall(VecRestoreArray(pcbddc->vec1_P,&array)); 5972 } 5973 } else { /* expand the coarse solution */ 5974 if (applytranspose) { 5975 PetscCall(MatMult(pcbddc->coarse_psi_B,pcbddc->vec1_P,pcis->vec1_B)); 5976 } else { 5977 PetscCall(MatMult(pcbddc->coarse_phi_B,pcbddc->vec1_P,pcis->vec1_B)); 5978 } 5979 } 5980 PetscFunctionReturn(0); 5981 } 5982 5983 PetscErrorCode PCBDDCScatterCoarseDataBegin(PC pc,InsertMode imode, ScatterMode smode) 5984 { 5985 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 5986 Vec from,to; 5987 const PetscScalar *array; 5988 5989 PetscFunctionBegin; 5990 if (smode == SCATTER_REVERSE) { /* from global to local -> get data from coarse solution */ 5991 from = pcbddc->coarse_vec; 5992 to = pcbddc->vec1_P; 5993 if (pcbddc->coarse_ksp) { /* get array from coarse processes */ 5994 Vec tvec; 5995 5996 PetscCall(KSPGetRhs(pcbddc->coarse_ksp,&tvec)); 5997 PetscCall(VecResetArray(tvec)); 5998 PetscCall(KSPGetSolution(pcbddc->coarse_ksp,&tvec)); 5999 PetscCall(VecGetArrayRead(tvec,&array)); 6000 PetscCall(VecPlaceArray(from,array)); 6001 PetscCall(VecRestoreArrayRead(tvec,&array)); 6002 } 6003 } else { /* from local to global -> put data in coarse right hand side */ 6004 from = pcbddc->vec1_P; 6005 to = pcbddc->coarse_vec; 6006 } 6007 PetscCall(VecScatterBegin(pcbddc->coarse_loc_to_glob,from,to,imode,smode)); 6008 PetscFunctionReturn(0); 6009 } 6010 6011 PetscErrorCode PCBDDCScatterCoarseDataEnd(PC pc, InsertMode imode, ScatterMode smode) 6012 { 6013 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 6014 Vec from,to; 6015 const PetscScalar *array; 6016 6017 PetscFunctionBegin; 6018 if (smode == SCATTER_REVERSE) { /* from global to local -> get data from coarse solution */ 6019 from = pcbddc->coarse_vec; 6020 to = pcbddc->vec1_P; 6021 } else { /* from local to global -> put data in coarse right hand side */ 6022 from = pcbddc->vec1_P; 6023 to = pcbddc->coarse_vec; 6024 } 6025 PetscCall(VecScatterEnd(pcbddc->coarse_loc_to_glob,from,to,imode,smode)); 6026 if (smode == SCATTER_FORWARD) { 6027 if (pcbddc->coarse_ksp) { /* get array from coarse processes */ 6028 Vec tvec; 6029 6030 PetscCall(KSPGetRhs(pcbddc->coarse_ksp,&tvec)); 6031 PetscCall(VecGetArrayRead(to,&array)); 6032 PetscCall(VecPlaceArray(tvec,array)); 6033 PetscCall(VecRestoreArrayRead(to,&array)); 6034 } 6035 } else { 6036 if (pcbddc->coarse_ksp) { /* restore array of pcbddc->coarse_vec */ 6037 PetscCall(VecResetArray(from)); 6038 } 6039 } 6040 PetscFunctionReturn(0); 6041 } 6042 6043 PetscErrorCode PCBDDCConstraintsSetUp(PC pc) 6044 { 6045 PC_IS* pcis = (PC_IS*)(pc->data); 6046 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 6047 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 6048 /* one and zero */ 6049 PetscScalar one=1.0,zero=0.0; 6050 /* space to store constraints and their local indices */ 6051 PetscScalar *constraints_data; 6052 PetscInt *constraints_idxs,*constraints_idxs_B; 6053 PetscInt *constraints_idxs_ptr,*constraints_data_ptr; 6054 PetscInt *constraints_n; 6055 /* iterators */ 6056 PetscInt i,j,k,total_counts,total_counts_cc,cum; 6057 /* BLAS integers */ 6058 PetscBLASInt lwork,lierr; 6059 PetscBLASInt Blas_N,Blas_M,Blas_K,Blas_one=1; 6060 PetscBLASInt Blas_LDA,Blas_LDB,Blas_LDC; 6061 /* reuse */ 6062 PetscInt olocal_primal_size,olocal_primal_size_cc; 6063 PetscInt *olocal_primal_ref_node,*olocal_primal_ref_mult; 6064 /* change of basis */ 6065 PetscBool qr_needed; 6066 PetscBT change_basis,qr_needed_idx; 6067 /* auxiliary stuff */ 6068 PetscInt *nnz,*is_indices; 6069 PetscInt ncc; 6070 /* some quantities */ 6071 PetscInt n_vertices,total_primal_vertices,valid_constraints; 6072 PetscInt size_of_constraint,max_size_of_constraint=0,max_constraints,temp_constraints; 6073 PetscReal tol; /* tolerance for retaining eigenmodes */ 6074 6075 PetscFunctionBegin; 6076 tol = PetscSqrtReal(PETSC_SMALL); 6077 /* Destroy Mat objects computed previously */ 6078 PetscCall(MatDestroy(&pcbddc->ChangeOfBasisMatrix)); 6079 PetscCall(MatDestroy(&pcbddc->ConstraintMatrix)); 6080 PetscCall(MatDestroy(&pcbddc->switch_static_change)); 6081 /* save info on constraints from previous setup (if any) */ 6082 olocal_primal_size = pcbddc->local_primal_size; 6083 olocal_primal_size_cc = pcbddc->local_primal_size_cc; 6084 PetscCall(PetscMalloc2(olocal_primal_size_cc,&olocal_primal_ref_node,olocal_primal_size_cc,&olocal_primal_ref_mult)); 6085 PetscCall(PetscArraycpy(olocal_primal_ref_node,pcbddc->local_primal_ref_node,olocal_primal_size_cc)); 6086 PetscCall(PetscArraycpy(olocal_primal_ref_mult,pcbddc->local_primal_ref_mult,olocal_primal_size_cc)); 6087 PetscCall(PetscFree2(pcbddc->local_primal_ref_node,pcbddc->local_primal_ref_mult)); 6088 PetscCall(PetscFree(pcbddc->primal_indices_local_idxs)); 6089 6090 if (!pcbddc->adaptive_selection) { 6091 IS ISForVertices,*ISForFaces,*ISForEdges; 6092 MatNullSpace nearnullsp; 6093 const Vec *nearnullvecs; 6094 Vec *localnearnullsp; 6095 PetscScalar *array; 6096 PetscInt n_ISForFaces,n_ISForEdges,nnsp_size,o_nf,o_ne; 6097 PetscBool nnsp_has_cnst; 6098 /* LAPACK working arrays for SVD or POD */ 6099 PetscBool skip_lapack,boolforchange; 6100 PetscScalar *work; 6101 PetscReal *singular_vals; 6102 #if defined(PETSC_USE_COMPLEX) 6103 PetscReal *rwork; 6104 #endif 6105 PetscScalar *temp_basis = NULL,*correlation_mat = NULL; 6106 PetscBLASInt dummy_int=1; 6107 PetscScalar dummy_scalar=1.; 6108 PetscBool use_pod = PETSC_FALSE; 6109 6110 /* MKL SVD with same input gives different results on different processes! */ 6111 #if defined(PETSC_MISSING_LAPACK_GESVD) || defined(PETSC_HAVE_MKL_LIBS) 6112 use_pod = PETSC_TRUE; 6113 #endif 6114 /* Get index sets for faces, edges and vertices from graph */ 6115 PetscCall(PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph,&n_ISForFaces,&ISForFaces,&n_ISForEdges,&ISForEdges,&ISForVertices)); 6116 o_nf = n_ISForFaces; 6117 o_ne = n_ISForEdges; 6118 n_vertices = 0; 6119 if (ISForVertices) PetscCall(ISGetSize(ISForVertices,&n_vertices)); 6120 /* print some info */ 6121 if (pcbddc->dbg_flag && (!pcbddc->sub_schurs || pcbddc->sub_schurs_rebuild)) { 6122 6123 if (!pcbddc->dbg_viewer) pcbddc->dbg_viewer = PETSC_VIEWER_STDOUT_(PetscObjectComm((PetscObject)pc)); 6124 PetscCall(PCBDDCGraphASCIIView(pcbddc->mat_graph,pcbddc->dbg_flag,pcbddc->dbg_viewer)); 6125 PetscCall(PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer)); 6126 PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"--------------------------------------------------------------\n")); 6127 PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02" PetscInt_FMT " local candidate vertices (%d)\n",PetscGlobalRank,n_vertices,pcbddc->use_vertices)); 6128 PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02" PetscInt_FMT " local candidate edges (%d)\n",PetscGlobalRank,n_ISForEdges,pcbddc->use_edges)); 6129 PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02" PetscInt_FMT " local candidate faces (%d)\n",PetscGlobalRank,n_ISForFaces,pcbddc->use_faces)); 6130 PetscCall(PetscViewerFlush(pcbddc->dbg_viewer)); 6131 PetscCall(PetscViewerASCIIPopSynchronized(pcbddc->dbg_viewer)); 6132 } 6133 6134 if (!pcbddc->use_vertices) n_vertices = 0; 6135 if (!pcbddc->use_edges) n_ISForEdges = 0; 6136 if (!pcbddc->use_faces) n_ISForFaces = 0; 6137 6138 /* check if near null space is attached to global mat */ 6139 if (pcbddc->use_nnsp) { 6140 PetscCall(MatGetNearNullSpace(pc->pmat,&nearnullsp)); 6141 } else nearnullsp = NULL; 6142 6143 if (nearnullsp) { 6144 PetscCall(MatNullSpaceGetVecs(nearnullsp,&nnsp_has_cnst,&nnsp_size,&nearnullvecs)); 6145 /* remove any stored info */ 6146 PetscCall(MatNullSpaceDestroy(&pcbddc->onearnullspace)); 6147 PetscCall(PetscFree(pcbddc->onearnullvecs_state)); 6148 /* store information for BDDC solver reuse */ 6149 PetscCall(PetscObjectReference((PetscObject)nearnullsp)); 6150 pcbddc->onearnullspace = nearnullsp; 6151 PetscCall(PetscMalloc1(nnsp_size,&pcbddc->onearnullvecs_state)); 6152 for (i=0;i<nnsp_size;i++) { 6153 PetscCall(PetscObjectStateGet((PetscObject)nearnullvecs[i],&pcbddc->onearnullvecs_state[i])); 6154 } 6155 } else { /* if near null space is not provided BDDC uses constants by default */ 6156 nnsp_size = 0; 6157 nnsp_has_cnst = PETSC_TRUE; 6158 } 6159 /* get max number of constraints on a single cc */ 6160 max_constraints = nnsp_size; 6161 if (nnsp_has_cnst) max_constraints++; 6162 6163 /* 6164 Evaluate maximum storage size needed by the procedure 6165 - Indices for connected component i stored at "constraints_idxs + constraints_idxs_ptr[i]" 6166 - Values for constraints on connected component i stored at "constraints_data + constraints_data_ptr[i]" 6167 There can be multiple constraints per connected component 6168 */ 6169 ncc = n_vertices+n_ISForFaces+n_ISForEdges; 6170 PetscCall(PetscMalloc3(ncc+1,&constraints_idxs_ptr,ncc+1,&constraints_data_ptr,ncc,&constraints_n)); 6171 6172 total_counts = n_ISForFaces+n_ISForEdges; 6173 total_counts *= max_constraints; 6174 total_counts += n_vertices; 6175 PetscCall(PetscBTCreate(total_counts,&change_basis)); 6176 6177 total_counts = 0; 6178 max_size_of_constraint = 0; 6179 for (i=0;i<n_ISForEdges+n_ISForFaces;i++) { 6180 IS used_is; 6181 if (i<n_ISForEdges) { 6182 used_is = ISForEdges[i]; 6183 } else { 6184 used_is = ISForFaces[i-n_ISForEdges]; 6185 } 6186 PetscCall(ISGetSize(used_is,&j)); 6187 total_counts += j; 6188 max_size_of_constraint = PetscMax(j,max_size_of_constraint); 6189 } 6190 PetscCall(PetscMalloc3(total_counts*max_constraints+n_vertices,&constraints_data,total_counts+n_vertices,&constraints_idxs,total_counts+n_vertices,&constraints_idxs_B)); 6191 6192 /* get local part of global near null space vectors */ 6193 PetscCall(PetscMalloc1(nnsp_size,&localnearnullsp)); 6194 for (k=0;k<nnsp_size;k++) { 6195 PetscCall(VecDuplicate(pcis->vec1_N,&localnearnullsp[k])); 6196 PetscCall(VecScatterBegin(matis->rctx,nearnullvecs[k],localnearnullsp[k],INSERT_VALUES,SCATTER_FORWARD)); 6197 PetscCall(VecScatterEnd(matis->rctx,nearnullvecs[k],localnearnullsp[k],INSERT_VALUES,SCATTER_FORWARD)); 6198 } 6199 6200 /* whether or not to skip lapack calls */ 6201 skip_lapack = PETSC_TRUE; 6202 if (n_ISForFaces+n_ISForEdges && max_constraints > 1 && !pcbddc->use_nnsp_true) skip_lapack = PETSC_FALSE; 6203 6204 /* First we issue queries to allocate optimal workspace for LAPACKgesvd (or LAPACKsyev if SVD is missing) */ 6205 if (!skip_lapack) { 6206 PetscScalar temp_work; 6207 6208 if (use_pod) { 6209 /* Proper Orthogonal Decomposition (POD) using the snapshot method */ 6210 PetscCall(PetscMalloc1(max_constraints*max_constraints,&correlation_mat)); 6211 PetscCall(PetscMalloc1(max_constraints,&singular_vals)); 6212 PetscCall(PetscMalloc1(max_size_of_constraint*max_constraints,&temp_basis)); 6213 #if defined(PETSC_USE_COMPLEX) 6214 PetscCall(PetscMalloc1(3*max_constraints,&rwork)); 6215 #endif 6216 /* now we evaluate the optimal workspace using query with lwork=-1 */ 6217 PetscCall(PetscBLASIntCast(max_constraints,&Blas_N)); 6218 PetscCall(PetscBLASIntCast(max_constraints,&Blas_LDA)); 6219 lwork = -1; 6220 PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF)); 6221 #if !defined(PETSC_USE_COMPLEX) 6222 PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,&temp_work,&lwork,&lierr)); 6223 #else 6224 PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,&temp_work,&lwork,rwork,&lierr)); 6225 #endif 6226 PetscCall(PetscFPTrapPop()); 6227 PetscCheck(!lierr,PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to SYEV Lapack routine %d",(int)lierr); 6228 } else { 6229 #if !defined(PETSC_MISSING_LAPACK_GESVD) 6230 /* SVD */ 6231 PetscInt max_n,min_n; 6232 max_n = max_size_of_constraint; 6233 min_n = max_constraints; 6234 if (max_size_of_constraint < max_constraints) { 6235 min_n = max_size_of_constraint; 6236 max_n = max_constraints; 6237 } 6238 PetscCall(PetscMalloc1(min_n,&singular_vals)); 6239 #if defined(PETSC_USE_COMPLEX) 6240 PetscCall(PetscMalloc1(5*min_n,&rwork)); 6241 #endif 6242 /* now we evaluate the optimal workspace using query with lwork=-1 */ 6243 lwork = -1; 6244 PetscCall(PetscBLASIntCast(max_n,&Blas_M)); 6245 PetscCall(PetscBLASIntCast(min_n,&Blas_N)); 6246 PetscCall(PetscBLASIntCast(max_n,&Blas_LDA)); 6247 PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF)); 6248 #if !defined(PETSC_USE_COMPLEX) 6249 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&constraints_data[0],&Blas_LDA,singular_vals,&dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,&temp_work,&lwork,&lierr)); 6250 #else 6251 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&constraints_data[0],&Blas_LDA,singular_vals,&dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,&temp_work,&lwork,rwork,&lierr)); 6252 #endif 6253 PetscCall(PetscFPTrapPop()); 6254 PetscCheck(!lierr,PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to GESVD Lapack routine %d",(int)lierr); 6255 #else 6256 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"This should not happen"); 6257 #endif /* on missing GESVD */ 6258 } 6259 /* Allocate optimal workspace */ 6260 PetscCall(PetscBLASIntCast((PetscInt)PetscRealPart(temp_work),&lwork)); 6261 PetscCall(PetscMalloc1(lwork,&work)); 6262 } 6263 /* Now we can loop on constraining sets */ 6264 total_counts = 0; 6265 constraints_idxs_ptr[0] = 0; 6266 constraints_data_ptr[0] = 0; 6267 /* vertices */ 6268 if (n_vertices) { 6269 PetscCall(ISGetIndices(ISForVertices,(const PetscInt**)&is_indices)); 6270 PetscCall(PetscArraycpy(constraints_idxs,is_indices,n_vertices)); 6271 for (i=0;i<n_vertices;i++) { 6272 constraints_n[total_counts] = 1; 6273 constraints_data[total_counts] = 1.0; 6274 constraints_idxs_ptr[total_counts+1] = constraints_idxs_ptr[total_counts]+1; 6275 constraints_data_ptr[total_counts+1] = constraints_data_ptr[total_counts]+1; 6276 total_counts++; 6277 } 6278 PetscCall(ISRestoreIndices(ISForVertices,(const PetscInt**)&is_indices)); 6279 } 6280 6281 /* edges and faces */ 6282 total_counts_cc = total_counts; 6283 for (ncc=0;ncc<n_ISForEdges+n_ISForFaces;ncc++) { 6284 IS used_is; 6285 PetscBool idxs_copied = PETSC_FALSE; 6286 6287 if (ncc<n_ISForEdges) { 6288 used_is = ISForEdges[ncc]; 6289 boolforchange = pcbddc->use_change_of_basis; /* change or not the basis on the edge */ 6290 } else { 6291 used_is = ISForFaces[ncc-n_ISForEdges]; 6292 boolforchange = (PetscBool)(pcbddc->use_change_of_basis && pcbddc->use_change_on_faces); /* change or not the basis on the face */ 6293 } 6294 temp_constraints = 0; /* zero the number of constraints I have on this conn comp */ 6295 6296 PetscCall(ISGetSize(used_is,&size_of_constraint)); 6297 if (!size_of_constraint) continue; 6298 PetscCall(ISGetIndices(used_is,(const PetscInt**)&is_indices)); 6299 /* change of basis should not be performed on local periodic nodes */ 6300 if (pcbddc->mat_graph->mirrors && pcbddc->mat_graph->mirrors[is_indices[0]]) boolforchange = PETSC_FALSE; 6301 if (nnsp_has_cnst) { 6302 PetscScalar quad_value; 6303 6304 PetscCall(PetscArraycpy(constraints_idxs + constraints_idxs_ptr[total_counts_cc],is_indices,size_of_constraint)); 6305 idxs_copied = PETSC_TRUE; 6306 6307 if (!pcbddc->use_nnsp_true) { 6308 quad_value = (PetscScalar)(1.0/PetscSqrtReal((PetscReal)size_of_constraint)); 6309 } else { 6310 quad_value = 1.0; 6311 } 6312 for (j=0;j<size_of_constraint;j++) { 6313 constraints_data[constraints_data_ptr[total_counts_cc]+j] = quad_value; 6314 } 6315 temp_constraints++; 6316 total_counts++; 6317 } 6318 for (k=0;k<nnsp_size;k++) { 6319 PetscReal real_value; 6320 PetscScalar *ptr_to_data; 6321 6322 PetscCall(VecGetArrayRead(localnearnullsp[k],(const PetscScalar**)&array)); 6323 ptr_to_data = &constraints_data[constraints_data_ptr[total_counts_cc]+temp_constraints*size_of_constraint]; 6324 for (j=0;j<size_of_constraint;j++) { 6325 ptr_to_data[j] = array[is_indices[j]]; 6326 } 6327 PetscCall(VecRestoreArrayRead(localnearnullsp[k],(const PetscScalar**)&array)); 6328 /* check if array is null on the connected component */ 6329 PetscCall(PetscBLASIntCast(size_of_constraint,&Blas_N)); 6330 PetscStackCallBLAS("BLASasum",real_value = BLASasum_(&Blas_N,ptr_to_data,&Blas_one)); 6331 if (real_value > tol*size_of_constraint) { /* keep indices and values */ 6332 temp_constraints++; 6333 total_counts++; 6334 if (!idxs_copied) { 6335 PetscCall(PetscArraycpy(constraints_idxs + constraints_idxs_ptr[total_counts_cc],is_indices,size_of_constraint)); 6336 idxs_copied = PETSC_TRUE; 6337 } 6338 } 6339 } 6340 PetscCall(ISRestoreIndices(used_is,(const PetscInt**)&is_indices)); 6341 valid_constraints = temp_constraints; 6342 if (!pcbddc->use_nnsp_true && temp_constraints) { 6343 if (temp_constraints == 1) { /* just normalize the constraint */ 6344 PetscScalar norm,*ptr_to_data; 6345 6346 ptr_to_data = &constraints_data[constraints_data_ptr[total_counts_cc]]; 6347 PetscCall(PetscBLASIntCast(size_of_constraint,&Blas_N)); 6348 PetscStackCallBLAS("BLASdot",norm = BLASdot_(&Blas_N,ptr_to_data,&Blas_one,ptr_to_data,&Blas_one)); 6349 norm = 1.0/PetscSqrtReal(PetscRealPart(norm)); 6350 PetscStackCallBLAS("BLASscal",BLASscal_(&Blas_N,&norm,ptr_to_data,&Blas_one)); 6351 } else { /* perform SVD */ 6352 PetscScalar *ptr_to_data = &constraints_data[constraints_data_ptr[total_counts_cc]]; 6353 6354 if (use_pod) { 6355 /* SVD: Y = U*S*V^H -> U (eigenvectors of Y*Y^H) = Y*V*(S)^\dag 6356 POD: Y^H*Y = V*D*V^H, D = S^H*S -> U = Y*V*D^(-1/2) 6357 -> When PETSC_USE_COMPLEX and PETSC_MISSING_LAPACK_GESVD are defined 6358 the constraints basis will differ (by a complex factor with absolute value equal to 1) 6359 from that computed using LAPACKgesvd 6360 -> This is due to a different computation of eigenvectors in LAPACKheev 6361 -> The quality of the POD-computed basis will be the same */ 6362 PetscCall(PetscArrayzero(correlation_mat,temp_constraints*temp_constraints)); 6363 /* Store upper triangular part of correlation matrix */ 6364 PetscCall(PetscBLASIntCast(size_of_constraint,&Blas_N)); 6365 PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF)); 6366 for (j=0;j<temp_constraints;j++) { 6367 for (k=0;k<j+1;k++) { 6368 PetscStackCallBLAS("BLASdot",correlation_mat[j*temp_constraints+k] = BLASdot_(&Blas_N,ptr_to_data+k*size_of_constraint,&Blas_one,ptr_to_data+j*size_of_constraint,&Blas_one)); 6369 } 6370 } 6371 /* compute eigenvalues and eigenvectors of correlation matrix */ 6372 PetscCall(PetscBLASIntCast(temp_constraints,&Blas_N)); 6373 PetscCall(PetscBLASIntCast(temp_constraints,&Blas_LDA)); 6374 #if !defined(PETSC_USE_COMPLEX) 6375 PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,work,&lwork,&lierr)); 6376 #else 6377 PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,work,&lwork,rwork,&lierr)); 6378 #endif 6379 PetscCall(PetscFPTrapPop()); 6380 PetscCheck(!lierr,PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYEV Lapack routine %d",(int)lierr); 6381 /* retain eigenvalues greater than tol: note that LAPACKsyev gives eigs in ascending order */ 6382 j = 0; 6383 while (j < temp_constraints && singular_vals[j]/singular_vals[temp_constraints-1] < tol) j++; 6384 total_counts = total_counts-j; 6385 valid_constraints = temp_constraints-j; 6386 /* scale and copy POD basis into used quadrature memory */ 6387 PetscCall(PetscBLASIntCast(size_of_constraint,&Blas_M)); 6388 PetscCall(PetscBLASIntCast(temp_constraints,&Blas_N)); 6389 PetscCall(PetscBLASIntCast(temp_constraints,&Blas_K)); 6390 PetscCall(PetscBLASIntCast(size_of_constraint,&Blas_LDA)); 6391 PetscCall(PetscBLASIntCast(temp_constraints,&Blas_LDB)); 6392 PetscCall(PetscBLASIntCast(size_of_constraint,&Blas_LDC)); 6393 if (j<temp_constraints) { 6394 PetscInt ii; 6395 for (k=j;k<temp_constraints;k++) singular_vals[k] = 1.0/PetscSqrtReal(singular_vals[k]); 6396 PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF)); 6397 PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&Blas_M,&Blas_N,&Blas_K,&one,ptr_to_data,&Blas_LDA,correlation_mat,&Blas_LDB,&zero,temp_basis,&Blas_LDC)); 6398 PetscCall(PetscFPTrapPop()); 6399 for (k=0;k<temp_constraints-j;k++) { 6400 for (ii=0;ii<size_of_constraint;ii++) { 6401 ptr_to_data[k*size_of_constraint+ii] = singular_vals[temp_constraints-1-k]*temp_basis[(temp_constraints-1-k)*size_of_constraint+ii]; 6402 } 6403 } 6404 } 6405 } else { 6406 #if !defined(PETSC_MISSING_LAPACK_GESVD) 6407 PetscCall(PetscBLASIntCast(size_of_constraint,&Blas_M)); 6408 PetscCall(PetscBLASIntCast(temp_constraints,&Blas_N)); 6409 PetscCall(PetscBLASIntCast(size_of_constraint,&Blas_LDA)); 6410 PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF)); 6411 #if !defined(PETSC_USE_COMPLEX) 6412 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,ptr_to_data,&Blas_LDA,singular_vals,&dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,work,&lwork,&lierr)); 6413 #else 6414 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,ptr_to_data,&Blas_LDA,singular_vals,&dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,work,&lwork,rwork,&lierr)); 6415 #endif 6416 PetscCheck(!lierr,PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in GESVD Lapack routine %d",(int)lierr); 6417 PetscCall(PetscFPTrapPop()); 6418 /* retain eigenvalues greater than tol: note that LAPACKgesvd gives eigs in descending order */ 6419 k = temp_constraints; 6420 if (k > size_of_constraint) k = size_of_constraint; 6421 j = 0; 6422 while (j < k && singular_vals[k-j-1]/singular_vals[0] < tol) j++; 6423 valid_constraints = k-j; 6424 total_counts = total_counts-temp_constraints+valid_constraints; 6425 #else 6426 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"This should not happen"); 6427 #endif /* on missing GESVD */ 6428 } 6429 } 6430 } 6431 /* update pointers information */ 6432 if (valid_constraints) { 6433 constraints_n[total_counts_cc] = valid_constraints; 6434 constraints_idxs_ptr[total_counts_cc+1] = constraints_idxs_ptr[total_counts_cc]+size_of_constraint; 6435 constraints_data_ptr[total_counts_cc+1] = constraints_data_ptr[total_counts_cc]+size_of_constraint*valid_constraints; 6436 /* set change_of_basis flag */ 6437 if (boolforchange) { 6438 PetscBTSet(change_basis,total_counts_cc); 6439 } 6440 total_counts_cc++; 6441 } 6442 } 6443 /* free workspace */ 6444 if (!skip_lapack) { 6445 PetscCall(PetscFree(work)); 6446 #if defined(PETSC_USE_COMPLEX) 6447 PetscCall(PetscFree(rwork)); 6448 #endif 6449 PetscCall(PetscFree(singular_vals)); 6450 PetscCall(PetscFree(correlation_mat)); 6451 PetscCall(PetscFree(temp_basis)); 6452 } 6453 for (k=0;k<nnsp_size;k++) { 6454 PetscCall(VecDestroy(&localnearnullsp[k])); 6455 } 6456 PetscCall(PetscFree(localnearnullsp)); 6457 /* free index sets of faces, edges and vertices */ 6458 PetscCall(PCBDDCGraphRestoreCandidatesIS(pcbddc->mat_graph,&o_nf,&ISForFaces,&o_ne,&ISForEdges,&ISForVertices)); 6459 } else { 6460 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 6461 6462 total_counts = 0; 6463 n_vertices = 0; 6464 if (sub_schurs->is_vertices && pcbddc->use_vertices) { 6465 PetscCall(ISGetLocalSize(sub_schurs->is_vertices,&n_vertices)); 6466 } 6467 max_constraints = 0; 6468 total_counts_cc = 0; 6469 for (i=0;i<sub_schurs->n_subs+n_vertices;i++) { 6470 total_counts += pcbddc->adaptive_constraints_n[i]; 6471 if (pcbddc->adaptive_constraints_n[i]) total_counts_cc++; 6472 max_constraints = PetscMax(max_constraints,pcbddc->adaptive_constraints_n[i]); 6473 } 6474 constraints_idxs_ptr = pcbddc->adaptive_constraints_idxs_ptr; 6475 constraints_data_ptr = pcbddc->adaptive_constraints_data_ptr; 6476 constraints_idxs = pcbddc->adaptive_constraints_idxs; 6477 constraints_data = pcbddc->adaptive_constraints_data; 6478 /* constraints_n differs from pcbddc->adaptive_constraints_n */ 6479 PetscCall(PetscMalloc1(total_counts_cc,&constraints_n)); 6480 total_counts_cc = 0; 6481 for (i=0;i<sub_schurs->n_subs+n_vertices;i++) { 6482 if (pcbddc->adaptive_constraints_n[i]) { 6483 constraints_n[total_counts_cc++] = pcbddc->adaptive_constraints_n[i]; 6484 } 6485 } 6486 6487 max_size_of_constraint = 0; 6488 for (i=0;i<total_counts_cc;i++) max_size_of_constraint = PetscMax(max_size_of_constraint,constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i]); 6489 PetscCall(PetscMalloc1(constraints_idxs_ptr[total_counts_cc],&constraints_idxs_B)); 6490 /* Change of basis */ 6491 PetscCall(PetscBTCreate(total_counts_cc,&change_basis)); 6492 if (pcbddc->use_change_of_basis) { 6493 for (i=0;i<sub_schurs->n_subs;i++) { 6494 if (PetscBTLookup(sub_schurs->is_edge,i) || pcbddc->use_change_on_faces) { 6495 PetscCall(PetscBTSet(change_basis,i+n_vertices)); 6496 } 6497 } 6498 } 6499 } 6500 pcbddc->local_primal_size = total_counts; 6501 PetscCall(PetscMalloc1(pcbddc->local_primal_size+pcbddc->benign_n,&pcbddc->primal_indices_local_idxs)); 6502 6503 /* map constraints_idxs in boundary numbering */ 6504 if (pcbddc->use_change_of_basis) { 6505 PetscCall(ISGlobalToLocalMappingApply(pcis->BtoNmap,IS_GTOLM_DROP,constraints_idxs_ptr[total_counts_cc],constraints_idxs,&i,constraints_idxs_B)); 6506 PetscCheck(i == constraints_idxs_ptr[total_counts_cc],PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error in boundary numbering for constraints indices %" PetscInt_FMT " != %" PetscInt_FMT,constraints_idxs_ptr[total_counts_cc],i); 6507 } 6508 6509 /* Create constraint matrix */ 6510 PetscCall(MatCreate(PETSC_COMM_SELF,&pcbddc->ConstraintMatrix)); 6511 PetscCall(MatSetType(pcbddc->ConstraintMatrix,MATAIJ)); 6512 PetscCall(MatSetSizes(pcbddc->ConstraintMatrix,pcbddc->local_primal_size,pcis->n,pcbddc->local_primal_size,pcis->n)); 6513 6514 /* find primal_dofs: subdomain corners plus dofs selected as primal after change of basis */ 6515 /* determine if a QR strategy is needed for change of basis */ 6516 qr_needed = pcbddc->use_qr_single; 6517 PetscCall(PetscBTCreate(total_counts_cc,&qr_needed_idx)); 6518 total_primal_vertices=0; 6519 pcbddc->local_primal_size_cc = 0; 6520 for (i=0;i<total_counts_cc;i++) { 6521 size_of_constraint = constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i]; 6522 if (size_of_constraint == 1 && pcbddc->mat_graph->custom_minimal_size) { 6523 pcbddc->primal_indices_local_idxs[total_primal_vertices++] = constraints_idxs[constraints_idxs_ptr[i]]; 6524 pcbddc->local_primal_size_cc += 1; 6525 } else if (PetscBTLookup(change_basis,i)) { 6526 for (k=0;k<constraints_n[i];k++) { 6527 pcbddc->primal_indices_local_idxs[total_primal_vertices++] = constraints_idxs[constraints_idxs_ptr[i]+k]; 6528 } 6529 pcbddc->local_primal_size_cc += constraints_n[i]; 6530 if (constraints_n[i] > 1 || pcbddc->use_qr_single) { 6531 PetscBTSet(qr_needed_idx,i); 6532 qr_needed = PETSC_TRUE; 6533 } 6534 } else { 6535 pcbddc->local_primal_size_cc += 1; 6536 } 6537 } 6538 /* note that the local variable n_vertices used below stores the number of pointwise constraints */ 6539 pcbddc->n_vertices = total_primal_vertices; 6540 /* permute indices in order to have a sorted set of vertices */ 6541 PetscCall(PetscSortInt(total_primal_vertices,pcbddc->primal_indices_local_idxs)); 6542 PetscCall(PetscMalloc2(pcbddc->local_primal_size_cc+pcbddc->benign_n,&pcbddc->local_primal_ref_node,pcbddc->local_primal_size_cc+pcbddc->benign_n,&pcbddc->local_primal_ref_mult)); 6543 PetscCall(PetscArraycpy(pcbddc->local_primal_ref_node,pcbddc->primal_indices_local_idxs,total_primal_vertices)); 6544 for (i=0;i<total_primal_vertices;i++) pcbddc->local_primal_ref_mult[i] = 1; 6545 6546 /* nonzero structure of constraint matrix */ 6547 /* and get reference dof for local constraints */ 6548 PetscCall(PetscMalloc1(pcbddc->local_primal_size,&nnz)); 6549 for (i=0;i<total_primal_vertices;i++) nnz[i] = 1; 6550 6551 j = total_primal_vertices; 6552 total_counts = total_primal_vertices; 6553 cum = total_primal_vertices; 6554 for (i=n_vertices;i<total_counts_cc;i++) { 6555 if (!PetscBTLookup(change_basis,i)) { 6556 pcbddc->local_primal_ref_node[cum] = constraints_idxs[constraints_idxs_ptr[i]]; 6557 pcbddc->local_primal_ref_mult[cum] = constraints_n[i]; 6558 cum++; 6559 size_of_constraint = constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i]; 6560 for (k=0;k<constraints_n[i];k++) { 6561 pcbddc->primal_indices_local_idxs[total_counts++] = constraints_idxs[constraints_idxs_ptr[i]+k]; 6562 nnz[j+k] = size_of_constraint; 6563 } 6564 j += constraints_n[i]; 6565 } 6566 } 6567 PetscCall(MatSeqAIJSetPreallocation(pcbddc->ConstraintMatrix,0,nnz)); 6568 PetscCall(MatSetOption(pcbddc->ConstraintMatrix,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_TRUE)); 6569 PetscCall(PetscFree(nnz)); 6570 6571 /* set values in constraint matrix */ 6572 for (i=0;i<total_primal_vertices;i++) { 6573 PetscCall(MatSetValue(pcbddc->ConstraintMatrix,i,pcbddc->local_primal_ref_node[i],1.0,INSERT_VALUES)); 6574 } 6575 total_counts = total_primal_vertices; 6576 for (i=n_vertices;i<total_counts_cc;i++) { 6577 if (!PetscBTLookup(change_basis,i)) { 6578 PetscInt *cols; 6579 6580 size_of_constraint = constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i]; 6581 cols = constraints_idxs+constraints_idxs_ptr[i]; 6582 for (k=0;k<constraints_n[i];k++) { 6583 PetscInt row = total_counts+k; 6584 PetscScalar *vals; 6585 6586 vals = constraints_data+constraints_data_ptr[i]+k*size_of_constraint; 6587 PetscCall(MatSetValues(pcbddc->ConstraintMatrix,1,&row,size_of_constraint,cols,vals,INSERT_VALUES)); 6588 } 6589 total_counts += constraints_n[i]; 6590 } 6591 } 6592 /* assembling */ 6593 PetscCall(MatAssemblyBegin(pcbddc->ConstraintMatrix,MAT_FINAL_ASSEMBLY)); 6594 PetscCall(MatAssemblyEnd(pcbddc->ConstraintMatrix,MAT_FINAL_ASSEMBLY)); 6595 PetscCall(MatViewFromOptions(pcbddc->ConstraintMatrix,(PetscObject)pc,"-pc_bddc_constraint_mat_view")); 6596 6597 /* Create matrix for change of basis. We don't need it in case pcbddc->use_change_of_basis is FALSE */ 6598 if (pcbddc->use_change_of_basis) { 6599 /* dual and primal dofs on a single cc */ 6600 PetscInt dual_dofs,primal_dofs; 6601 /* working stuff for GEQRF */ 6602 PetscScalar *qr_basis = NULL,*qr_tau = NULL,*qr_work = NULL,lqr_work_t; 6603 PetscBLASInt lqr_work; 6604 /* working stuff for UNGQR */ 6605 PetscScalar *gqr_work = NULL,lgqr_work_t=0.0; 6606 PetscBLASInt lgqr_work; 6607 /* working stuff for TRTRS */ 6608 PetscScalar *trs_rhs = NULL; 6609 PetscBLASInt Blas_NRHS; 6610 /* pointers for values insertion into change of basis matrix */ 6611 PetscInt *start_rows,*start_cols; 6612 PetscScalar *start_vals; 6613 /* working stuff for values insertion */ 6614 PetscBT is_primal; 6615 PetscInt *aux_primal_numbering_B; 6616 /* matrix sizes */ 6617 PetscInt global_size,local_size; 6618 /* temporary change of basis */ 6619 Mat localChangeOfBasisMatrix; 6620 /* extra space for debugging */ 6621 PetscScalar *dbg_work = NULL; 6622 6623 PetscCall(MatCreate(PETSC_COMM_SELF,&localChangeOfBasisMatrix)); 6624 PetscCall(MatSetType(localChangeOfBasisMatrix,MATAIJ)); 6625 PetscCall(MatSetSizes(localChangeOfBasisMatrix,pcis->n,pcis->n,pcis->n,pcis->n)); 6626 /* nonzeros for local mat */ 6627 PetscCall(PetscMalloc1(pcis->n,&nnz)); 6628 if (!pcbddc->benign_change || pcbddc->fake_change) { 6629 for (i=0;i<pcis->n;i++) nnz[i]=1; 6630 } else { 6631 const PetscInt *ii; 6632 PetscInt n; 6633 PetscBool flg_row; 6634 PetscCall(MatGetRowIJ(pcbddc->benign_change,0,PETSC_FALSE,PETSC_FALSE,&n,&ii,NULL,&flg_row)); 6635 for (i=0;i<n;i++) nnz[i] = ii[i+1]-ii[i]; 6636 PetscCall(MatRestoreRowIJ(pcbddc->benign_change,0,PETSC_FALSE,PETSC_FALSE,&n,&ii,NULL,&flg_row)); 6637 } 6638 for (i=n_vertices;i<total_counts_cc;i++) { 6639 if (PetscBTLookup(change_basis,i)) { 6640 size_of_constraint = constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i]; 6641 if (PetscBTLookup(qr_needed_idx,i)) { 6642 for (j=0;j<size_of_constraint;j++) nnz[constraints_idxs[constraints_idxs_ptr[i]+j]] = size_of_constraint; 6643 } else { 6644 nnz[constraints_idxs[constraints_idxs_ptr[i]]] = size_of_constraint; 6645 for (j=1;j<size_of_constraint;j++) nnz[constraints_idxs[constraints_idxs_ptr[i]+j]] = 2; 6646 } 6647 } 6648 } 6649 PetscCall(MatSeqAIJSetPreallocation(localChangeOfBasisMatrix,0,nnz)); 6650 PetscCall(MatSetOption(localChangeOfBasisMatrix,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_TRUE)); 6651 PetscCall(PetscFree(nnz)); 6652 /* Set interior change in the matrix */ 6653 if (!pcbddc->benign_change || pcbddc->fake_change) { 6654 for (i=0;i<pcis->n;i++) { 6655 PetscCall(MatSetValue(localChangeOfBasisMatrix,i,i,1.0,INSERT_VALUES)); 6656 } 6657 } else { 6658 const PetscInt *ii,*jj; 6659 PetscScalar *aa; 6660 PetscInt n; 6661 PetscBool flg_row; 6662 PetscCall(MatGetRowIJ(pcbddc->benign_change,0,PETSC_FALSE,PETSC_FALSE,&n,&ii,&jj,&flg_row)); 6663 PetscCall(MatSeqAIJGetArray(pcbddc->benign_change,&aa)); 6664 for (i=0;i<n;i++) { 6665 PetscCall(MatSetValues(localChangeOfBasisMatrix,1,&i,ii[i+1]-ii[i],jj+ii[i],aa+ii[i],INSERT_VALUES)); 6666 } 6667 PetscCall(MatSeqAIJRestoreArray(pcbddc->benign_change,&aa)); 6668 PetscCall(MatRestoreRowIJ(pcbddc->benign_change,0,PETSC_FALSE,PETSC_FALSE,&n,&ii,&jj,&flg_row)); 6669 } 6670 6671 if (pcbddc->dbg_flag) { 6672 PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"--------------------------------------------------------------\n")); 6673 PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Checking change of basis computation for subdomain %04d\n",PetscGlobalRank)); 6674 } 6675 6676 /* Now we loop on the constraints which need a change of basis */ 6677 /* 6678 Change of basis matrix is evaluated similarly to the FIRST APPROACH in 6679 Klawonn and Widlund, Dual-primal FETI-DP methods for linear elasticity, (see Sect 6.2.1) 6680 6681 Basic blocks of change of basis matrix T computed: 6682 6683 - By using the following block transformation if there is only a primal dof on the cc (and -pc_bddc_use_qr_single is not specified) 6684 6685 | 1 0 ... 0 s_1/S | 6686 | 0 1 ... 0 s_2/S | 6687 | ... | 6688 | 0 ... 1 s_{n-1}/S | 6689 | -s_1/s_n ... -s_{n-1}/s_n s_n/S | 6690 6691 with S = \sum_{i=1}^n s_i^2 6692 NOTE: in the above example, the primal dof is the last one of the edge in LOCAL ordering 6693 in the current implementation, the primal dof is the first one of the edge in GLOBAL ordering 6694 6695 - QR decomposition of constraints otherwise 6696 */ 6697 if (qr_needed && max_size_of_constraint) { 6698 /* space to store Q */ 6699 PetscCall(PetscMalloc1(max_size_of_constraint*max_size_of_constraint,&qr_basis)); 6700 /* array to store scaling factors for reflectors */ 6701 PetscCall(PetscMalloc1(max_constraints,&qr_tau)); 6702 /* first we issue queries for optimal work */ 6703 PetscCall(PetscBLASIntCast(max_size_of_constraint,&Blas_M)); 6704 PetscCall(PetscBLASIntCast(max_constraints,&Blas_N)); 6705 PetscCall(PetscBLASIntCast(max_size_of_constraint,&Blas_LDA)); 6706 lqr_work = -1; 6707 PetscStackCallBLAS("LAPACKgeqrf",LAPACKgeqrf_(&Blas_M,&Blas_N,qr_basis,&Blas_LDA,qr_tau,&lqr_work_t,&lqr_work,&lierr)); 6708 PetscCheck(!lierr,PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to GEQRF Lapack routine %d",(int)lierr); 6709 PetscCall(PetscBLASIntCast((PetscInt)PetscRealPart(lqr_work_t),&lqr_work)); 6710 PetscCall(PetscMalloc1((PetscInt)PetscRealPart(lqr_work_t),&qr_work)); 6711 lgqr_work = -1; 6712 PetscCall(PetscBLASIntCast(max_size_of_constraint,&Blas_M)); 6713 PetscCall(PetscBLASIntCast(max_size_of_constraint,&Blas_N)); 6714 PetscCall(PetscBLASIntCast(max_constraints,&Blas_K)); 6715 PetscCall(PetscBLASIntCast(max_size_of_constraint,&Blas_LDA)); 6716 if (Blas_K>Blas_M) Blas_K=Blas_M; /* adjust just for computing optimal work */ 6717 PetscStackCallBLAS("LAPACKorgqr",LAPACKorgqr_(&Blas_M,&Blas_N,&Blas_K,qr_basis,&Blas_LDA,qr_tau,&lgqr_work_t,&lgqr_work,&lierr)); 6718 PetscCheck(!lierr,PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to ORGQR/UNGQR Lapack routine %d",(int)lierr); 6719 PetscCall(PetscBLASIntCast((PetscInt)PetscRealPart(lgqr_work_t),&lgqr_work)); 6720 PetscCall(PetscMalloc1((PetscInt)PetscRealPart(lgqr_work_t),&gqr_work)); 6721 /* array to store rhs and solution of triangular solver */ 6722 PetscCall(PetscMalloc1(max_constraints*max_constraints,&trs_rhs)); 6723 /* allocating workspace for check */ 6724 if (pcbddc->dbg_flag) { 6725 PetscCall(PetscMalloc1(max_size_of_constraint*(max_constraints+max_size_of_constraint),&dbg_work)); 6726 } 6727 } 6728 /* array to store whether a node is primal or not */ 6729 PetscCall(PetscBTCreate(pcis->n_B,&is_primal)); 6730 PetscCall(PetscMalloc1(total_primal_vertices,&aux_primal_numbering_B)); 6731 PetscCall(ISGlobalToLocalMappingApply(pcis->BtoNmap,IS_GTOLM_DROP,total_primal_vertices,pcbddc->local_primal_ref_node,&i,aux_primal_numbering_B)); 6732 PetscCheck(i == total_primal_vertices,PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error in boundary numbering for BDDC vertices! %" PetscInt_FMT " != %" PetscInt_FMT,total_primal_vertices,i); 6733 for (i=0;i<total_primal_vertices;i++) { 6734 PetscCall(PetscBTSet(is_primal,aux_primal_numbering_B[i])); 6735 } 6736 PetscCall(PetscFree(aux_primal_numbering_B)); 6737 6738 /* loop on constraints and see whether or not they need a change of basis and compute it */ 6739 for (total_counts=n_vertices;total_counts<total_counts_cc;total_counts++) { 6740 size_of_constraint = constraints_idxs_ptr[total_counts+1]-constraints_idxs_ptr[total_counts]; 6741 if (PetscBTLookup(change_basis,total_counts)) { 6742 /* get constraint info */ 6743 primal_dofs = constraints_n[total_counts]; 6744 dual_dofs = size_of_constraint-primal_dofs; 6745 6746 if (pcbddc->dbg_flag) { 6747 PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Constraints %" PetscInt_FMT ": %" PetscInt_FMT " need a change of basis (size %" PetscInt_FMT ")\n",total_counts,primal_dofs,size_of_constraint)); 6748 } 6749 6750 if (PetscBTLookup(qr_needed_idx,total_counts)) { /* QR */ 6751 6752 /* copy quadrature constraints for change of basis check */ 6753 if (pcbddc->dbg_flag) { 6754 PetscCall(PetscArraycpy(dbg_work,&constraints_data[constraints_data_ptr[total_counts]],size_of_constraint*primal_dofs)); 6755 } 6756 /* copy temporary constraints into larger work vector (in order to store all columns of Q) */ 6757 PetscCall(PetscArraycpy(qr_basis,&constraints_data[constraints_data_ptr[total_counts]],size_of_constraint*primal_dofs)); 6758 6759 /* compute QR decomposition of constraints */ 6760 PetscCall(PetscBLASIntCast(size_of_constraint,&Blas_M)); 6761 PetscCall(PetscBLASIntCast(primal_dofs,&Blas_N)); 6762 PetscCall(PetscBLASIntCast(size_of_constraint,&Blas_LDA)); 6763 PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF)); 6764 PetscStackCallBLAS("LAPACKgeqrf",LAPACKgeqrf_(&Blas_M,&Blas_N,qr_basis,&Blas_LDA,qr_tau,qr_work,&lqr_work,&lierr)); 6765 PetscCheck(!lierr,PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in GEQRF Lapack routine %d",(int)lierr); 6766 PetscCall(PetscFPTrapPop()); 6767 6768 /* explicitly compute R^-T */ 6769 PetscCall(PetscArrayzero(trs_rhs,primal_dofs*primal_dofs)); 6770 for (j=0;j<primal_dofs;j++) trs_rhs[j*(primal_dofs+1)] = 1.0; 6771 PetscCall(PetscBLASIntCast(primal_dofs,&Blas_N)); 6772 PetscCall(PetscBLASIntCast(primal_dofs,&Blas_NRHS)); 6773 PetscCall(PetscBLASIntCast(size_of_constraint,&Blas_LDA)); 6774 PetscCall(PetscBLASIntCast(primal_dofs,&Blas_LDB)); 6775 PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF)); 6776 PetscStackCallBLAS("LAPACKtrtrs",LAPACKtrtrs_("U","T","N",&Blas_N,&Blas_NRHS,qr_basis,&Blas_LDA,trs_rhs,&Blas_LDB,&lierr)); 6777 PetscCheck(!lierr,PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in TRTRS Lapack routine %d",(int)lierr); 6778 PetscCall(PetscFPTrapPop()); 6779 6780 /* explicitly compute all columns of Q (Q = [Q1 | Q2]) overwriting QR factorization in qr_basis */ 6781 PetscCall(PetscBLASIntCast(size_of_constraint,&Blas_M)); 6782 PetscCall(PetscBLASIntCast(size_of_constraint,&Blas_N)); 6783 PetscCall(PetscBLASIntCast(primal_dofs,&Blas_K)); 6784 PetscCall(PetscBLASIntCast(size_of_constraint,&Blas_LDA)); 6785 PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF)); 6786 PetscStackCallBLAS("LAPACKorgqr",LAPACKorgqr_(&Blas_M,&Blas_N,&Blas_K,qr_basis,&Blas_LDA,qr_tau,gqr_work,&lgqr_work,&lierr)); 6787 PetscCheck(!lierr,PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in ORGQR/UNGQR Lapack routine %d",(int)lierr); 6788 PetscCall(PetscFPTrapPop()); 6789 6790 /* first primal_dofs columns of Q need to be re-scaled in order to be unitary w.r.t constraints 6791 i.e. C_{pxn}*Q_{nxn} should be equal to [I_pxp | 0_pxd] (see check below) 6792 where n=size_of_constraint, p=primal_dofs, d=dual_dofs (n=p+d), I and 0 identity and null matrix resp. */ 6793 PetscCall(PetscBLASIntCast(size_of_constraint,&Blas_M)); 6794 PetscCall(PetscBLASIntCast(primal_dofs,&Blas_N)); 6795 PetscCall(PetscBLASIntCast(primal_dofs,&Blas_K)); 6796 PetscCall(PetscBLASIntCast(size_of_constraint,&Blas_LDA)); 6797 PetscCall(PetscBLASIntCast(primal_dofs,&Blas_LDB)); 6798 PetscCall(PetscBLASIntCast(size_of_constraint,&Blas_LDC)); 6799 PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF)); 6800 PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&Blas_M,&Blas_N,&Blas_K,&one,qr_basis,&Blas_LDA,trs_rhs,&Blas_LDB,&zero,constraints_data+constraints_data_ptr[total_counts],&Blas_LDC)); 6801 PetscCall(PetscFPTrapPop()); 6802 PetscCall(PetscArraycpy(qr_basis,&constraints_data[constraints_data_ptr[total_counts]],size_of_constraint*primal_dofs)); 6803 6804 /* insert values in change of basis matrix respecting global ordering of new primal dofs */ 6805 start_rows = &constraints_idxs[constraints_idxs_ptr[total_counts]]; 6806 /* insert cols for primal dofs */ 6807 for (j=0;j<primal_dofs;j++) { 6808 start_vals = &qr_basis[j*size_of_constraint]; 6809 start_cols = &constraints_idxs[constraints_idxs_ptr[total_counts]+j]; 6810 PetscCall(MatSetValues(localChangeOfBasisMatrix,size_of_constraint,start_rows,1,start_cols,start_vals,INSERT_VALUES)); 6811 } 6812 /* insert cols for dual dofs */ 6813 for (j=0,k=0;j<dual_dofs;k++) { 6814 if (!PetscBTLookup(is_primal,constraints_idxs_B[constraints_idxs_ptr[total_counts]+k])) { 6815 start_vals = &qr_basis[(primal_dofs+j)*size_of_constraint]; 6816 start_cols = &constraints_idxs[constraints_idxs_ptr[total_counts]+k]; 6817 PetscCall(MatSetValues(localChangeOfBasisMatrix,size_of_constraint,start_rows,1,start_cols,start_vals,INSERT_VALUES)); 6818 j++; 6819 } 6820 } 6821 6822 /* check change of basis */ 6823 if (pcbddc->dbg_flag) { 6824 PetscInt ii,jj; 6825 PetscBool valid_qr=PETSC_TRUE; 6826 PetscCall(PetscBLASIntCast(primal_dofs,&Blas_M)); 6827 PetscCall(PetscBLASIntCast(size_of_constraint,&Blas_N)); 6828 PetscCall(PetscBLASIntCast(size_of_constraint,&Blas_K)); 6829 PetscCall(PetscBLASIntCast(size_of_constraint,&Blas_LDA)); 6830 PetscCall(PetscBLASIntCast(size_of_constraint,&Blas_LDB)); 6831 PetscCall(PetscBLASIntCast(primal_dofs,&Blas_LDC)); 6832 PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF)); 6833 PetscStackCallBLAS("BLASgemm",BLASgemm_("T","N",&Blas_M,&Blas_N,&Blas_K,&one,dbg_work,&Blas_LDA,qr_basis,&Blas_LDB,&zero,&dbg_work[size_of_constraint*primal_dofs],&Blas_LDC)); 6834 PetscCall(PetscFPTrapPop()); 6835 for (jj=0;jj<size_of_constraint;jj++) { 6836 for (ii=0;ii<primal_dofs;ii++) { 6837 if (ii != jj && PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]) > 1.e-12) valid_qr = PETSC_FALSE; 6838 if (ii == jj && PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]-(PetscReal)1) > 1.e-12) valid_qr = PETSC_FALSE; 6839 } 6840 } 6841 if (!valid_qr) { 6842 PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> wrong change of basis!\n")); 6843 for (jj=0;jj<size_of_constraint;jj++) { 6844 for (ii=0;ii<primal_dofs;ii++) { 6845 if (ii != jj && PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]) > 1.e-12) { 6846 PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\tQr basis function %" PetscInt_FMT " is not orthogonal to constraint %" PetscInt_FMT " (%1.14e)!\n",jj,ii,(double)PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]))); 6847 } 6848 if (ii == jj && PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]-(PetscReal)1) > 1.e-12) { 6849 PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\tQr basis function %" PetscInt_FMT " is not unitary w.r.t constraint %" PetscInt_FMT " (%1.14e)!\n",jj,ii,(double)PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]))); 6850 } 6851 } 6852 } 6853 } else { 6854 PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> right change of basis!\n")); 6855 } 6856 } 6857 } else { /* simple transformation block */ 6858 PetscInt row,col; 6859 PetscScalar val,norm; 6860 6861 PetscCall(PetscBLASIntCast(size_of_constraint,&Blas_N)); 6862 PetscStackCallBLAS("BLASdot",norm = BLASdot_(&Blas_N,constraints_data+constraints_data_ptr[total_counts],&Blas_one,constraints_data+constraints_data_ptr[total_counts],&Blas_one)); 6863 for (j=0;j<size_of_constraint;j++) { 6864 PetscInt row_B = constraints_idxs_B[constraints_idxs_ptr[total_counts]+j]; 6865 row = constraints_idxs[constraints_idxs_ptr[total_counts]+j]; 6866 if (!PetscBTLookup(is_primal,row_B)) { 6867 col = constraints_idxs[constraints_idxs_ptr[total_counts]]; 6868 PetscCall(MatSetValue(localChangeOfBasisMatrix,row,row,1.0,INSERT_VALUES)); 6869 PetscCall(MatSetValue(localChangeOfBasisMatrix,row,col,constraints_data[constraints_data_ptr[total_counts]+j]/norm,INSERT_VALUES)); 6870 } else { 6871 for (k=0;k<size_of_constraint;k++) { 6872 col = constraints_idxs[constraints_idxs_ptr[total_counts]+k]; 6873 if (row != col) { 6874 val = -constraints_data[constraints_data_ptr[total_counts]+k]/constraints_data[constraints_data_ptr[total_counts]]; 6875 } else { 6876 val = constraints_data[constraints_data_ptr[total_counts]]/norm; 6877 } 6878 PetscCall(MatSetValue(localChangeOfBasisMatrix,row,col,val,INSERT_VALUES)); 6879 } 6880 } 6881 } 6882 if (pcbddc->dbg_flag) { 6883 PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> using standard change of basis\n")); 6884 } 6885 } 6886 } else { 6887 if (pcbddc->dbg_flag) { 6888 PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Constraint %" PetscInt_FMT " does not need a change of basis (size %" PetscInt_FMT ")\n",total_counts,size_of_constraint)); 6889 } 6890 } 6891 } 6892 6893 /* free workspace */ 6894 if (qr_needed) { 6895 if (pcbddc->dbg_flag) { 6896 PetscCall(PetscFree(dbg_work)); 6897 } 6898 PetscCall(PetscFree(trs_rhs)); 6899 PetscCall(PetscFree(qr_tau)); 6900 PetscCall(PetscFree(qr_work)); 6901 PetscCall(PetscFree(gqr_work)); 6902 PetscCall(PetscFree(qr_basis)); 6903 } 6904 PetscCall(PetscBTDestroy(&is_primal)); 6905 PetscCall(MatAssemblyBegin(localChangeOfBasisMatrix,MAT_FINAL_ASSEMBLY)); 6906 PetscCall(MatAssemblyEnd(localChangeOfBasisMatrix,MAT_FINAL_ASSEMBLY)); 6907 6908 /* assembling of global change of variable */ 6909 if (!pcbddc->fake_change) { 6910 Mat tmat; 6911 PetscInt bs; 6912 6913 PetscCall(VecGetSize(pcis->vec1_global,&global_size)); 6914 PetscCall(VecGetLocalSize(pcis->vec1_global,&local_size)); 6915 PetscCall(MatDuplicate(pc->pmat,MAT_DO_NOT_COPY_VALUES,&tmat)); 6916 PetscCall(MatISSetLocalMat(tmat,localChangeOfBasisMatrix)); 6917 PetscCall(MatAssemblyBegin(tmat,MAT_FINAL_ASSEMBLY)); 6918 PetscCall(MatAssemblyEnd(tmat,MAT_FINAL_ASSEMBLY)); 6919 PetscCall(MatCreate(PetscObjectComm((PetscObject)pc),&pcbddc->ChangeOfBasisMatrix)); 6920 PetscCall(MatSetType(pcbddc->ChangeOfBasisMatrix,MATAIJ)); 6921 PetscCall(MatGetBlockSize(pc->pmat,&bs)); 6922 PetscCall(MatSetBlockSize(pcbddc->ChangeOfBasisMatrix,bs)); 6923 PetscCall(MatSetSizes(pcbddc->ChangeOfBasisMatrix,local_size,local_size,global_size,global_size)); 6924 PetscCall(MatISSetMPIXAIJPreallocation_Private(tmat,pcbddc->ChangeOfBasisMatrix,PETSC_TRUE)); 6925 PetscCall(MatConvert(tmat,MATAIJ,MAT_REUSE_MATRIX,&pcbddc->ChangeOfBasisMatrix)); 6926 PetscCall(MatDestroy(&tmat)); 6927 PetscCall(VecSet(pcis->vec1_global,0.0)); 6928 PetscCall(VecSet(pcis->vec1_N,1.0)); 6929 PetscCall(VecScatterBegin(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE)); 6930 PetscCall(VecScatterEnd(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE)); 6931 PetscCall(VecReciprocal(pcis->vec1_global)); 6932 PetscCall(MatDiagonalScale(pcbddc->ChangeOfBasisMatrix,pcis->vec1_global,NULL)); 6933 6934 /* check */ 6935 if (pcbddc->dbg_flag) { 6936 PetscReal error; 6937 Vec x,x_change; 6938 6939 PetscCall(VecDuplicate(pcis->vec1_global,&x)); 6940 PetscCall(VecDuplicate(pcis->vec1_global,&x_change)); 6941 PetscCall(VecSetRandom(x,NULL)); 6942 PetscCall(VecCopy(x,pcis->vec1_global)); 6943 PetscCall(VecScatterBegin(matis->rctx,x,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD)); 6944 PetscCall(VecScatterEnd(matis->rctx,x,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD)); 6945 PetscCall(MatMult(localChangeOfBasisMatrix,pcis->vec1_N,pcis->vec2_N)); 6946 PetscCall(VecScatterBegin(matis->rctx,pcis->vec2_N,x,INSERT_VALUES,SCATTER_REVERSE)); 6947 PetscCall(VecScatterEnd(matis->rctx,pcis->vec2_N,x,INSERT_VALUES,SCATTER_REVERSE)); 6948 PetscCall(MatMult(pcbddc->ChangeOfBasisMatrix,pcis->vec1_global,x_change)); 6949 PetscCall(VecAXPY(x,-1.0,x_change)); 6950 PetscCall(VecNorm(x,NORM_INFINITY,&error)); 6951 if (error > PETSC_SMALL) { 6952 SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"Error global vs local change on N: %1.6e",(double)error); 6953 } 6954 PetscCall(VecDestroy(&x)); 6955 PetscCall(VecDestroy(&x_change)); 6956 } 6957 /* adapt sub_schurs computed (if any) */ 6958 if (pcbddc->use_deluxe_scaling) { 6959 PCBDDCSubSchurs sub_schurs=pcbddc->sub_schurs; 6960 6961 PetscCheck(!pcbddc->use_change_of_basis || !pcbddc->adaptive_userdefined,PetscObjectComm((PetscObject)pc),PETSC_ERR_SUP,"Cannot mix automatic change of basis, adaptive selection and user-defined constraints"); 6962 if (sub_schurs && sub_schurs->S_Ej_all) { 6963 Mat S_new,tmat; 6964 IS is_all_N,is_V_Sall = NULL; 6965 6966 PetscCall(ISLocalToGlobalMappingApplyIS(pcis->BtoNmap,sub_schurs->is_Ej_all,&is_all_N)); 6967 PetscCall(MatCreateSubMatrix(localChangeOfBasisMatrix,is_all_N,is_all_N,MAT_INITIAL_MATRIX,&tmat)); 6968 if (pcbddc->deluxe_zerorows) { 6969 ISLocalToGlobalMapping NtoSall; 6970 IS is_V; 6971 PetscCall(ISCreateGeneral(PETSC_COMM_SELF,pcbddc->n_vertices,pcbddc->local_primal_ref_node,PETSC_COPY_VALUES,&is_V)); 6972 PetscCall(ISLocalToGlobalMappingCreateIS(is_all_N,&NtoSall)); 6973 PetscCall(ISGlobalToLocalMappingApplyIS(NtoSall,IS_GTOLM_DROP,is_V,&is_V_Sall)); 6974 PetscCall(ISLocalToGlobalMappingDestroy(&NtoSall)); 6975 PetscCall(ISDestroy(&is_V)); 6976 } 6977 PetscCall(ISDestroy(&is_all_N)); 6978 PetscCall(MatPtAP(sub_schurs->S_Ej_all,tmat,MAT_INITIAL_MATRIX,1.0,&S_new)); 6979 PetscCall(MatDestroy(&sub_schurs->S_Ej_all)); 6980 PetscCall(PetscObjectReference((PetscObject)S_new)); 6981 if (pcbddc->deluxe_zerorows) { 6982 const PetscScalar *array; 6983 const PetscInt *idxs_V,*idxs_all; 6984 PetscInt i,n_V; 6985 6986 PetscCall(MatZeroRowsColumnsIS(S_new,is_V_Sall,1.,NULL,NULL)); 6987 PetscCall(ISGetLocalSize(is_V_Sall,&n_V)); 6988 PetscCall(ISGetIndices(is_V_Sall,&idxs_V)); 6989 PetscCall(ISGetIndices(sub_schurs->is_Ej_all,&idxs_all)); 6990 PetscCall(VecGetArrayRead(pcis->D,&array)); 6991 for (i=0;i<n_V;i++) { 6992 PetscScalar val; 6993 PetscInt idx; 6994 6995 idx = idxs_V[i]; 6996 val = array[idxs_all[idxs_V[i]]]; 6997 PetscCall(MatSetValue(S_new,idx,idx,val,INSERT_VALUES)); 6998 } 6999 PetscCall(MatAssemblyBegin(S_new,MAT_FINAL_ASSEMBLY)); 7000 PetscCall(MatAssemblyEnd(S_new,MAT_FINAL_ASSEMBLY)); 7001 PetscCall(VecRestoreArrayRead(pcis->D,&array)); 7002 PetscCall(ISRestoreIndices(sub_schurs->is_Ej_all,&idxs_all)); 7003 PetscCall(ISRestoreIndices(is_V_Sall,&idxs_V)); 7004 } 7005 sub_schurs->S_Ej_all = S_new; 7006 PetscCall(MatDestroy(&S_new)); 7007 if (sub_schurs->sum_S_Ej_all) { 7008 PetscCall(MatPtAP(sub_schurs->sum_S_Ej_all,tmat,MAT_INITIAL_MATRIX,1.0,&S_new)); 7009 PetscCall(MatDestroy(&sub_schurs->sum_S_Ej_all)); 7010 PetscCall(PetscObjectReference((PetscObject)S_new)); 7011 if (pcbddc->deluxe_zerorows) { 7012 PetscCall(MatZeroRowsColumnsIS(S_new,is_V_Sall,1.,NULL,NULL)); 7013 } 7014 sub_schurs->sum_S_Ej_all = S_new; 7015 PetscCall(MatDestroy(&S_new)); 7016 } 7017 PetscCall(ISDestroy(&is_V_Sall)); 7018 PetscCall(MatDestroy(&tmat)); 7019 } 7020 /* destroy any change of basis context in sub_schurs */ 7021 if (sub_schurs && sub_schurs->change) { 7022 PetscInt i; 7023 7024 for (i=0;i<sub_schurs->n_subs;i++) { 7025 PetscCall(KSPDestroy(&sub_schurs->change[i])); 7026 } 7027 PetscCall(PetscFree(sub_schurs->change)); 7028 } 7029 } 7030 if (pcbddc->switch_static) { /* need to save the local change */ 7031 pcbddc->switch_static_change = localChangeOfBasisMatrix; 7032 } else { 7033 PetscCall(MatDestroy(&localChangeOfBasisMatrix)); 7034 } 7035 /* determine if any process has changed the pressures locally */ 7036 pcbddc->change_interior = pcbddc->benign_have_null; 7037 } else { /* fake change (get back change of basis into ConstraintMatrix and info on qr) */ 7038 PetscCall(MatDestroy(&pcbddc->ConstraintMatrix)); 7039 pcbddc->ConstraintMatrix = localChangeOfBasisMatrix; 7040 pcbddc->use_qr_single = qr_needed; 7041 } 7042 } else if (pcbddc->user_ChangeOfBasisMatrix || pcbddc->benign_saddle_point) { 7043 if (!pcbddc->benign_have_null && pcbddc->user_ChangeOfBasisMatrix) { 7044 PetscCall(PetscObjectReference((PetscObject)pcbddc->user_ChangeOfBasisMatrix)); 7045 pcbddc->ChangeOfBasisMatrix = pcbddc->user_ChangeOfBasisMatrix; 7046 } else { 7047 Mat benign_global = NULL; 7048 if (pcbddc->benign_have_null) { 7049 Mat M; 7050 7051 pcbddc->change_interior = PETSC_TRUE; 7052 PetscCall(VecCopy(matis->counter,pcis->vec1_N)); 7053 PetscCall(VecReciprocal(pcis->vec1_N)); 7054 PetscCall(MatDuplicate(pc->pmat,MAT_DO_NOT_COPY_VALUES,&benign_global)); 7055 if (pcbddc->benign_change) { 7056 PetscCall(MatDuplicate(pcbddc->benign_change,MAT_COPY_VALUES,&M)); 7057 PetscCall(MatDiagonalScale(M,pcis->vec1_N,NULL)); 7058 } else { 7059 PetscCall(MatCreateSeqAIJ(PETSC_COMM_SELF,pcis->n,pcis->n,1,NULL,&M)); 7060 PetscCall(MatDiagonalSet(M,pcis->vec1_N,INSERT_VALUES)); 7061 } 7062 PetscCall(MatISSetLocalMat(benign_global,M)); 7063 PetscCall(MatDestroy(&M)); 7064 PetscCall(MatAssemblyBegin(benign_global,MAT_FINAL_ASSEMBLY)); 7065 PetscCall(MatAssemblyEnd(benign_global,MAT_FINAL_ASSEMBLY)); 7066 } 7067 if (pcbddc->user_ChangeOfBasisMatrix) { 7068 PetscCall(MatMatMult(pcbddc->user_ChangeOfBasisMatrix,benign_global,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&pcbddc->ChangeOfBasisMatrix)); 7069 PetscCall(MatDestroy(&benign_global)); 7070 } else if (pcbddc->benign_have_null) { 7071 pcbddc->ChangeOfBasisMatrix = benign_global; 7072 } 7073 } 7074 if (pcbddc->switch_static && pcbddc->ChangeOfBasisMatrix) { /* need to save the local change */ 7075 IS is_global; 7076 const PetscInt *gidxs; 7077 7078 PetscCall(ISLocalToGlobalMappingGetIndices(matis->rmapping,&gidxs)); 7079 PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)pc),pcis->n,gidxs,PETSC_COPY_VALUES,&is_global)); 7080 PetscCall(ISLocalToGlobalMappingRestoreIndices(matis->rmapping,&gidxs)); 7081 PetscCall(MatCreateSubMatrixUnsorted(pcbddc->ChangeOfBasisMatrix,is_global,is_global,&pcbddc->switch_static_change)); 7082 PetscCall(ISDestroy(&is_global)); 7083 } 7084 } 7085 if (!pcbddc->fake_change && pcbddc->ChangeOfBasisMatrix && !pcbddc->work_change) { 7086 PetscCall(VecDuplicate(pcis->vec1_global,&pcbddc->work_change)); 7087 } 7088 7089 if (!pcbddc->fake_change) { 7090 /* add pressure dofs to set of primal nodes for numbering purposes */ 7091 for (i=0;i<pcbddc->benign_n;i++) { 7092 pcbddc->local_primal_ref_node[pcbddc->local_primal_size_cc] = pcbddc->benign_p0_lidx[i]; 7093 pcbddc->primal_indices_local_idxs[pcbddc->local_primal_size] = pcbddc->benign_p0_lidx[i]; 7094 pcbddc->local_primal_ref_mult[pcbddc->local_primal_size_cc] = 1; 7095 pcbddc->local_primal_size_cc++; 7096 pcbddc->local_primal_size++; 7097 } 7098 7099 /* check if a new primal space has been introduced (also take into account benign trick) */ 7100 pcbddc->new_primal_space_local = PETSC_TRUE; 7101 if (olocal_primal_size == pcbddc->local_primal_size) { 7102 PetscCall(PetscArraycmp(pcbddc->local_primal_ref_node,olocal_primal_ref_node,olocal_primal_size_cc,&pcbddc->new_primal_space_local)); 7103 pcbddc->new_primal_space_local = (PetscBool)(!pcbddc->new_primal_space_local); 7104 if (!pcbddc->new_primal_space_local) { 7105 PetscCall(PetscArraycmp(pcbddc->local_primal_ref_mult,olocal_primal_ref_mult,olocal_primal_size_cc,&pcbddc->new_primal_space_local)); 7106 pcbddc->new_primal_space_local = (PetscBool)(!pcbddc->new_primal_space_local); 7107 } 7108 } 7109 /* new_primal_space will be used for numbering of coarse dofs, so it should be the same across all subdomains */ 7110 PetscCall(MPIU_Allreduce(&pcbddc->new_primal_space_local,&pcbddc->new_primal_space,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc))); 7111 } 7112 PetscCall(PetscFree2(olocal_primal_ref_node,olocal_primal_ref_mult)); 7113 7114 /* flush dbg viewer */ 7115 if (pcbddc->dbg_flag) { 7116 PetscCall(PetscViewerFlush(pcbddc->dbg_viewer)); 7117 } 7118 7119 /* free workspace */ 7120 PetscCall(PetscBTDestroy(&qr_needed_idx)); 7121 PetscCall(PetscBTDestroy(&change_basis)); 7122 if (!pcbddc->adaptive_selection) { 7123 PetscCall(PetscFree3(constraints_idxs_ptr,constraints_data_ptr,constraints_n)); 7124 PetscCall(PetscFree3(constraints_data,constraints_idxs,constraints_idxs_B)); 7125 } else { 7126 PetscCall(PetscFree5(pcbddc->adaptive_constraints_n,pcbddc->adaptive_constraints_idxs_ptr,pcbddc->adaptive_constraints_data_ptr,pcbddc->adaptive_constraints_idxs,pcbddc->adaptive_constraints_data)); 7127 PetscCall(PetscFree(constraints_n)); 7128 PetscCall(PetscFree(constraints_idxs_B)); 7129 } 7130 PetscFunctionReturn(0); 7131 } 7132 7133 PetscErrorCode PCBDDCAnalyzeInterface(PC pc) 7134 { 7135 ISLocalToGlobalMapping map; 7136 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 7137 Mat_IS *matis = (Mat_IS*)pc->pmat->data; 7138 PetscInt i,N; 7139 PetscBool rcsr = PETSC_FALSE; 7140 7141 PetscFunctionBegin; 7142 if (pcbddc->recompute_topography) { 7143 pcbddc->graphanalyzed = PETSC_FALSE; 7144 /* Reset previously computed graph */ 7145 PetscCall(PCBDDCGraphReset(pcbddc->mat_graph)); 7146 /* Init local Graph struct */ 7147 PetscCall(MatGetSize(pc->pmat,&N,NULL)); 7148 PetscCall(MatISGetLocalToGlobalMapping(pc->pmat,&map,NULL)); 7149 PetscCall(PCBDDCGraphInit(pcbddc->mat_graph,map,N,pcbddc->graphmaxcount)); 7150 7151 if (pcbddc->user_primal_vertices_local && !pcbddc->user_primal_vertices) { 7152 PetscCall(PCBDDCConsistencyCheckIS(pc,MPI_LOR,&pcbddc->user_primal_vertices_local)); 7153 } 7154 /* Check validity of the csr graph passed in by the user */ 7155 PetscCheck(!pcbddc->mat_graph->nvtxs_csr || pcbddc->mat_graph->nvtxs_csr == pcbddc->mat_graph->nvtxs,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Invalid size of local CSR graph! Found %" PetscInt_FMT ", expected %" PetscInt_FMT,pcbddc->mat_graph->nvtxs_csr,pcbddc->mat_graph->nvtxs); 7156 7157 /* Set default CSR adjacency of local dofs if not provided by the user with PCBDDCSetLocalAdjacencyGraph */ 7158 if (!pcbddc->mat_graph->xadj && pcbddc->use_local_adj) { 7159 PetscInt *xadj,*adjncy; 7160 PetscInt nvtxs; 7161 PetscBool flg_row=PETSC_FALSE; 7162 7163 PetscCall(MatGetRowIJ(matis->A,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row)); 7164 if (flg_row) { 7165 PetscCall(PCBDDCSetLocalAdjacencyGraph(pc,nvtxs,xadj,adjncy,PETSC_COPY_VALUES)); 7166 pcbddc->computed_rowadj = PETSC_TRUE; 7167 } 7168 PetscCall(MatRestoreRowIJ(matis->A,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row)); 7169 rcsr = PETSC_TRUE; 7170 } 7171 if (pcbddc->dbg_flag) { 7172 PetscCall(PetscViewerFlush(pcbddc->dbg_viewer)); 7173 } 7174 7175 if (pcbddc->mat_graph->cdim && !pcbddc->mat_graph->cloc) { 7176 PetscReal *lcoords; 7177 PetscInt n; 7178 MPI_Datatype dimrealtype; 7179 7180 /* TODO: support for blocked */ 7181 PetscCheck(pcbddc->mat_graph->cnloc == pc->pmat->rmap->n,PETSC_COMM_SELF,PETSC_ERR_USER,"Invalid number of local coordinates! Got %" PetscInt_FMT ", expected %" PetscInt_FMT,pcbddc->mat_graph->cnloc,pc->pmat->rmap->n); 7182 PetscCall(MatGetLocalSize(matis->A,&n,NULL)); 7183 PetscCall(PetscMalloc1(pcbddc->mat_graph->cdim*n,&lcoords)); 7184 PetscCallMPI(MPI_Type_contiguous(pcbddc->mat_graph->cdim,MPIU_REAL,&dimrealtype)); 7185 PetscCallMPI(MPI_Type_commit(&dimrealtype)); 7186 PetscCall(PetscSFBcastBegin(matis->sf,dimrealtype,pcbddc->mat_graph->coords,lcoords,MPI_REPLACE)); 7187 PetscCall(PetscSFBcastEnd(matis->sf,dimrealtype,pcbddc->mat_graph->coords,lcoords,MPI_REPLACE)); 7188 PetscCallMPI(MPI_Type_free(&dimrealtype)); 7189 PetscCall(PetscFree(pcbddc->mat_graph->coords)); 7190 7191 pcbddc->mat_graph->coords = lcoords; 7192 pcbddc->mat_graph->cloc = PETSC_TRUE; 7193 pcbddc->mat_graph->cnloc = n; 7194 } 7195 PetscCheck(!pcbddc->mat_graph->cnloc || pcbddc->mat_graph->cnloc == pcbddc->mat_graph->nvtxs,PETSC_COMM_SELF,PETSC_ERR_USER,"Invalid number of local subdomain coordinates! Got %" PetscInt_FMT ", expected %" PetscInt_FMT,pcbddc->mat_graph->cnloc,pcbddc->mat_graph->nvtxs); 7196 pcbddc->mat_graph->active_coords = (PetscBool)(pcbddc->corner_selection && pcbddc->mat_graph->cdim && !pcbddc->corner_selected); 7197 7198 /* Setup of Graph */ 7199 pcbddc->mat_graph->commsizelimit = 0; /* don't use the COMM_SELF variant of the graph */ 7200 PetscCall(PCBDDCGraphSetUp(pcbddc->mat_graph,pcbddc->vertex_size,pcbddc->NeumannBoundariesLocal,pcbddc->DirichletBoundariesLocal,pcbddc->n_ISForDofsLocal,pcbddc->ISForDofsLocal,pcbddc->user_primal_vertices_local)); 7201 7202 /* attach info on disconnected subdomains if present */ 7203 if (pcbddc->n_local_subs) { 7204 PetscInt *local_subs,n,totn; 7205 7206 PetscCall(MatGetLocalSize(matis->A,&n,NULL)); 7207 PetscCall(PetscMalloc1(n,&local_subs)); 7208 for (i=0;i<n;i++) local_subs[i] = pcbddc->n_local_subs; 7209 for (i=0;i<pcbddc->n_local_subs;i++) { 7210 const PetscInt *idxs; 7211 PetscInt nl,j; 7212 7213 PetscCall(ISGetLocalSize(pcbddc->local_subs[i],&nl)); 7214 PetscCall(ISGetIndices(pcbddc->local_subs[i],&idxs)); 7215 for (j=0;j<nl;j++) local_subs[idxs[j]] = i; 7216 PetscCall(ISRestoreIndices(pcbddc->local_subs[i],&idxs)); 7217 } 7218 for (i=0,totn=0;i<n;i++) totn = PetscMax(totn,local_subs[i]); 7219 pcbddc->mat_graph->n_local_subs = totn + 1; 7220 pcbddc->mat_graph->local_subs = local_subs; 7221 } 7222 } 7223 7224 if (!pcbddc->graphanalyzed) { 7225 /* Graph's connected components analysis */ 7226 PetscCall(PCBDDCGraphComputeConnectedComponents(pcbddc->mat_graph)); 7227 pcbddc->graphanalyzed = PETSC_TRUE; 7228 pcbddc->corner_selected = pcbddc->corner_selection; 7229 } 7230 if (rcsr) pcbddc->mat_graph->nvtxs_csr = 0; 7231 PetscFunctionReturn(0); 7232 } 7233 7234 PetscErrorCode PCBDDCOrthonormalizeVecs(PetscInt *nio, Vec vecs[]) 7235 { 7236 PetscInt i,j,n; 7237 PetscScalar *alphas; 7238 PetscReal norm,*onorms; 7239 7240 PetscFunctionBegin; 7241 n = *nio; 7242 if (!n) PetscFunctionReturn(0); 7243 PetscCall(PetscMalloc2(n,&alphas,n,&onorms)); 7244 PetscCall(VecNormalize(vecs[0],&norm)); 7245 if (norm < PETSC_SMALL) { 7246 onorms[0] = 0.0; 7247 PetscCall(VecSet(vecs[0],0.0)); 7248 } else { 7249 onorms[0] = norm; 7250 } 7251 7252 for (i=1;i<n;i++) { 7253 PetscCall(VecMDot(vecs[i],i,vecs,alphas)); 7254 for (j=0;j<i;j++) alphas[j] = PetscConj(-alphas[j]); 7255 PetscCall(VecMAXPY(vecs[i],i,alphas,vecs)); 7256 PetscCall(VecNormalize(vecs[i],&norm)); 7257 if (norm < PETSC_SMALL) { 7258 onorms[i] = 0.0; 7259 PetscCall(VecSet(vecs[i],0.0)); 7260 } else { 7261 onorms[i] = norm; 7262 } 7263 } 7264 /* push nonzero vectors at the beginning */ 7265 for (i=0;i<n;i++) { 7266 if (onorms[i] == 0.0) { 7267 for (j=i+1;j<n;j++) { 7268 if (onorms[j] != 0.0) { 7269 PetscCall(VecCopy(vecs[j],vecs[i])); 7270 onorms[j] = 0.0; 7271 } 7272 } 7273 } 7274 } 7275 for (i=0,*nio=0;i<n;i++) *nio += onorms[i] != 0.0 ? 1 : 0; 7276 PetscCall(PetscFree2(alphas,onorms)); 7277 PetscFunctionReturn(0); 7278 } 7279 7280 PetscErrorCode PCBDDCMatISGetSubassemblingPattern(Mat mat, PetscInt *n_subdomains, PetscInt redprocs, IS* is_sends, PetscBool *have_void) 7281 { 7282 ISLocalToGlobalMapping mapping; 7283 Mat A; 7284 PetscInt n_neighs,*neighs,*n_shared,**shared; 7285 PetscMPIInt size,rank,color; 7286 PetscInt *xadj,*adjncy; 7287 PetscInt *adjncy_wgt,*v_wgt,*ranks_send_to_idx; 7288 PetscInt im_active,active_procs,N,n,i,j,threshold = 2; 7289 PetscInt void_procs,*procs_candidates = NULL; 7290 PetscInt xadj_count,*count; 7291 PetscBool ismatis,use_vwgt=PETSC_FALSE; 7292 PetscSubcomm psubcomm; 7293 MPI_Comm subcomm; 7294 7295 PetscFunctionBegin; 7296 PetscValidHeaderSpecific(mat,MAT_CLASSID,1); 7297 PetscCall(PetscObjectTypeCompare((PetscObject)mat,MATIS,&ismatis)); 7298 PetscCheck(ismatis,PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot use %s on a matrix object which is not of type MATIS",PETSC_FUNCTION_NAME); 7299 PetscValidLogicalCollectiveInt(mat,*n_subdomains,2); 7300 PetscValidLogicalCollectiveInt(mat,redprocs,3); 7301 PetscCheck(*n_subdomains >0,PetscObjectComm((PetscObject)mat),PETSC_ERR_ARG_WRONG,"Invalid number of subdomains requested %" PetscInt_FMT,*n_subdomains); 7302 7303 if (have_void) *have_void = PETSC_FALSE; 7304 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)mat),&size)); 7305 PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank)); 7306 PetscCall(MatISGetLocalMat(mat,&A)); 7307 PetscCall(MatGetLocalSize(A,&n,NULL)); 7308 im_active = !!n; 7309 PetscCall(MPIU_Allreduce(&im_active,&active_procs,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)mat))); 7310 void_procs = size - active_procs; 7311 /* get ranks of of non-active processes in mat communicator */ 7312 if (void_procs) { 7313 PetscInt ncand; 7314 7315 if (have_void) *have_void = PETSC_TRUE; 7316 PetscCall(PetscMalloc1(size,&procs_candidates)); 7317 PetscCallMPI(MPI_Allgather(&im_active,1,MPIU_INT,procs_candidates,1,MPIU_INT,PetscObjectComm((PetscObject)mat))); 7318 for (i=0,ncand=0;i<size;i++) { 7319 if (!procs_candidates[i]) { 7320 procs_candidates[ncand++] = i; 7321 } 7322 } 7323 /* force n_subdomains to be not greater that the number of non-active processes */ 7324 *n_subdomains = PetscMin(void_procs,*n_subdomains); 7325 } 7326 7327 /* number of subdomains requested greater than active processes or matrix size -> just shift the matrix 7328 number of subdomains requested 1 -> send to rank-0 or first candidate in voids */ 7329 PetscCall(MatGetSize(mat,&N,NULL)); 7330 if (active_procs < *n_subdomains || *n_subdomains == 1 || N <= *n_subdomains) { 7331 PetscInt issize,isidx,dest; 7332 if (*n_subdomains == 1) dest = 0; 7333 else dest = rank; 7334 if (im_active) { 7335 issize = 1; 7336 if (procs_candidates) { /* shift the pattern on non-active candidates (if any) */ 7337 isidx = procs_candidates[dest]; 7338 } else { 7339 isidx = dest; 7340 } 7341 } else { 7342 issize = 0; 7343 isidx = -1; 7344 } 7345 if (*n_subdomains != 1) *n_subdomains = active_procs; 7346 PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)mat),issize,&isidx,PETSC_COPY_VALUES,is_sends)); 7347 PetscCall(PetscFree(procs_candidates)); 7348 PetscFunctionReturn(0); 7349 } 7350 PetscCall(PetscOptionsGetBool(NULL,NULL,"-matis_partitioning_use_vwgt",&use_vwgt,NULL)); 7351 PetscCall(PetscOptionsGetInt(NULL,NULL,"-matis_partitioning_threshold",&threshold,NULL)); 7352 threshold = PetscMax(threshold,2); 7353 7354 /* Get info on mapping */ 7355 PetscCall(MatISGetLocalToGlobalMapping(mat,&mapping,NULL)); 7356 PetscCall(ISLocalToGlobalMappingGetInfo(mapping,&n_neighs,&neighs,&n_shared,&shared)); 7357 7358 /* build local CSR graph of subdomains' connectivity */ 7359 PetscCall(PetscMalloc1(2,&xadj)); 7360 xadj[0] = 0; 7361 xadj[1] = PetscMax(n_neighs-1,0); 7362 PetscCall(PetscMalloc1(xadj[1],&adjncy)); 7363 PetscCall(PetscMalloc1(xadj[1],&adjncy_wgt)); 7364 PetscCall(PetscCalloc1(n,&count)); 7365 for (i=1;i<n_neighs;i++) 7366 for (j=0;j<n_shared[i];j++) 7367 count[shared[i][j]] += 1; 7368 7369 xadj_count = 0; 7370 for (i=1;i<n_neighs;i++) { 7371 for (j=0;j<n_shared[i];j++) { 7372 if (count[shared[i][j]] < threshold) { 7373 adjncy[xadj_count] = neighs[i]; 7374 adjncy_wgt[xadj_count] = n_shared[i]; 7375 xadj_count++; 7376 break; 7377 } 7378 } 7379 } 7380 xadj[1] = xadj_count; 7381 PetscCall(PetscFree(count)); 7382 PetscCall(ISLocalToGlobalMappingRestoreInfo(mapping,&n_neighs,&neighs,&n_shared,&shared)); 7383 PetscCall(PetscSortIntWithArray(xadj[1],adjncy,adjncy_wgt)); 7384 7385 PetscCall(PetscMalloc1(1,&ranks_send_to_idx)); 7386 7387 /* Restrict work on active processes only */ 7388 PetscCall(PetscMPIIntCast(im_active,&color)); 7389 if (void_procs) { 7390 PetscCall(PetscSubcommCreate(PetscObjectComm((PetscObject)mat),&psubcomm)); 7391 PetscCall(PetscSubcommSetNumber(psubcomm,2)); /* 2 groups, active process and not active processes */ 7392 PetscCall(PetscSubcommSetTypeGeneral(psubcomm,color,rank)); 7393 subcomm = PetscSubcommChild(psubcomm); 7394 } else { 7395 psubcomm = NULL; 7396 subcomm = PetscObjectComm((PetscObject)mat); 7397 } 7398 7399 v_wgt = NULL; 7400 if (!color) { 7401 PetscCall(PetscFree(xadj)); 7402 PetscCall(PetscFree(adjncy)); 7403 PetscCall(PetscFree(adjncy_wgt)); 7404 } else { 7405 Mat subdomain_adj; 7406 IS new_ranks,new_ranks_contig; 7407 MatPartitioning partitioner; 7408 PetscInt rstart=0,rend=0; 7409 PetscInt *is_indices,*oldranks; 7410 PetscMPIInt size; 7411 PetscBool aggregate; 7412 7413 PetscCallMPI(MPI_Comm_size(subcomm,&size)); 7414 if (void_procs) { 7415 PetscInt prank = rank; 7416 PetscCall(PetscMalloc1(size,&oldranks)); 7417 PetscCallMPI(MPI_Allgather(&prank,1,MPIU_INT,oldranks,1,MPIU_INT,subcomm)); 7418 for (i=0;i<xadj[1];i++) { 7419 PetscCall(PetscFindInt(adjncy[i],size,oldranks,&adjncy[i])); 7420 } 7421 PetscCall(PetscSortIntWithArray(xadj[1],adjncy,adjncy_wgt)); 7422 } else { 7423 oldranks = NULL; 7424 } 7425 aggregate = ((redprocs > 0 && redprocs < size) ? PETSC_TRUE : PETSC_FALSE); 7426 if (aggregate) { /* TODO: all this part could be made more efficient */ 7427 PetscInt lrows,row,ncols,*cols; 7428 PetscMPIInt nrank; 7429 PetscScalar *vals; 7430 7431 PetscCallMPI(MPI_Comm_rank(subcomm,&nrank)); 7432 lrows = 0; 7433 if (nrank<redprocs) { 7434 lrows = size/redprocs; 7435 if (nrank<size%redprocs) lrows++; 7436 } 7437 PetscCall(MatCreateAIJ(subcomm,lrows,lrows,size,size,50,NULL,50,NULL,&subdomain_adj)); 7438 PetscCall(MatGetOwnershipRange(subdomain_adj,&rstart,&rend)); 7439 PetscCall(MatSetOption(subdomain_adj,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_FALSE)); 7440 PetscCall(MatSetOption(subdomain_adj,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE)); 7441 row = nrank; 7442 ncols = xadj[1]-xadj[0]; 7443 cols = adjncy; 7444 PetscCall(PetscMalloc1(ncols,&vals)); 7445 for (i=0;i<ncols;i++) vals[i] = adjncy_wgt[i]; 7446 PetscCall(MatSetValues(subdomain_adj,1,&row,ncols,cols,vals,INSERT_VALUES)); 7447 PetscCall(MatAssemblyBegin(subdomain_adj,MAT_FINAL_ASSEMBLY)); 7448 PetscCall(MatAssemblyEnd(subdomain_adj,MAT_FINAL_ASSEMBLY)); 7449 PetscCall(PetscFree(xadj)); 7450 PetscCall(PetscFree(adjncy)); 7451 PetscCall(PetscFree(adjncy_wgt)); 7452 PetscCall(PetscFree(vals)); 7453 if (use_vwgt) { 7454 Vec v; 7455 const PetscScalar *array; 7456 PetscInt nl; 7457 7458 PetscCall(MatCreateVecs(subdomain_adj,&v,NULL)); 7459 PetscCall(VecSetValue(v,row,(PetscScalar)n,INSERT_VALUES)); 7460 PetscCall(VecAssemblyBegin(v)); 7461 PetscCall(VecAssemblyEnd(v)); 7462 PetscCall(VecGetLocalSize(v,&nl)); 7463 PetscCall(VecGetArrayRead(v,&array)); 7464 PetscCall(PetscMalloc1(nl,&v_wgt)); 7465 for (i=0;i<nl;i++) v_wgt[i] = (PetscInt)PetscRealPart(array[i]); 7466 PetscCall(VecRestoreArrayRead(v,&array)); 7467 PetscCall(VecDestroy(&v)); 7468 } 7469 } else { 7470 PetscCall(MatCreateMPIAdj(subcomm,1,(PetscInt)size,xadj,adjncy,adjncy_wgt,&subdomain_adj)); 7471 if (use_vwgt) { 7472 PetscCall(PetscMalloc1(1,&v_wgt)); 7473 v_wgt[0] = n; 7474 } 7475 } 7476 /* PetscCall(MatView(subdomain_adj,0)); */ 7477 7478 /* Partition */ 7479 PetscCall(MatPartitioningCreate(subcomm,&partitioner)); 7480 #if defined(PETSC_HAVE_PTSCOTCH) 7481 PetscCall(MatPartitioningSetType(partitioner,MATPARTITIONINGPTSCOTCH)); 7482 #elif defined(PETSC_HAVE_PARMETIS) 7483 PetscCall(MatPartitioningSetType(partitioner,MATPARTITIONINGPARMETIS)); 7484 #else 7485 PetscCall(MatPartitioningSetType(partitioner,MATPARTITIONINGAVERAGE)); 7486 #endif 7487 PetscCall(MatPartitioningSetAdjacency(partitioner,subdomain_adj)); 7488 if (v_wgt) { 7489 PetscCall(MatPartitioningSetVertexWeights(partitioner,v_wgt)); 7490 } 7491 *n_subdomains = PetscMin((PetscInt)size,*n_subdomains); 7492 PetscCall(MatPartitioningSetNParts(partitioner,*n_subdomains)); 7493 PetscCall(MatPartitioningSetFromOptions(partitioner)); 7494 PetscCall(MatPartitioningApply(partitioner,&new_ranks)); 7495 /* PetscCall(MatPartitioningView(partitioner,0)); */ 7496 7497 /* renumber new_ranks to avoid "holes" in new set of processors */ 7498 PetscCall(ISRenumber(new_ranks,NULL,NULL,&new_ranks_contig)); 7499 PetscCall(ISDestroy(&new_ranks)); 7500 PetscCall(ISGetIndices(new_ranks_contig,(const PetscInt**)&is_indices)); 7501 if (!aggregate) { 7502 if (procs_candidates) { /* shift the pattern on non-active candidates (if any) */ 7503 PetscAssert(oldranks,PETSC_COMM_SELF,PETSC_ERR_PLIB,"This should not happen"); 7504 ranks_send_to_idx[0] = procs_candidates[oldranks[is_indices[0]]]; 7505 } else if (oldranks) { 7506 ranks_send_to_idx[0] = oldranks[is_indices[0]]; 7507 } else { 7508 ranks_send_to_idx[0] = is_indices[0]; 7509 } 7510 } else { 7511 PetscInt idx = 0; 7512 PetscMPIInt tag; 7513 MPI_Request *reqs; 7514 7515 PetscCall(PetscObjectGetNewTag((PetscObject)subdomain_adj,&tag)); 7516 PetscCall(PetscMalloc1(rend-rstart,&reqs)); 7517 for (i=rstart;i<rend;i++) { 7518 PetscCallMPI(MPI_Isend(is_indices+i-rstart,1,MPIU_INT,i,tag,subcomm,&reqs[i-rstart])); 7519 } 7520 PetscCallMPI(MPI_Recv(&idx,1,MPIU_INT,MPI_ANY_SOURCE,tag,subcomm,MPI_STATUS_IGNORE)); 7521 PetscCallMPI(MPI_Waitall(rend-rstart,reqs,MPI_STATUSES_IGNORE)); 7522 PetscCall(PetscFree(reqs)); 7523 if (procs_candidates) { /* shift the pattern on non-active candidates (if any) */ 7524 PetscAssert(oldranks,PETSC_COMM_SELF,PETSC_ERR_PLIB,"This should not happen"); 7525 ranks_send_to_idx[0] = procs_candidates[oldranks[idx]]; 7526 } else if (oldranks) { 7527 ranks_send_to_idx[0] = oldranks[idx]; 7528 } else { 7529 ranks_send_to_idx[0] = idx; 7530 } 7531 } 7532 PetscCall(ISRestoreIndices(new_ranks_contig,(const PetscInt**)&is_indices)); 7533 /* clean up */ 7534 PetscCall(PetscFree(oldranks)); 7535 PetscCall(ISDestroy(&new_ranks_contig)); 7536 PetscCall(MatDestroy(&subdomain_adj)); 7537 PetscCall(MatPartitioningDestroy(&partitioner)); 7538 } 7539 PetscCall(PetscSubcommDestroy(&psubcomm)); 7540 PetscCall(PetscFree(procs_candidates)); 7541 7542 /* assemble parallel IS for sends */ 7543 i = 1; 7544 if (!color) i=0; 7545 PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)mat),i,ranks_send_to_idx,PETSC_OWN_POINTER,is_sends)); 7546 PetscFunctionReturn(0); 7547 } 7548 7549 typedef enum {MATDENSE_PRIVATE=0,MATAIJ_PRIVATE,MATBAIJ_PRIVATE,MATSBAIJ_PRIVATE}MatTypePrivate; 7550 7551 PetscErrorCode PCBDDCMatISSubassemble(Mat mat, IS is_sends, PetscInt n_subdomains, PetscBool restrict_comm, PetscBool restrict_full, PetscBool reuse, Mat *mat_n, PetscInt nis, IS isarray[], PetscInt nvecs, Vec nnsp_vec[]) 7552 { 7553 Mat local_mat; 7554 IS is_sends_internal; 7555 PetscInt rows,cols,new_local_rows; 7556 PetscInt i,bs,buf_size_idxs,buf_size_idxs_is,buf_size_vals,buf_size_vecs; 7557 PetscBool ismatis,isdense,newisdense,destroy_mat; 7558 ISLocalToGlobalMapping l2gmap; 7559 PetscInt* l2gmap_indices; 7560 const PetscInt* is_indices; 7561 MatType new_local_type; 7562 /* buffers */ 7563 PetscInt *ptr_idxs,*send_buffer_idxs,*recv_buffer_idxs; 7564 PetscInt *ptr_idxs_is,*send_buffer_idxs_is,*recv_buffer_idxs_is; 7565 PetscInt *recv_buffer_idxs_local; 7566 PetscScalar *ptr_vals,*recv_buffer_vals; 7567 const PetscScalar *send_buffer_vals; 7568 PetscScalar *ptr_vecs,*send_buffer_vecs,*recv_buffer_vecs; 7569 /* MPI */ 7570 MPI_Comm comm,comm_n; 7571 PetscSubcomm subcomm; 7572 PetscMPIInt n_sends,n_recvs,size; 7573 PetscMPIInt *iflags,*ilengths_idxs,*ilengths_vals,*ilengths_idxs_is; 7574 PetscMPIInt *onodes,*onodes_is,*olengths_idxs,*olengths_idxs_is,*olengths_vals; 7575 PetscMPIInt len,tag_idxs,tag_idxs_is,tag_vals,tag_vecs,source_dest; 7576 MPI_Request *send_req_idxs,*send_req_idxs_is,*send_req_vals,*send_req_vecs; 7577 MPI_Request *recv_req_idxs,*recv_req_idxs_is,*recv_req_vals,*recv_req_vecs; 7578 7579 PetscFunctionBegin; 7580 PetscValidHeaderSpecific(mat,MAT_CLASSID,1); 7581 PetscCall(PetscObjectTypeCompare((PetscObject)mat,MATIS,&ismatis)); 7582 PetscCheck(ismatis,PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot use %s on a matrix object which is not of type MATIS",PETSC_FUNCTION_NAME); 7583 PetscValidLogicalCollectiveInt(mat,n_subdomains,3); 7584 PetscValidLogicalCollectiveBool(mat,restrict_comm,4); 7585 PetscValidLogicalCollectiveBool(mat,restrict_full,5); 7586 PetscValidLogicalCollectiveBool(mat,reuse,6); 7587 PetscValidLogicalCollectiveInt(mat,nis,8); 7588 PetscValidLogicalCollectiveInt(mat,nvecs,10); 7589 if (nvecs) { 7590 PetscCheck(nvecs <= 1,PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Just 1 vector supported"); 7591 PetscValidHeaderSpecific(nnsp_vec[0],VEC_CLASSID,11); 7592 } 7593 /* further checks */ 7594 PetscCall(MatISGetLocalMat(mat,&local_mat)); 7595 PetscCall(PetscObjectTypeCompare((PetscObject)local_mat,MATSEQDENSE,&isdense)); 7596 PetscCheck(isdense,PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Currently cannot subassemble MATIS when local matrix type is not of type SEQDENSE"); 7597 PetscCall(MatGetSize(local_mat,&rows,&cols)); 7598 PetscCheck(rows == cols,PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Local MATIS matrices should be square"); 7599 if (reuse && *mat_n) { 7600 PetscInt mrows,mcols,mnrows,mncols; 7601 PetscValidHeaderSpecific(*mat_n,MAT_CLASSID,7); 7602 PetscCall(PetscObjectTypeCompare((PetscObject)*mat_n,MATIS,&ismatis)); 7603 PetscCheck(ismatis,PetscObjectComm((PetscObject)*mat_n),PETSC_ERR_SUP,"Cannot reuse a matrix which is not of type MATIS"); 7604 PetscCall(MatGetSize(mat,&mrows,&mcols)); 7605 PetscCall(MatGetSize(*mat_n,&mnrows,&mncols)); 7606 PetscCheck(mrows == mnrows,PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot reuse matrix! Wrong number of rows %" PetscInt_FMT " != %" PetscInt_FMT,mrows,mnrows); 7607 PetscCheck(mcols == mncols,PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot reuse matrix! Wrong number of cols %" PetscInt_FMT " != %" PetscInt_FMT,mcols,mncols); 7608 } 7609 PetscCall(MatGetBlockSize(local_mat,&bs)); 7610 PetscValidLogicalCollectiveInt(mat,bs,1); 7611 7612 /* prepare IS for sending if not provided */ 7613 if (!is_sends) { 7614 PetscCheck(n_subdomains,PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"You should specify either an IS or a target number of subdomains"); 7615 PetscCall(PCBDDCMatISGetSubassemblingPattern(mat,&n_subdomains,0,&is_sends_internal,NULL)); 7616 } else { 7617 PetscCall(PetscObjectReference((PetscObject)is_sends)); 7618 is_sends_internal = is_sends; 7619 } 7620 7621 /* get comm */ 7622 PetscCall(PetscObjectGetComm((PetscObject)mat,&comm)); 7623 7624 /* compute number of sends */ 7625 PetscCall(ISGetLocalSize(is_sends_internal,&i)); 7626 PetscCall(PetscMPIIntCast(i,&n_sends)); 7627 7628 /* compute number of receives */ 7629 PetscCallMPI(MPI_Comm_size(comm,&size)); 7630 PetscCall(PetscMalloc1(size,&iflags)); 7631 PetscCall(PetscArrayzero(iflags,size)); 7632 PetscCall(ISGetIndices(is_sends_internal,&is_indices)); 7633 for (i=0;i<n_sends;i++) iflags[is_indices[i]] = 1; 7634 PetscCall(PetscGatherNumberOfMessages(comm,iflags,NULL,&n_recvs)); 7635 PetscCall(PetscFree(iflags)); 7636 7637 /* restrict comm if requested */ 7638 subcomm = NULL; 7639 destroy_mat = PETSC_FALSE; 7640 if (restrict_comm) { 7641 PetscMPIInt color,subcommsize; 7642 7643 color = 0; 7644 if (restrict_full) { 7645 if (!n_recvs) color = 1; /* processes not receiving anything will not participate in new comm (full restriction) */ 7646 } else { 7647 if (!n_recvs && n_sends) color = 1; /* just those processes that are sending but not receiving anything will not participate in new comm */ 7648 } 7649 PetscCall(MPIU_Allreduce(&color,&subcommsize,1,MPI_INT,MPI_SUM,comm)); 7650 subcommsize = size - subcommsize; 7651 /* check if reuse has been requested */ 7652 if (reuse) { 7653 if (*mat_n) { 7654 PetscMPIInt subcommsize2; 7655 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)*mat_n),&subcommsize2)); 7656 PetscCheck(subcommsize == subcommsize2,PetscObjectComm((PetscObject)*mat_n),PETSC_ERR_PLIB,"Cannot reuse matrix! wrong subcomm size %d != %d",subcommsize,subcommsize2); 7657 comm_n = PetscObjectComm((PetscObject)*mat_n); 7658 } else { 7659 comm_n = PETSC_COMM_SELF; 7660 } 7661 } else { /* MAT_INITIAL_MATRIX */ 7662 PetscMPIInt rank; 7663 7664 PetscCallMPI(MPI_Comm_rank(comm,&rank)); 7665 PetscCall(PetscSubcommCreate(comm,&subcomm)); 7666 PetscCall(PetscSubcommSetNumber(subcomm,2)); 7667 PetscCall(PetscSubcommSetTypeGeneral(subcomm,color,rank)); 7668 comm_n = PetscSubcommChild(subcomm); 7669 } 7670 /* flag to destroy *mat_n if not significative */ 7671 if (color) destroy_mat = PETSC_TRUE; 7672 } else { 7673 comm_n = comm; 7674 } 7675 7676 /* prepare send/receive buffers */ 7677 PetscCall(PetscMalloc1(size,&ilengths_idxs)); 7678 PetscCall(PetscArrayzero(ilengths_idxs,size)); 7679 PetscCall(PetscMalloc1(size,&ilengths_vals)); 7680 PetscCall(PetscArrayzero(ilengths_vals,size)); 7681 if (nis) { 7682 PetscCall(PetscCalloc1(size,&ilengths_idxs_is)); 7683 } 7684 7685 /* Get data from local matrices */ 7686 PetscCheck(isdense,PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Subassembling of AIJ local matrices not yet implemented"); 7687 /* TODO: See below some guidelines on how to prepare the local buffers */ 7688 /* 7689 send_buffer_vals should contain the raw values of the local matrix 7690 send_buffer_idxs should contain: 7691 - MatType_PRIVATE type 7692 - PetscInt size_of_l2gmap 7693 - PetscInt global_row_indices[size_of_l2gmap] 7694 - PetscInt all_other_info_which_is_needed_to_compute_preallocation_and_set_values 7695 */ 7696 { 7697 ISLocalToGlobalMapping mapping; 7698 7699 PetscCall(MatISGetLocalToGlobalMapping(mat,&mapping,NULL)); 7700 PetscCall(MatDenseGetArrayRead(local_mat,&send_buffer_vals)); 7701 PetscCall(ISLocalToGlobalMappingGetSize(mapping,&i)); 7702 PetscCall(PetscMalloc1(i+2,&send_buffer_idxs)); 7703 send_buffer_idxs[0] = (PetscInt)MATDENSE_PRIVATE; 7704 send_buffer_idxs[1] = i; 7705 PetscCall(ISLocalToGlobalMappingGetIndices(mapping,(const PetscInt**)&ptr_idxs)); 7706 PetscCall(PetscArraycpy(&send_buffer_idxs[2],ptr_idxs,i)); 7707 PetscCall(ISLocalToGlobalMappingRestoreIndices(mapping,(const PetscInt**)&ptr_idxs)); 7708 PetscCall(PetscMPIIntCast(i,&len)); 7709 for (i=0;i<n_sends;i++) { 7710 ilengths_vals[is_indices[i]] = len*len; 7711 ilengths_idxs[is_indices[i]] = len+2; 7712 } 7713 } 7714 PetscCall(PetscGatherMessageLengths2(comm,n_sends,n_recvs,ilengths_idxs,ilengths_vals,&onodes,&olengths_idxs,&olengths_vals)); 7715 /* additional is (if any) */ 7716 if (nis) { 7717 PetscMPIInt psum; 7718 PetscInt j; 7719 for (j=0,psum=0;j<nis;j++) { 7720 PetscInt plen; 7721 PetscCall(ISGetLocalSize(isarray[j],&plen)); 7722 PetscCall(PetscMPIIntCast(plen,&len)); 7723 psum += len+1; /* indices + length */ 7724 } 7725 PetscCall(PetscMalloc1(psum,&send_buffer_idxs_is)); 7726 for (j=0,psum=0;j<nis;j++) { 7727 PetscInt plen; 7728 const PetscInt *is_array_idxs; 7729 PetscCall(ISGetLocalSize(isarray[j],&plen)); 7730 send_buffer_idxs_is[psum] = plen; 7731 PetscCall(ISGetIndices(isarray[j],&is_array_idxs)); 7732 PetscCall(PetscArraycpy(&send_buffer_idxs_is[psum+1],is_array_idxs,plen)); 7733 PetscCall(ISRestoreIndices(isarray[j],&is_array_idxs)); 7734 psum += plen+1; /* indices + length */ 7735 } 7736 for (i=0;i<n_sends;i++) { 7737 ilengths_idxs_is[is_indices[i]] = psum; 7738 } 7739 PetscCall(PetscGatherMessageLengths(comm,n_sends,n_recvs,ilengths_idxs_is,&onodes_is,&olengths_idxs_is)); 7740 } 7741 PetscCall(MatISRestoreLocalMat(mat,&local_mat)); 7742 7743 buf_size_idxs = 0; 7744 buf_size_vals = 0; 7745 buf_size_idxs_is = 0; 7746 buf_size_vecs = 0; 7747 for (i=0;i<n_recvs;i++) { 7748 buf_size_idxs += (PetscInt)olengths_idxs[i]; 7749 buf_size_vals += (PetscInt)olengths_vals[i]; 7750 if (nis) buf_size_idxs_is += (PetscInt)olengths_idxs_is[i]; 7751 if (nvecs) buf_size_vecs += (PetscInt)olengths_idxs[i]; 7752 } 7753 PetscCall(PetscMalloc1(buf_size_idxs,&recv_buffer_idxs)); 7754 PetscCall(PetscMalloc1(buf_size_vals,&recv_buffer_vals)); 7755 PetscCall(PetscMalloc1(buf_size_idxs_is,&recv_buffer_idxs_is)); 7756 PetscCall(PetscMalloc1(buf_size_vecs,&recv_buffer_vecs)); 7757 7758 /* get new tags for clean communications */ 7759 PetscCall(PetscObjectGetNewTag((PetscObject)mat,&tag_idxs)); 7760 PetscCall(PetscObjectGetNewTag((PetscObject)mat,&tag_vals)); 7761 PetscCall(PetscObjectGetNewTag((PetscObject)mat,&tag_idxs_is)); 7762 PetscCall(PetscObjectGetNewTag((PetscObject)mat,&tag_vecs)); 7763 7764 /* allocate for requests */ 7765 PetscCall(PetscMalloc1(n_sends,&send_req_idxs)); 7766 PetscCall(PetscMalloc1(n_sends,&send_req_vals)); 7767 PetscCall(PetscMalloc1(n_sends,&send_req_idxs_is)); 7768 PetscCall(PetscMalloc1(n_sends,&send_req_vecs)); 7769 PetscCall(PetscMalloc1(n_recvs,&recv_req_idxs)); 7770 PetscCall(PetscMalloc1(n_recvs,&recv_req_vals)); 7771 PetscCall(PetscMalloc1(n_recvs,&recv_req_idxs_is)); 7772 PetscCall(PetscMalloc1(n_recvs,&recv_req_vecs)); 7773 7774 /* communications */ 7775 ptr_idxs = recv_buffer_idxs; 7776 ptr_vals = recv_buffer_vals; 7777 ptr_idxs_is = recv_buffer_idxs_is; 7778 ptr_vecs = recv_buffer_vecs; 7779 for (i=0;i<n_recvs;i++) { 7780 source_dest = onodes[i]; 7781 PetscCallMPI(MPI_Irecv(ptr_idxs,olengths_idxs[i],MPIU_INT,source_dest,tag_idxs,comm,&recv_req_idxs[i])); 7782 PetscCallMPI(MPI_Irecv(ptr_vals,olengths_vals[i],MPIU_SCALAR,source_dest,tag_vals,comm,&recv_req_vals[i])); 7783 ptr_idxs += olengths_idxs[i]; 7784 ptr_vals += olengths_vals[i]; 7785 if (nis) { 7786 source_dest = onodes_is[i]; 7787 PetscCallMPI(MPI_Irecv(ptr_idxs_is,olengths_idxs_is[i],MPIU_INT,source_dest,tag_idxs_is,comm,&recv_req_idxs_is[i])); 7788 ptr_idxs_is += olengths_idxs_is[i]; 7789 } 7790 if (nvecs) { 7791 source_dest = onodes[i]; 7792 PetscCallMPI(MPI_Irecv(ptr_vecs,olengths_idxs[i]-2,MPIU_SCALAR,source_dest,tag_vecs,comm,&recv_req_vecs[i])); 7793 ptr_vecs += olengths_idxs[i]-2; 7794 } 7795 } 7796 for (i=0;i<n_sends;i++) { 7797 PetscCall(PetscMPIIntCast(is_indices[i],&source_dest)); 7798 PetscCallMPI(MPI_Isend(send_buffer_idxs,ilengths_idxs[source_dest],MPIU_INT,source_dest,tag_idxs,comm,&send_req_idxs[i])); 7799 PetscCallMPI(MPI_Isend((PetscScalar*)send_buffer_vals,ilengths_vals[source_dest],MPIU_SCALAR,source_dest,tag_vals,comm,&send_req_vals[i])); 7800 if (nis) { 7801 PetscCallMPI(MPI_Isend(send_buffer_idxs_is,ilengths_idxs_is[source_dest],MPIU_INT,source_dest,tag_idxs_is,comm,&send_req_idxs_is[i])); 7802 } 7803 if (nvecs) { 7804 PetscCall(VecGetArray(nnsp_vec[0],&send_buffer_vecs)); 7805 PetscCallMPI(MPI_Isend(send_buffer_vecs,ilengths_idxs[source_dest]-2,MPIU_SCALAR,source_dest,tag_vecs,comm,&send_req_vecs[i])); 7806 } 7807 } 7808 PetscCall(ISRestoreIndices(is_sends_internal,&is_indices)); 7809 PetscCall(ISDestroy(&is_sends_internal)); 7810 7811 /* assemble new l2g map */ 7812 PetscCallMPI(MPI_Waitall(n_recvs,recv_req_idxs,MPI_STATUSES_IGNORE)); 7813 ptr_idxs = recv_buffer_idxs; 7814 new_local_rows = 0; 7815 for (i=0;i<n_recvs;i++) { 7816 new_local_rows += *(ptr_idxs+1); /* second element is the local size of the l2gmap */ 7817 ptr_idxs += olengths_idxs[i]; 7818 } 7819 PetscCall(PetscMalloc1(new_local_rows,&l2gmap_indices)); 7820 ptr_idxs = recv_buffer_idxs; 7821 new_local_rows = 0; 7822 for (i=0;i<n_recvs;i++) { 7823 PetscCall(PetscArraycpy(&l2gmap_indices[new_local_rows],ptr_idxs+2,*(ptr_idxs+1))); 7824 new_local_rows += *(ptr_idxs+1); /* second element is the local size of the l2gmap */ 7825 ptr_idxs += olengths_idxs[i]; 7826 } 7827 PetscCall(PetscSortRemoveDupsInt(&new_local_rows,l2gmap_indices)); 7828 PetscCall(ISLocalToGlobalMappingCreate(comm_n,1,new_local_rows,l2gmap_indices,PETSC_COPY_VALUES,&l2gmap)); 7829 PetscCall(PetscFree(l2gmap_indices)); 7830 7831 /* infer new local matrix type from received local matrices type */ 7832 /* currently if all local matrices are of type X, then the resulting matrix will be of type X, except for the dense case */ 7833 /* it also assumes that if the block size is set, than it is the same among all local matrices (see checks at the beginning of the function) */ 7834 if (n_recvs) { 7835 MatTypePrivate new_local_type_private = (MatTypePrivate)send_buffer_idxs[0]; 7836 ptr_idxs = recv_buffer_idxs; 7837 for (i=0;i<n_recvs;i++) { 7838 if ((PetscInt)new_local_type_private != *ptr_idxs) { 7839 new_local_type_private = MATAIJ_PRIVATE; 7840 break; 7841 } 7842 ptr_idxs += olengths_idxs[i]; 7843 } 7844 switch (new_local_type_private) { 7845 case MATDENSE_PRIVATE: 7846 new_local_type = MATSEQAIJ; 7847 bs = 1; 7848 break; 7849 case MATAIJ_PRIVATE: 7850 new_local_type = MATSEQAIJ; 7851 bs = 1; 7852 break; 7853 case MATBAIJ_PRIVATE: 7854 new_local_type = MATSEQBAIJ; 7855 break; 7856 case MATSBAIJ_PRIVATE: 7857 new_local_type = MATSEQSBAIJ; 7858 break; 7859 default: 7860 SETERRQ(comm,PETSC_ERR_SUP,"Unsupported private type %d in %s",new_local_type_private,PETSC_FUNCTION_NAME); 7861 } 7862 } else { /* by default, new_local_type is seqaij */ 7863 new_local_type = MATSEQAIJ; 7864 bs = 1; 7865 } 7866 7867 /* create MATIS object if needed */ 7868 if (!reuse) { 7869 PetscCall(MatGetSize(mat,&rows,&cols)); 7870 PetscCall(MatCreateIS(comm_n,bs,PETSC_DECIDE,PETSC_DECIDE,rows,cols,l2gmap,l2gmap,mat_n)); 7871 } else { 7872 /* it also destroys the local matrices */ 7873 if (*mat_n) { 7874 PetscCall(MatSetLocalToGlobalMapping(*mat_n,l2gmap,l2gmap)); 7875 } else { /* this is a fake object */ 7876 PetscCall(MatCreateIS(comm_n,bs,PETSC_DECIDE,PETSC_DECIDE,rows,cols,l2gmap,l2gmap,mat_n)); 7877 } 7878 } 7879 PetscCall(MatISGetLocalMat(*mat_n,&local_mat)); 7880 PetscCall(MatSetType(local_mat,new_local_type)); 7881 7882 PetscCallMPI(MPI_Waitall(n_recvs,recv_req_vals,MPI_STATUSES_IGNORE)); 7883 7884 /* Global to local map of received indices */ 7885 PetscCall(PetscMalloc1(buf_size_idxs,&recv_buffer_idxs_local)); /* needed for values insertion */ 7886 PetscCall(ISGlobalToLocalMappingApply(l2gmap,IS_GTOLM_MASK,buf_size_idxs,recv_buffer_idxs,&i,recv_buffer_idxs_local)); 7887 PetscCall(ISLocalToGlobalMappingDestroy(&l2gmap)); 7888 7889 /* restore attributes -> type of incoming data and its size */ 7890 buf_size_idxs = 0; 7891 for (i=0;i<n_recvs;i++) { 7892 recv_buffer_idxs_local[buf_size_idxs] = recv_buffer_idxs[buf_size_idxs]; 7893 recv_buffer_idxs_local[buf_size_idxs+1] = recv_buffer_idxs[buf_size_idxs+1]; 7894 buf_size_idxs += (PetscInt)olengths_idxs[i]; 7895 } 7896 PetscCall(PetscFree(recv_buffer_idxs)); 7897 7898 /* set preallocation */ 7899 PetscCall(PetscObjectTypeCompare((PetscObject)local_mat,MATSEQDENSE,&newisdense)); 7900 if (!newisdense) { 7901 PetscInt *new_local_nnz=NULL; 7902 7903 ptr_idxs = recv_buffer_idxs_local; 7904 if (n_recvs) { 7905 PetscCall(PetscCalloc1(new_local_rows,&new_local_nnz)); 7906 } 7907 for (i=0;i<n_recvs;i++) { 7908 PetscInt j; 7909 if (*ptr_idxs == (PetscInt)MATDENSE_PRIVATE) { /* preallocation provided for dense case only */ 7910 for (j=0;j<*(ptr_idxs+1);j++) { 7911 new_local_nnz[*(ptr_idxs+2+j)] += *(ptr_idxs+1); 7912 } 7913 } else { 7914 /* TODO */ 7915 } 7916 ptr_idxs += olengths_idxs[i]; 7917 } 7918 if (new_local_nnz) { 7919 for (i=0;i<new_local_rows;i++) new_local_nnz[i] = PetscMin(new_local_nnz[i],new_local_rows); 7920 PetscCall(MatSeqAIJSetPreallocation(local_mat,0,new_local_nnz)); 7921 for (i=0;i<new_local_rows;i++) new_local_nnz[i] /= bs; 7922 PetscCall(MatSeqBAIJSetPreallocation(local_mat,bs,0,new_local_nnz)); 7923 for (i=0;i<new_local_rows;i++) new_local_nnz[i] = PetscMax(new_local_nnz[i]-i,0); 7924 PetscCall(MatSeqSBAIJSetPreallocation(local_mat,bs,0,new_local_nnz)); 7925 } else { 7926 PetscCall(MatSetUp(local_mat)); 7927 } 7928 PetscCall(PetscFree(new_local_nnz)); 7929 } else { 7930 PetscCall(MatSetUp(local_mat)); 7931 } 7932 7933 /* set values */ 7934 ptr_vals = recv_buffer_vals; 7935 ptr_idxs = recv_buffer_idxs_local; 7936 for (i=0;i<n_recvs;i++) { 7937 if (*ptr_idxs == (PetscInt)MATDENSE_PRIVATE) { /* values insertion provided for dense case only */ 7938 PetscCall(MatSetOption(local_mat,MAT_ROW_ORIENTED,PETSC_FALSE)); 7939 PetscCall(MatSetValues(local_mat,*(ptr_idxs+1),ptr_idxs+2,*(ptr_idxs+1),ptr_idxs+2,ptr_vals,ADD_VALUES)); 7940 PetscCall(MatAssemblyBegin(local_mat,MAT_FLUSH_ASSEMBLY)); 7941 PetscCall(MatAssemblyEnd(local_mat,MAT_FLUSH_ASSEMBLY)); 7942 PetscCall(MatSetOption(local_mat,MAT_ROW_ORIENTED,PETSC_TRUE)); 7943 } else { 7944 /* TODO */ 7945 } 7946 ptr_idxs += olengths_idxs[i]; 7947 ptr_vals += olengths_vals[i]; 7948 } 7949 PetscCall(MatAssemblyBegin(local_mat,MAT_FINAL_ASSEMBLY)); 7950 PetscCall(MatAssemblyEnd(local_mat,MAT_FINAL_ASSEMBLY)); 7951 PetscCall(MatISRestoreLocalMat(*mat_n,&local_mat)); 7952 PetscCall(MatAssemblyBegin(*mat_n,MAT_FINAL_ASSEMBLY)); 7953 PetscCall(MatAssemblyEnd(*mat_n,MAT_FINAL_ASSEMBLY)); 7954 PetscCall(PetscFree(recv_buffer_vals)); 7955 7956 #if 0 7957 if (!restrict_comm) { /* check */ 7958 Vec lvec,rvec; 7959 PetscReal infty_error; 7960 7961 PetscCall(MatCreateVecs(mat,&rvec,&lvec)); 7962 PetscCall(VecSetRandom(rvec,NULL)); 7963 PetscCall(MatMult(mat,rvec,lvec)); 7964 PetscCall(VecScale(lvec,-1.0)); 7965 PetscCall(MatMultAdd(*mat_n,rvec,lvec,lvec)); 7966 PetscCall(VecNorm(lvec,NORM_INFINITY,&infty_error)); 7967 PetscCall(PetscPrintf(PetscObjectComm((PetscObject)mat),"Infinity error subassembling %1.6e\n",infty_error)); 7968 PetscCall(VecDestroy(&rvec)); 7969 PetscCall(VecDestroy(&lvec)); 7970 } 7971 #endif 7972 7973 /* assemble new additional is (if any) */ 7974 if (nis) { 7975 PetscInt **temp_idxs,*count_is,j,psum; 7976 7977 PetscCallMPI(MPI_Waitall(n_recvs,recv_req_idxs_is,MPI_STATUSES_IGNORE)); 7978 PetscCall(PetscCalloc1(nis,&count_is)); 7979 ptr_idxs = recv_buffer_idxs_is; 7980 psum = 0; 7981 for (i=0;i<n_recvs;i++) { 7982 for (j=0;j<nis;j++) { 7983 PetscInt plen = *(ptr_idxs); /* first element is the local size of IS's indices */ 7984 count_is[j] += plen; /* increment counting of buffer for j-th IS */ 7985 psum += plen; 7986 ptr_idxs += plen+1; /* shift pointer to received data */ 7987 } 7988 } 7989 PetscCall(PetscMalloc1(nis,&temp_idxs)); 7990 PetscCall(PetscMalloc1(psum,&temp_idxs[0])); 7991 for (i=1;i<nis;i++) { 7992 temp_idxs[i] = temp_idxs[i-1]+count_is[i-1]; 7993 } 7994 PetscCall(PetscArrayzero(count_is,nis)); 7995 ptr_idxs = recv_buffer_idxs_is; 7996 for (i=0;i<n_recvs;i++) { 7997 for (j=0;j<nis;j++) { 7998 PetscInt plen = *(ptr_idxs); /* first element is the local size of IS's indices */ 7999 PetscCall(PetscArraycpy(&temp_idxs[j][count_is[j]],ptr_idxs+1,plen)); 8000 count_is[j] += plen; /* increment starting point of buffer for j-th IS */ 8001 ptr_idxs += plen+1; /* shift pointer to received data */ 8002 } 8003 } 8004 for (i=0;i<nis;i++) { 8005 PetscCall(ISDestroy(&isarray[i])); 8006 PetscCall(PetscSortRemoveDupsInt(&count_is[i],temp_idxs[i])); 8007 PetscCall(ISCreateGeneral(comm_n,count_is[i],temp_idxs[i],PETSC_COPY_VALUES,&isarray[i])); 8008 } 8009 PetscCall(PetscFree(count_is)); 8010 PetscCall(PetscFree(temp_idxs[0])); 8011 PetscCall(PetscFree(temp_idxs)); 8012 } 8013 /* free workspace */ 8014 PetscCall(PetscFree(recv_buffer_idxs_is)); 8015 PetscCallMPI(MPI_Waitall(n_sends,send_req_idxs,MPI_STATUSES_IGNORE)); 8016 PetscCall(PetscFree(send_buffer_idxs)); 8017 PetscCallMPI(MPI_Waitall(n_sends,send_req_vals,MPI_STATUSES_IGNORE)); 8018 if (isdense) { 8019 PetscCall(MatISGetLocalMat(mat,&local_mat)); 8020 PetscCall(MatDenseRestoreArrayRead(local_mat,&send_buffer_vals)); 8021 PetscCall(MatISRestoreLocalMat(mat,&local_mat)); 8022 } else { 8023 /* PetscCall(PetscFree(send_buffer_vals)); */ 8024 } 8025 if (nis) { 8026 PetscCallMPI(MPI_Waitall(n_sends,send_req_idxs_is,MPI_STATUSES_IGNORE)); 8027 PetscCall(PetscFree(send_buffer_idxs_is)); 8028 } 8029 8030 if (nvecs) { 8031 PetscCallMPI(MPI_Waitall(n_recvs,recv_req_vecs,MPI_STATUSES_IGNORE)); 8032 PetscCallMPI(MPI_Waitall(n_sends,send_req_vecs,MPI_STATUSES_IGNORE)); 8033 PetscCall(VecRestoreArray(nnsp_vec[0],&send_buffer_vecs)); 8034 PetscCall(VecDestroy(&nnsp_vec[0])); 8035 PetscCall(VecCreate(comm_n,&nnsp_vec[0])); 8036 PetscCall(VecSetSizes(nnsp_vec[0],new_local_rows,PETSC_DECIDE)); 8037 PetscCall(VecSetType(nnsp_vec[0],VECSTANDARD)); 8038 /* set values */ 8039 ptr_vals = recv_buffer_vecs; 8040 ptr_idxs = recv_buffer_idxs_local; 8041 PetscCall(VecGetArray(nnsp_vec[0],&send_buffer_vecs)); 8042 for (i=0;i<n_recvs;i++) { 8043 PetscInt j; 8044 for (j=0;j<*(ptr_idxs+1);j++) { 8045 send_buffer_vecs[*(ptr_idxs+2+j)] += *(ptr_vals + j); 8046 } 8047 ptr_idxs += olengths_idxs[i]; 8048 ptr_vals += olengths_idxs[i]-2; 8049 } 8050 PetscCall(VecRestoreArray(nnsp_vec[0],&send_buffer_vecs)); 8051 PetscCall(VecAssemblyBegin(nnsp_vec[0])); 8052 PetscCall(VecAssemblyEnd(nnsp_vec[0])); 8053 } 8054 8055 PetscCall(PetscFree(recv_buffer_vecs)); 8056 PetscCall(PetscFree(recv_buffer_idxs_local)); 8057 PetscCall(PetscFree(recv_req_idxs)); 8058 PetscCall(PetscFree(recv_req_vals)); 8059 PetscCall(PetscFree(recv_req_vecs)); 8060 PetscCall(PetscFree(recv_req_idxs_is)); 8061 PetscCall(PetscFree(send_req_idxs)); 8062 PetscCall(PetscFree(send_req_vals)); 8063 PetscCall(PetscFree(send_req_vecs)); 8064 PetscCall(PetscFree(send_req_idxs_is)); 8065 PetscCall(PetscFree(ilengths_vals)); 8066 PetscCall(PetscFree(ilengths_idxs)); 8067 PetscCall(PetscFree(olengths_vals)); 8068 PetscCall(PetscFree(olengths_idxs)); 8069 PetscCall(PetscFree(onodes)); 8070 if (nis) { 8071 PetscCall(PetscFree(ilengths_idxs_is)); 8072 PetscCall(PetscFree(olengths_idxs_is)); 8073 PetscCall(PetscFree(onodes_is)); 8074 } 8075 PetscCall(PetscSubcommDestroy(&subcomm)); 8076 if (destroy_mat) { /* destroy mat is true only if restrict comm is true and process will not participate */ 8077 PetscCall(MatDestroy(mat_n)); 8078 for (i=0;i<nis;i++) { 8079 PetscCall(ISDestroy(&isarray[i])); 8080 } 8081 if (nvecs) { /* need to match VecDestroy nnsp_vec called in the other code path */ 8082 PetscCall(VecDestroy(&nnsp_vec[0])); 8083 } 8084 *mat_n = NULL; 8085 } 8086 PetscFunctionReturn(0); 8087 } 8088 8089 /* temporary hack into ksp private data structure */ 8090 #include <petsc/private/kspimpl.h> 8091 8092 PetscErrorCode PCBDDCSetUpCoarseSolver(PC pc,PetscScalar* coarse_submat_vals) 8093 { 8094 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 8095 PC_IS *pcis = (PC_IS*)pc->data; 8096 Mat coarse_mat,coarse_mat_is,coarse_submat_dense; 8097 Mat coarsedivudotp = NULL; 8098 Mat coarseG,t_coarse_mat_is; 8099 MatNullSpace CoarseNullSpace = NULL; 8100 ISLocalToGlobalMapping coarse_islg; 8101 IS coarse_is,*isarray,corners; 8102 PetscInt i,im_active=-1,active_procs=-1; 8103 PetscInt nis,nisdofs,nisneu,nisvert; 8104 PetscInt coarse_eqs_per_proc; 8105 PC pc_temp; 8106 PCType coarse_pc_type; 8107 KSPType coarse_ksp_type; 8108 PetscBool multilevel_requested,multilevel_allowed; 8109 PetscBool coarse_reuse; 8110 PetscInt ncoarse,nedcfield; 8111 PetscBool compute_vecs = PETSC_FALSE; 8112 PetscScalar *array; 8113 MatReuse coarse_mat_reuse; 8114 PetscBool restr, full_restr, have_void; 8115 PetscMPIInt size; 8116 8117 PetscFunctionBegin; 8118 PetscCall(PetscLogEventBegin(PC_BDDC_CoarseSetUp[pcbddc->current_level],pc,0,0,0)); 8119 /* Assign global numbering to coarse dofs */ 8120 if (pcbddc->new_primal_space || pcbddc->coarse_size == -1) { /* a new primal space is present or it is the first initialization, so recompute global numbering */ 8121 PetscInt ocoarse_size; 8122 compute_vecs = PETSC_TRUE; 8123 8124 pcbddc->new_primal_space = PETSC_TRUE; 8125 ocoarse_size = pcbddc->coarse_size; 8126 PetscCall(PetscFree(pcbddc->global_primal_indices)); 8127 PetscCall(PCBDDCComputePrimalNumbering(pc,&pcbddc->coarse_size,&pcbddc->global_primal_indices)); 8128 /* see if we can avoid some work */ 8129 if (pcbddc->coarse_ksp) { /* coarse ksp has already been created */ 8130 /* if the coarse size is different or we are using adaptive selection, better to not reuse the coarse matrix */ 8131 if (ocoarse_size != pcbddc->coarse_size || pcbddc->adaptive_selection) { 8132 PetscCall(KSPReset(pcbddc->coarse_ksp)); 8133 coarse_reuse = PETSC_FALSE; 8134 } else { /* we can safely reuse already computed coarse matrix */ 8135 coarse_reuse = PETSC_TRUE; 8136 } 8137 } else { /* there's no coarse ksp, so we need to create the coarse matrix too */ 8138 coarse_reuse = PETSC_FALSE; 8139 } 8140 /* reset any subassembling information */ 8141 if (!coarse_reuse || pcbddc->recompute_topography) { 8142 PetscCall(ISDestroy(&pcbddc->coarse_subassembling)); 8143 } 8144 } else { /* primal space is unchanged, so we can reuse coarse matrix */ 8145 coarse_reuse = PETSC_TRUE; 8146 } 8147 if (coarse_reuse && pcbddc->coarse_ksp) { 8148 PetscCall(KSPGetOperators(pcbddc->coarse_ksp,&coarse_mat,NULL)); 8149 PetscCall(PetscObjectReference((PetscObject)coarse_mat)); 8150 coarse_mat_reuse = MAT_REUSE_MATRIX; 8151 } else { 8152 coarse_mat = NULL; 8153 coarse_mat_reuse = MAT_INITIAL_MATRIX; 8154 } 8155 8156 /* creates temporary l2gmap and IS for coarse indexes */ 8157 PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)pc),pcbddc->local_primal_size,pcbddc->global_primal_indices,PETSC_COPY_VALUES,&coarse_is)); 8158 PetscCall(ISLocalToGlobalMappingCreateIS(coarse_is,&coarse_islg)); 8159 8160 /* creates temporary MATIS object for coarse matrix */ 8161 PetscCall(MatCreateSeqDense(PETSC_COMM_SELF,pcbddc->local_primal_size,pcbddc->local_primal_size,coarse_submat_vals,&coarse_submat_dense)); 8162 PetscCall(MatCreateIS(PetscObjectComm((PetscObject)pc),1,PETSC_DECIDE,PETSC_DECIDE,pcbddc->coarse_size,pcbddc->coarse_size,coarse_islg,coarse_islg,&t_coarse_mat_is)); 8163 PetscCall(MatISSetLocalMat(t_coarse_mat_is,coarse_submat_dense)); 8164 PetscCall(MatAssemblyBegin(t_coarse_mat_is,MAT_FINAL_ASSEMBLY)); 8165 PetscCall(MatAssemblyEnd(t_coarse_mat_is,MAT_FINAL_ASSEMBLY)); 8166 PetscCall(MatDestroy(&coarse_submat_dense)); 8167 8168 /* count "active" (i.e. with positive local size) and "void" processes */ 8169 im_active = !!(pcis->n); 8170 PetscCall(MPIU_Allreduce(&im_active,&active_procs,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)pc))); 8171 8172 /* determine number of processes partecipating to coarse solver and compute subassembling pattern */ 8173 /* restr : whether we want to exclude senders (which are not receivers) from the subassembling pattern */ 8174 /* full_restr : just use the receivers from the subassembling pattern */ 8175 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)pc),&size)); 8176 coarse_mat_is = NULL; 8177 multilevel_allowed = PETSC_FALSE; 8178 multilevel_requested = PETSC_FALSE; 8179 coarse_eqs_per_proc = PetscMin(PetscMax(pcbddc->coarse_size,1),pcbddc->coarse_eqs_per_proc); 8180 if (coarse_eqs_per_proc < 0) coarse_eqs_per_proc = pcbddc->coarse_size; 8181 if (pcbddc->current_level < pcbddc->max_levels) multilevel_requested = PETSC_TRUE; 8182 if (pcbddc->coarse_size <= pcbddc->coarse_eqs_limit) multilevel_requested = PETSC_FALSE; 8183 if (multilevel_requested) { 8184 ncoarse = active_procs/pcbddc->coarsening_ratio; 8185 restr = PETSC_FALSE; 8186 full_restr = PETSC_FALSE; 8187 } else { 8188 ncoarse = pcbddc->coarse_size/coarse_eqs_per_proc + !!(pcbddc->coarse_size%coarse_eqs_per_proc); 8189 restr = PETSC_TRUE; 8190 full_restr = PETSC_TRUE; 8191 } 8192 if (!pcbddc->coarse_size || size == 1) multilevel_allowed = multilevel_requested = restr = full_restr = PETSC_FALSE; 8193 ncoarse = PetscMax(1,ncoarse); 8194 if (!pcbddc->coarse_subassembling) { 8195 if (pcbddc->coarsening_ratio > 1) { 8196 if (multilevel_requested) { 8197 PetscCall(PCBDDCMatISGetSubassemblingPattern(pc->pmat,&ncoarse,pcbddc->coarse_adj_red,&pcbddc->coarse_subassembling,&have_void)); 8198 } else { 8199 PetscCall(PCBDDCMatISGetSubassemblingPattern(t_coarse_mat_is,&ncoarse,pcbddc->coarse_adj_red,&pcbddc->coarse_subassembling,&have_void)); 8200 } 8201 } else { 8202 PetscMPIInt rank; 8203 8204 PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)pc),&rank)); 8205 have_void = (active_procs == (PetscInt)size) ? PETSC_FALSE : PETSC_TRUE; 8206 PetscCall(ISCreateStride(PetscObjectComm((PetscObject)pc),1,rank,1,&pcbddc->coarse_subassembling)); 8207 } 8208 } else { /* if a subassembling pattern exists, then we can reuse the coarse ksp and compute the number of process involved */ 8209 PetscInt psum; 8210 if (pcbddc->coarse_ksp) psum = 1; 8211 else psum = 0; 8212 PetscCall(MPIU_Allreduce(&psum,&ncoarse,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)pc))); 8213 have_void = ncoarse < size ? PETSC_TRUE : PETSC_FALSE; 8214 } 8215 /* determine if we can go multilevel */ 8216 if (multilevel_requested) { 8217 if (ncoarse > 1) multilevel_allowed = PETSC_TRUE; /* found enough processes */ 8218 else restr = full_restr = PETSC_TRUE; /* 1 subdomain, use a direct solver */ 8219 } 8220 if (multilevel_allowed && have_void) restr = PETSC_TRUE; 8221 8222 /* dump subassembling pattern */ 8223 if (pcbddc->dbg_flag && multilevel_allowed) { 8224 PetscCall(ISView(pcbddc->coarse_subassembling,pcbddc->dbg_viewer)); 8225 } 8226 /* compute dofs splitting and neumann boundaries for coarse dofs */ 8227 nedcfield = -1; 8228 corners = NULL; 8229 if (multilevel_allowed && !coarse_reuse && (pcbddc->n_ISForDofsLocal || pcbddc->NeumannBoundariesLocal || pcbddc->nedclocal || pcbddc->corner_selected)) { /* protects from unneeded computations */ 8230 PetscInt *tidxs,*tidxs2,nout,tsize,i; 8231 const PetscInt *idxs; 8232 ISLocalToGlobalMapping tmap; 8233 8234 /* create map between primal indices (in local representative ordering) and local primal numbering */ 8235 PetscCall(ISLocalToGlobalMappingCreate(PETSC_COMM_SELF,1,pcbddc->local_primal_size,pcbddc->primal_indices_local_idxs,PETSC_COPY_VALUES,&tmap)); 8236 /* allocate space for temporary storage */ 8237 PetscCall(PetscMalloc1(pcbddc->local_primal_size,&tidxs)); 8238 PetscCall(PetscMalloc1(pcbddc->local_primal_size,&tidxs2)); 8239 /* allocate for IS array */ 8240 nisdofs = pcbddc->n_ISForDofsLocal; 8241 if (pcbddc->nedclocal) { 8242 if (pcbddc->nedfield > -1) { 8243 nedcfield = pcbddc->nedfield; 8244 } else { 8245 nedcfield = 0; 8246 PetscCheck(!nisdofs,PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"This should not happen (%" PetscInt_FMT ")",nisdofs); 8247 nisdofs = 1; 8248 } 8249 } 8250 nisneu = !!pcbddc->NeumannBoundariesLocal; 8251 nisvert = 0; /* nisvert is not used */ 8252 nis = nisdofs + nisneu + nisvert; 8253 PetscCall(PetscMalloc1(nis,&isarray)); 8254 /* dofs splitting */ 8255 for (i=0;i<nisdofs;i++) { 8256 /* PetscCall(ISView(pcbddc->ISForDofsLocal[i],0)); */ 8257 if (nedcfield != i) { 8258 PetscCall(ISGetLocalSize(pcbddc->ISForDofsLocal[i],&tsize)); 8259 PetscCall(ISGetIndices(pcbddc->ISForDofsLocal[i],&idxs)); 8260 PetscCall(ISGlobalToLocalMappingApply(tmap,IS_GTOLM_DROP,tsize,idxs,&nout,tidxs)); 8261 PetscCall(ISRestoreIndices(pcbddc->ISForDofsLocal[i],&idxs)); 8262 } else { 8263 PetscCall(ISGetLocalSize(pcbddc->nedclocal,&tsize)); 8264 PetscCall(ISGetIndices(pcbddc->nedclocal,&idxs)); 8265 PetscCall(ISGlobalToLocalMappingApply(tmap,IS_GTOLM_DROP,tsize,idxs,&nout,tidxs)); 8266 PetscCheck(tsize == nout,PETSC_COMM_SELF,PETSC_ERR_PLIB,"Failed when mapping coarse nedelec field! %" PetscInt_FMT " != %" PetscInt_FMT,tsize,nout); 8267 PetscCall(ISRestoreIndices(pcbddc->nedclocal,&idxs)); 8268 } 8269 PetscCall(ISLocalToGlobalMappingApply(coarse_islg,nout,tidxs,tidxs2)); 8270 PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)pc),nout,tidxs2,PETSC_COPY_VALUES,&isarray[i])); 8271 /* PetscCall(ISView(isarray[i],0)); */ 8272 } 8273 /* neumann boundaries */ 8274 if (pcbddc->NeumannBoundariesLocal) { 8275 /* PetscCall(ISView(pcbddc->NeumannBoundariesLocal,0)); */ 8276 PetscCall(ISGetLocalSize(pcbddc->NeumannBoundariesLocal,&tsize)); 8277 PetscCall(ISGetIndices(pcbddc->NeumannBoundariesLocal,&idxs)); 8278 PetscCall(ISGlobalToLocalMappingApply(tmap,IS_GTOLM_DROP,tsize,idxs,&nout,tidxs)); 8279 PetscCall(ISRestoreIndices(pcbddc->NeumannBoundariesLocal,&idxs)); 8280 PetscCall(ISLocalToGlobalMappingApply(coarse_islg,nout,tidxs,tidxs2)); 8281 PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)pc),nout,tidxs2,PETSC_COPY_VALUES,&isarray[nisdofs])); 8282 /* PetscCall(ISView(isarray[nisdofs],0)); */ 8283 } 8284 /* coordinates */ 8285 if (pcbddc->corner_selected) { 8286 PetscCall(PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph,NULL,NULL,NULL,NULL,&corners)); 8287 PetscCall(ISGetLocalSize(corners,&tsize)); 8288 PetscCall(ISGetIndices(corners,&idxs)); 8289 PetscCall(ISGlobalToLocalMappingApply(tmap,IS_GTOLM_DROP,tsize,idxs,&nout,tidxs)); 8290 PetscCheck(tsize == nout,PETSC_COMM_SELF,PETSC_ERR_PLIB,"Failed when mapping corners! %" PetscInt_FMT " != %" PetscInt_FMT,tsize,nout); 8291 PetscCall(ISRestoreIndices(corners,&idxs)); 8292 PetscCall(PCBDDCGraphRestoreCandidatesIS(pcbddc->mat_graph,NULL,NULL,NULL,NULL,&corners)); 8293 PetscCall(ISLocalToGlobalMappingApply(coarse_islg,nout,tidxs,tidxs2)); 8294 PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)pc),nout,tidxs2,PETSC_COPY_VALUES,&corners)); 8295 } 8296 PetscCall(PetscFree(tidxs)); 8297 PetscCall(PetscFree(tidxs2)); 8298 PetscCall(ISLocalToGlobalMappingDestroy(&tmap)); 8299 } else { 8300 nis = 0; 8301 nisdofs = 0; 8302 nisneu = 0; 8303 nisvert = 0; 8304 isarray = NULL; 8305 } 8306 /* destroy no longer needed map */ 8307 PetscCall(ISLocalToGlobalMappingDestroy(&coarse_islg)); 8308 8309 /* subassemble */ 8310 if (multilevel_allowed) { 8311 Vec vp[1]; 8312 PetscInt nvecs = 0; 8313 PetscBool reuse,reuser; 8314 8315 if (coarse_mat) reuse = PETSC_TRUE; 8316 else reuse = PETSC_FALSE; 8317 PetscCall(MPIU_Allreduce(&reuse,&reuser,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc))); 8318 vp[0] = NULL; 8319 if (pcbddc->benign_have_null) { /* propagate no-net-flux quadrature to coarser level */ 8320 PetscCall(VecCreate(PetscObjectComm((PetscObject)pc),&vp[0])); 8321 PetscCall(VecSetSizes(vp[0],pcbddc->local_primal_size,PETSC_DECIDE)); 8322 PetscCall(VecSetType(vp[0],VECSTANDARD)); 8323 nvecs = 1; 8324 8325 if (pcbddc->divudotp) { 8326 Mat B,loc_divudotp; 8327 Vec v,p; 8328 IS dummy; 8329 PetscInt np; 8330 8331 PetscCall(MatISGetLocalMat(pcbddc->divudotp,&loc_divudotp)); 8332 PetscCall(MatGetSize(loc_divudotp,&np,NULL)); 8333 PetscCall(ISCreateStride(PETSC_COMM_SELF,np,0,1,&dummy)); 8334 PetscCall(MatCreateSubMatrix(loc_divudotp,dummy,pcis->is_B_local,MAT_INITIAL_MATRIX,&B)); 8335 PetscCall(MatCreateVecs(B,&v,&p)); 8336 PetscCall(VecSet(p,1.)); 8337 PetscCall(MatMultTranspose(B,p,v)); 8338 PetscCall(VecDestroy(&p)); 8339 PetscCall(MatDestroy(&B)); 8340 PetscCall(VecGetArray(vp[0],&array)); 8341 PetscCall(VecPlaceArray(pcbddc->vec1_P,array)); 8342 PetscCall(MatMultTranspose(pcbddc->coarse_phi_B,v,pcbddc->vec1_P)); 8343 PetscCall(VecResetArray(pcbddc->vec1_P)); 8344 PetscCall(VecRestoreArray(vp[0],&array)); 8345 PetscCall(ISDestroy(&dummy)); 8346 PetscCall(VecDestroy(&v)); 8347 } 8348 } 8349 if (reuser) { 8350 PetscCall(PCBDDCMatISSubassemble(t_coarse_mat_is,pcbddc->coarse_subassembling,0,restr,full_restr,PETSC_TRUE,&coarse_mat,nis,isarray,nvecs,vp)); 8351 } else { 8352 PetscCall(PCBDDCMatISSubassemble(t_coarse_mat_is,pcbddc->coarse_subassembling,0,restr,full_restr,PETSC_FALSE,&coarse_mat_is,nis,isarray,nvecs,vp)); 8353 } 8354 if (vp[0]) { /* vp[0] could have been placed on a different set of processes */ 8355 PetscScalar *arraym; 8356 const PetscScalar *arrayv; 8357 PetscInt nl; 8358 PetscCall(VecGetLocalSize(vp[0],&nl)); 8359 PetscCall(MatCreateSeqDense(PETSC_COMM_SELF,1,nl,NULL,&coarsedivudotp)); 8360 PetscCall(MatDenseGetArray(coarsedivudotp,&arraym)); 8361 PetscCall(VecGetArrayRead(vp[0],&arrayv)); 8362 PetscCall(PetscArraycpy(arraym,arrayv,nl)); 8363 PetscCall(VecRestoreArrayRead(vp[0],&arrayv)); 8364 PetscCall(MatDenseRestoreArray(coarsedivudotp,&arraym)); 8365 PetscCall(VecDestroy(&vp[0])); 8366 } else { 8367 PetscCall(MatCreateSeqAIJ(PETSC_COMM_SELF,0,0,1,NULL,&coarsedivudotp)); 8368 } 8369 } else { 8370 PetscCall(PCBDDCMatISSubassemble(t_coarse_mat_is,pcbddc->coarse_subassembling,0,restr,full_restr,PETSC_FALSE,&coarse_mat_is,0,NULL,0,NULL)); 8371 } 8372 if (coarse_mat_is || coarse_mat) { 8373 if (!multilevel_allowed) { 8374 PetscCall(MatConvert(coarse_mat_is,MATAIJ,coarse_mat_reuse,&coarse_mat)); 8375 } else { 8376 /* if this matrix is present, it means we are not reusing the coarse matrix */ 8377 if (coarse_mat_is) { 8378 PetscCheck(!coarse_mat,PetscObjectComm((PetscObject)coarse_mat_is),PETSC_ERR_PLIB,"This should not happen"); 8379 PetscCall(PetscObjectReference((PetscObject)coarse_mat_is)); 8380 coarse_mat = coarse_mat_is; 8381 } 8382 } 8383 } 8384 PetscCall(MatDestroy(&t_coarse_mat_is)); 8385 PetscCall(MatDestroy(&coarse_mat_is)); 8386 8387 /* create local to global scatters for coarse problem */ 8388 if (compute_vecs) { 8389 PetscInt lrows; 8390 PetscCall(VecDestroy(&pcbddc->coarse_vec)); 8391 if (coarse_mat) { 8392 PetscCall(MatGetLocalSize(coarse_mat,&lrows,NULL)); 8393 } else { 8394 lrows = 0; 8395 } 8396 PetscCall(VecCreate(PetscObjectComm((PetscObject)pc),&pcbddc->coarse_vec)); 8397 PetscCall(VecSetSizes(pcbddc->coarse_vec,lrows,PETSC_DECIDE)); 8398 PetscCall(VecSetType(pcbddc->coarse_vec,coarse_mat ? coarse_mat->defaultvectype : VECSTANDARD)); 8399 PetscCall(VecScatterDestroy(&pcbddc->coarse_loc_to_glob)); 8400 PetscCall(VecScatterCreate(pcbddc->vec1_P,NULL,pcbddc->coarse_vec,coarse_is,&pcbddc->coarse_loc_to_glob)); 8401 } 8402 PetscCall(ISDestroy(&coarse_is)); 8403 8404 /* set defaults for coarse KSP and PC */ 8405 if (multilevel_allowed) { 8406 coarse_ksp_type = KSPRICHARDSON; 8407 coarse_pc_type = PCBDDC; 8408 } else { 8409 coarse_ksp_type = KSPPREONLY; 8410 coarse_pc_type = PCREDUNDANT; 8411 } 8412 8413 /* print some info if requested */ 8414 if (pcbddc->dbg_flag) { 8415 if (!multilevel_allowed) { 8416 PetscCall(PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n")); 8417 if (multilevel_requested) { 8418 PetscCall(PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Not enough active processes on level %" PetscInt_FMT " (active processes %" PetscInt_FMT ", coarsening ratio %" PetscInt_FMT ")\n",pcbddc->current_level,active_procs,pcbddc->coarsening_ratio)); 8419 } else if (pcbddc->max_levels) { 8420 PetscCall(PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Maximum number of requested levels reached (%" PetscInt_FMT ")\n",pcbddc->max_levels)); 8421 } 8422 PetscCall(PetscViewerFlush(pcbddc->dbg_viewer)); 8423 } 8424 } 8425 8426 /* communicate coarse discrete gradient */ 8427 coarseG = NULL; 8428 if (pcbddc->nedcG && multilevel_allowed) { 8429 MPI_Comm ccomm; 8430 if (coarse_mat) { 8431 ccomm = PetscObjectComm((PetscObject)coarse_mat); 8432 } else { 8433 ccomm = MPI_COMM_NULL; 8434 } 8435 PetscCall(MatMPIAIJRestrict(pcbddc->nedcG,ccomm,&coarseG)); 8436 } 8437 8438 /* create the coarse KSP object only once with defaults */ 8439 if (coarse_mat) { 8440 PetscBool isredundant,isbddc,force,valid; 8441 PetscViewer dbg_viewer = NULL; 8442 8443 if (pcbddc->dbg_flag) { 8444 dbg_viewer = PETSC_VIEWER_STDOUT_(PetscObjectComm((PetscObject)coarse_mat)); 8445 PetscCall(PetscViewerASCIIAddTab(dbg_viewer,2*pcbddc->current_level)); 8446 } 8447 if (!pcbddc->coarse_ksp) { 8448 char prefix[256],str_level[16]; 8449 size_t len; 8450 8451 PetscCall(KSPCreate(PetscObjectComm((PetscObject)coarse_mat),&pcbddc->coarse_ksp)); 8452 PetscCall(KSPSetErrorIfNotConverged(pcbddc->coarse_ksp,pc->erroriffailure)); 8453 PetscCall(PetscObjectIncrementTabLevel((PetscObject)pcbddc->coarse_ksp,(PetscObject)pc,1)); 8454 PetscCall(KSPSetTolerances(pcbddc->coarse_ksp,PETSC_DEFAULT,PETSC_DEFAULT,PETSC_DEFAULT,1)); 8455 PetscCall(KSPSetOperators(pcbddc->coarse_ksp,coarse_mat,coarse_mat)); 8456 PetscCall(KSPSetType(pcbddc->coarse_ksp,coarse_ksp_type)); 8457 PetscCall(KSPSetNormType(pcbddc->coarse_ksp,KSP_NORM_NONE)); 8458 PetscCall(KSPGetPC(pcbddc->coarse_ksp,&pc_temp)); 8459 /* TODO is this logic correct? should check for coarse_mat type */ 8460 PetscCall(PCSetType(pc_temp,coarse_pc_type)); 8461 /* prefix */ 8462 PetscCall(PetscStrcpy(prefix,"")); 8463 PetscCall(PetscStrcpy(str_level,"")); 8464 if (!pcbddc->current_level) { 8465 PetscCall(PetscStrncpy(prefix,((PetscObject)pc)->prefix,sizeof(prefix))); 8466 PetscCall(PetscStrlcat(prefix,"pc_bddc_coarse_",sizeof(prefix))); 8467 } else { 8468 PetscCall(PetscStrlen(((PetscObject)pc)->prefix,&len)); 8469 if (pcbddc->current_level>1) len -= 3; /* remove "lX_" with X level number */ 8470 if (pcbddc->current_level>10) len -= 1; /* remove another char from level number */ 8471 /* Nonstandard use of PetscStrncpy() to copy only a portion of the string */ 8472 PetscCall(PetscStrncpy(prefix,((PetscObject)pc)->prefix,len+1)); 8473 PetscCall(PetscSNPrintf(str_level,sizeof(str_level),"l%d_",(int)(pcbddc->current_level))); 8474 PetscCall(PetscStrlcat(prefix,str_level,sizeof(prefix))); 8475 } 8476 PetscCall(KSPSetOptionsPrefix(pcbddc->coarse_ksp,prefix)); 8477 /* propagate BDDC info to the next level (these are dummy calls if pc_temp is not of type PCBDDC) */ 8478 PetscCall(PCBDDCSetLevel(pc_temp,pcbddc->current_level+1)); 8479 PetscCall(PCBDDCSetCoarseningRatio(pc_temp,pcbddc->coarsening_ratio)); 8480 PetscCall(PCBDDCSetLevels(pc_temp,pcbddc->max_levels)); 8481 /* allow user customization */ 8482 PetscCall(KSPSetFromOptions(pcbddc->coarse_ksp)); 8483 /* get some info after set from options */ 8484 PetscCall(KSPGetPC(pcbddc->coarse_ksp,&pc_temp)); 8485 /* multilevel cannot be done with coarse PC different from BDDC, NN, HPDDM, unless forced to */ 8486 force = PETSC_FALSE; 8487 PetscCall(PetscOptionsGetBool(NULL,((PetscObject)pc_temp)->prefix,"-pc_type_forced",&force,NULL)); 8488 PetscCall(PetscObjectTypeCompareAny((PetscObject)pc_temp,&valid,PCBDDC,PCNN,PCHPDDM,"")); 8489 PetscCall(PetscObjectTypeCompare((PetscObject)pc_temp,PCBDDC,&isbddc)); 8490 if (multilevel_allowed && !force && !valid) { 8491 isbddc = PETSC_TRUE; 8492 PetscCall(PCSetType(pc_temp,PCBDDC)); 8493 PetscCall(PCBDDCSetLevel(pc_temp,pcbddc->current_level+1)); 8494 PetscCall(PCBDDCSetCoarseningRatio(pc_temp,pcbddc->coarsening_ratio)); 8495 PetscCall(PCBDDCSetLevels(pc_temp,pcbddc->max_levels)); 8496 if (pc_temp->ops->setfromoptions) { /* need to setfromoptions again, skipping the pc_type */ 8497 PetscObjectOptionsBegin((PetscObject)pc_temp); 8498 PetscCall((*pc_temp->ops->setfromoptions)(PetscOptionsObject,pc_temp)); 8499 PetscCall(PetscObjectProcessOptionsHandlers(PetscOptionsObject,(PetscObject)pc_temp)); 8500 PetscOptionsEnd(); 8501 pc_temp->setfromoptionscalled++; 8502 } 8503 } 8504 } 8505 /* propagate BDDC info to the next level (these are dummy calls if pc_temp is not of type PCBDDC) */ 8506 PetscCall(KSPGetPC(pcbddc->coarse_ksp,&pc_temp)); 8507 if (nisdofs) { 8508 PetscCall(PCBDDCSetDofsSplitting(pc_temp,nisdofs,isarray)); 8509 for (i=0;i<nisdofs;i++) { 8510 PetscCall(ISDestroy(&isarray[i])); 8511 } 8512 } 8513 if (nisneu) { 8514 PetscCall(PCBDDCSetNeumannBoundaries(pc_temp,isarray[nisdofs])); 8515 PetscCall(ISDestroy(&isarray[nisdofs])); 8516 } 8517 if (nisvert) { 8518 PetscCall(PCBDDCSetPrimalVerticesIS(pc_temp,isarray[nis-1])); 8519 PetscCall(ISDestroy(&isarray[nis-1])); 8520 } 8521 if (coarseG) { 8522 PetscCall(PCBDDCSetDiscreteGradient(pc_temp,coarseG,1,nedcfield,PETSC_FALSE,PETSC_TRUE)); 8523 } 8524 8525 /* get some info after set from options */ 8526 PetscCall(PetscObjectTypeCompare((PetscObject)pc_temp,PCBDDC,&isbddc)); 8527 8528 /* multilevel can only be requested via -pc_bddc_levels or PCBDDCSetLevels */ 8529 if (isbddc && !multilevel_allowed) { 8530 PetscCall(PCSetType(pc_temp,coarse_pc_type)); 8531 } 8532 /* multilevel cannot be done with coarse PC different from BDDC, NN, HPDDM, unless forced to */ 8533 force = PETSC_FALSE; 8534 PetscCall(PetscOptionsGetBool(NULL,((PetscObject)pc_temp)->prefix,"-pc_type_forced",&force,NULL)); 8535 PetscCall(PetscObjectTypeCompareAny((PetscObject)pc_temp,&valid,PCBDDC,PCNN,PCHPDDM,"")); 8536 if (multilevel_requested && multilevel_allowed && !valid && !force) { 8537 PetscCall(PCSetType(pc_temp,PCBDDC)); 8538 } 8539 PetscCall(PetscObjectTypeCompare((PetscObject)pc_temp,PCREDUNDANT,&isredundant)); 8540 if (isredundant) { 8541 KSP inner_ksp; 8542 PC inner_pc; 8543 8544 PetscCall(PCRedundantGetKSP(pc_temp,&inner_ksp)); 8545 PetscCall(KSPGetPC(inner_ksp,&inner_pc)); 8546 } 8547 8548 /* parameters which miss an API */ 8549 PetscCall(PetscObjectTypeCompare((PetscObject)pc_temp,PCBDDC,&isbddc)); 8550 if (isbddc) { 8551 PC_BDDC* pcbddc_coarse = (PC_BDDC*)pc_temp->data; 8552 8553 pcbddc_coarse->detect_disconnected = PETSC_TRUE; 8554 pcbddc_coarse->coarse_eqs_per_proc = pcbddc->coarse_eqs_per_proc; 8555 pcbddc_coarse->coarse_eqs_limit = pcbddc->coarse_eqs_limit; 8556 pcbddc_coarse->benign_saddle_point = pcbddc->benign_have_null; 8557 if (pcbddc_coarse->benign_saddle_point) { 8558 Mat coarsedivudotp_is; 8559 ISLocalToGlobalMapping l2gmap,rl2g,cl2g; 8560 IS row,col; 8561 const PetscInt *gidxs; 8562 PetscInt n,st,M,N; 8563 8564 PetscCall(MatGetSize(coarsedivudotp,&n,NULL)); 8565 PetscCallMPI(MPI_Scan(&n,&st,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)coarse_mat))); 8566 st = st-n; 8567 PetscCall(ISCreateStride(PetscObjectComm((PetscObject)coarse_mat),1,st,1,&row)); 8568 PetscCall(MatISGetLocalToGlobalMapping(coarse_mat,&l2gmap,NULL)); 8569 PetscCall(ISLocalToGlobalMappingGetSize(l2gmap,&n)); 8570 PetscCall(ISLocalToGlobalMappingGetIndices(l2gmap,&gidxs)); 8571 PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)coarse_mat),n,gidxs,PETSC_COPY_VALUES,&col)); 8572 PetscCall(ISLocalToGlobalMappingRestoreIndices(l2gmap,&gidxs)); 8573 PetscCall(ISLocalToGlobalMappingCreateIS(row,&rl2g)); 8574 PetscCall(ISLocalToGlobalMappingCreateIS(col,&cl2g)); 8575 PetscCall(ISGetSize(row,&M)); 8576 PetscCall(MatGetSize(coarse_mat,&N,NULL)); 8577 PetscCall(ISDestroy(&row)); 8578 PetscCall(ISDestroy(&col)); 8579 PetscCall(MatCreate(PetscObjectComm((PetscObject)coarse_mat),&coarsedivudotp_is)); 8580 PetscCall(MatSetType(coarsedivudotp_is,MATIS)); 8581 PetscCall(MatSetSizes(coarsedivudotp_is,PETSC_DECIDE,PETSC_DECIDE,M,N)); 8582 PetscCall(MatSetLocalToGlobalMapping(coarsedivudotp_is,rl2g,cl2g)); 8583 PetscCall(ISLocalToGlobalMappingDestroy(&rl2g)); 8584 PetscCall(ISLocalToGlobalMappingDestroy(&cl2g)); 8585 PetscCall(MatISSetLocalMat(coarsedivudotp_is,coarsedivudotp)); 8586 PetscCall(MatDestroy(&coarsedivudotp)); 8587 PetscCall(PCBDDCSetDivergenceMat(pc_temp,coarsedivudotp_is,PETSC_FALSE,NULL)); 8588 PetscCall(MatDestroy(&coarsedivudotp_is)); 8589 pcbddc_coarse->adaptive_userdefined = PETSC_TRUE; 8590 if (pcbddc->adaptive_threshold[0] == 0.0) pcbddc_coarse->deluxe_zerorows = PETSC_TRUE; 8591 } 8592 } 8593 8594 /* propagate symmetry info of coarse matrix */ 8595 PetscCall(MatSetOption(coarse_mat,MAT_STRUCTURALLY_SYMMETRIC,PETSC_TRUE)); 8596 if (pc->pmat->symmetric_set) { 8597 PetscCall(MatSetOption(coarse_mat,MAT_SYMMETRIC,pc->pmat->symmetric)); 8598 } 8599 if (pc->pmat->hermitian_set) { 8600 PetscCall(MatSetOption(coarse_mat,MAT_HERMITIAN,pc->pmat->hermitian)); 8601 } 8602 if (pc->pmat->spd_set) { 8603 PetscCall(MatSetOption(coarse_mat,MAT_SPD,pc->pmat->spd)); 8604 } 8605 if (pcbddc->benign_saddle_point && !pcbddc->benign_have_null) { 8606 PetscCall(MatSetOption(coarse_mat,MAT_SPD,PETSC_TRUE)); 8607 } 8608 /* set operators */ 8609 PetscCall(MatViewFromOptions(coarse_mat,(PetscObject)pc,"-pc_bddc_coarse_mat_view")); 8610 PetscCall(MatSetOptionsPrefix(coarse_mat,((PetscObject)pcbddc->coarse_ksp)->prefix)); 8611 PetscCall(KSPSetOperators(pcbddc->coarse_ksp,coarse_mat,coarse_mat)); 8612 if (pcbddc->dbg_flag) { 8613 PetscCall(PetscViewerASCIISubtractTab(dbg_viewer,2*pcbddc->current_level)); 8614 } 8615 } 8616 PetscCall(MatDestroy(&coarseG)); 8617 PetscCall(PetscFree(isarray)); 8618 #if 0 8619 { 8620 PetscViewer viewer; 8621 char filename[256]; 8622 sprintf(filename,"coarse_mat_level%d.m",pcbddc->current_level); 8623 PetscCall(PetscViewerASCIIOpen(PetscObjectComm((PetscObject)coarse_mat),filename,&viewer)); 8624 PetscCall(PetscViewerPushFormat(viewer,PETSC_VIEWER_ASCII_MATLAB)); 8625 PetscCall(MatView(coarse_mat,viewer)); 8626 PetscCall(PetscViewerPopFormat(viewer)); 8627 PetscCall(PetscViewerDestroy(&viewer)); 8628 } 8629 #endif 8630 8631 if (corners) { 8632 Vec gv; 8633 IS is; 8634 const PetscInt *idxs; 8635 PetscInt i,d,N,n,cdim = pcbddc->mat_graph->cdim; 8636 PetscScalar *coords; 8637 8638 PetscCheck(pcbddc->mat_graph->cloc,PETSC_COMM_SELF,PETSC_ERR_PLIB,"Missing local coordinates"); 8639 PetscCall(VecGetSize(pcbddc->coarse_vec,&N)); 8640 PetscCall(VecGetLocalSize(pcbddc->coarse_vec,&n)); 8641 PetscCall(VecCreate(PetscObjectComm((PetscObject)pcbddc->coarse_vec),&gv)); 8642 PetscCall(VecSetBlockSize(gv,cdim)); 8643 PetscCall(VecSetSizes(gv,n*cdim,N*cdim)); 8644 PetscCall(VecSetType(gv,VECSTANDARD)); 8645 PetscCall(VecSetFromOptions(gv)); 8646 PetscCall(VecSet(gv,PETSC_MAX_REAL)); /* we only propagate coordinates from vertices constraints */ 8647 8648 PetscCall(PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph,NULL,NULL,NULL,NULL,&is)); 8649 PetscCall(ISGetLocalSize(is,&n)); 8650 PetscCall(ISGetIndices(is,&idxs)); 8651 PetscCall(PetscMalloc1(n*cdim,&coords)); 8652 for (i=0;i<n;i++) { 8653 for (d=0;d<cdim;d++) { 8654 coords[cdim*i+d] = pcbddc->mat_graph->coords[cdim*idxs[i]+d]; 8655 } 8656 } 8657 PetscCall(ISRestoreIndices(is,&idxs)); 8658 PetscCall(PCBDDCGraphRestoreCandidatesIS(pcbddc->mat_graph,NULL,NULL,NULL,NULL,&is)); 8659 8660 PetscCall(ISGetLocalSize(corners,&n)); 8661 PetscCall(ISGetIndices(corners,&idxs)); 8662 PetscCall(VecSetValuesBlocked(gv,n,idxs,coords,INSERT_VALUES)); 8663 PetscCall(ISRestoreIndices(corners,&idxs)); 8664 PetscCall(PetscFree(coords)); 8665 PetscCall(VecAssemblyBegin(gv)); 8666 PetscCall(VecAssemblyEnd(gv)); 8667 PetscCall(VecGetArray(gv,&coords)); 8668 if (pcbddc->coarse_ksp) { 8669 PC coarse_pc; 8670 PetscBool isbddc; 8671 8672 PetscCall(KSPGetPC(pcbddc->coarse_ksp,&coarse_pc)); 8673 PetscCall(PetscObjectTypeCompare((PetscObject)coarse_pc,PCBDDC,&isbddc)); 8674 if (isbddc) { /* coarse coordinates have PETSC_MAX_REAL, specific for BDDC */ 8675 PetscReal *realcoords; 8676 8677 PetscCall(VecGetLocalSize(gv,&n)); 8678 #if defined(PETSC_USE_COMPLEX) 8679 PetscCall(PetscMalloc1(n,&realcoords)); 8680 for (i=0;i<n;i++) realcoords[i] = PetscRealPart(coords[i]); 8681 #else 8682 realcoords = coords; 8683 #endif 8684 PetscCall(PCSetCoordinates(coarse_pc,cdim,n/cdim,realcoords)); 8685 #if defined(PETSC_USE_COMPLEX) 8686 PetscCall(PetscFree(realcoords)); 8687 #endif 8688 } 8689 } 8690 PetscCall(VecRestoreArray(gv,&coords)); 8691 PetscCall(VecDestroy(&gv)); 8692 } 8693 PetscCall(ISDestroy(&corners)); 8694 8695 if (pcbddc->coarse_ksp) { 8696 Vec crhs,csol; 8697 8698 PetscCall(KSPGetSolution(pcbddc->coarse_ksp,&csol)); 8699 PetscCall(KSPGetRhs(pcbddc->coarse_ksp,&crhs)); 8700 if (!csol) { 8701 PetscCall(MatCreateVecs(coarse_mat,&((pcbddc->coarse_ksp)->vec_sol),NULL)); 8702 } 8703 if (!crhs) { 8704 PetscCall(MatCreateVecs(coarse_mat,NULL,&((pcbddc->coarse_ksp)->vec_rhs))); 8705 } 8706 } 8707 PetscCall(MatDestroy(&coarsedivudotp)); 8708 8709 /* compute null space for coarse solver if the benign trick has been requested */ 8710 if (pcbddc->benign_null) { 8711 8712 PetscCall(VecSet(pcbddc->vec1_P,0.)); 8713 for (i=0;i<pcbddc->benign_n;i++) { 8714 PetscCall(VecSetValue(pcbddc->vec1_P,pcbddc->local_primal_size-pcbddc->benign_n+i,1.0,INSERT_VALUES)); 8715 } 8716 PetscCall(VecAssemblyBegin(pcbddc->vec1_P)); 8717 PetscCall(VecAssemblyEnd(pcbddc->vec1_P)); 8718 PetscCall(VecScatterBegin(pcbddc->coarse_loc_to_glob,pcbddc->vec1_P,pcbddc->coarse_vec,INSERT_VALUES,SCATTER_FORWARD)); 8719 PetscCall(VecScatterEnd(pcbddc->coarse_loc_to_glob,pcbddc->vec1_P,pcbddc->coarse_vec,INSERT_VALUES,SCATTER_FORWARD)); 8720 if (coarse_mat) { 8721 Vec nullv; 8722 PetscScalar *array,*array2; 8723 PetscInt nl; 8724 8725 PetscCall(MatCreateVecs(coarse_mat,&nullv,NULL)); 8726 PetscCall(VecGetLocalSize(nullv,&nl)); 8727 PetscCall(VecGetArrayRead(pcbddc->coarse_vec,(const PetscScalar**)&array)); 8728 PetscCall(VecGetArray(nullv,&array2)); 8729 PetscCall(PetscArraycpy(array2,array,nl)); 8730 PetscCall(VecRestoreArray(nullv,&array2)); 8731 PetscCall(VecRestoreArrayRead(pcbddc->coarse_vec,(const PetscScalar**)&array)); 8732 PetscCall(VecNormalize(nullv,NULL)); 8733 PetscCall(MatNullSpaceCreate(PetscObjectComm((PetscObject)coarse_mat),PETSC_FALSE,1,&nullv,&CoarseNullSpace)); 8734 PetscCall(VecDestroy(&nullv)); 8735 } 8736 } 8737 PetscCall(PetscLogEventEnd(PC_BDDC_CoarseSetUp[pcbddc->current_level],pc,0,0,0)); 8738 8739 PetscCall(PetscLogEventBegin(PC_BDDC_CoarseSolver[pcbddc->current_level],pc,0,0,0)); 8740 if (pcbddc->coarse_ksp) { 8741 PetscBool ispreonly; 8742 8743 if (CoarseNullSpace) { 8744 PetscBool isnull; 8745 8746 PetscCall(MatNullSpaceTest(CoarseNullSpace,coarse_mat,&isnull)); 8747 if (isnull) { 8748 PetscCall(MatSetNullSpace(coarse_mat,CoarseNullSpace)); 8749 } 8750 /* TODO: add local nullspaces (if any) */ 8751 } 8752 /* setup coarse ksp */ 8753 PetscCall(KSPSetUp(pcbddc->coarse_ksp)); 8754 /* Check coarse problem if in debug mode or if solving with an iterative method */ 8755 PetscCall(PetscObjectTypeCompare((PetscObject)pcbddc->coarse_ksp,KSPPREONLY,&ispreonly)); 8756 if (pcbddc->dbg_flag || (!ispreonly && pcbddc->use_coarse_estimates)) { 8757 KSP check_ksp; 8758 KSPType check_ksp_type; 8759 PC check_pc; 8760 Vec check_vec,coarse_vec; 8761 PetscReal abs_infty_error,infty_error,lambda_min=1.0,lambda_max=1.0; 8762 PetscInt its; 8763 PetscBool compute_eigs; 8764 PetscReal *eigs_r,*eigs_c; 8765 PetscInt neigs; 8766 const char *prefix; 8767 8768 /* Create ksp object suitable for estimation of extreme eigenvalues */ 8769 PetscCall(KSPCreate(PetscObjectComm((PetscObject)pcbddc->coarse_ksp),&check_ksp)); 8770 PetscCall(PetscObjectIncrementTabLevel((PetscObject)check_ksp,(PetscObject)pcbddc->coarse_ksp,0)); 8771 PetscCall(KSPSetErrorIfNotConverged(pcbddc->coarse_ksp,PETSC_FALSE)); 8772 PetscCall(KSPSetOperators(check_ksp,coarse_mat,coarse_mat)); 8773 PetscCall(KSPSetTolerances(check_ksp,1.e-12,1.e-12,PETSC_DEFAULT,pcbddc->coarse_size)); 8774 /* prevent from setup unneeded object */ 8775 PetscCall(KSPGetPC(check_ksp,&check_pc)); 8776 PetscCall(PCSetType(check_pc,PCNONE)); 8777 if (ispreonly) { 8778 check_ksp_type = KSPPREONLY; 8779 compute_eigs = PETSC_FALSE; 8780 } else { 8781 check_ksp_type = KSPGMRES; 8782 compute_eigs = PETSC_TRUE; 8783 } 8784 PetscCall(KSPSetType(check_ksp,check_ksp_type)); 8785 PetscCall(KSPSetComputeSingularValues(check_ksp,compute_eigs)); 8786 PetscCall(KSPSetComputeEigenvalues(check_ksp,compute_eigs)); 8787 PetscCall(KSPGMRESSetRestart(check_ksp,pcbddc->coarse_size+1)); 8788 PetscCall(KSPGetOptionsPrefix(pcbddc->coarse_ksp,&prefix)); 8789 PetscCall(KSPSetOptionsPrefix(check_ksp,prefix)); 8790 PetscCall(KSPAppendOptionsPrefix(check_ksp,"check_")); 8791 PetscCall(KSPSetFromOptions(check_ksp)); 8792 PetscCall(KSPSetUp(check_ksp)); 8793 PetscCall(KSPGetPC(pcbddc->coarse_ksp,&check_pc)); 8794 PetscCall(KSPSetPC(check_ksp,check_pc)); 8795 /* create random vec */ 8796 PetscCall(MatCreateVecs(coarse_mat,&coarse_vec,&check_vec)); 8797 PetscCall(VecSetRandom(check_vec,NULL)); 8798 PetscCall(MatMult(coarse_mat,check_vec,coarse_vec)); 8799 /* solve coarse problem */ 8800 PetscCall(KSPSolve(check_ksp,coarse_vec,coarse_vec)); 8801 PetscCall(KSPCheckSolve(check_ksp,pc,coarse_vec)); 8802 /* set eigenvalue estimation if preonly has not been requested */ 8803 if (compute_eigs) { 8804 PetscCall(PetscMalloc1(pcbddc->coarse_size+1,&eigs_r)); 8805 PetscCall(PetscMalloc1(pcbddc->coarse_size+1,&eigs_c)); 8806 PetscCall(KSPComputeEigenvalues(check_ksp,pcbddc->coarse_size+1,eigs_r,eigs_c,&neigs)); 8807 if (neigs) { 8808 lambda_max = eigs_r[neigs-1]; 8809 lambda_min = eigs_r[0]; 8810 if (pcbddc->use_coarse_estimates) { 8811 if (lambda_max>=lambda_min) { /* using PETSC_SMALL since lambda_max == lambda_min is not allowed by KSPChebyshevSetEigenvalues */ 8812 PetscCall(KSPChebyshevSetEigenvalues(pcbddc->coarse_ksp,lambda_max+PETSC_SMALL,lambda_min)); 8813 PetscCall(KSPRichardsonSetScale(pcbddc->coarse_ksp,2.0/(lambda_max+lambda_min))); 8814 } 8815 } 8816 } 8817 } 8818 8819 /* check coarse problem residual error */ 8820 if (pcbddc->dbg_flag) { 8821 PetscViewer dbg_viewer = PETSC_VIEWER_STDOUT_(PetscObjectComm((PetscObject)pcbddc->coarse_ksp)); 8822 PetscCall(PetscViewerASCIIAddTab(dbg_viewer,2*(pcbddc->current_level+1))); 8823 PetscCall(VecAXPY(check_vec,-1.0,coarse_vec)); 8824 PetscCall(VecNorm(check_vec,NORM_INFINITY,&infty_error)); 8825 PetscCall(MatMult(coarse_mat,check_vec,coarse_vec)); 8826 PetscCall(VecNorm(coarse_vec,NORM_INFINITY,&abs_infty_error)); 8827 PetscCall(PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem details (use estimates %d)\n",pcbddc->use_coarse_estimates)); 8828 PetscCall(PetscObjectPrintClassNamePrefixType((PetscObject)(pcbddc->coarse_ksp),dbg_viewer)); 8829 PetscCall(PetscObjectPrintClassNamePrefixType((PetscObject)(check_pc),dbg_viewer)); 8830 PetscCall(PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem exact infty_error : %1.6e\n",(double)infty_error)); 8831 PetscCall(PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem residual infty_error: %1.6e\n",(double)abs_infty_error)); 8832 if (CoarseNullSpace) { 8833 PetscCall(PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem is singular\n")); 8834 } 8835 if (compute_eigs) { 8836 PetscReal lambda_max_s,lambda_min_s; 8837 KSPConvergedReason reason; 8838 PetscCall(KSPGetType(check_ksp,&check_ksp_type)); 8839 PetscCall(KSPGetIterationNumber(check_ksp,&its)); 8840 PetscCall(KSPGetConvergedReason(check_ksp,&reason)); 8841 PetscCall(KSPComputeExtremeSingularValues(check_ksp,&lambda_max_s,&lambda_min_s)); 8842 PetscCall(PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem eigenvalues (estimated with %" PetscInt_FMT " iterations of %s, conv reason %d): %1.6e %1.6e (%1.6e %1.6e)\n",its,check_ksp_type,reason,(double)lambda_min,(double)lambda_max,(double)lambda_min_s,(double)lambda_max_s)); 8843 for (i=0;i<neigs;i++) { 8844 PetscCall(PetscViewerASCIIPrintf(dbg_viewer,"%1.6e %1.6ei\n",(double)eigs_r[i],(double)eigs_c[i])); 8845 } 8846 } 8847 PetscCall(PetscViewerFlush(dbg_viewer)); 8848 PetscCall(PetscViewerASCIISubtractTab(dbg_viewer,2*(pcbddc->current_level+1))); 8849 } 8850 PetscCall(VecDestroy(&check_vec)); 8851 PetscCall(VecDestroy(&coarse_vec)); 8852 PetscCall(KSPDestroy(&check_ksp)); 8853 if (compute_eigs) { 8854 PetscCall(PetscFree(eigs_r)); 8855 PetscCall(PetscFree(eigs_c)); 8856 } 8857 } 8858 } 8859 PetscCall(MatNullSpaceDestroy(&CoarseNullSpace)); 8860 /* print additional info */ 8861 if (pcbddc->dbg_flag) { 8862 /* waits until all processes reaches this point */ 8863 PetscCall(PetscBarrier((PetscObject)pc)); 8864 PetscCall(PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Coarse solver setup completed at level %" PetscInt_FMT "\n",pcbddc->current_level)); 8865 PetscCall(PetscViewerFlush(pcbddc->dbg_viewer)); 8866 } 8867 8868 /* free memory */ 8869 PetscCall(MatDestroy(&coarse_mat)); 8870 PetscCall(PetscLogEventEnd(PC_BDDC_CoarseSolver[pcbddc->current_level],pc,0,0,0)); 8871 PetscFunctionReturn(0); 8872 } 8873 8874 PetscErrorCode PCBDDCComputePrimalNumbering(PC pc,PetscInt* coarse_size_n,PetscInt** local_primal_indices_n) 8875 { 8876 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 8877 PC_IS* pcis = (PC_IS*)pc->data; 8878 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 8879 IS subset,subset_mult,subset_n; 8880 PetscInt local_size,coarse_size=0; 8881 PetscInt *local_primal_indices=NULL; 8882 const PetscInt *t_local_primal_indices; 8883 8884 PetscFunctionBegin; 8885 /* Compute global number of coarse dofs */ 8886 PetscCheck(!pcbddc->local_primal_size || pcbddc->local_primal_ref_node,PETSC_COMM_SELF,PETSC_ERR_PLIB,"BDDC ConstraintsSetUp should be called first"); 8887 PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)(pc->pmat)),pcbddc->local_primal_size_cc,pcbddc->local_primal_ref_node,PETSC_COPY_VALUES,&subset_n)); 8888 PetscCall(ISLocalToGlobalMappingApplyIS(pcis->mapping,subset_n,&subset)); 8889 PetscCall(ISDestroy(&subset_n)); 8890 PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)(pc->pmat)),pcbddc->local_primal_size_cc,pcbddc->local_primal_ref_mult,PETSC_COPY_VALUES,&subset_mult)); 8891 PetscCall(ISRenumber(subset,subset_mult,&coarse_size,&subset_n)); 8892 PetscCall(ISDestroy(&subset)); 8893 PetscCall(ISDestroy(&subset_mult)); 8894 PetscCall(ISGetLocalSize(subset_n,&local_size)); 8895 PetscCheck(local_size == pcbddc->local_primal_size,PETSC_COMM_SELF,PETSC_ERR_PLIB,"Invalid number of local primal indices computed %" PetscInt_FMT " != %" PetscInt_FMT,local_size,pcbddc->local_primal_size); 8896 PetscCall(PetscMalloc1(local_size,&local_primal_indices)); 8897 PetscCall(ISGetIndices(subset_n,&t_local_primal_indices)); 8898 PetscCall(PetscArraycpy(local_primal_indices,t_local_primal_indices,local_size)); 8899 PetscCall(ISRestoreIndices(subset_n,&t_local_primal_indices)); 8900 PetscCall(ISDestroy(&subset_n)); 8901 8902 /* check numbering */ 8903 if (pcbddc->dbg_flag) { 8904 PetscScalar coarsesum,*array,*array2; 8905 PetscInt i; 8906 PetscBool set_error = PETSC_FALSE,set_error_reduced = PETSC_FALSE; 8907 8908 PetscCall(PetscViewerFlush(pcbddc->dbg_viewer)); 8909 PetscCall(PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n")); 8910 PetscCall(PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Check coarse indices\n")); 8911 PetscCall(PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer)); 8912 /* counter */ 8913 PetscCall(VecSet(pcis->vec1_global,0.0)); 8914 PetscCall(VecSet(pcis->vec1_N,1.0)); 8915 PetscCall(VecScatterBegin(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE)); 8916 PetscCall(VecScatterEnd(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE)); 8917 PetscCall(VecScatterBegin(matis->rctx,pcis->vec1_global,pcis->vec2_N,INSERT_VALUES,SCATTER_FORWARD)); 8918 PetscCall(VecScatterEnd(matis->rctx,pcis->vec1_global,pcis->vec2_N,INSERT_VALUES,SCATTER_FORWARD)); 8919 PetscCall(VecSet(pcis->vec1_N,0.0)); 8920 for (i=0;i<pcbddc->local_primal_size;i++) { 8921 PetscCall(VecSetValue(pcis->vec1_N,pcbddc->primal_indices_local_idxs[i],1.0,INSERT_VALUES)); 8922 } 8923 PetscCall(VecAssemblyBegin(pcis->vec1_N)); 8924 PetscCall(VecAssemblyEnd(pcis->vec1_N)); 8925 PetscCall(VecSet(pcis->vec1_global,0.0)); 8926 PetscCall(VecScatterBegin(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE)); 8927 PetscCall(VecScatterEnd(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE)); 8928 PetscCall(VecScatterBegin(matis->rctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD)); 8929 PetscCall(VecScatterEnd(matis->rctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD)); 8930 PetscCall(VecGetArray(pcis->vec1_N,&array)); 8931 PetscCall(VecGetArray(pcis->vec2_N,&array2)); 8932 for (i=0;i<pcis->n;i++) { 8933 if (array[i] != 0.0 && array[i] != array2[i]) { 8934 PetscInt owned = (PetscInt)PetscRealPart(array[i]),gi; 8935 PetscInt neigh = (PetscInt)PetscRealPart(array2[i]); 8936 set_error = PETSC_TRUE; 8937 PetscCall(ISLocalToGlobalMappingApply(pcis->mapping,1,&i,&gi)); 8938 PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d: local index %" PetscInt_FMT " (gid %" PetscInt_FMT ") owned by %" PetscInt_FMT " processes instead of %" PetscInt_FMT "!\n",PetscGlobalRank,i,gi,owned,neigh)); 8939 } 8940 } 8941 PetscCall(VecRestoreArray(pcis->vec2_N,&array2)); 8942 PetscCall(MPIU_Allreduce(&set_error,&set_error_reduced,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc))); 8943 PetscCall(PetscViewerFlush(pcbddc->dbg_viewer)); 8944 for (i=0;i<pcis->n;i++) { 8945 if (PetscRealPart(array[i]) > 0.0) array[i] = 1.0/PetscRealPart(array[i]); 8946 } 8947 PetscCall(VecRestoreArray(pcis->vec1_N,&array)); 8948 PetscCall(VecSet(pcis->vec1_global,0.0)); 8949 PetscCall(VecScatterBegin(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE)); 8950 PetscCall(VecScatterEnd(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE)); 8951 PetscCall(VecSum(pcis->vec1_global,&coarsesum)); 8952 PetscCall(PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Size of coarse problem is %" PetscInt_FMT " (%lf)\n",coarse_size,(double)PetscRealPart(coarsesum))); 8953 if (pcbddc->dbg_flag > 1 || set_error_reduced) { 8954 PetscInt *gidxs; 8955 8956 PetscCall(PetscMalloc1(pcbddc->local_primal_size,&gidxs)); 8957 PetscCall(ISLocalToGlobalMappingApply(pcis->mapping,pcbddc->local_primal_size,pcbddc->primal_indices_local_idxs,gidxs)); 8958 PetscCall(PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Distribution of local primal indices\n")); 8959 PetscCall(PetscViewerFlush(pcbddc->dbg_viewer)); 8960 PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d\n",PetscGlobalRank)); 8961 for (i=0;i<pcbddc->local_primal_size;i++) { 8962 PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local_primal_indices[%" PetscInt_FMT "]=%" PetscInt_FMT " (%" PetscInt_FMT ",%" PetscInt_FMT ")\n",i,local_primal_indices[i],pcbddc->primal_indices_local_idxs[i],gidxs[i])); 8963 } 8964 PetscCall(PetscViewerFlush(pcbddc->dbg_viewer)); 8965 PetscCall(PetscFree(gidxs)); 8966 } 8967 PetscCall(PetscViewerFlush(pcbddc->dbg_viewer)); 8968 PetscCall(PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer)); 8969 PetscCheck(!set_error_reduced,PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"BDDC Numbering of coarse dofs failed"); 8970 } 8971 8972 /* get back data */ 8973 *coarse_size_n = coarse_size; 8974 *local_primal_indices_n = local_primal_indices; 8975 PetscFunctionReturn(0); 8976 } 8977 8978 PetscErrorCode PCBDDCGlobalToLocal(VecScatter g2l_ctx,Vec gwork, Vec lwork, IS globalis, IS* localis) 8979 { 8980 IS localis_t; 8981 PetscInt i,lsize,*idxs,n; 8982 PetscScalar *vals; 8983 8984 PetscFunctionBegin; 8985 /* get indices in local ordering exploiting local to global map */ 8986 PetscCall(ISGetLocalSize(globalis,&lsize)); 8987 PetscCall(PetscMalloc1(lsize,&vals)); 8988 for (i=0;i<lsize;i++) vals[i] = 1.0; 8989 PetscCall(ISGetIndices(globalis,(const PetscInt**)&idxs)); 8990 PetscCall(VecSet(gwork,0.0)); 8991 PetscCall(VecSet(lwork,0.0)); 8992 if (idxs) { /* multilevel guard */ 8993 PetscCall(VecSetOption(gwork,VEC_IGNORE_NEGATIVE_INDICES,PETSC_TRUE)); 8994 PetscCall(VecSetValues(gwork,lsize,idxs,vals,INSERT_VALUES)); 8995 } 8996 PetscCall(VecAssemblyBegin(gwork)); 8997 PetscCall(ISRestoreIndices(globalis,(const PetscInt**)&idxs)); 8998 PetscCall(PetscFree(vals)); 8999 PetscCall(VecAssemblyEnd(gwork)); 9000 /* now compute set in local ordering */ 9001 PetscCall(VecScatterBegin(g2l_ctx,gwork,lwork,INSERT_VALUES,SCATTER_FORWARD)); 9002 PetscCall(VecScatterEnd(g2l_ctx,gwork,lwork,INSERT_VALUES,SCATTER_FORWARD)); 9003 PetscCall(VecGetArrayRead(lwork,(const PetscScalar**)&vals)); 9004 PetscCall(VecGetSize(lwork,&n)); 9005 for (i=0,lsize=0;i<n;i++) { 9006 if (PetscRealPart(vals[i]) > 0.5) { 9007 lsize++; 9008 } 9009 } 9010 PetscCall(PetscMalloc1(lsize,&idxs)); 9011 for (i=0,lsize=0;i<n;i++) { 9012 if (PetscRealPart(vals[i]) > 0.5) { 9013 idxs[lsize++] = i; 9014 } 9015 } 9016 PetscCall(VecRestoreArrayRead(lwork,(const PetscScalar**)&vals)); 9017 PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)gwork),lsize,idxs,PETSC_OWN_POINTER,&localis_t)); 9018 *localis = localis_t; 9019 PetscFunctionReturn(0); 9020 } 9021 9022 PetscErrorCode PCBDDCComputeFakeChange(PC pc, PetscBool constraints, PCBDDCGraph graph, PCBDDCSubSchurs schurs, Mat *change, IS *change_primal, IS *change_primal_mult, PetscBool *change_with_qr) 9023 { 9024 PC_IS *pcis = (PC_IS*)pc->data; 9025 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 9026 PC_IS *pcisf; 9027 PC_BDDC *pcbddcf; 9028 PC pcf; 9029 9030 PetscFunctionBegin; 9031 PetscCall(PCCreate(PetscObjectComm((PetscObject)pc),&pcf)); 9032 PetscCall(PetscLogObjectParent((PetscObject)pc,(PetscObject)pcf)); 9033 PetscCall(PCSetOperators(pcf,pc->mat,pc->pmat)); 9034 PetscCall(PCSetType(pcf,PCBDDC)); 9035 9036 pcisf = (PC_IS*)pcf->data; 9037 pcbddcf = (PC_BDDC*)pcf->data; 9038 9039 pcisf->is_B_local = pcis->is_B_local; 9040 pcisf->vec1_N = pcis->vec1_N; 9041 pcisf->BtoNmap = pcis->BtoNmap; 9042 pcisf->n = pcis->n; 9043 pcisf->n_B = pcis->n_B; 9044 9045 PetscCall(PetscFree(pcbddcf->mat_graph)); 9046 PetscCall(PetscFree(pcbddcf->sub_schurs)); 9047 pcbddcf->mat_graph = graph ? graph : pcbddc->mat_graph; 9048 pcbddcf->sub_schurs = schurs; 9049 pcbddcf->adaptive_selection = schurs ? PETSC_TRUE : PETSC_FALSE; 9050 pcbddcf->adaptive_threshold[0] = pcbddc->adaptive_threshold[0]; 9051 pcbddcf->adaptive_threshold[1] = pcbddc->adaptive_threshold[1]; 9052 pcbddcf->adaptive_nmin = pcbddc->adaptive_nmin; 9053 pcbddcf->adaptive_nmax = pcbddc->adaptive_nmax; 9054 pcbddcf->use_faces = PETSC_TRUE; 9055 pcbddcf->use_change_of_basis = (PetscBool)!constraints; 9056 pcbddcf->use_change_on_faces = (PetscBool)!constraints; 9057 pcbddcf->use_qr_single = (PetscBool)!constraints; 9058 pcbddcf->fake_change = PETSC_TRUE; 9059 pcbddcf->dbg_flag = pcbddc->dbg_flag; 9060 9061 PetscCall(PCBDDCAdaptiveSelection(pcf)); 9062 PetscCall(PCBDDCConstraintsSetUp(pcf)); 9063 9064 *change = pcbddcf->ConstraintMatrix; 9065 if (change_primal) PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)pc->pmat),pcbddcf->local_primal_size_cc,pcbddcf->local_primal_ref_node,PETSC_COPY_VALUES,change_primal)); 9066 if (change_primal_mult) PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)pc->pmat),pcbddcf->local_primal_size_cc,pcbddcf->local_primal_ref_mult,PETSC_COPY_VALUES,change_primal_mult)); 9067 if (change_with_qr) *change_with_qr = pcbddcf->use_qr_single; 9068 9069 if (schurs) pcbddcf->sub_schurs = NULL; 9070 pcbddcf->ConstraintMatrix = NULL; 9071 pcbddcf->mat_graph = NULL; 9072 pcisf->is_B_local = NULL; 9073 pcisf->vec1_N = NULL; 9074 pcisf->BtoNmap = NULL; 9075 PetscCall(PCDestroy(&pcf)); 9076 PetscFunctionReturn(0); 9077 } 9078 9079 PetscErrorCode PCBDDCSetUpSubSchurs(PC pc) 9080 { 9081 PC_IS *pcis=(PC_IS*)pc->data; 9082 PC_BDDC *pcbddc=(PC_BDDC*)pc->data; 9083 PCBDDCSubSchurs sub_schurs=pcbddc->sub_schurs; 9084 Mat S_j; 9085 PetscInt *used_xadj,*used_adjncy; 9086 PetscBool free_used_adj; 9087 9088 PetscFunctionBegin; 9089 PetscCall(PetscLogEventBegin(PC_BDDC_Schurs[pcbddc->current_level],pc,0,0,0)); 9090 /* decide the adjacency to be used for determining internal problems for local schur on subsets */ 9091 free_used_adj = PETSC_FALSE; 9092 if (pcbddc->sub_schurs_layers == -1) { 9093 used_xadj = NULL; 9094 used_adjncy = NULL; 9095 } else { 9096 if (pcbddc->sub_schurs_use_useradj && pcbddc->mat_graph->xadj) { 9097 used_xadj = pcbddc->mat_graph->xadj; 9098 used_adjncy = pcbddc->mat_graph->adjncy; 9099 } else if (pcbddc->computed_rowadj) { 9100 used_xadj = pcbddc->mat_graph->xadj; 9101 used_adjncy = pcbddc->mat_graph->adjncy; 9102 } else { 9103 PetscBool flg_row=PETSC_FALSE; 9104 const PetscInt *xadj,*adjncy; 9105 PetscInt nvtxs; 9106 9107 PetscCall(MatGetRowIJ(pcbddc->local_mat,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,&xadj,&adjncy,&flg_row)); 9108 if (flg_row) { 9109 PetscCall(PetscMalloc2(nvtxs+1,&used_xadj,xadj[nvtxs],&used_adjncy)); 9110 PetscCall(PetscArraycpy(used_xadj,xadj,nvtxs+1)); 9111 PetscCall(PetscArraycpy(used_adjncy,adjncy,xadj[nvtxs])); 9112 free_used_adj = PETSC_TRUE; 9113 } else { 9114 pcbddc->sub_schurs_layers = -1; 9115 used_xadj = NULL; 9116 used_adjncy = NULL; 9117 } 9118 PetscCall(MatRestoreRowIJ(pcbddc->local_mat,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,&xadj,&adjncy,&flg_row)); 9119 } 9120 } 9121 9122 /* setup sub_schurs data */ 9123 PetscCall(MatCreateSchurComplement(pcis->A_II,pcis->pA_II,pcis->A_IB,pcis->A_BI,pcis->A_BB,&S_j)); 9124 if (!sub_schurs->schur_explicit) { 9125 /* pcbddc->ksp_D up to date only if not using MatFactor with Schur complement support */ 9126 PetscCall(MatSchurComplementSetKSP(S_j,pcbddc->ksp_D)); 9127 PetscCall(PCBDDCSubSchursSetUp(sub_schurs,NULL,S_j,PETSC_FALSE,used_xadj,used_adjncy,pcbddc->sub_schurs_layers,NULL,pcbddc->adaptive_selection,PETSC_FALSE,PETSC_FALSE,0,NULL,NULL,NULL,NULL)); 9128 } else { 9129 Mat change = NULL; 9130 Vec scaling = NULL; 9131 IS change_primal = NULL, iP; 9132 PetscInt benign_n; 9133 PetscBool reuse_solvers = (PetscBool)!pcbddc->use_change_of_basis; 9134 PetscBool need_change = PETSC_FALSE; 9135 PetscBool discrete_harmonic = PETSC_FALSE; 9136 9137 if (!pcbddc->use_vertices && reuse_solvers) { 9138 PetscInt n_vertices; 9139 9140 PetscCall(ISGetLocalSize(sub_schurs->is_vertices,&n_vertices)); 9141 reuse_solvers = (PetscBool)!n_vertices; 9142 } 9143 if (!pcbddc->benign_change_explicit) { 9144 benign_n = pcbddc->benign_n; 9145 } else { 9146 benign_n = 0; 9147 } 9148 /* sub_schurs->change is a local object; instead, PCBDDCConstraintsSetUp and the quantities used in the test below are logically collective on pc. 9149 We need a global reduction to avoid possible deadlocks. 9150 We assume that sub_schurs->change is created once, and then reused for different solves, unless the topography has been recomputed */ 9151 if (pcbddc->adaptive_userdefined || (pcbddc->deluxe_zerorows && !pcbddc->use_change_of_basis)) { 9152 PetscBool have_loc_change = (PetscBool)(!!sub_schurs->change); 9153 PetscCall(MPIU_Allreduce(&have_loc_change,&need_change,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc))); 9154 need_change = (PetscBool)(!need_change); 9155 } 9156 /* If the user defines additional constraints, we import them here */ 9157 if (need_change) { 9158 PetscCheck(!pcbddc->sub_schurs_rebuild,PETSC_COMM_SELF,PETSC_ERR_SUP,"Cannot compute change of basis with a different graph"); 9159 PetscCall(PCBDDCComputeFakeChange(pc,PETSC_FALSE,NULL,NULL,&change,&change_primal,NULL,&sub_schurs->change_with_qr)); 9160 9161 } 9162 if (!pcbddc->use_deluxe_scaling) scaling = pcis->D; 9163 9164 PetscCall(PetscObjectQuery((PetscObject)pc,"__KSPFETIDP_iP",(PetscObject*)&iP)); 9165 if (iP) { 9166 PetscOptionsBegin(PetscObjectComm((PetscObject)iP),sub_schurs->prefix,"BDDC sub_schurs options","PC"); 9167 PetscCall(PetscOptionsBool("-sub_schurs_discrete_harmonic",NULL,NULL,discrete_harmonic,&discrete_harmonic,NULL)); 9168 PetscOptionsEnd(); 9169 } 9170 if (discrete_harmonic) { 9171 Mat A; 9172 PetscCall(MatDuplicate(pcbddc->local_mat,MAT_COPY_VALUES,&A)); 9173 PetscCall(MatZeroRowsColumnsIS(A,iP,1.0,NULL,NULL)); 9174 PetscCall(PetscObjectCompose((PetscObject)A,"__KSPFETIDP_iP",(PetscObject)iP)); 9175 PetscCall(PCBDDCSubSchursSetUp(sub_schurs,A,S_j,pcbddc->sub_schurs_exact_schur,used_xadj,used_adjncy,pcbddc->sub_schurs_layers,scaling,pcbddc->adaptive_selection,reuse_solvers,pcbddc->benign_saddle_point,benign_n,pcbddc->benign_p0_lidx,pcbddc->benign_zerodiag_subs,change,change_primal)); 9176 PetscCall(MatDestroy(&A)); 9177 } else { 9178 PetscCall(PCBDDCSubSchursSetUp(sub_schurs,pcbddc->local_mat,S_j,pcbddc->sub_schurs_exact_schur,used_xadj,used_adjncy,pcbddc->sub_schurs_layers,scaling,pcbddc->adaptive_selection,reuse_solvers,pcbddc->benign_saddle_point,benign_n,pcbddc->benign_p0_lidx,pcbddc->benign_zerodiag_subs,change,change_primal)); 9179 } 9180 PetscCall(MatDestroy(&change)); 9181 PetscCall(ISDestroy(&change_primal)); 9182 } 9183 PetscCall(MatDestroy(&S_j)); 9184 9185 /* free adjacency */ 9186 if (free_used_adj) { 9187 PetscCall(PetscFree2(used_xadj,used_adjncy)); 9188 } 9189 PetscCall(PetscLogEventEnd(PC_BDDC_Schurs[pcbddc->current_level],pc,0,0,0)); 9190 PetscFunctionReturn(0); 9191 } 9192 9193 PetscErrorCode PCBDDCInitSubSchurs(PC pc) 9194 { 9195 PC_IS *pcis=(PC_IS*)pc->data; 9196 PC_BDDC *pcbddc=(PC_BDDC*)pc->data; 9197 PCBDDCGraph graph; 9198 9199 PetscFunctionBegin; 9200 /* attach interface graph for determining subsets */ 9201 if (pcbddc->sub_schurs_rebuild) { /* in case rebuild has been requested, it uses a graph generated only by the neighbouring information */ 9202 IS verticesIS,verticescomm; 9203 PetscInt vsize,*idxs; 9204 9205 PetscCall(PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph,NULL,NULL,NULL,NULL,&verticesIS)); 9206 PetscCall(ISGetSize(verticesIS,&vsize)); 9207 PetscCall(ISGetIndices(verticesIS,(const PetscInt**)&idxs)); 9208 PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)pc),vsize,idxs,PETSC_COPY_VALUES,&verticescomm)); 9209 PetscCall(ISRestoreIndices(verticesIS,(const PetscInt**)&idxs)); 9210 PetscCall(PCBDDCGraphRestoreCandidatesIS(pcbddc->mat_graph,NULL,NULL,NULL,NULL,&verticesIS)); 9211 PetscCall(PCBDDCGraphCreate(&graph)); 9212 PetscCall(PCBDDCGraphInit(graph,pcbddc->mat_graph->l2gmap,pcbddc->mat_graph->nvtxs_global,pcbddc->graphmaxcount)); 9213 PetscCall(PCBDDCGraphSetUp(graph,pcbddc->mat_graph->custom_minimal_size,NULL,pcbddc->DirichletBoundariesLocal,0,NULL,verticescomm)); 9214 PetscCall(ISDestroy(&verticescomm)); 9215 PetscCall(PCBDDCGraphComputeConnectedComponents(graph)); 9216 } else { 9217 graph = pcbddc->mat_graph; 9218 } 9219 /* print some info */ 9220 if (pcbddc->dbg_flag && !pcbddc->sub_schurs_rebuild) { 9221 IS vertices; 9222 PetscInt nv,nedges,nfaces; 9223 PetscCall(PCBDDCGraphASCIIView(graph,pcbddc->dbg_flag,pcbddc->dbg_viewer)); 9224 PetscCall(PCBDDCGraphGetCandidatesIS(graph,&nfaces,NULL,&nedges,NULL,&vertices)); 9225 PetscCall(ISGetSize(vertices,&nv)); 9226 PetscCall(PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer)); 9227 PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"--------------------------------------------------------------\n")); 9228 PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02" PetscInt_FMT " local candidate vertices (%d)\n",PetscGlobalRank,nv,pcbddc->use_vertices)); 9229 PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02" PetscInt_FMT " local candidate edges (%d)\n",PetscGlobalRank,nedges,pcbddc->use_edges)); 9230 PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02" PetscInt_FMT " local candidate faces (%d)\n",PetscGlobalRank,nfaces,pcbddc->use_faces)); 9231 PetscCall(PetscViewerFlush(pcbddc->dbg_viewer)); 9232 PetscCall(PetscViewerASCIIPopSynchronized(pcbddc->dbg_viewer)); 9233 PetscCall(PCBDDCGraphRestoreCandidatesIS(graph,&nfaces,NULL,&nedges,NULL,&vertices)); 9234 } 9235 9236 /* sub_schurs init */ 9237 if (!pcbddc->sub_schurs) { 9238 PetscCall(PCBDDCSubSchursCreate(&pcbddc->sub_schurs)); 9239 } 9240 PetscCall(PCBDDCSubSchursInit(pcbddc->sub_schurs,((PetscObject)pc)->prefix,pcis->is_I_local,pcis->is_B_local,graph,pcis->BtoNmap,pcbddc->sub_schurs_rebuild,PETSC_FALSE)); 9241 9242 /* free graph struct */ 9243 if (pcbddc->sub_schurs_rebuild) { 9244 PetscCall(PCBDDCGraphDestroy(&graph)); 9245 } 9246 PetscFunctionReturn(0); 9247 } 9248 9249 PetscErrorCode PCBDDCCheckOperator(PC pc) 9250 { 9251 PC_IS *pcis=(PC_IS*)pc->data; 9252 PC_BDDC *pcbddc=(PC_BDDC*)pc->data; 9253 9254 PetscFunctionBegin; 9255 if (pcbddc->n_vertices == pcbddc->local_primal_size) { 9256 IS zerodiag = NULL; 9257 Mat S_j,B0_B=NULL; 9258 Vec dummy_vec=NULL,vec_check_B,vec_scale_P; 9259 PetscScalar *p0_check,*array,*array2; 9260 PetscReal norm; 9261 PetscInt i; 9262 9263 /* B0 and B0_B */ 9264 if (zerodiag) { 9265 IS dummy; 9266 9267 PetscCall(ISCreateStride(PETSC_COMM_SELF,pcbddc->benign_n,0,1,&dummy)); 9268 PetscCall(MatCreateSubMatrix(pcbddc->benign_B0,dummy,pcis->is_B_local,MAT_INITIAL_MATRIX,&B0_B)); 9269 PetscCall(MatCreateVecs(B0_B,NULL,&dummy_vec)); 9270 PetscCall(ISDestroy(&dummy)); 9271 } 9272 /* I need a primal vector to scale primal nodes since BDDC sums contibutions */ 9273 PetscCall(VecDuplicate(pcbddc->vec1_P,&vec_scale_P)); 9274 PetscCall(VecSet(pcbddc->vec1_P,1.0)); 9275 PetscCall(VecScatterBegin(pcbddc->coarse_loc_to_glob,pcbddc->vec1_P,pcbddc->coarse_vec,ADD_VALUES,SCATTER_FORWARD)); 9276 PetscCall(VecScatterEnd(pcbddc->coarse_loc_to_glob,pcbddc->vec1_P,pcbddc->coarse_vec,ADD_VALUES,SCATTER_FORWARD)); 9277 PetscCall(VecScatterBegin(pcbddc->coarse_loc_to_glob,pcbddc->coarse_vec,vec_scale_P,INSERT_VALUES,SCATTER_REVERSE)); 9278 PetscCall(VecScatterEnd(pcbddc->coarse_loc_to_glob,pcbddc->coarse_vec,vec_scale_P,INSERT_VALUES,SCATTER_REVERSE)); 9279 PetscCall(VecReciprocal(vec_scale_P)); 9280 /* S_j */ 9281 PetscCall(MatCreateSchurComplement(pcis->A_II,pcis->pA_II,pcis->A_IB,pcis->A_BI,pcis->A_BB,&S_j)); 9282 PetscCall(MatSchurComplementSetKSP(S_j,pcbddc->ksp_D)); 9283 9284 /* mimic vector in \widetilde{W}_\Gamma */ 9285 PetscCall(VecSetRandom(pcis->vec1_N,NULL)); 9286 /* continuous in primal space */ 9287 PetscCall(VecSetRandom(pcbddc->coarse_vec,NULL)); 9288 PetscCall(VecScatterBegin(pcbddc->coarse_loc_to_glob,pcbddc->coarse_vec,pcbddc->vec1_P,INSERT_VALUES,SCATTER_REVERSE)); 9289 PetscCall(VecScatterEnd(pcbddc->coarse_loc_to_glob,pcbddc->coarse_vec,pcbddc->vec1_P,INSERT_VALUES,SCATTER_REVERSE)); 9290 PetscCall(VecGetArray(pcbddc->vec1_P,&array)); 9291 PetscCall(PetscCalloc1(pcbddc->benign_n,&p0_check)); 9292 for (i=0;i<pcbddc->benign_n;i++) p0_check[i] = array[pcbddc->local_primal_size-pcbddc->benign_n+i]; 9293 PetscCall(VecSetValues(pcis->vec1_N,pcbddc->local_primal_size,pcbddc->local_primal_ref_node,array,INSERT_VALUES)); 9294 PetscCall(VecRestoreArray(pcbddc->vec1_P,&array)); 9295 PetscCall(VecAssemblyBegin(pcis->vec1_N)); 9296 PetscCall(VecAssemblyEnd(pcis->vec1_N)); 9297 PetscCall(VecScatterBegin(pcis->N_to_B,pcis->vec1_N,pcis->vec2_B,INSERT_VALUES,SCATTER_FORWARD)); 9298 PetscCall(VecScatterEnd(pcis->N_to_B,pcis->vec1_N,pcis->vec2_B,INSERT_VALUES,SCATTER_FORWARD)); 9299 PetscCall(VecDuplicate(pcis->vec2_B,&vec_check_B)); 9300 PetscCall(VecCopy(pcis->vec2_B,vec_check_B)); 9301 9302 /* assemble rhs for coarse problem */ 9303 /* widetilde{S}_\Gamma w_\Gamma + \widetilde{B0}^T_B p0 */ 9304 /* local with Schur */ 9305 PetscCall(MatMult(S_j,pcis->vec2_B,pcis->vec1_B)); 9306 if (zerodiag) { 9307 PetscCall(VecGetArray(dummy_vec,&array)); 9308 for (i=0;i<pcbddc->benign_n;i++) array[i] = p0_check[i]; 9309 PetscCall(VecRestoreArray(dummy_vec,&array)); 9310 PetscCall(MatMultTransposeAdd(B0_B,dummy_vec,pcis->vec1_B,pcis->vec1_B)); 9311 } 9312 /* sum on primal nodes the local contributions */ 9313 PetscCall(VecScatterBegin(pcis->N_to_B,pcis->vec1_B,pcis->vec1_N,INSERT_VALUES,SCATTER_REVERSE)); 9314 PetscCall(VecScatterEnd(pcis->N_to_B,pcis->vec1_B,pcis->vec1_N,INSERT_VALUES,SCATTER_REVERSE)); 9315 PetscCall(VecGetArray(pcis->vec1_N,&array)); 9316 PetscCall(VecGetArray(pcbddc->vec1_P,&array2)); 9317 for (i=0;i<pcbddc->local_primal_size;i++) array2[i] = array[pcbddc->local_primal_ref_node[i]]; 9318 PetscCall(VecRestoreArray(pcbddc->vec1_P,&array2)); 9319 PetscCall(VecRestoreArray(pcis->vec1_N,&array)); 9320 PetscCall(VecSet(pcbddc->coarse_vec,0.)); 9321 PetscCall(VecScatterBegin(pcbddc->coarse_loc_to_glob,pcbddc->vec1_P,pcbddc->coarse_vec,ADD_VALUES,SCATTER_FORWARD)); 9322 PetscCall(VecScatterEnd(pcbddc->coarse_loc_to_glob,pcbddc->vec1_P,pcbddc->coarse_vec,ADD_VALUES,SCATTER_FORWARD)); 9323 PetscCall(VecScatterBegin(pcbddc->coarse_loc_to_glob,pcbddc->coarse_vec,pcbddc->vec1_P,INSERT_VALUES,SCATTER_REVERSE)); 9324 PetscCall(VecScatterEnd(pcbddc->coarse_loc_to_glob,pcbddc->coarse_vec,pcbddc->vec1_P,INSERT_VALUES,SCATTER_REVERSE)); 9325 PetscCall(VecGetArray(pcbddc->vec1_P,&array)); 9326 /* scale primal nodes (BDDC sums contibutions) */ 9327 PetscCall(VecPointwiseMult(pcbddc->vec1_P,vec_scale_P,pcbddc->vec1_P)); 9328 PetscCall(VecSetValues(pcis->vec1_N,pcbddc->local_primal_size,pcbddc->local_primal_ref_node,array,INSERT_VALUES)); 9329 PetscCall(VecRestoreArray(pcbddc->vec1_P,&array)); 9330 PetscCall(VecAssemblyBegin(pcis->vec1_N)); 9331 PetscCall(VecAssemblyEnd(pcis->vec1_N)); 9332 PetscCall(VecScatterBegin(pcis->N_to_B,pcis->vec1_N,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD)); 9333 PetscCall(VecScatterEnd(pcis->N_to_B,pcis->vec1_N,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD)); 9334 /* global: \widetilde{B0}_B w_\Gamma */ 9335 if (zerodiag) { 9336 PetscCall(MatMult(B0_B,pcis->vec2_B,dummy_vec)); 9337 PetscCall(VecGetArray(dummy_vec,&array)); 9338 for (i=0;i<pcbddc->benign_n;i++) pcbddc->benign_p0[i] = array[i]; 9339 PetscCall(VecRestoreArray(dummy_vec,&array)); 9340 } 9341 /* BDDC */ 9342 PetscCall(VecSet(pcis->vec1_D,0.)); 9343 PetscCall(PCBDDCApplyInterfacePreconditioner(pc,PETSC_FALSE)); 9344 9345 PetscCall(VecCopy(pcis->vec1_B,pcis->vec2_B)); 9346 PetscCall(VecAXPY(pcis->vec1_B,-1.0,vec_check_B)); 9347 PetscCall(VecNorm(pcis->vec1_B,NORM_INFINITY,&norm)); 9348 PetscCall(PetscPrintf(PETSC_COMM_SELF,"[%d] BDDC local error is %1.4e\n",PetscGlobalRank,(double)norm)); 9349 for (i=0;i<pcbddc->benign_n;i++) { 9350 PetscCall(PetscPrintf(PETSC_COMM_SELF,"[%d] BDDC p0[%" PetscInt_FMT "] error is %1.4e\n",PetscGlobalRank,i,(double)PetscAbsScalar(pcbddc->benign_p0[i]-p0_check[i]))); 9351 } 9352 PetscCall(PetscFree(p0_check)); 9353 PetscCall(VecDestroy(&vec_scale_P)); 9354 PetscCall(VecDestroy(&vec_check_B)); 9355 PetscCall(VecDestroy(&dummy_vec)); 9356 PetscCall(MatDestroy(&S_j)); 9357 PetscCall(MatDestroy(&B0_B)); 9358 } 9359 PetscFunctionReturn(0); 9360 } 9361 9362 #include <../src/mat/impls/aij/mpi/mpiaij.h> 9363 PetscErrorCode MatMPIAIJRestrict(Mat A, MPI_Comm ccomm, Mat *B) 9364 { 9365 Mat At; 9366 IS rows; 9367 PetscInt rst,ren; 9368 PetscLayout rmap; 9369 9370 PetscFunctionBegin; 9371 rst = ren = 0; 9372 if (ccomm != MPI_COMM_NULL) { 9373 PetscCall(PetscLayoutCreate(ccomm,&rmap)); 9374 PetscCall(PetscLayoutSetSize(rmap,A->rmap->N)); 9375 PetscCall(PetscLayoutSetBlockSize(rmap,1)); 9376 PetscCall(PetscLayoutSetUp(rmap)); 9377 PetscCall(PetscLayoutGetRange(rmap,&rst,&ren)); 9378 } 9379 PetscCall(ISCreateStride(PetscObjectComm((PetscObject)A),ren-rst,rst,1,&rows)); 9380 PetscCall(MatCreateSubMatrix(A,rows,NULL,MAT_INITIAL_MATRIX,&At)); 9381 PetscCall(ISDestroy(&rows)); 9382 9383 if (ccomm != MPI_COMM_NULL) { 9384 Mat_MPIAIJ *a,*b; 9385 IS from,to; 9386 Vec gvec; 9387 PetscInt lsize; 9388 9389 PetscCall(MatCreate(ccomm,B)); 9390 PetscCall(MatSetSizes(*B,ren-rst,PETSC_DECIDE,PETSC_DECIDE,At->cmap->N)); 9391 PetscCall(MatSetType(*B,MATAIJ)); 9392 PetscCall(PetscLayoutDestroy(&((*B)->rmap))); 9393 PetscCall(PetscLayoutSetUp((*B)->cmap)); 9394 a = (Mat_MPIAIJ*)At->data; 9395 b = (Mat_MPIAIJ*)(*B)->data; 9396 PetscCallMPI(MPI_Comm_size(ccomm,&b->size)); 9397 PetscCallMPI(MPI_Comm_rank(ccomm,&b->rank)); 9398 PetscCall(PetscObjectReference((PetscObject)a->A)); 9399 PetscCall(PetscObjectReference((PetscObject)a->B)); 9400 b->A = a->A; 9401 b->B = a->B; 9402 9403 b->donotstash = a->donotstash; 9404 b->roworiented = a->roworiented; 9405 b->rowindices = NULL; 9406 b->rowvalues = NULL; 9407 b->getrowactive = PETSC_FALSE; 9408 9409 (*B)->rmap = rmap; 9410 (*B)->factortype = A->factortype; 9411 (*B)->assembled = PETSC_TRUE; 9412 (*B)->insertmode = NOT_SET_VALUES; 9413 (*B)->preallocated = PETSC_TRUE; 9414 9415 if (a->colmap) { 9416 #if defined(PETSC_USE_CTABLE) 9417 PetscCall(PetscTableCreateCopy(a->colmap,&b->colmap)); 9418 #else 9419 PetscCall(PetscMalloc1(At->cmap->N,&b->colmap)); 9420 PetscCall(PetscLogObjectMemory((PetscObject)*B,At->cmap->N*sizeof(PetscInt))); 9421 PetscCall(PetscArraycpy(b->colmap,a->colmap,At->cmap->N)); 9422 #endif 9423 } else b->colmap = NULL; 9424 if (a->garray) { 9425 PetscInt len; 9426 len = a->B->cmap->n; 9427 PetscCall(PetscMalloc1(len+1,&b->garray)); 9428 PetscCall(PetscLogObjectMemory((PetscObject)(*B),len*sizeof(PetscInt))); 9429 if (len) PetscCall(PetscArraycpy(b->garray,a->garray,len)); 9430 } else b->garray = NULL; 9431 9432 PetscCall(PetscObjectReference((PetscObject)a->lvec)); 9433 b->lvec = a->lvec; 9434 PetscCall(PetscLogObjectParent((PetscObject)*B,(PetscObject)b->lvec)); 9435 9436 /* cannot use VecScatterCopy */ 9437 PetscCall(VecGetLocalSize(b->lvec,&lsize)); 9438 PetscCall(ISCreateGeneral(ccomm,lsize,b->garray,PETSC_USE_POINTER,&from)); 9439 PetscCall(ISCreateStride(PETSC_COMM_SELF,lsize,0,1,&to)); 9440 PetscCall(MatCreateVecs(*B,&gvec,NULL)); 9441 PetscCall(VecScatterCreate(gvec,from,b->lvec,to,&b->Mvctx)); 9442 PetscCall(PetscLogObjectParent((PetscObject)*B,(PetscObject)b->Mvctx)); 9443 PetscCall(ISDestroy(&from)); 9444 PetscCall(ISDestroy(&to)); 9445 PetscCall(VecDestroy(&gvec)); 9446 } 9447 PetscCall(MatDestroy(&At)); 9448 PetscFunctionReturn(0); 9449 } 9450