1 #include <../src/mat/impls/aij/seq/aij.h> 2 #include <../src/ksp/pc/impls/bddc/bddc.h> 3 #include <../src/ksp/pc/impls/bddc/bddcprivate.h> 4 #include <../src/mat/impls/dense/seq/dense.h> 5 #include <petscdmplex.h> 6 #include <petscblaslapack.h> 7 #include <petsc/private/sfimpl.h> 8 #include <petsc/private/dmpleximpl.h> 9 #include <petscdmda.h> 10 11 static PetscErrorCode MatMPIAIJRestrict(Mat,MPI_Comm,Mat*); 12 13 /* if range is true, it returns B s.t. span{B} = range(A) 14 if range is false, it returns B s.t. range(B) _|_ range(A) */ 15 PetscErrorCode MatDenseOrthogonalRangeOrComplement(Mat A, PetscBool range, PetscInt lw, PetscScalar *work, PetscReal *rwork, Mat *B) 16 { 17 PetscScalar *uwork,*data,*U, ds = 0.; 18 PetscReal *sing; 19 PetscBLASInt bM,bN,lwork,lierr,di = 1; 20 PetscInt ulw,i,nr,nc,n; 21 #if defined(PETSC_USE_COMPLEX) 22 PetscReal *rwork2; 23 #endif 24 25 PetscFunctionBegin; 26 CHKERRQ(MatGetSize(A,&nr,&nc)); 27 if (!nr || !nc) PetscFunctionReturn(0); 28 29 /* workspace */ 30 if (!work) { 31 ulw = PetscMax(PetscMax(1,5*PetscMin(nr,nc)),3*PetscMin(nr,nc)+PetscMax(nr,nc)); 32 CHKERRQ(PetscMalloc1(ulw,&uwork)); 33 } else { 34 ulw = lw; 35 uwork = work; 36 } 37 n = PetscMin(nr,nc); 38 if (!rwork) { 39 CHKERRQ(PetscMalloc1(n,&sing)); 40 } else { 41 sing = rwork; 42 } 43 44 /* SVD */ 45 CHKERRQ(PetscMalloc1(nr*nr,&U)); 46 CHKERRQ(PetscBLASIntCast(nr,&bM)); 47 CHKERRQ(PetscBLASIntCast(nc,&bN)); 48 CHKERRQ(PetscBLASIntCast(ulw,&lwork)); 49 CHKERRQ(MatDenseGetArray(A,&data)); 50 CHKERRQ(PetscFPTrapPush(PETSC_FP_TRAP_OFF)); 51 #if !defined(PETSC_USE_COMPLEX) 52 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("A","N",&bM,&bN,data,&bM,sing,U,&bM,&ds,&di,uwork,&lwork,&lierr)); 53 #else 54 CHKERRQ(PetscMalloc1(5*n,&rwork2)); 55 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("A","N",&bM,&bN,data,&bM,sing,U,&bM,&ds,&di,uwork,&lwork,rwork2,&lierr)); 56 CHKERRQ(PetscFree(rwork2)); 57 #endif 58 CHKERRQ(PetscFPTrapPop()); 59 PetscCheck(!lierr,PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in GESVD Lapack routine %d",(int)lierr); 60 CHKERRQ(MatDenseRestoreArray(A,&data)); 61 for (i=0;i<n;i++) if (sing[i] < PETSC_SMALL) break; 62 if (!rwork) { 63 CHKERRQ(PetscFree(sing)); 64 } 65 if (!work) { 66 CHKERRQ(PetscFree(uwork)); 67 } 68 /* create B */ 69 if (!range) { 70 CHKERRQ(MatCreateSeqDense(PETSC_COMM_SELF,nr,nr-i,NULL,B)); 71 CHKERRQ(MatDenseGetArray(*B,&data)); 72 CHKERRQ(PetscArraycpy(data,U+nr*i,(nr-i)*nr)); 73 } else { 74 CHKERRQ(MatCreateSeqDense(PETSC_COMM_SELF,nr,i,NULL,B)); 75 CHKERRQ(MatDenseGetArray(*B,&data)); 76 CHKERRQ(PetscArraycpy(data,U,i*nr)); 77 } 78 CHKERRQ(MatDenseRestoreArray(*B,&data)); 79 CHKERRQ(PetscFree(U)); 80 PetscFunctionReturn(0); 81 } 82 83 /* TODO REMOVE */ 84 #if defined(PRINT_GDET) 85 static int inc = 0; 86 static int lev = 0; 87 #endif 88 89 PetscErrorCode PCBDDCComputeNedelecChangeEdge(Mat lG, IS edge, IS extrow, IS extcol, IS corners, Mat* Gins, Mat* GKins, PetscScalar cvals[2], PetscScalar *work, PetscReal *rwork) 90 { 91 Mat GE,GEd; 92 PetscInt rsize,csize,esize; 93 PetscScalar *ptr; 94 95 PetscFunctionBegin; 96 CHKERRQ(ISGetSize(edge,&esize)); 97 if (!esize) PetscFunctionReturn(0); 98 CHKERRQ(ISGetSize(extrow,&rsize)); 99 CHKERRQ(ISGetSize(extcol,&csize)); 100 101 /* gradients */ 102 ptr = work + 5*esize; 103 CHKERRQ(MatCreateSubMatrix(lG,extrow,extcol,MAT_INITIAL_MATRIX,&GE)); 104 CHKERRQ(MatCreateSeqDense(PETSC_COMM_SELF,rsize,csize,ptr,Gins)); 105 CHKERRQ(MatConvert(GE,MATSEQDENSE,MAT_REUSE_MATRIX,Gins)); 106 CHKERRQ(MatDestroy(&GE)); 107 108 /* constants */ 109 ptr += rsize*csize; 110 CHKERRQ(MatCreateSeqDense(PETSC_COMM_SELF,esize,csize,ptr,&GEd)); 111 CHKERRQ(MatCreateSubMatrix(lG,edge,extcol,MAT_INITIAL_MATRIX,&GE)); 112 CHKERRQ(MatConvert(GE,MATSEQDENSE,MAT_REUSE_MATRIX,&GEd)); 113 CHKERRQ(MatDestroy(&GE)); 114 CHKERRQ(MatDenseOrthogonalRangeOrComplement(GEd,PETSC_FALSE,5*esize,work,rwork,GKins)); 115 CHKERRQ(MatDestroy(&GEd)); 116 117 if (corners) { 118 Mat GEc; 119 const PetscScalar *vals; 120 PetscScalar v; 121 122 CHKERRQ(MatCreateSubMatrix(lG,edge,corners,MAT_INITIAL_MATRIX,&GEc)); 123 CHKERRQ(MatTransposeMatMult(GEc,*GKins,MAT_INITIAL_MATRIX,1.0,&GEd)); 124 CHKERRQ(MatDenseGetArrayRead(GEd,&vals)); 125 /* v = PetscAbsScalar(vals[0]) */; 126 v = 1.; 127 cvals[0] = vals[0]/v; 128 cvals[1] = vals[1]/v; 129 CHKERRQ(MatDenseRestoreArrayRead(GEd,&vals)); 130 CHKERRQ(MatScale(*GKins,1./v)); 131 #if defined(PRINT_GDET) 132 { 133 PetscViewer viewer; 134 char filename[256]; 135 sprintf(filename,"Gdet_l%d_r%d_cc%d.m",lev,PetscGlobalRank,inc++); 136 CHKERRQ(PetscViewerASCIIOpen(PETSC_COMM_SELF,filename,&viewer)); 137 CHKERRQ(PetscViewerPushFormat(viewer,PETSC_VIEWER_ASCII_MATLAB)); 138 CHKERRQ(PetscObjectSetName((PetscObject)GEc,"GEc")); 139 CHKERRQ(MatView(GEc,viewer)); 140 CHKERRQ(PetscObjectSetName((PetscObject)(*GKins),"GK")); 141 CHKERRQ(MatView(*GKins,viewer)); 142 CHKERRQ(PetscObjectSetName((PetscObject)GEd,"Gproj")); 143 CHKERRQ(MatView(GEd,viewer)); 144 CHKERRQ(PetscViewerDestroy(&viewer)); 145 } 146 #endif 147 CHKERRQ(MatDestroy(&GEd)); 148 CHKERRQ(MatDestroy(&GEc)); 149 } 150 151 PetscFunctionReturn(0); 152 } 153 154 PetscErrorCode PCBDDCNedelecSupport(PC pc) 155 { 156 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 157 Mat_IS *matis = (Mat_IS*)pc->pmat->data; 158 Mat G,T,conn,lG,lGt,lGis,lGall,lGe,lGinit; 159 Vec tvec; 160 PetscSF sfv; 161 ISLocalToGlobalMapping el2g,vl2g,fl2g,al2g; 162 MPI_Comm comm; 163 IS lned,primals,allprimals,nedfieldlocal; 164 IS *eedges,*extrows,*extcols,*alleedges; 165 PetscBT btv,bte,btvc,btb,btbd,btvcand,btvi,btee,bter; 166 PetscScalar *vals,*work; 167 PetscReal *rwork; 168 const PetscInt *idxs,*ii,*jj,*iit,*jjt; 169 PetscInt ne,nv,Lv,order,n,field; 170 PetscInt n_neigh,*neigh,*n_shared,**shared; 171 PetscInt i,j,extmem,cum,maxsize,nee; 172 PetscInt *extrow,*extrowcum,*marks,*vmarks,*gidxs; 173 PetscInt *sfvleaves,*sfvroots; 174 PetscInt *corners,*cedges; 175 PetscInt *ecount,**eneighs,*vcount,**vneighs; 176 PetscInt *emarks; 177 PetscBool print,eerr,done,lrc[2],conforming,global,singular,setprimal; 178 PetscErrorCode ierr; 179 180 PetscFunctionBegin; 181 /* If the discrete gradient is defined for a subset of dofs and global is true, 182 it assumes G is given in global ordering for all the dofs. 183 Otherwise, the ordering is global for the Nedelec field */ 184 order = pcbddc->nedorder; 185 conforming = pcbddc->conforming; 186 field = pcbddc->nedfield; 187 global = pcbddc->nedglobal; 188 setprimal = PETSC_FALSE; 189 print = PETSC_FALSE; 190 singular = PETSC_FALSE; 191 192 /* Command line customization */ 193 ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)pc),((PetscObject)pc)->prefix,"BDDC Nedelec options","PC");CHKERRQ(ierr); 194 CHKERRQ(PetscOptionsBool("-pc_bddc_nedelec_field_primal","All edge dofs set as primals: Toselli's algorithm C",NULL,setprimal,&setprimal,NULL)); 195 CHKERRQ(PetscOptionsBool("-pc_bddc_nedelec_singular","Infer nullspace from discrete gradient",NULL,singular,&singular,NULL)); 196 CHKERRQ(PetscOptionsInt("-pc_bddc_nedelec_order","Test variable order code (to be removed)",NULL,order,&order,NULL)); 197 /* print debug info TODO: to be removed */ 198 CHKERRQ(PetscOptionsBool("-pc_bddc_nedelec_print","Print debug info",NULL,print,&print,NULL)); 199 ierr = PetscOptionsEnd();CHKERRQ(ierr); 200 201 /* Return if there are no edges in the decomposition and the problem is not singular */ 202 CHKERRQ(MatISGetLocalToGlobalMapping(pc->pmat,&al2g,NULL)); 203 CHKERRQ(ISLocalToGlobalMappingGetSize(al2g,&n)); 204 CHKERRQ(PetscObjectGetComm((PetscObject)pc,&comm)); 205 if (!singular) { 206 CHKERRQ(VecGetArrayRead(matis->counter,(const PetscScalar**)&vals)); 207 lrc[0] = PETSC_FALSE; 208 for (i=0;i<n;i++) { 209 if (PetscRealPart(vals[i]) > 2.) { 210 lrc[0] = PETSC_TRUE; 211 break; 212 } 213 } 214 CHKERRQ(VecRestoreArrayRead(matis->counter,(const PetscScalar**)&vals)); 215 CHKERRMPI(MPIU_Allreduce(&lrc[0],&lrc[1],1,MPIU_BOOL,MPI_LOR,comm)); 216 if (!lrc[1]) PetscFunctionReturn(0); 217 } 218 219 /* Get Nedelec field */ 220 PetscCheckFalse(pcbddc->n_ISForDofsLocal && field >= pcbddc->n_ISForDofsLocal,comm,PETSC_ERR_USER,"Invalid field for Nedelec %D: number of fields is %D",field,pcbddc->n_ISForDofsLocal); 221 if (pcbddc->n_ISForDofsLocal && field >= 0) { 222 CHKERRQ(PetscObjectReference((PetscObject)pcbddc->ISForDofsLocal[field])); 223 nedfieldlocal = pcbddc->ISForDofsLocal[field]; 224 CHKERRQ(ISGetLocalSize(nedfieldlocal,&ne)); 225 } else if (!pcbddc->n_ISForDofsLocal && field != PETSC_DECIDE) { 226 ne = n; 227 nedfieldlocal = NULL; 228 global = PETSC_TRUE; 229 } else if (field == PETSC_DECIDE) { 230 PetscInt rst,ren,*idx; 231 232 CHKERRQ(PetscArrayzero(matis->sf_leafdata,n)); 233 CHKERRQ(PetscArrayzero(matis->sf_rootdata,pc->pmat->rmap->n)); 234 CHKERRQ(MatGetOwnershipRange(pcbddc->discretegradient,&rst,&ren)); 235 for (i=rst;i<ren;i++) { 236 PetscInt nc; 237 238 CHKERRQ(MatGetRow(pcbddc->discretegradient,i,&nc,NULL,NULL)); 239 if (nc > 1) matis->sf_rootdata[i-rst] = 1; 240 CHKERRQ(MatRestoreRow(pcbddc->discretegradient,i,&nc,NULL,NULL)); 241 } 242 CHKERRQ(PetscSFBcastBegin(matis->sf,MPIU_INT,matis->sf_rootdata,matis->sf_leafdata,MPI_REPLACE)); 243 CHKERRQ(PetscSFBcastEnd(matis->sf,MPIU_INT,matis->sf_rootdata,matis->sf_leafdata,MPI_REPLACE)); 244 CHKERRQ(PetscMalloc1(n,&idx)); 245 for (i=0,ne=0;i<n;i++) if (matis->sf_leafdata[i]) idx[ne++] = i; 246 CHKERRQ(ISCreateGeneral(comm,ne,idx,PETSC_OWN_POINTER,&nedfieldlocal)); 247 } else { 248 SETERRQ(comm,PETSC_ERR_USER,"When multiple fields are present, the Nedelec field has to be specified"); 249 } 250 251 /* Sanity checks */ 252 PetscCheckFalse(!order && !conforming,comm,PETSC_ERR_SUP,"Variable order and non-conforming spaces are not supported at the same time"); 253 PetscCheck(!pcbddc->user_ChangeOfBasisMatrix,comm,PETSC_ERR_SUP,"Cannot generate Nedelec support with user defined change of basis"); 254 PetscCheckFalse(order && ne%order,PETSC_COMM_SELF,PETSC_ERR_USER,"The number of local edge dofs %D it's not a multiple of the order %D",ne,order); 255 256 /* Just set primal dofs and return */ 257 if (setprimal) { 258 IS enedfieldlocal; 259 PetscInt *eidxs; 260 261 CHKERRQ(PetscMalloc1(ne,&eidxs)); 262 CHKERRQ(VecGetArrayRead(matis->counter,(const PetscScalar**)&vals)); 263 if (nedfieldlocal) { 264 CHKERRQ(ISGetIndices(nedfieldlocal,&idxs)); 265 for (i=0,cum=0;i<ne;i++) { 266 if (PetscRealPart(vals[idxs[i]]) > 2.) { 267 eidxs[cum++] = idxs[i]; 268 } 269 } 270 CHKERRQ(ISRestoreIndices(nedfieldlocal,&idxs)); 271 } else { 272 for (i=0,cum=0;i<ne;i++) { 273 if (PetscRealPart(vals[i]) > 2.) { 274 eidxs[cum++] = i; 275 } 276 } 277 } 278 CHKERRQ(VecRestoreArrayRead(matis->counter,(const PetscScalar**)&vals)); 279 CHKERRQ(ISCreateGeneral(comm,cum,eidxs,PETSC_COPY_VALUES,&enedfieldlocal)); 280 CHKERRQ(PCBDDCSetPrimalVerticesLocalIS(pc,enedfieldlocal)); 281 CHKERRQ(PetscFree(eidxs)); 282 CHKERRQ(ISDestroy(&nedfieldlocal)); 283 CHKERRQ(ISDestroy(&enedfieldlocal)); 284 PetscFunctionReturn(0); 285 } 286 287 /* Compute some l2g maps */ 288 if (nedfieldlocal) { 289 IS is; 290 291 /* need to map from the local Nedelec field to local numbering */ 292 CHKERRQ(ISLocalToGlobalMappingCreateIS(nedfieldlocal,&fl2g)); 293 /* need to map from the local Nedelec field to global numbering for the whole dofs*/ 294 CHKERRQ(ISLocalToGlobalMappingApplyIS(al2g,nedfieldlocal,&is)); 295 CHKERRQ(ISLocalToGlobalMappingCreateIS(is,&al2g)); 296 /* need to map from the local Nedelec field to global numbering (for Nedelec only) */ 297 if (global) { 298 CHKERRQ(PetscObjectReference((PetscObject)al2g)); 299 el2g = al2g; 300 } else { 301 IS gis; 302 303 CHKERRQ(ISRenumber(is,NULL,NULL,&gis)); 304 CHKERRQ(ISLocalToGlobalMappingCreateIS(gis,&el2g)); 305 CHKERRQ(ISDestroy(&gis)); 306 } 307 CHKERRQ(ISDestroy(&is)); 308 } else { 309 /* restore default */ 310 pcbddc->nedfield = -1; 311 /* one ref for the destruction of al2g, one for el2g */ 312 CHKERRQ(PetscObjectReference((PetscObject)al2g)); 313 CHKERRQ(PetscObjectReference((PetscObject)al2g)); 314 el2g = al2g; 315 fl2g = NULL; 316 } 317 318 /* Start communication to drop connections for interior edges (for cc analysis only) */ 319 CHKERRQ(PetscArrayzero(matis->sf_leafdata,n)); 320 CHKERRQ(PetscArrayzero(matis->sf_rootdata,pc->pmat->rmap->n)); 321 if (nedfieldlocal) { 322 CHKERRQ(ISGetIndices(nedfieldlocal,&idxs)); 323 for (i=0;i<ne;i++) matis->sf_leafdata[idxs[i]] = 1; 324 CHKERRQ(ISRestoreIndices(nedfieldlocal,&idxs)); 325 } else { 326 for (i=0;i<ne;i++) matis->sf_leafdata[i] = 1; 327 } 328 CHKERRQ(PetscSFReduceBegin(matis->sf,MPIU_INT,matis->sf_leafdata,matis->sf_rootdata,MPI_SUM)); 329 CHKERRQ(PetscSFReduceEnd(matis->sf,MPIU_INT,matis->sf_leafdata,matis->sf_rootdata,MPI_SUM)); 330 331 if (!singular) { /* drop connections with interior edges to avoid unneeded communications and memory movements */ 332 CHKERRQ(MatDuplicate(pcbddc->discretegradient,MAT_COPY_VALUES,&G)); 333 CHKERRQ(MatSetOption(G,MAT_KEEP_NONZERO_PATTERN,PETSC_FALSE)); 334 if (global) { 335 PetscInt rst; 336 337 CHKERRQ(MatGetOwnershipRange(G,&rst,NULL)); 338 for (i=0,cum=0;i<pc->pmat->rmap->n;i++) { 339 if (matis->sf_rootdata[i] < 2) { 340 matis->sf_rootdata[cum++] = i + rst; 341 } 342 } 343 CHKERRQ(MatSetOption(G,MAT_NO_OFF_PROC_ZERO_ROWS,PETSC_TRUE)); 344 CHKERRQ(MatZeroRows(G,cum,matis->sf_rootdata,0.,NULL,NULL)); 345 } else { 346 PetscInt *tbz; 347 348 CHKERRQ(PetscMalloc1(ne,&tbz)); 349 CHKERRQ(PetscSFBcastBegin(matis->sf,MPIU_INT,matis->sf_rootdata,matis->sf_leafdata,MPI_REPLACE)); 350 CHKERRQ(PetscSFBcastEnd(matis->sf,MPIU_INT,matis->sf_rootdata,matis->sf_leafdata,MPI_REPLACE)); 351 CHKERRQ(ISGetIndices(nedfieldlocal,&idxs)); 352 for (i=0,cum=0;i<ne;i++) 353 if (matis->sf_leafdata[idxs[i]] == 1) 354 tbz[cum++] = i; 355 CHKERRQ(ISRestoreIndices(nedfieldlocal,&idxs)); 356 CHKERRQ(ISLocalToGlobalMappingApply(el2g,cum,tbz,tbz)); 357 CHKERRQ(MatZeroRows(G,cum,tbz,0.,NULL,NULL)); 358 CHKERRQ(PetscFree(tbz)); 359 } 360 } else { /* we need the entire G to infer the nullspace */ 361 CHKERRQ(PetscObjectReference((PetscObject)pcbddc->discretegradient)); 362 G = pcbddc->discretegradient; 363 } 364 365 /* Extract subdomain relevant rows of G */ 366 CHKERRQ(ISLocalToGlobalMappingGetIndices(el2g,&idxs)); 367 CHKERRQ(ISCreateGeneral(comm,ne,idxs,PETSC_USE_POINTER,&lned)); 368 CHKERRQ(MatCreateSubMatrix(G,lned,NULL,MAT_INITIAL_MATRIX,&lGall)); 369 CHKERRQ(ISLocalToGlobalMappingRestoreIndices(el2g,&idxs)); 370 CHKERRQ(ISDestroy(&lned)); 371 CHKERRQ(MatConvert(lGall,MATIS,MAT_INITIAL_MATRIX,&lGis)); 372 CHKERRQ(MatDestroy(&lGall)); 373 CHKERRQ(MatISGetLocalMat(lGis,&lG)); 374 375 /* SF for nodal dofs communications */ 376 CHKERRQ(MatGetLocalSize(G,NULL,&Lv)); 377 CHKERRQ(MatISGetLocalToGlobalMapping(lGis,NULL,&vl2g)); 378 CHKERRQ(PetscObjectReference((PetscObject)vl2g)); 379 CHKERRQ(ISLocalToGlobalMappingGetSize(vl2g,&nv)); 380 CHKERRQ(PetscSFCreate(comm,&sfv)); 381 CHKERRQ(ISLocalToGlobalMappingGetIndices(vl2g,&idxs)); 382 CHKERRQ(PetscSFSetGraphLayout(sfv,lGis->cmap,nv,NULL,PETSC_OWN_POINTER,idxs)); 383 CHKERRQ(ISLocalToGlobalMappingRestoreIndices(vl2g,&idxs)); 384 i = singular ? 2 : 1; 385 CHKERRQ(PetscMalloc2(i*nv,&sfvleaves,i*Lv,&sfvroots)); 386 387 /* Destroy temporary G created in MATIS format and modified G */ 388 CHKERRQ(PetscObjectReference((PetscObject)lG)); 389 CHKERRQ(MatDestroy(&lGis)); 390 CHKERRQ(MatDestroy(&G)); 391 392 if (print) { 393 CHKERRQ(PetscObjectSetName((PetscObject)lG,"initial_lG")); 394 CHKERRQ(MatView(lG,NULL)); 395 } 396 397 /* Save lG for values insertion in change of basis */ 398 CHKERRQ(MatDuplicate(lG,MAT_COPY_VALUES,&lGinit)); 399 400 /* Analyze the edge-nodes connections (duplicate lG) */ 401 CHKERRQ(MatDuplicate(lG,MAT_COPY_VALUES,&lGe)); 402 CHKERRQ(MatSetOption(lGe,MAT_KEEP_NONZERO_PATTERN,PETSC_FALSE)); 403 CHKERRQ(PetscBTCreate(nv,&btv)); 404 CHKERRQ(PetscBTCreate(ne,&bte)); 405 CHKERRQ(PetscBTCreate(ne,&btb)); 406 CHKERRQ(PetscBTCreate(ne,&btbd)); 407 CHKERRQ(PetscBTCreate(nv,&btvcand)); 408 /* need to import the boundary specification to ensure the 409 proper detection of coarse edges' endpoints */ 410 if (pcbddc->DirichletBoundariesLocal) { 411 IS is; 412 413 if (fl2g) { 414 CHKERRQ(ISGlobalToLocalMappingApplyIS(fl2g,IS_GTOLM_MASK,pcbddc->DirichletBoundariesLocal,&is)); 415 } else { 416 is = pcbddc->DirichletBoundariesLocal; 417 } 418 CHKERRQ(ISGetLocalSize(is,&cum)); 419 CHKERRQ(ISGetIndices(is,&idxs)); 420 for (i=0;i<cum;i++) { 421 if (idxs[i] >= 0) { 422 CHKERRQ(PetscBTSet(btb,idxs[i])); 423 CHKERRQ(PetscBTSet(btbd,idxs[i])); 424 } 425 } 426 CHKERRQ(ISRestoreIndices(is,&idxs)); 427 if (fl2g) { 428 CHKERRQ(ISDestroy(&is)); 429 } 430 } 431 if (pcbddc->NeumannBoundariesLocal) { 432 IS is; 433 434 if (fl2g) { 435 CHKERRQ(ISGlobalToLocalMappingApplyIS(fl2g,IS_GTOLM_MASK,pcbddc->NeumannBoundariesLocal,&is)); 436 } else { 437 is = pcbddc->NeumannBoundariesLocal; 438 } 439 CHKERRQ(ISGetLocalSize(is,&cum)); 440 CHKERRQ(ISGetIndices(is,&idxs)); 441 for (i=0;i<cum;i++) { 442 if (idxs[i] >= 0) { 443 CHKERRQ(PetscBTSet(btb,idxs[i])); 444 } 445 } 446 CHKERRQ(ISRestoreIndices(is,&idxs)); 447 if (fl2g) { 448 CHKERRQ(ISDestroy(&is)); 449 } 450 } 451 452 /* Count neighs per dof */ 453 CHKERRQ(ISLocalToGlobalMappingGetNodeInfo(el2g,NULL,&ecount,&eneighs)); 454 CHKERRQ(ISLocalToGlobalMappingGetNodeInfo(vl2g,NULL,&vcount,&vneighs)); 455 456 /* need to remove coarse faces' dofs and coarse edges' dirichlet dofs 457 for proper detection of coarse edges' endpoints */ 458 CHKERRQ(PetscBTCreate(ne,&btee)); 459 for (i=0;i<ne;i++) { 460 if ((ecount[i] > 2 && !PetscBTLookup(btbd,i)) || (ecount[i] == 2 && PetscBTLookup(btb,i))) { 461 CHKERRQ(PetscBTSet(btee,i)); 462 } 463 } 464 CHKERRQ(PetscMalloc1(ne,&marks)); 465 if (!conforming) { 466 CHKERRQ(MatTranspose(lGe,MAT_INITIAL_MATRIX,&lGt)); 467 CHKERRQ(MatGetRowIJ(lGt,0,PETSC_FALSE,PETSC_FALSE,&i,&iit,&jjt,&done)); 468 } 469 CHKERRQ(MatGetRowIJ(lGe,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done)); 470 CHKERRQ(MatSeqAIJGetArray(lGe,&vals)); 471 cum = 0; 472 for (i=0;i<ne;i++) { 473 /* eliminate rows corresponding to edge dofs belonging to coarse faces */ 474 if (!PetscBTLookup(btee,i)) { 475 marks[cum++] = i; 476 continue; 477 } 478 /* set badly connected edge dofs as primal */ 479 if (!conforming) { 480 if (ii[i+1]-ii[i] != order + 1) { /* every row of G on the coarse edge should list order+1 nodal dofs */ 481 marks[cum++] = i; 482 CHKERRQ(PetscBTSet(bte,i)); 483 for (j=ii[i];j<ii[i+1];j++) { 484 CHKERRQ(PetscBTSet(btv,jj[j])); 485 } 486 } else { 487 /* every edge dofs should be connected trough a certain number of nodal dofs 488 to other edge dofs belonging to coarse edges 489 - at most 2 endpoints 490 - order-1 interior nodal dofs 491 - no undefined nodal dofs (nconn < order) 492 */ 493 PetscInt ends = 0,ints = 0, undef = 0; 494 for (j=ii[i];j<ii[i+1];j++) { 495 PetscInt v = jj[j],k; 496 PetscInt nconn = iit[v+1]-iit[v]; 497 for (k=iit[v];k<iit[v+1];k++) if (!PetscBTLookup(btee,jjt[k])) nconn--; 498 if (nconn > order) ends++; 499 else if (nconn == order) ints++; 500 else undef++; 501 } 502 if (undef || ends > 2 || ints != order -1) { 503 marks[cum++] = i; 504 CHKERRQ(PetscBTSet(bte,i)); 505 for (j=ii[i];j<ii[i+1];j++) { 506 CHKERRQ(PetscBTSet(btv,jj[j])); 507 } 508 } 509 } 510 } 511 /* We assume the order on the element edge is ii[i+1]-ii[i]-1 */ 512 if (!order && ii[i+1] != ii[i]) { 513 PetscScalar val = 1./(ii[i+1]-ii[i]-1); 514 for (j=ii[i];j<ii[i+1];j++) vals[j] = val; 515 } 516 } 517 CHKERRQ(PetscBTDestroy(&btee)); 518 CHKERRQ(MatSeqAIJRestoreArray(lGe,&vals)); 519 CHKERRQ(MatRestoreRowIJ(lGe,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done)); 520 if (!conforming) { 521 CHKERRQ(MatRestoreRowIJ(lGt,0,PETSC_FALSE,PETSC_FALSE,&i,&iit,&jjt,&done)); 522 CHKERRQ(MatDestroy(&lGt)); 523 } 524 CHKERRQ(MatZeroRows(lGe,cum,marks,0.,NULL,NULL)); 525 526 /* identify splitpoints and corner candidates */ 527 CHKERRQ(MatTranspose(lGe,MAT_INITIAL_MATRIX,&lGt)); 528 if (print) { 529 CHKERRQ(PetscObjectSetName((PetscObject)lGe,"edgerestr_lG")); 530 CHKERRQ(MatView(lGe,NULL)); 531 CHKERRQ(PetscObjectSetName((PetscObject)lGt,"edgerestr_lGt")); 532 CHKERRQ(MatView(lGt,NULL)); 533 } 534 CHKERRQ(MatGetRowIJ(lGt,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done)); 535 CHKERRQ(MatSeqAIJGetArray(lGt,&vals)); 536 for (i=0;i<nv;i++) { 537 PetscInt ord = order, test = ii[i+1]-ii[i], vc = vcount[i]; 538 PetscBool sneighs = PETSC_TRUE, bdir = PETSC_FALSE; 539 if (!order) { /* variable order */ 540 PetscReal vorder = 0.; 541 542 for (j=ii[i];j<ii[i+1];j++) vorder += PetscRealPart(vals[j]); 543 test = PetscFloorReal(vorder+10.*PETSC_SQRT_MACHINE_EPSILON); 544 PetscCheckFalse(vorder-test > PETSC_SQRT_MACHINE_EPSILON,PETSC_COMM_SELF,PETSC_ERR_PLIB,"Unexpected value for vorder: %g (%D)",vorder,test); 545 ord = 1; 546 } 547 PetscAssert(test%ord == 0,PETSC_COMM_SELF,PETSC_ERR_PLIB,"Unexpected number of edge dofs %" PetscInt_FMT " connected with nodal dof %" PetscInt_FMT " with order %" PetscInt_FMT,test,i,ord); 548 for (j=ii[i];j<ii[i+1] && sneighs;j++) { 549 if (PetscBTLookup(btbd,jj[j])) { 550 bdir = PETSC_TRUE; 551 break; 552 } 553 if (vc != ecount[jj[j]]) { 554 sneighs = PETSC_FALSE; 555 } else { 556 PetscInt k,*vn = vneighs[i], *en = eneighs[jj[j]]; 557 for (k=0;k<vc;k++) { 558 if (vn[k] != en[k]) { 559 sneighs = PETSC_FALSE; 560 break; 561 } 562 } 563 } 564 } 565 if (!sneighs || test >= 3*ord || bdir) { /* splitpoints */ 566 if (print) PetscPrintf(PETSC_COMM_SELF,"SPLITPOINT %D (%D %D %D)\n",i,!sneighs,test >= 3*ord,bdir); 567 CHKERRQ(PetscBTSet(btv,i)); 568 } else if (test == ord) { 569 if (order == 1 || (!order && ii[i+1]-ii[i] == 1)) { 570 if (print) PetscPrintf(PETSC_COMM_SELF,"ENDPOINT %D\n",i); 571 CHKERRQ(PetscBTSet(btv,i)); 572 } else { 573 if (print) PetscPrintf(PETSC_COMM_SELF,"CORNER CANDIDATE %D\n",i); 574 CHKERRQ(PetscBTSet(btvcand,i)); 575 } 576 } 577 } 578 CHKERRQ(ISLocalToGlobalMappingRestoreNodeInfo(el2g,NULL,&ecount,&eneighs)); 579 CHKERRQ(ISLocalToGlobalMappingRestoreNodeInfo(vl2g,NULL,&vcount,&vneighs)); 580 CHKERRQ(PetscBTDestroy(&btbd)); 581 582 /* a candidate is valid if it is connected to another candidate via a non-primal edge dof */ 583 if (order != 1) { 584 if (print) PetscPrintf(PETSC_COMM_SELF,"INSPECTING CANDIDATES\n"); 585 CHKERRQ(MatGetRowIJ(lGe,0,PETSC_FALSE,PETSC_FALSE,&i,&iit,&jjt,&done)); 586 for (i=0;i<nv;i++) { 587 if (PetscBTLookup(btvcand,i)) { 588 PetscBool found = PETSC_FALSE; 589 for (j=ii[i];j<ii[i+1] && !found;j++) { 590 PetscInt k,e = jj[j]; 591 if (PetscBTLookup(bte,e)) continue; 592 for (k=iit[e];k<iit[e+1];k++) { 593 PetscInt v = jjt[k]; 594 if (v != i && PetscBTLookup(btvcand,v)) { 595 found = PETSC_TRUE; 596 break; 597 } 598 } 599 } 600 if (!found) { 601 if (print) PetscPrintf(PETSC_COMM_SELF," CANDIDATE %D CLEARED\n",i); 602 CHKERRQ(PetscBTClear(btvcand,i)); 603 } else { 604 if (print) PetscPrintf(PETSC_COMM_SELF," CANDIDATE %D ACCEPTED\n",i); 605 } 606 } 607 } 608 CHKERRQ(MatRestoreRowIJ(lGe,0,PETSC_FALSE,PETSC_FALSE,&i,&iit,&jjt,&done)); 609 } 610 CHKERRQ(MatSeqAIJRestoreArray(lGt,&vals)); 611 CHKERRQ(MatRestoreRowIJ(lGt,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done)); 612 CHKERRQ(MatDestroy(&lGe)); 613 614 /* Get the local G^T explicitly */ 615 CHKERRQ(MatDestroy(&lGt)); 616 CHKERRQ(MatTranspose(lG,MAT_INITIAL_MATRIX,&lGt)); 617 CHKERRQ(MatSetOption(lGt,MAT_KEEP_NONZERO_PATTERN,PETSC_FALSE)); 618 619 /* Mark interior nodal dofs */ 620 CHKERRQ(ISLocalToGlobalMappingGetInfo(vl2g,&n_neigh,&neigh,&n_shared,&shared)); 621 CHKERRQ(PetscBTCreate(nv,&btvi)); 622 for (i=1;i<n_neigh;i++) { 623 for (j=0;j<n_shared[i];j++) { 624 CHKERRQ(PetscBTSet(btvi,shared[i][j])); 625 } 626 } 627 CHKERRQ(ISLocalToGlobalMappingRestoreInfo(vl2g,&n_neigh,&neigh,&n_shared,&shared)); 628 629 /* communicate corners and splitpoints */ 630 CHKERRQ(PetscMalloc1(nv,&vmarks)); 631 CHKERRQ(PetscArrayzero(sfvleaves,nv)); 632 CHKERRQ(PetscArrayzero(sfvroots,Lv)); 633 for (i=0;i<nv;i++) if (PetscUnlikely(PetscBTLookup(btv,i))) sfvleaves[i] = 1; 634 635 if (print) { 636 IS tbz; 637 638 cum = 0; 639 for (i=0;i<nv;i++) 640 if (sfvleaves[i]) 641 vmarks[cum++] = i; 642 643 CHKERRQ(ISCreateGeneral(PETSC_COMM_SELF,cum,vmarks,PETSC_COPY_VALUES,&tbz)); 644 CHKERRQ(PetscObjectSetName((PetscObject)tbz,"corners_to_be_zeroed_local")); 645 CHKERRQ(ISView(tbz,NULL)); 646 CHKERRQ(ISDestroy(&tbz)); 647 } 648 649 CHKERRQ(PetscSFReduceBegin(sfv,MPIU_INT,sfvleaves,sfvroots,MPI_SUM)); 650 CHKERRQ(PetscSFReduceEnd(sfv,MPIU_INT,sfvleaves,sfvroots,MPI_SUM)); 651 CHKERRQ(PetscSFBcastBegin(sfv,MPIU_INT,sfvroots,sfvleaves,MPI_REPLACE)); 652 CHKERRQ(PetscSFBcastEnd(sfv,MPIU_INT,sfvroots,sfvleaves,MPI_REPLACE)); 653 654 /* Zero rows of lGt corresponding to identified corners 655 and interior nodal dofs */ 656 cum = 0; 657 for (i=0;i<nv;i++) { 658 if (sfvleaves[i]) { 659 vmarks[cum++] = i; 660 CHKERRQ(PetscBTSet(btv,i)); 661 } 662 if (!PetscBTLookup(btvi,i)) vmarks[cum++] = i; 663 } 664 CHKERRQ(PetscBTDestroy(&btvi)); 665 if (print) { 666 IS tbz; 667 668 CHKERRQ(ISCreateGeneral(PETSC_COMM_SELF,cum,vmarks,PETSC_COPY_VALUES,&tbz)); 669 CHKERRQ(PetscObjectSetName((PetscObject)tbz,"corners_to_be_zeroed_with_interior")); 670 CHKERRQ(ISView(tbz,NULL)); 671 CHKERRQ(ISDestroy(&tbz)); 672 } 673 CHKERRQ(MatZeroRows(lGt,cum,vmarks,0.,NULL,NULL)); 674 CHKERRQ(PetscFree(vmarks)); 675 CHKERRQ(PetscSFDestroy(&sfv)); 676 CHKERRQ(PetscFree2(sfvleaves,sfvroots)); 677 678 /* Recompute G */ 679 CHKERRQ(MatDestroy(&lG)); 680 CHKERRQ(MatTranspose(lGt,MAT_INITIAL_MATRIX,&lG)); 681 if (print) { 682 CHKERRQ(PetscObjectSetName((PetscObject)lG,"used_lG")); 683 CHKERRQ(MatView(lG,NULL)); 684 CHKERRQ(PetscObjectSetName((PetscObject)lGt,"used_lGt")); 685 CHKERRQ(MatView(lGt,NULL)); 686 } 687 688 /* Get primal dofs (if any) */ 689 cum = 0; 690 for (i=0;i<ne;i++) { 691 if (PetscUnlikely(PetscBTLookup(bte,i))) marks[cum++] = i; 692 } 693 if (fl2g) { 694 CHKERRQ(ISLocalToGlobalMappingApply(fl2g,cum,marks,marks)); 695 } 696 CHKERRQ(ISCreateGeneral(comm,cum,marks,PETSC_COPY_VALUES,&primals)); 697 if (print) { 698 CHKERRQ(PetscObjectSetName((PetscObject)primals,"prescribed_primal_dofs")); 699 CHKERRQ(ISView(primals,NULL)); 700 } 701 CHKERRQ(PetscBTDestroy(&bte)); 702 /* TODO: what if the user passed in some of them ? */ 703 CHKERRQ(PCBDDCSetPrimalVerticesLocalIS(pc,primals)); 704 CHKERRQ(ISDestroy(&primals)); 705 706 /* Compute edge connectivity */ 707 CHKERRQ(PetscObjectSetOptionsPrefix((PetscObject)lG,"econn_")); 708 709 /* Symbolic conn = lG*lGt */ 710 CHKERRQ(MatProductCreate(lG,lGt,NULL,&conn)); 711 CHKERRQ(MatProductSetType(conn,MATPRODUCT_AB)); 712 CHKERRQ(MatProductSetAlgorithm(conn,"default")); 713 CHKERRQ(MatProductSetFill(conn,PETSC_DEFAULT)); 714 CHKERRQ(PetscObjectSetOptionsPrefix((PetscObject)conn,"econn_")); 715 CHKERRQ(MatProductSetFromOptions(conn)); 716 CHKERRQ(MatProductSymbolic(conn)); 717 718 CHKERRQ(MatGetRowIJ(conn,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done)); 719 if (fl2g) { 720 PetscBT btf; 721 PetscInt *iia,*jja,*iiu,*jju; 722 PetscBool rest = PETSC_FALSE,free = PETSC_FALSE; 723 724 /* create CSR for all local dofs */ 725 CHKERRQ(PetscMalloc1(n+1,&iia)); 726 if (pcbddc->mat_graph->nvtxs_csr) { /* the user has passed in a CSR graph */ 727 PetscCheckFalse(pcbddc->mat_graph->nvtxs_csr != n,PETSC_COMM_SELF,PETSC_ERR_USER,"Invalid size of CSR graph %D. Should be %D",pcbddc->mat_graph->nvtxs_csr,n); 728 iiu = pcbddc->mat_graph->xadj; 729 jju = pcbddc->mat_graph->adjncy; 730 } else if (pcbddc->use_local_adj) { 731 rest = PETSC_TRUE; 732 CHKERRQ(MatGetRowIJ(matis->A,0,PETSC_TRUE,PETSC_FALSE,&i,(const PetscInt**)&iiu,(const PetscInt**)&jju,&done)); 733 } else { 734 free = PETSC_TRUE; 735 CHKERRQ(PetscMalloc2(n+1,&iiu,n,&jju)); 736 iiu[0] = 0; 737 for (i=0;i<n;i++) { 738 iiu[i+1] = i+1; 739 jju[i] = -1; 740 } 741 } 742 743 /* import sizes of CSR */ 744 iia[0] = 0; 745 for (i=0;i<n;i++) iia[i+1] = iiu[i+1]-iiu[i]; 746 747 /* overwrite entries corresponding to the Nedelec field */ 748 CHKERRQ(PetscBTCreate(n,&btf)); 749 CHKERRQ(ISGetIndices(nedfieldlocal,&idxs)); 750 for (i=0;i<ne;i++) { 751 CHKERRQ(PetscBTSet(btf,idxs[i])); 752 iia[idxs[i]+1] = ii[i+1]-ii[i]; 753 } 754 755 /* iia in CSR */ 756 for (i=0;i<n;i++) iia[i+1] += iia[i]; 757 758 /* jja in CSR */ 759 CHKERRQ(PetscMalloc1(iia[n],&jja)); 760 for (i=0;i<n;i++) 761 if (!PetscBTLookup(btf,i)) 762 for (j=0;j<iiu[i+1]-iiu[i];j++) 763 jja[iia[i]+j] = jju[iiu[i]+j]; 764 765 /* map edge dofs connectivity */ 766 if (jj) { 767 CHKERRQ(ISLocalToGlobalMappingApply(fl2g,ii[ne],jj,(PetscInt *)jj)); 768 for (i=0;i<ne;i++) { 769 PetscInt e = idxs[i]; 770 for (j=0;j<ii[i+1]-ii[i];j++) jja[iia[e]+j] = jj[ii[i]+j]; 771 } 772 } 773 CHKERRQ(ISRestoreIndices(nedfieldlocal,&idxs)); 774 CHKERRQ(PCBDDCSetLocalAdjacencyGraph(pc,n,iia,jja,PETSC_OWN_POINTER)); 775 if (rest) { 776 CHKERRQ(MatRestoreRowIJ(matis->A,0,PETSC_TRUE,PETSC_FALSE,&i,(const PetscInt**)&iiu,(const PetscInt**)&jju,&done)); 777 } 778 if (free) { 779 CHKERRQ(PetscFree2(iiu,jju)); 780 } 781 CHKERRQ(PetscBTDestroy(&btf)); 782 } else { 783 CHKERRQ(PCBDDCSetLocalAdjacencyGraph(pc,n,ii,jj,PETSC_USE_POINTER)); 784 } 785 786 /* Analyze interface for edge dofs */ 787 CHKERRQ(PCBDDCAnalyzeInterface(pc)); 788 pcbddc->mat_graph->twodim = PETSC_FALSE; 789 790 /* Get coarse edges in the edge space */ 791 CHKERRQ(PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph,NULL,NULL,&nee,&alleedges,&allprimals)); 792 CHKERRQ(MatRestoreRowIJ(conn,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done)); 793 794 if (fl2g) { 795 CHKERRQ(ISGlobalToLocalMappingApplyIS(fl2g,IS_GTOLM_DROP,allprimals,&primals)); 796 CHKERRQ(PetscMalloc1(nee,&eedges)); 797 for (i=0;i<nee;i++) { 798 CHKERRQ(ISGlobalToLocalMappingApplyIS(fl2g,IS_GTOLM_DROP,alleedges[i],&eedges[i])); 799 } 800 } else { 801 eedges = alleedges; 802 primals = allprimals; 803 } 804 805 /* Mark fine edge dofs with their coarse edge id */ 806 CHKERRQ(PetscArrayzero(marks,ne)); 807 CHKERRQ(ISGetLocalSize(primals,&cum)); 808 CHKERRQ(ISGetIndices(primals,&idxs)); 809 for (i=0;i<cum;i++) marks[idxs[i]] = nee+1; 810 CHKERRQ(ISRestoreIndices(primals,&idxs)); 811 if (print) { 812 CHKERRQ(PetscObjectSetName((PetscObject)primals,"obtained_primal_dofs")); 813 CHKERRQ(ISView(primals,NULL)); 814 } 815 816 maxsize = 0; 817 for (i=0;i<nee;i++) { 818 PetscInt size,mark = i+1; 819 820 CHKERRQ(ISGetLocalSize(eedges[i],&size)); 821 CHKERRQ(ISGetIndices(eedges[i],&idxs)); 822 for (j=0;j<size;j++) marks[idxs[j]] = mark; 823 CHKERRQ(ISRestoreIndices(eedges[i],&idxs)); 824 maxsize = PetscMax(maxsize,size); 825 } 826 827 /* Find coarse edge endpoints */ 828 CHKERRQ(MatGetRowIJ(lG,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done)); 829 CHKERRQ(MatGetRowIJ(lGt,0,PETSC_FALSE,PETSC_FALSE,&i,&iit,&jjt,&done)); 830 for (i=0;i<nee;i++) { 831 PetscInt mark = i+1,size; 832 833 CHKERRQ(ISGetLocalSize(eedges[i],&size)); 834 if (!size && nedfieldlocal) continue; 835 PetscCheck(size,PETSC_COMM_SELF,PETSC_ERR_PLIB,"Unexpected zero sized edge %D",i); 836 CHKERRQ(ISGetIndices(eedges[i],&idxs)); 837 if (print) { 838 CHKERRQ(PetscPrintf(PETSC_COMM_SELF,"ENDPOINTS ANALYSIS EDGE %D\n",i)); 839 CHKERRQ(ISView(eedges[i],NULL)); 840 } 841 for (j=0;j<size;j++) { 842 PetscInt k, ee = idxs[j]; 843 if (print) PetscPrintf(PETSC_COMM_SELF," idx %D\n",ee); 844 for (k=ii[ee];k<ii[ee+1];k++) { 845 if (print) PetscPrintf(PETSC_COMM_SELF," inspect %D\n",jj[k]); 846 if (PetscBTLookup(btv,jj[k])) { 847 if (print) PetscPrintf(PETSC_COMM_SELF," corner found (already set) %D\n",jj[k]); 848 } else if (PetscBTLookup(btvcand,jj[k])) { /* is it ok? */ 849 PetscInt k2; 850 PetscBool corner = PETSC_FALSE; 851 for (k2 = iit[jj[k]];k2 < iit[jj[k]+1];k2++) { 852 if (print) PetscPrintf(PETSC_COMM_SELF," INSPECTING %D: mark %D (ref mark %D), boundary %D\n",jjt[k2],marks[jjt[k2]],mark,!!PetscBTLookup(btb,jjt[k2])); 853 /* it's a corner if either is connected with an edge dof belonging to a different cc or 854 if the edge dof lie on the natural part of the boundary */ 855 if ((marks[jjt[k2]] && marks[jjt[k2]] != mark) || (!marks[jjt[k2]] && PetscBTLookup(btb,jjt[k2]))) { 856 corner = PETSC_TRUE; 857 break; 858 } 859 } 860 if (corner) { /* found the nodal dof corresponding to the endpoint of the edge */ 861 if (print) PetscPrintf(PETSC_COMM_SELF," corner found %D\n",jj[k]); 862 CHKERRQ(PetscBTSet(btv,jj[k])); 863 } else { 864 if (print) PetscPrintf(PETSC_COMM_SELF," no corners found\n"); 865 } 866 } 867 } 868 } 869 CHKERRQ(ISRestoreIndices(eedges[i],&idxs)); 870 } 871 CHKERRQ(MatRestoreRowIJ(lGt,0,PETSC_FALSE,PETSC_FALSE,&i,&iit,&jjt,&done)); 872 CHKERRQ(MatRestoreRowIJ(lG,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done)); 873 CHKERRQ(PetscBTDestroy(&btb)); 874 875 /* Reset marked primal dofs */ 876 CHKERRQ(ISGetLocalSize(primals,&cum)); 877 CHKERRQ(ISGetIndices(primals,&idxs)); 878 for (i=0;i<cum;i++) marks[idxs[i]] = 0; 879 CHKERRQ(ISRestoreIndices(primals,&idxs)); 880 881 /* Now use the initial lG */ 882 CHKERRQ(MatDestroy(&lG)); 883 CHKERRQ(MatDestroy(&lGt)); 884 lG = lGinit; 885 CHKERRQ(MatTranspose(lG,MAT_INITIAL_MATRIX,&lGt)); 886 887 /* Compute extended cols indices */ 888 CHKERRQ(PetscBTCreate(nv,&btvc)); 889 CHKERRQ(PetscBTCreate(nee,&bter)); 890 CHKERRQ(MatGetRowIJ(lG,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done)); 891 CHKERRQ(MatSeqAIJGetMaxRowNonzeros(lG,&i)); 892 i *= maxsize; 893 CHKERRQ(PetscCalloc1(nee,&extcols)); 894 CHKERRQ(PetscMalloc2(i,&extrow,i,&gidxs)); 895 eerr = PETSC_FALSE; 896 for (i=0;i<nee;i++) { 897 PetscInt size,found = 0; 898 899 cum = 0; 900 CHKERRQ(ISGetLocalSize(eedges[i],&size)); 901 if (!size && nedfieldlocal) continue; 902 PetscCheck(size,PETSC_COMM_SELF,PETSC_ERR_PLIB,"Unexpected zero sized edge %D",i); 903 CHKERRQ(ISGetIndices(eedges[i],&idxs)); 904 CHKERRQ(PetscBTMemzero(nv,btvc)); 905 for (j=0;j<size;j++) { 906 PetscInt k,ee = idxs[j]; 907 for (k=ii[ee];k<ii[ee+1];k++) { 908 PetscInt vv = jj[k]; 909 if (!PetscBTLookup(btv,vv)) extrow[cum++] = vv; 910 else if (!PetscBTLookupSet(btvc,vv)) found++; 911 } 912 } 913 CHKERRQ(ISRestoreIndices(eedges[i],&idxs)); 914 CHKERRQ(PetscSortRemoveDupsInt(&cum,extrow)); 915 CHKERRQ(ISLocalToGlobalMappingApply(vl2g,cum,extrow,gidxs)); 916 CHKERRQ(PetscSortIntWithArray(cum,gidxs,extrow)); 917 CHKERRQ(ISCreateGeneral(PETSC_COMM_SELF,cum,extrow,PETSC_COPY_VALUES,&extcols[i])); 918 /* it may happen that endpoints are not defined at this point 919 if it is the case, mark this edge for a second pass */ 920 if (cum != size -1 || found != 2) { 921 CHKERRQ(PetscBTSet(bter,i)); 922 if (print) { 923 CHKERRQ(PetscObjectSetName((PetscObject)eedges[i],"error_edge")); 924 CHKERRQ(ISView(eedges[i],NULL)); 925 CHKERRQ(PetscObjectSetName((PetscObject)extcols[i],"error_extcol")); 926 CHKERRQ(ISView(extcols[i],NULL)); 927 } 928 eerr = PETSC_TRUE; 929 } 930 } 931 /* PetscCheck(!eerr,PETSC_COMM_SELF,PETSC_ERR_PLIB,"Unexpected SIZE OF EDGE > EXTCOL FIRST PASS"); */ 932 CHKERRMPI(MPIU_Allreduce(&eerr,&done,1,MPIU_BOOL,MPI_LOR,comm)); 933 if (done) { 934 PetscInt *newprimals; 935 936 CHKERRQ(PetscMalloc1(ne,&newprimals)); 937 CHKERRQ(ISGetLocalSize(primals,&cum)); 938 CHKERRQ(ISGetIndices(primals,&idxs)); 939 CHKERRQ(PetscArraycpy(newprimals,idxs,cum)); 940 CHKERRQ(ISRestoreIndices(primals,&idxs)); 941 CHKERRQ(MatGetRowIJ(lGt,0,PETSC_FALSE,PETSC_FALSE,&i,&iit,&jjt,&done)); 942 if (print) PetscPrintf(PETSC_COMM_SELF,"DOING SECOND PASS (eerr %D)\n",eerr); 943 for (i=0;i<nee;i++) { 944 PetscBool has_candidates = PETSC_FALSE; 945 if (PetscBTLookup(bter,i)) { 946 PetscInt size,mark = i+1; 947 948 CHKERRQ(ISGetLocalSize(eedges[i],&size)); 949 CHKERRQ(ISGetIndices(eedges[i],&idxs)); 950 /* for (j=0;j<size;j++) newprimals[cum++] = idxs[j]; */ 951 for (j=0;j<size;j++) { 952 PetscInt k,ee = idxs[j]; 953 if (print) PetscPrintf(PETSC_COMM_SELF,"Inspecting edge dof %D [%D %D)\n",ee,ii[ee],ii[ee+1]); 954 for (k=ii[ee];k<ii[ee+1];k++) { 955 /* set all candidates located on the edge as corners */ 956 if (PetscBTLookup(btvcand,jj[k])) { 957 PetscInt k2,vv = jj[k]; 958 has_candidates = PETSC_TRUE; 959 if (print) PetscPrintf(PETSC_COMM_SELF," Candidate set to vertex %D\n",vv); 960 CHKERRQ(PetscBTSet(btv,vv)); 961 /* set all edge dofs connected to candidate as primals */ 962 for (k2=iit[vv];k2<iit[vv+1];k2++) { 963 if (marks[jjt[k2]] == mark) { 964 PetscInt k3,ee2 = jjt[k2]; 965 if (print) PetscPrintf(PETSC_COMM_SELF," Connected edge dof set to primal %D\n",ee2); 966 newprimals[cum++] = ee2; 967 /* finally set the new corners */ 968 for (k3=ii[ee2];k3<ii[ee2+1];k3++) { 969 if (print) PetscPrintf(PETSC_COMM_SELF," Connected nodal dof set to vertex %D\n",jj[k3]); 970 CHKERRQ(PetscBTSet(btv,jj[k3])); 971 } 972 } 973 } 974 } else { 975 if (print) PetscPrintf(PETSC_COMM_SELF," Not a candidate vertex %D\n",jj[k]); 976 } 977 } 978 } 979 if (!has_candidates) { /* circular edge */ 980 PetscInt k, ee = idxs[0],*tmarks; 981 982 CHKERRQ(PetscCalloc1(ne,&tmarks)); 983 if (print) PetscPrintf(PETSC_COMM_SELF," Circular edge %D\n",i); 984 for (k=ii[ee];k<ii[ee+1];k++) { 985 PetscInt k2; 986 if (print) PetscPrintf(PETSC_COMM_SELF," Set to corner %D\n",jj[k]); 987 CHKERRQ(PetscBTSet(btv,jj[k])); 988 for (k2=iit[jj[k]];k2<iit[jj[k]+1];k2++) tmarks[jjt[k2]]++; 989 } 990 for (j=0;j<size;j++) { 991 if (tmarks[idxs[j]] > 1) { 992 if (print) PetscPrintf(PETSC_COMM_SELF," Edge dof set to primal %D\n",idxs[j]); 993 newprimals[cum++] = idxs[j]; 994 } 995 } 996 CHKERRQ(PetscFree(tmarks)); 997 } 998 CHKERRQ(ISRestoreIndices(eedges[i],&idxs)); 999 } 1000 CHKERRQ(ISDestroy(&extcols[i])); 1001 } 1002 CHKERRQ(PetscFree(extcols)); 1003 CHKERRQ(MatRestoreRowIJ(lGt,0,PETSC_FALSE,PETSC_FALSE,&i,&iit,&jjt,&done)); 1004 CHKERRQ(PetscSortRemoveDupsInt(&cum,newprimals)); 1005 if (fl2g) { 1006 CHKERRQ(ISLocalToGlobalMappingApply(fl2g,cum,newprimals,newprimals)); 1007 CHKERRQ(ISDestroy(&primals)); 1008 for (i=0;i<nee;i++) { 1009 CHKERRQ(ISDestroy(&eedges[i])); 1010 } 1011 CHKERRQ(PetscFree(eedges)); 1012 } 1013 CHKERRQ(PCBDDCGraphRestoreCandidatesIS(pcbddc->mat_graph,NULL,NULL,&nee,&alleedges,&allprimals)); 1014 CHKERRQ(ISCreateGeneral(comm,cum,newprimals,PETSC_COPY_VALUES,&primals)); 1015 CHKERRQ(PetscFree(newprimals)); 1016 CHKERRQ(PCBDDCSetPrimalVerticesLocalIS(pc,primals)); 1017 CHKERRQ(ISDestroy(&primals)); 1018 CHKERRQ(PCBDDCAnalyzeInterface(pc)); 1019 pcbddc->mat_graph->twodim = PETSC_FALSE; 1020 CHKERRQ(PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph,NULL,NULL,&nee,&alleedges,&allprimals)); 1021 if (fl2g) { 1022 CHKERRQ(ISGlobalToLocalMappingApplyIS(fl2g,IS_GTOLM_DROP,allprimals,&primals)); 1023 CHKERRQ(PetscMalloc1(nee,&eedges)); 1024 for (i=0;i<nee;i++) { 1025 CHKERRQ(ISGlobalToLocalMappingApplyIS(fl2g,IS_GTOLM_DROP,alleedges[i],&eedges[i])); 1026 } 1027 } else { 1028 eedges = alleedges; 1029 primals = allprimals; 1030 } 1031 CHKERRQ(PetscCalloc1(nee,&extcols)); 1032 1033 /* Mark again */ 1034 CHKERRQ(PetscArrayzero(marks,ne)); 1035 for (i=0;i<nee;i++) { 1036 PetscInt size,mark = i+1; 1037 1038 CHKERRQ(ISGetLocalSize(eedges[i],&size)); 1039 CHKERRQ(ISGetIndices(eedges[i],&idxs)); 1040 for (j=0;j<size;j++) marks[idxs[j]] = mark; 1041 CHKERRQ(ISRestoreIndices(eedges[i],&idxs)); 1042 } 1043 if (print) { 1044 CHKERRQ(PetscObjectSetName((PetscObject)primals,"obtained_primal_dofs_secondpass")); 1045 CHKERRQ(ISView(primals,NULL)); 1046 } 1047 1048 /* Recompute extended cols */ 1049 eerr = PETSC_FALSE; 1050 for (i=0;i<nee;i++) { 1051 PetscInt size; 1052 1053 cum = 0; 1054 CHKERRQ(ISGetLocalSize(eedges[i],&size)); 1055 if (!size && nedfieldlocal) continue; 1056 PetscCheck(size,PETSC_COMM_SELF,PETSC_ERR_PLIB,"Unexpected zero sized edge %D",i); 1057 CHKERRQ(ISGetIndices(eedges[i],&idxs)); 1058 for (j=0;j<size;j++) { 1059 PetscInt k,ee = idxs[j]; 1060 for (k=ii[ee];k<ii[ee+1];k++) if (!PetscBTLookup(btv,jj[k])) extrow[cum++] = jj[k]; 1061 } 1062 CHKERRQ(ISRestoreIndices(eedges[i],&idxs)); 1063 CHKERRQ(PetscSortRemoveDupsInt(&cum,extrow)); 1064 CHKERRQ(ISLocalToGlobalMappingApply(vl2g,cum,extrow,gidxs)); 1065 CHKERRQ(PetscSortIntWithArray(cum,gidxs,extrow)); 1066 CHKERRQ(ISCreateGeneral(PETSC_COMM_SELF,cum,extrow,PETSC_COPY_VALUES,&extcols[i])); 1067 if (cum != size -1) { 1068 if (print) { 1069 CHKERRQ(PetscObjectSetName((PetscObject)eedges[i],"error_edge_secondpass")); 1070 CHKERRQ(ISView(eedges[i],NULL)); 1071 CHKERRQ(PetscObjectSetName((PetscObject)extcols[i],"error_extcol_secondpass")); 1072 CHKERRQ(ISView(extcols[i],NULL)); 1073 } 1074 eerr = PETSC_TRUE; 1075 } 1076 } 1077 } 1078 CHKERRQ(MatRestoreRowIJ(lG,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done)); 1079 CHKERRQ(PetscFree2(extrow,gidxs)); 1080 CHKERRQ(PetscBTDestroy(&bter)); 1081 if (print) CHKERRQ(PCBDDCGraphASCIIView(pcbddc->mat_graph,5,PETSC_VIEWER_STDOUT_SELF)); 1082 /* an error should not occur at this point */ 1083 PetscCheck(!eerr,PETSC_COMM_SELF,PETSC_ERR_PLIB,"Unexpected SIZE OF EDGE > EXTCOL SECOND PASS"); 1084 1085 /* Check the number of endpoints */ 1086 CHKERRQ(MatGetRowIJ(lG,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done)); 1087 CHKERRQ(PetscMalloc1(2*nee,&corners)); 1088 CHKERRQ(PetscMalloc1(nee,&cedges)); 1089 for (i=0;i<nee;i++) { 1090 PetscInt size, found = 0, gc[2]; 1091 1092 /* init with defaults */ 1093 cedges[i] = corners[i*2] = corners[i*2+1] = -1; 1094 CHKERRQ(ISGetLocalSize(eedges[i],&size)); 1095 if (!size && nedfieldlocal) continue; 1096 PetscCheck(size,PETSC_COMM_SELF,PETSC_ERR_PLIB,"Unexpected zero sized edge %D",i); 1097 CHKERRQ(ISGetIndices(eedges[i],&idxs)); 1098 CHKERRQ(PetscBTMemzero(nv,btvc)); 1099 for (j=0;j<size;j++) { 1100 PetscInt k,ee = idxs[j]; 1101 for (k=ii[ee];k<ii[ee+1];k++) { 1102 PetscInt vv = jj[k]; 1103 if (PetscBTLookup(btv,vv) && !PetscBTLookupSet(btvc,vv)) { 1104 PetscCheckFalse(found == 2,PETSC_COMM_SELF,PETSC_ERR_PLIB,"Found more then two corners for edge %D",i); 1105 corners[i*2+found++] = vv; 1106 } 1107 } 1108 } 1109 if (found != 2) { 1110 PetscInt e; 1111 if (fl2g) { 1112 CHKERRQ(ISLocalToGlobalMappingApply(fl2g,1,idxs,&e)); 1113 } else { 1114 e = idxs[0]; 1115 } 1116 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Found %D corners for edge %D (astart %D, estart %D)",found,i,e,idxs[0]); 1117 } 1118 1119 /* get primal dof index on this coarse edge */ 1120 CHKERRQ(ISLocalToGlobalMappingApply(vl2g,2,corners+2*i,gc)); 1121 if (gc[0] > gc[1]) { 1122 PetscInt swap = corners[2*i]; 1123 corners[2*i] = corners[2*i+1]; 1124 corners[2*i+1] = swap; 1125 } 1126 cedges[i] = idxs[size-1]; 1127 CHKERRQ(ISRestoreIndices(eedges[i],&idxs)); 1128 if (print) PetscPrintf(PETSC_COMM_SELF,"EDGE %D: ce %D, corners (%D,%D)\n",i,cedges[i],corners[2*i],corners[2*i+1]); 1129 } 1130 CHKERRQ(MatRestoreRowIJ(lG,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done)); 1131 CHKERRQ(PetscBTDestroy(&btvc)); 1132 1133 if (PetscDefined(USE_DEBUG)) { 1134 /* Inspects columns of lG (rows of lGt) and make sure the change of basis will 1135 not interfere with neighbouring coarse edges */ 1136 CHKERRQ(PetscMalloc1(nee+1,&emarks)); 1137 CHKERRQ(MatGetRowIJ(lGt,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done)); 1138 for (i=0;i<nv;i++) { 1139 PetscInt emax = 0,eemax = 0; 1140 1141 if (ii[i+1]==ii[i] || PetscBTLookup(btv,i)) continue; 1142 CHKERRQ(PetscArrayzero(emarks,nee+1)); 1143 for (j=ii[i];j<ii[i+1];j++) emarks[marks[jj[j]]]++; 1144 for (j=1;j<nee+1;j++) { 1145 if (emax < emarks[j]) { 1146 emax = emarks[j]; 1147 eemax = j; 1148 } 1149 } 1150 /* not relevant for edges */ 1151 if (!eemax) continue; 1152 1153 for (j=ii[i];j<ii[i+1];j++) { 1154 if (marks[jj[j]] && marks[jj[j]] != eemax) { 1155 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Found 2 coarse edges (id %D and %D) connected through the %D nodal dof at edge dof %D",marks[jj[j]]-1,eemax,i,jj[j]); 1156 } 1157 } 1158 } 1159 CHKERRQ(PetscFree(emarks)); 1160 CHKERRQ(MatRestoreRowIJ(lGt,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done)); 1161 } 1162 1163 /* Compute extended rows indices for edge blocks of the change of basis */ 1164 CHKERRQ(MatGetRowIJ(lGt,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done)); 1165 CHKERRQ(MatSeqAIJGetMaxRowNonzeros(lGt,&extmem)); 1166 extmem *= maxsize; 1167 CHKERRQ(PetscMalloc1(extmem*nee,&extrow)); 1168 CHKERRQ(PetscMalloc1(nee,&extrows)); 1169 CHKERRQ(PetscCalloc1(nee,&extrowcum)); 1170 for (i=0;i<nv;i++) { 1171 PetscInt mark = 0,size,start; 1172 1173 if (ii[i+1]==ii[i] || PetscBTLookup(btv,i)) continue; 1174 for (j=ii[i];j<ii[i+1];j++) 1175 if (marks[jj[j]] && !mark) 1176 mark = marks[jj[j]]; 1177 1178 /* not relevant */ 1179 if (!mark) continue; 1180 1181 /* import extended row */ 1182 mark--; 1183 start = mark*extmem+extrowcum[mark]; 1184 size = ii[i+1]-ii[i]; 1185 PetscCheckFalse(extrowcum[mark] + size > extmem,PETSC_COMM_SELF,PETSC_ERR_PLIB,"Not enough memory allocated %D > %D",extrowcum[mark] + size,extmem); 1186 CHKERRQ(PetscArraycpy(extrow+start,jj+ii[i],size)); 1187 extrowcum[mark] += size; 1188 } 1189 CHKERRQ(MatRestoreRowIJ(lGt,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done)); 1190 CHKERRQ(MatDestroy(&lGt)); 1191 CHKERRQ(PetscFree(marks)); 1192 1193 /* Compress extrows */ 1194 cum = 0; 1195 for (i=0;i<nee;i++) { 1196 PetscInt size = extrowcum[i],*start = extrow + i*extmem; 1197 CHKERRQ(PetscSortRemoveDupsInt(&size,start)); 1198 CHKERRQ(ISCreateGeneral(PETSC_COMM_SELF,size,start,PETSC_USE_POINTER,&extrows[i])); 1199 cum = PetscMax(cum,size); 1200 } 1201 CHKERRQ(PetscFree(extrowcum)); 1202 CHKERRQ(PetscBTDestroy(&btv)); 1203 CHKERRQ(PetscBTDestroy(&btvcand)); 1204 1205 /* Workspace for lapack inner calls and VecSetValues */ 1206 CHKERRQ(PetscMalloc2((5+cum+maxsize)*maxsize,&work,maxsize,&rwork)); 1207 1208 /* Create change of basis matrix (preallocation can be improved) */ 1209 CHKERRQ(MatCreate(comm,&T)); 1210 ierr = MatSetSizes(T,pc->pmat->rmap->n,pc->pmat->rmap->n, 1211 pc->pmat->rmap->N,pc->pmat->rmap->N);CHKERRQ(ierr); 1212 CHKERRQ(MatSetType(T,MATAIJ)); 1213 CHKERRQ(MatSeqAIJSetPreallocation(T,10,NULL)); 1214 CHKERRQ(MatMPIAIJSetPreallocation(T,10,NULL,10,NULL)); 1215 CHKERRQ(MatSetLocalToGlobalMapping(T,al2g,al2g)); 1216 CHKERRQ(MatSetOption(T,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE)); 1217 CHKERRQ(MatSetOption(T,MAT_ROW_ORIENTED,PETSC_FALSE)); 1218 CHKERRQ(ISLocalToGlobalMappingDestroy(&al2g)); 1219 1220 /* Defaults to identity */ 1221 CHKERRQ(MatCreateVecs(pc->pmat,&tvec,NULL)); 1222 CHKERRQ(VecSet(tvec,1.0)); 1223 CHKERRQ(MatDiagonalSet(T,tvec,INSERT_VALUES)); 1224 CHKERRQ(VecDestroy(&tvec)); 1225 1226 /* Create discrete gradient for the coarser level if needed */ 1227 CHKERRQ(MatDestroy(&pcbddc->nedcG)); 1228 CHKERRQ(ISDestroy(&pcbddc->nedclocal)); 1229 if (pcbddc->current_level < pcbddc->max_levels) { 1230 ISLocalToGlobalMapping cel2g,cvl2g; 1231 IS wis,gwis; 1232 PetscInt cnv,cne; 1233 1234 CHKERRQ(ISCreateGeneral(comm,nee,cedges,PETSC_COPY_VALUES,&wis)); 1235 if (fl2g) { 1236 CHKERRQ(ISLocalToGlobalMappingApplyIS(fl2g,wis,&pcbddc->nedclocal)); 1237 } else { 1238 CHKERRQ(PetscObjectReference((PetscObject)wis)); 1239 pcbddc->nedclocal = wis; 1240 } 1241 CHKERRQ(ISLocalToGlobalMappingApplyIS(el2g,wis,&gwis)); 1242 CHKERRQ(ISDestroy(&wis)); 1243 CHKERRQ(ISRenumber(gwis,NULL,&cne,&wis)); 1244 CHKERRQ(ISLocalToGlobalMappingCreateIS(wis,&cel2g)); 1245 CHKERRQ(ISDestroy(&wis)); 1246 CHKERRQ(ISDestroy(&gwis)); 1247 1248 CHKERRQ(ISCreateGeneral(comm,2*nee,corners,PETSC_USE_POINTER,&wis)); 1249 CHKERRQ(ISLocalToGlobalMappingApplyIS(vl2g,wis,&gwis)); 1250 CHKERRQ(ISDestroy(&wis)); 1251 CHKERRQ(ISRenumber(gwis,NULL,&cnv,&wis)); 1252 CHKERRQ(ISLocalToGlobalMappingCreateIS(wis,&cvl2g)); 1253 CHKERRQ(ISDestroy(&wis)); 1254 CHKERRQ(ISDestroy(&gwis)); 1255 1256 CHKERRQ(MatCreate(comm,&pcbddc->nedcG)); 1257 CHKERRQ(MatSetSizes(pcbddc->nedcG,PETSC_DECIDE,PETSC_DECIDE,cne,cnv)); 1258 CHKERRQ(MatSetType(pcbddc->nedcG,MATAIJ)); 1259 CHKERRQ(MatSeqAIJSetPreallocation(pcbddc->nedcG,2,NULL)); 1260 CHKERRQ(MatMPIAIJSetPreallocation(pcbddc->nedcG,2,NULL,2,NULL)); 1261 CHKERRQ(MatSetLocalToGlobalMapping(pcbddc->nedcG,cel2g,cvl2g)); 1262 CHKERRQ(ISLocalToGlobalMappingDestroy(&cel2g)); 1263 CHKERRQ(ISLocalToGlobalMappingDestroy(&cvl2g)); 1264 } 1265 CHKERRQ(ISLocalToGlobalMappingDestroy(&vl2g)); 1266 1267 #if defined(PRINT_GDET) 1268 inc = 0; 1269 lev = pcbddc->current_level; 1270 #endif 1271 1272 /* Insert values in the change of basis matrix */ 1273 for (i=0;i<nee;i++) { 1274 Mat Gins = NULL, GKins = NULL; 1275 IS cornersis = NULL; 1276 PetscScalar cvals[2]; 1277 1278 if (pcbddc->nedcG) { 1279 CHKERRQ(ISCreateGeneral(PETSC_COMM_SELF,2,corners+2*i,PETSC_USE_POINTER,&cornersis)); 1280 } 1281 CHKERRQ(PCBDDCComputeNedelecChangeEdge(lG,eedges[i],extrows[i],extcols[i],cornersis,&Gins,&GKins,cvals,work,rwork)); 1282 if (Gins && GKins) { 1283 const PetscScalar *data; 1284 const PetscInt *rows,*cols; 1285 PetscInt nrh,nch,nrc,ncc; 1286 1287 CHKERRQ(ISGetIndices(eedges[i],&cols)); 1288 /* H1 */ 1289 CHKERRQ(ISGetIndices(extrows[i],&rows)); 1290 CHKERRQ(MatGetSize(Gins,&nrh,&nch)); 1291 CHKERRQ(MatDenseGetArrayRead(Gins,&data)); 1292 CHKERRQ(MatSetValuesLocal(T,nrh,rows,nch,cols,data,INSERT_VALUES)); 1293 CHKERRQ(MatDenseRestoreArrayRead(Gins,&data)); 1294 CHKERRQ(ISRestoreIndices(extrows[i],&rows)); 1295 /* complement */ 1296 CHKERRQ(MatGetSize(GKins,&nrc,&ncc)); 1297 PetscCheck(ncc,PETSC_COMM_SELF,PETSC_ERR_PLIB,"Constant function has not been generated for coarse edge %D",i); 1298 PetscCheckFalse(ncc + nch != nrc,PETSC_COMM_SELF,PETSC_ERR_PLIB,"The sum of the number of columns of GKins %D and Gins %D does not match %D for coarse edge %D",ncc,nch,nrc,i); 1299 PetscCheckFalse(ncc != 1 && pcbddc->nedcG,PETSC_COMM_SELF,PETSC_ERR_SUP,"Cannot generate the coarse discrete gradient for coarse edge %D with ncc %D",i,ncc); 1300 CHKERRQ(MatDenseGetArrayRead(GKins,&data)); 1301 CHKERRQ(MatSetValuesLocal(T,nrc,cols,ncc,cols+nch,data,INSERT_VALUES)); 1302 CHKERRQ(MatDenseRestoreArrayRead(GKins,&data)); 1303 1304 /* coarse discrete gradient */ 1305 if (pcbddc->nedcG) { 1306 PetscInt cols[2]; 1307 1308 cols[0] = 2*i; 1309 cols[1] = 2*i+1; 1310 CHKERRQ(MatSetValuesLocal(pcbddc->nedcG,1,&i,2,cols,cvals,INSERT_VALUES)); 1311 } 1312 CHKERRQ(ISRestoreIndices(eedges[i],&cols)); 1313 } 1314 CHKERRQ(ISDestroy(&extrows[i])); 1315 CHKERRQ(ISDestroy(&extcols[i])); 1316 CHKERRQ(ISDestroy(&cornersis)); 1317 CHKERRQ(MatDestroy(&Gins)); 1318 CHKERRQ(MatDestroy(&GKins)); 1319 } 1320 CHKERRQ(ISLocalToGlobalMappingDestroy(&el2g)); 1321 1322 /* Start assembling */ 1323 CHKERRQ(MatAssemblyBegin(T,MAT_FINAL_ASSEMBLY)); 1324 if (pcbddc->nedcG) { 1325 CHKERRQ(MatAssemblyBegin(pcbddc->nedcG,MAT_FINAL_ASSEMBLY)); 1326 } 1327 1328 /* Free */ 1329 if (fl2g) { 1330 CHKERRQ(ISDestroy(&primals)); 1331 for (i=0;i<nee;i++) { 1332 CHKERRQ(ISDestroy(&eedges[i])); 1333 } 1334 CHKERRQ(PetscFree(eedges)); 1335 } 1336 1337 /* hack mat_graph with primal dofs on the coarse edges */ 1338 { 1339 PCBDDCGraph graph = pcbddc->mat_graph; 1340 PetscInt *oqueue = graph->queue; 1341 PetscInt *ocptr = graph->cptr; 1342 PetscInt ncc,*idxs; 1343 1344 /* find first primal edge */ 1345 if (pcbddc->nedclocal) { 1346 CHKERRQ(ISGetIndices(pcbddc->nedclocal,(const PetscInt**)&idxs)); 1347 } else { 1348 if (fl2g) { 1349 CHKERRQ(ISLocalToGlobalMappingApply(fl2g,nee,cedges,cedges)); 1350 } 1351 idxs = cedges; 1352 } 1353 cum = 0; 1354 while (cum < nee && cedges[cum] < 0) cum++; 1355 1356 /* adapt connected components */ 1357 CHKERRQ(PetscMalloc2(graph->nvtxs+1,&graph->cptr,ocptr[graph->ncc],&graph->queue)); 1358 graph->cptr[0] = 0; 1359 for (i=0,ncc=0;i<graph->ncc;i++) { 1360 PetscInt lc = ocptr[i+1]-ocptr[i]; 1361 if (cum != nee && oqueue[ocptr[i+1]-1] == cedges[cum]) { /* this cc has a primal dof */ 1362 graph->cptr[ncc+1] = graph->cptr[ncc]+1; 1363 graph->queue[graph->cptr[ncc]] = cedges[cum]; 1364 ncc++; 1365 lc--; 1366 cum++; 1367 while (cum < nee && cedges[cum] < 0) cum++; 1368 } 1369 graph->cptr[ncc+1] = graph->cptr[ncc] + lc; 1370 for (j=0;j<lc;j++) graph->queue[graph->cptr[ncc]+j] = oqueue[ocptr[i]+j]; 1371 ncc++; 1372 } 1373 graph->ncc = ncc; 1374 if (pcbddc->nedclocal) { 1375 CHKERRQ(ISRestoreIndices(pcbddc->nedclocal,(const PetscInt**)&idxs)); 1376 } 1377 CHKERRQ(PetscFree2(ocptr,oqueue)); 1378 } 1379 CHKERRQ(ISLocalToGlobalMappingDestroy(&fl2g)); 1380 CHKERRQ(PCBDDCGraphRestoreCandidatesIS(pcbddc->mat_graph,NULL,NULL,&nee,&alleedges,&allprimals)); 1381 CHKERRQ(PCBDDCGraphResetCSR(pcbddc->mat_graph)); 1382 CHKERRQ(MatDestroy(&conn)); 1383 1384 CHKERRQ(ISDestroy(&nedfieldlocal)); 1385 CHKERRQ(PetscFree(extrow)); 1386 CHKERRQ(PetscFree2(work,rwork)); 1387 CHKERRQ(PetscFree(corners)); 1388 CHKERRQ(PetscFree(cedges)); 1389 CHKERRQ(PetscFree(extrows)); 1390 CHKERRQ(PetscFree(extcols)); 1391 CHKERRQ(MatDestroy(&lG)); 1392 1393 /* Complete assembling */ 1394 CHKERRQ(MatAssemblyEnd(T,MAT_FINAL_ASSEMBLY)); 1395 if (pcbddc->nedcG) { 1396 CHKERRQ(MatAssemblyEnd(pcbddc->nedcG,MAT_FINAL_ASSEMBLY)); 1397 #if 0 1398 CHKERRQ(PetscObjectSetName((PetscObject)pcbddc->nedcG,"coarse_G")); 1399 CHKERRQ(MatView(pcbddc->nedcG,NULL)); 1400 #endif 1401 } 1402 1403 /* set change of basis */ 1404 CHKERRQ(PCBDDCSetChangeOfBasisMat(pc,T,singular)); 1405 CHKERRQ(MatDestroy(&T)); 1406 1407 PetscFunctionReturn(0); 1408 } 1409 1410 /* the near-null space of BDDC carries information on quadrature weights, 1411 and these can be collinear -> so cheat with MatNullSpaceCreate 1412 and create a suitable set of basis vectors first */ 1413 PetscErrorCode PCBDDCNullSpaceCreate(MPI_Comm comm, PetscBool has_const, PetscInt nvecs, Vec quad_vecs[], MatNullSpace *nnsp) 1414 { 1415 PetscInt i; 1416 1417 PetscFunctionBegin; 1418 for (i=0;i<nvecs;i++) { 1419 PetscInt first,last; 1420 1421 CHKERRQ(VecGetOwnershipRange(quad_vecs[i],&first,&last)); 1422 PetscCheckFalse(last-first < 2*nvecs && has_const,PETSC_COMM_SELF,PETSC_ERR_SUP,"Not implemented"); 1423 if (i>=first && i < last) { 1424 PetscScalar *data; 1425 CHKERRQ(VecGetArray(quad_vecs[i],&data)); 1426 if (!has_const) { 1427 data[i-first] = 1.; 1428 } else { 1429 data[2*i-first] = 1./PetscSqrtReal(2.); 1430 data[2*i-first+1] = -1./PetscSqrtReal(2.); 1431 } 1432 CHKERRQ(VecRestoreArray(quad_vecs[i],&data)); 1433 } 1434 CHKERRQ(PetscObjectStateIncrease((PetscObject)quad_vecs[i])); 1435 } 1436 CHKERRQ(MatNullSpaceCreate(comm,has_const,nvecs,quad_vecs,nnsp)); 1437 for (i=0;i<nvecs;i++) { /* reset vectors */ 1438 PetscInt first,last; 1439 CHKERRQ(VecLockReadPop(quad_vecs[i])); 1440 CHKERRQ(VecGetOwnershipRange(quad_vecs[i],&first,&last)); 1441 if (i>=first && i < last) { 1442 PetscScalar *data; 1443 CHKERRQ(VecGetArray(quad_vecs[i],&data)); 1444 if (!has_const) { 1445 data[i-first] = 0.; 1446 } else { 1447 data[2*i-first] = 0.; 1448 data[2*i-first+1] = 0.; 1449 } 1450 CHKERRQ(VecRestoreArray(quad_vecs[i],&data)); 1451 } 1452 CHKERRQ(PetscObjectStateIncrease((PetscObject)quad_vecs[i])); 1453 CHKERRQ(VecLockReadPush(quad_vecs[i])); 1454 } 1455 PetscFunctionReturn(0); 1456 } 1457 1458 PetscErrorCode PCBDDCComputeNoNetFlux(Mat A, Mat divudotp, PetscBool transpose, IS vl2l, PCBDDCGraph graph, MatNullSpace *nnsp) 1459 { 1460 Mat loc_divudotp; 1461 Vec p,v,vins,quad_vec,*quad_vecs; 1462 ISLocalToGlobalMapping map; 1463 PetscScalar *vals; 1464 const PetscScalar *array; 1465 PetscInt i,maxneighs = 0,maxsize,*gidxs; 1466 PetscInt n_neigh,*neigh,*n_shared,**shared; 1467 PetscMPIInt rank; 1468 1469 PetscFunctionBegin; 1470 CHKERRQ(ISLocalToGlobalMappingGetInfo(graph->l2gmap,&n_neigh,&neigh,&n_shared,&shared)); 1471 for (i=0;i<n_neigh;i++) maxneighs = PetscMax(graph->count[shared[i][0]]+1,maxneighs); 1472 CHKERRMPI(MPIU_Allreduce(MPI_IN_PLACE,&maxneighs,1,MPIU_INT,MPI_MAX,PetscObjectComm((PetscObject)A))); 1473 if (!maxneighs) { 1474 CHKERRQ(ISLocalToGlobalMappingRestoreInfo(graph->l2gmap,&n_neigh,&neigh,&n_shared,&shared)); 1475 *nnsp = NULL; 1476 PetscFunctionReturn(0); 1477 } 1478 maxsize = 0; 1479 for (i=0;i<n_neigh;i++) maxsize = PetscMax(n_shared[i],maxsize); 1480 CHKERRQ(PetscMalloc2(maxsize,&gidxs,maxsize,&vals)); 1481 /* create vectors to hold quadrature weights */ 1482 CHKERRQ(MatCreateVecs(A,&quad_vec,NULL)); 1483 if (!transpose) { 1484 CHKERRQ(MatISGetLocalToGlobalMapping(A,&map,NULL)); 1485 } else { 1486 CHKERRQ(MatISGetLocalToGlobalMapping(A,NULL,&map)); 1487 } 1488 CHKERRQ(VecDuplicateVecs(quad_vec,maxneighs,&quad_vecs)); 1489 CHKERRQ(VecDestroy(&quad_vec)); 1490 CHKERRQ(PCBDDCNullSpaceCreate(PetscObjectComm((PetscObject)A),PETSC_FALSE,maxneighs,quad_vecs,nnsp)); 1491 for (i=0;i<maxneighs;i++) { 1492 CHKERRQ(VecLockReadPop(quad_vecs[i])); 1493 } 1494 1495 /* compute local quad vec */ 1496 CHKERRQ(MatISGetLocalMat(divudotp,&loc_divudotp)); 1497 if (!transpose) { 1498 CHKERRQ(MatCreateVecs(loc_divudotp,&v,&p)); 1499 } else { 1500 CHKERRQ(MatCreateVecs(loc_divudotp,&p,&v)); 1501 } 1502 CHKERRQ(VecSet(p,1.)); 1503 if (!transpose) { 1504 CHKERRQ(MatMultTranspose(loc_divudotp,p,v)); 1505 } else { 1506 CHKERRQ(MatMult(loc_divudotp,p,v)); 1507 } 1508 if (vl2l) { 1509 Mat lA; 1510 VecScatter sc; 1511 1512 CHKERRQ(MatISGetLocalMat(A,&lA)); 1513 CHKERRQ(MatCreateVecs(lA,&vins,NULL)); 1514 CHKERRQ(VecScatterCreate(v,NULL,vins,vl2l,&sc)); 1515 CHKERRQ(VecScatterBegin(sc,v,vins,INSERT_VALUES,SCATTER_FORWARD)); 1516 CHKERRQ(VecScatterEnd(sc,v,vins,INSERT_VALUES,SCATTER_FORWARD)); 1517 CHKERRQ(VecScatterDestroy(&sc)); 1518 } else { 1519 vins = v; 1520 } 1521 CHKERRQ(VecGetArrayRead(vins,&array)); 1522 CHKERRQ(VecDestroy(&p)); 1523 1524 /* insert in global quadrature vecs */ 1525 CHKERRMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)A),&rank)); 1526 for (i=1;i<n_neigh;i++) { 1527 const PetscInt *idxs; 1528 PetscInt idx,nn,j; 1529 1530 idxs = shared[i]; 1531 nn = n_shared[i]; 1532 for (j=0;j<nn;j++) vals[j] = array[idxs[j]]; 1533 CHKERRQ(PetscFindInt(rank,graph->count[idxs[0]],graph->neighbours_set[idxs[0]],&idx)); 1534 idx = -(idx+1); 1535 PetscCheckFalse(idx < 0 || idx >= maxneighs,PETSC_COMM_SELF,PETSC_ERR_PLIB,"Invalid index %D not in [0,%D)",idx,maxneighs); 1536 CHKERRQ(ISLocalToGlobalMappingApply(map,nn,idxs,gidxs)); 1537 CHKERRQ(VecSetValues(quad_vecs[idx],nn,gidxs,vals,INSERT_VALUES)); 1538 } 1539 CHKERRQ(ISLocalToGlobalMappingRestoreInfo(graph->l2gmap,&n_neigh,&neigh,&n_shared,&shared)); 1540 CHKERRQ(VecRestoreArrayRead(vins,&array)); 1541 if (vl2l) { 1542 CHKERRQ(VecDestroy(&vins)); 1543 } 1544 CHKERRQ(VecDestroy(&v)); 1545 CHKERRQ(PetscFree2(gidxs,vals)); 1546 1547 /* assemble near null space */ 1548 for (i=0;i<maxneighs;i++) { 1549 CHKERRQ(VecAssemblyBegin(quad_vecs[i])); 1550 } 1551 for (i=0;i<maxneighs;i++) { 1552 CHKERRQ(VecAssemblyEnd(quad_vecs[i])); 1553 CHKERRQ(VecViewFromOptions(quad_vecs[i],NULL,"-pc_bddc_quad_vecs_view")); 1554 CHKERRQ(VecLockReadPush(quad_vecs[i])); 1555 } 1556 CHKERRQ(VecDestroyVecs(maxneighs,&quad_vecs)); 1557 PetscFunctionReturn(0); 1558 } 1559 1560 PetscErrorCode PCBDDCAddPrimalVerticesLocalIS(PC pc, IS primalv) 1561 { 1562 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 1563 1564 PetscFunctionBegin; 1565 if (primalv) { 1566 if (pcbddc->user_primal_vertices_local) { 1567 IS list[2], newp; 1568 1569 list[0] = primalv; 1570 list[1] = pcbddc->user_primal_vertices_local; 1571 CHKERRQ(ISConcatenate(PetscObjectComm((PetscObject)pc),2,list,&newp)); 1572 CHKERRQ(ISSortRemoveDups(newp)); 1573 CHKERRQ(ISDestroy(&list[1])); 1574 pcbddc->user_primal_vertices_local = newp; 1575 } else { 1576 CHKERRQ(PCBDDCSetPrimalVerticesLocalIS(pc,primalv)); 1577 } 1578 } 1579 PetscFunctionReturn(0); 1580 } 1581 1582 static PetscErrorCode func_coords_private(PetscInt dim, PetscReal t, const PetscReal X[], PetscInt Nf, PetscScalar *out, void *ctx) 1583 { 1584 PetscInt f, *comp = (PetscInt *)ctx; 1585 1586 PetscFunctionBegin; 1587 for (f=0;f<Nf;f++) out[f] = X[*comp]; 1588 PetscFunctionReturn(0); 1589 } 1590 1591 PetscErrorCode PCBDDCComputeLocalTopologyInfo(PC pc) 1592 { 1593 PetscErrorCode ierr; 1594 Vec local,global; 1595 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 1596 Mat_IS *matis = (Mat_IS*)pc->pmat->data; 1597 PetscBool monolithic = PETSC_FALSE; 1598 1599 PetscFunctionBegin; 1600 ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)pc),((PetscObject)pc)->prefix,"BDDC topology options","PC");CHKERRQ(ierr); 1601 CHKERRQ(PetscOptionsBool("-pc_bddc_monolithic","Discard any information on dofs splitting",NULL,monolithic,&monolithic,NULL)); 1602 ierr = PetscOptionsEnd();CHKERRQ(ierr); 1603 /* need to convert from global to local topology information and remove references to information in global ordering */ 1604 CHKERRQ(MatCreateVecs(pc->pmat,&global,NULL)); 1605 CHKERRQ(MatCreateVecs(matis->A,&local,NULL)); 1606 CHKERRQ(VecBindToCPU(global,PETSC_TRUE)); 1607 CHKERRQ(VecBindToCPU(local,PETSC_TRUE)); 1608 if (monolithic) { /* just get block size to properly compute vertices */ 1609 if (pcbddc->vertex_size == 1) { 1610 CHKERRQ(MatGetBlockSize(pc->pmat,&pcbddc->vertex_size)); 1611 } 1612 goto boundary; 1613 } 1614 1615 if (pcbddc->user_provided_isfordofs) { 1616 if (pcbddc->n_ISForDofs) { 1617 PetscInt i; 1618 1619 CHKERRQ(PetscMalloc1(pcbddc->n_ISForDofs,&pcbddc->ISForDofsLocal)); 1620 for (i=0;i<pcbddc->n_ISForDofs;i++) { 1621 PetscInt bs; 1622 1623 CHKERRQ(PCBDDCGlobalToLocal(matis->rctx,global,local,pcbddc->ISForDofs[i],&pcbddc->ISForDofsLocal[i])); 1624 CHKERRQ(ISGetBlockSize(pcbddc->ISForDofs[i],&bs)); 1625 CHKERRQ(ISSetBlockSize(pcbddc->ISForDofsLocal[i],bs)); 1626 CHKERRQ(ISDestroy(&pcbddc->ISForDofs[i])); 1627 } 1628 pcbddc->n_ISForDofsLocal = pcbddc->n_ISForDofs; 1629 pcbddc->n_ISForDofs = 0; 1630 CHKERRQ(PetscFree(pcbddc->ISForDofs)); 1631 } 1632 } else { 1633 if (!pcbddc->n_ISForDofsLocal) { /* field split not present */ 1634 DM dm; 1635 1636 CHKERRQ(MatGetDM(pc->pmat, &dm)); 1637 if (!dm) { 1638 CHKERRQ(PCGetDM(pc, &dm)); 1639 } 1640 if (dm) { 1641 IS *fields; 1642 PetscInt nf,i; 1643 1644 CHKERRQ(DMCreateFieldDecomposition(dm,&nf,NULL,&fields,NULL)); 1645 CHKERRQ(PetscMalloc1(nf,&pcbddc->ISForDofsLocal)); 1646 for (i=0;i<nf;i++) { 1647 PetscInt bs; 1648 1649 CHKERRQ(PCBDDCGlobalToLocal(matis->rctx,global,local,fields[i],&pcbddc->ISForDofsLocal[i])); 1650 CHKERRQ(ISGetBlockSize(fields[i],&bs)); 1651 CHKERRQ(ISSetBlockSize(pcbddc->ISForDofsLocal[i],bs)); 1652 CHKERRQ(ISDestroy(&fields[i])); 1653 } 1654 CHKERRQ(PetscFree(fields)); 1655 pcbddc->n_ISForDofsLocal = nf; 1656 } else { /* See if MATIS has fields attached by the conversion from MatNest */ 1657 PetscContainer c; 1658 1659 CHKERRQ(PetscObjectQuery((PetscObject)pc->pmat,"_convert_nest_lfields",(PetscObject*)&c)); 1660 if (c) { 1661 MatISLocalFields lf; 1662 CHKERRQ(PetscContainerGetPointer(c,(void**)&lf)); 1663 CHKERRQ(PCBDDCSetDofsSplittingLocal(pc,lf->nr,lf->rf)); 1664 } else { /* fallback, create the default fields if bs > 1 */ 1665 PetscInt i, n = matis->A->rmap->n; 1666 CHKERRQ(MatGetBlockSize(pc->pmat,&i)); 1667 if (i > 1) { 1668 pcbddc->n_ISForDofsLocal = i; 1669 CHKERRQ(PetscMalloc1(pcbddc->n_ISForDofsLocal,&pcbddc->ISForDofsLocal)); 1670 for (i=0;i<pcbddc->n_ISForDofsLocal;i++) { 1671 CHKERRQ(ISCreateStride(PetscObjectComm((PetscObject)pc),n/pcbddc->n_ISForDofsLocal,i,pcbddc->n_ISForDofsLocal,&pcbddc->ISForDofsLocal[i])); 1672 } 1673 } 1674 } 1675 } 1676 } else { 1677 PetscInt i; 1678 for (i=0;i<pcbddc->n_ISForDofsLocal;i++) { 1679 CHKERRQ(PCBDDCConsistencyCheckIS(pc,MPI_LAND,&pcbddc->ISForDofsLocal[i])); 1680 } 1681 } 1682 } 1683 1684 boundary: 1685 if (!pcbddc->DirichletBoundariesLocal && pcbddc->DirichletBoundaries) { 1686 CHKERRQ(PCBDDCGlobalToLocal(matis->rctx,global,local,pcbddc->DirichletBoundaries,&pcbddc->DirichletBoundariesLocal)); 1687 } else if (pcbddc->DirichletBoundariesLocal) { 1688 CHKERRQ(PCBDDCConsistencyCheckIS(pc,MPI_LAND,&pcbddc->DirichletBoundariesLocal)); 1689 } 1690 if (!pcbddc->NeumannBoundariesLocal && pcbddc->NeumannBoundaries) { 1691 CHKERRQ(PCBDDCGlobalToLocal(matis->rctx,global,local,pcbddc->NeumannBoundaries,&pcbddc->NeumannBoundariesLocal)); 1692 } else if (pcbddc->NeumannBoundariesLocal) { 1693 CHKERRQ(PCBDDCConsistencyCheckIS(pc,MPI_LOR,&pcbddc->NeumannBoundariesLocal)); 1694 } 1695 if (!pcbddc->user_primal_vertices_local && pcbddc->user_primal_vertices) { 1696 CHKERRQ(PCBDDCGlobalToLocal(matis->rctx,global,local,pcbddc->user_primal_vertices,&pcbddc->user_primal_vertices_local)); 1697 } 1698 CHKERRQ(VecDestroy(&global)); 1699 CHKERRQ(VecDestroy(&local)); 1700 /* detect local disconnected subdomains if requested (use matis->A) */ 1701 if (pcbddc->detect_disconnected) { 1702 IS primalv = NULL; 1703 PetscInt i; 1704 PetscBool filter = pcbddc->detect_disconnected_filter; 1705 1706 for (i=0;i<pcbddc->n_local_subs;i++) { 1707 CHKERRQ(ISDestroy(&pcbddc->local_subs[i])); 1708 } 1709 CHKERRQ(PetscFree(pcbddc->local_subs)); 1710 CHKERRQ(PCBDDCDetectDisconnectedComponents(pc,filter,&pcbddc->n_local_subs,&pcbddc->local_subs,&primalv)); 1711 CHKERRQ(PCBDDCAddPrimalVerticesLocalIS(pc,primalv)); 1712 CHKERRQ(ISDestroy(&primalv)); 1713 } 1714 /* early stage corner detection */ 1715 { 1716 DM dm; 1717 1718 CHKERRQ(MatGetDM(pc->pmat,&dm)); 1719 if (!dm) { 1720 CHKERRQ(PCGetDM(pc,&dm)); 1721 } 1722 if (dm) { 1723 PetscBool isda; 1724 1725 CHKERRQ(PetscObjectTypeCompare((PetscObject)dm,DMDA,&isda)); 1726 if (isda) { 1727 ISLocalToGlobalMapping l2l; 1728 IS corners; 1729 Mat lA; 1730 PetscBool gl,lo; 1731 1732 { 1733 Vec cvec; 1734 const PetscScalar *coords; 1735 PetscInt dof,n,cdim; 1736 PetscBool memc = PETSC_TRUE; 1737 1738 CHKERRQ(DMDAGetInfo(dm,NULL,NULL,NULL,NULL,NULL,NULL,NULL,&dof,NULL,NULL,NULL,NULL,NULL)); 1739 CHKERRQ(DMGetCoordinates(dm,&cvec)); 1740 CHKERRQ(VecGetLocalSize(cvec,&n)); 1741 CHKERRQ(VecGetBlockSize(cvec,&cdim)); 1742 n /= cdim; 1743 CHKERRQ(PetscFree(pcbddc->mat_graph->coords)); 1744 CHKERRQ(PetscMalloc1(dof*n*cdim,&pcbddc->mat_graph->coords)); 1745 CHKERRQ(VecGetArrayRead(cvec,&coords)); 1746 #if defined(PETSC_USE_COMPLEX) 1747 memc = PETSC_FALSE; 1748 #endif 1749 if (dof != 1) memc = PETSC_FALSE; 1750 if (memc) { 1751 CHKERRQ(PetscArraycpy(pcbddc->mat_graph->coords,coords,cdim*n*dof)); 1752 } else { /* BDDC graph does not use any blocked information, we need to replicate the data */ 1753 PetscReal *bcoords = pcbddc->mat_graph->coords; 1754 PetscInt i, b, d; 1755 1756 for (i=0;i<n;i++) { 1757 for (b=0;b<dof;b++) { 1758 for (d=0;d<cdim;d++) { 1759 bcoords[i*dof*cdim + b*cdim + d] = PetscRealPart(coords[i*cdim+d]); 1760 } 1761 } 1762 } 1763 } 1764 CHKERRQ(VecRestoreArrayRead(cvec,&coords)); 1765 pcbddc->mat_graph->cdim = cdim; 1766 pcbddc->mat_graph->cnloc = dof*n; 1767 pcbddc->mat_graph->cloc = PETSC_FALSE; 1768 } 1769 CHKERRQ(DMDAGetSubdomainCornersIS(dm,&corners)); 1770 CHKERRQ(MatISGetLocalMat(pc->pmat,&lA)); 1771 CHKERRQ(MatGetLocalToGlobalMapping(lA,&l2l,NULL)); 1772 CHKERRQ(MatISRestoreLocalMat(pc->pmat,&lA)); 1773 lo = (PetscBool)(l2l && corners); 1774 CHKERRMPI(MPIU_Allreduce(&lo,&gl,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)pc))); 1775 if (gl) { /* From PETSc's DMDA */ 1776 const PetscInt *idx; 1777 PetscInt dof,bs,*idxout,n; 1778 1779 CHKERRQ(DMDAGetInfo(dm,NULL,NULL,NULL,NULL,NULL,NULL,NULL,&dof,NULL,NULL,NULL,NULL,NULL)); 1780 CHKERRQ(ISLocalToGlobalMappingGetBlockSize(l2l,&bs)); 1781 CHKERRQ(ISGetLocalSize(corners,&n)); 1782 CHKERRQ(ISGetIndices(corners,&idx)); 1783 if (bs == dof) { 1784 CHKERRQ(PetscMalloc1(n,&idxout)); 1785 CHKERRQ(ISLocalToGlobalMappingApplyBlock(l2l,n,idx,idxout)); 1786 } else { /* the original DMDA local-to-local map have been modified */ 1787 PetscInt i,d; 1788 1789 CHKERRQ(PetscMalloc1(dof*n,&idxout)); 1790 for (i=0;i<n;i++) for (d=0;d<dof;d++) idxout[dof*i+d] = dof*idx[i]+d; 1791 CHKERRQ(ISLocalToGlobalMappingApply(l2l,dof*n,idxout,idxout)); 1792 1793 bs = 1; 1794 n *= dof; 1795 } 1796 CHKERRQ(ISRestoreIndices(corners,&idx)); 1797 CHKERRQ(DMDARestoreSubdomainCornersIS(dm,&corners)); 1798 CHKERRQ(ISCreateBlock(PetscObjectComm((PetscObject)pc),bs,n,idxout,PETSC_OWN_POINTER,&corners)); 1799 CHKERRQ(PCBDDCAddPrimalVerticesLocalIS(pc,corners)); 1800 CHKERRQ(ISDestroy(&corners)); 1801 pcbddc->corner_selected = PETSC_TRUE; 1802 pcbddc->corner_selection = PETSC_TRUE; 1803 } 1804 if (corners) { 1805 CHKERRQ(DMDARestoreSubdomainCornersIS(dm,&corners)); 1806 } 1807 } 1808 } 1809 } 1810 if (pcbddc->corner_selection && !pcbddc->mat_graph->cdim) { 1811 DM dm; 1812 1813 CHKERRQ(MatGetDM(pc->pmat,&dm)); 1814 if (!dm) { 1815 CHKERRQ(PCGetDM(pc,&dm)); 1816 } 1817 if (dm) { /* this can get very expensive, I need to find a faster alternative */ 1818 Vec vcoords; 1819 PetscSection section; 1820 PetscReal *coords; 1821 PetscInt d,cdim,nl,nf,**ctxs; 1822 PetscErrorCode (**funcs)(PetscInt, PetscReal, const PetscReal *, PetscInt, PetscScalar *, void *); 1823 1824 CHKERRQ(DMGetCoordinateDim(dm,&cdim)); 1825 CHKERRQ(DMGetLocalSection(dm,§ion)); 1826 CHKERRQ(PetscSectionGetNumFields(section,&nf)); 1827 CHKERRQ(DMCreateGlobalVector(dm,&vcoords)); 1828 CHKERRQ(VecGetLocalSize(vcoords,&nl)); 1829 CHKERRQ(PetscMalloc1(nl*cdim,&coords)); 1830 CHKERRQ(PetscMalloc2(nf,&funcs,nf,&ctxs)); 1831 CHKERRQ(PetscMalloc1(nf,&ctxs[0])); 1832 for (d=0;d<nf;d++) funcs[d] = func_coords_private; 1833 for (d=1;d<nf;d++) ctxs[d] = ctxs[d-1] + 1; 1834 for (d=0;d<cdim;d++) { 1835 PetscInt i; 1836 const PetscScalar *v; 1837 1838 for (i=0;i<nf;i++) ctxs[i][0] = d; 1839 CHKERRQ(DMProjectFunction(dm,0.0,funcs,(void**)ctxs,INSERT_VALUES,vcoords)); 1840 CHKERRQ(VecGetArrayRead(vcoords,&v)); 1841 for (i=0;i<nl;i++) coords[i*cdim+d] = PetscRealPart(v[i]); 1842 CHKERRQ(VecRestoreArrayRead(vcoords,&v)); 1843 } 1844 CHKERRQ(VecDestroy(&vcoords)); 1845 CHKERRQ(PCSetCoordinates(pc,cdim,nl,coords)); 1846 CHKERRQ(PetscFree(coords)); 1847 CHKERRQ(PetscFree(ctxs[0])); 1848 CHKERRQ(PetscFree2(funcs,ctxs)); 1849 } 1850 } 1851 PetscFunctionReturn(0); 1852 } 1853 1854 PetscErrorCode PCBDDCConsistencyCheckIS(PC pc, MPI_Op mop, IS *is) 1855 { 1856 Mat_IS *matis = (Mat_IS*)(pc->pmat->data); 1857 IS nis; 1858 const PetscInt *idxs; 1859 PetscInt i,nd,n = matis->A->rmap->n,*nidxs,nnd; 1860 1861 PetscFunctionBegin; 1862 PetscCheckFalse(mop != MPI_LAND && mop != MPI_LOR,PetscObjectComm((PetscObject)(pc)),PETSC_ERR_SUP,"Supported are MPI_LAND and MPI_LOR"); 1863 if (mop == MPI_LAND) { 1864 /* init rootdata with true */ 1865 for (i=0;i<pc->pmat->rmap->n;i++) matis->sf_rootdata[i] = 1; 1866 } else { 1867 CHKERRQ(PetscArrayzero(matis->sf_rootdata,pc->pmat->rmap->n)); 1868 } 1869 CHKERRQ(PetscArrayzero(matis->sf_leafdata,n)); 1870 CHKERRQ(ISGetLocalSize(*is,&nd)); 1871 CHKERRQ(ISGetIndices(*is,&idxs)); 1872 for (i=0;i<nd;i++) 1873 if (-1 < idxs[i] && idxs[i] < n) 1874 matis->sf_leafdata[idxs[i]] = 1; 1875 CHKERRQ(ISRestoreIndices(*is,&idxs)); 1876 CHKERRQ(PetscSFReduceBegin(matis->sf,MPIU_INT,matis->sf_leafdata,matis->sf_rootdata,mop)); 1877 CHKERRQ(PetscSFReduceEnd(matis->sf,MPIU_INT,matis->sf_leafdata,matis->sf_rootdata,mop)); 1878 CHKERRQ(PetscSFBcastBegin(matis->sf,MPIU_INT,matis->sf_rootdata,matis->sf_leafdata,MPI_REPLACE)); 1879 CHKERRQ(PetscSFBcastEnd(matis->sf,MPIU_INT,matis->sf_rootdata,matis->sf_leafdata,MPI_REPLACE)); 1880 if (mop == MPI_LAND) { 1881 CHKERRQ(PetscMalloc1(nd,&nidxs)); 1882 } else { 1883 CHKERRQ(PetscMalloc1(n,&nidxs)); 1884 } 1885 for (i=0,nnd=0;i<n;i++) 1886 if (matis->sf_leafdata[i]) 1887 nidxs[nnd++] = i; 1888 CHKERRQ(ISCreateGeneral(PetscObjectComm((PetscObject)(*is)),nnd,nidxs,PETSC_OWN_POINTER,&nis)); 1889 CHKERRQ(ISDestroy(is)); 1890 *is = nis; 1891 PetscFunctionReturn(0); 1892 } 1893 1894 PetscErrorCode PCBDDCBenignRemoveInterior(PC pc,Vec r,Vec z) 1895 { 1896 PC_IS *pcis = (PC_IS*)(pc->data); 1897 PC_BDDC *pcbddc = (PC_BDDC*)(pc->data); 1898 1899 PetscFunctionBegin; 1900 if (!pcbddc->benign_have_null) { 1901 PetscFunctionReturn(0); 1902 } 1903 if (pcbddc->ChangeOfBasisMatrix) { 1904 Vec swap; 1905 1906 CHKERRQ(MatMultTranspose(pcbddc->ChangeOfBasisMatrix,r,pcbddc->work_change)); 1907 swap = pcbddc->work_change; 1908 pcbddc->work_change = r; 1909 r = swap; 1910 } 1911 CHKERRQ(VecScatterBegin(pcis->global_to_D,r,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD)); 1912 CHKERRQ(VecScatterEnd(pcis->global_to_D,r,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD)); 1913 CHKERRQ(PetscLogEventBegin(PC_BDDC_Solves[pcbddc->current_level][0],pc,0,0,0)); 1914 CHKERRQ(KSPSolve(pcbddc->ksp_D,pcis->vec1_D,pcis->vec2_D)); 1915 CHKERRQ(PetscLogEventEnd(PC_BDDC_Solves[pcbddc->current_level][0],pc,0,0,0)); 1916 CHKERRQ(KSPCheckSolve(pcbddc->ksp_D,pc,pcis->vec2_D)); 1917 CHKERRQ(VecSet(z,0.)); 1918 CHKERRQ(VecScatterBegin(pcis->global_to_D,pcis->vec2_D,z,INSERT_VALUES,SCATTER_REVERSE)); 1919 CHKERRQ(VecScatterEnd(pcis->global_to_D,pcis->vec2_D,z,INSERT_VALUES,SCATTER_REVERSE)); 1920 if (pcbddc->ChangeOfBasisMatrix) { 1921 pcbddc->work_change = r; 1922 CHKERRQ(VecCopy(z,pcbddc->work_change)); 1923 CHKERRQ(MatMult(pcbddc->ChangeOfBasisMatrix,pcbddc->work_change,z)); 1924 } 1925 PetscFunctionReturn(0); 1926 } 1927 1928 PetscErrorCode PCBDDCBenignMatMult_Private_Private(Mat A, Vec x, Vec y, PetscBool transpose) 1929 { 1930 PCBDDCBenignMatMult_ctx ctx; 1931 PetscBool apply_right,apply_left,reset_x; 1932 1933 PetscFunctionBegin; 1934 CHKERRQ(MatShellGetContext(A,&ctx)); 1935 if (transpose) { 1936 apply_right = ctx->apply_left; 1937 apply_left = ctx->apply_right; 1938 } else { 1939 apply_right = ctx->apply_right; 1940 apply_left = ctx->apply_left; 1941 } 1942 reset_x = PETSC_FALSE; 1943 if (apply_right) { 1944 const PetscScalar *ax; 1945 PetscInt nl,i; 1946 1947 CHKERRQ(VecGetLocalSize(x,&nl)); 1948 CHKERRQ(VecGetArrayRead(x,&ax)); 1949 CHKERRQ(PetscArraycpy(ctx->work,ax,nl)); 1950 CHKERRQ(VecRestoreArrayRead(x,&ax)); 1951 for (i=0;i<ctx->benign_n;i++) { 1952 PetscScalar sum,val; 1953 const PetscInt *idxs; 1954 PetscInt nz,j; 1955 CHKERRQ(ISGetLocalSize(ctx->benign_zerodiag_subs[i],&nz)); 1956 CHKERRQ(ISGetIndices(ctx->benign_zerodiag_subs[i],&idxs)); 1957 sum = 0.; 1958 if (ctx->apply_p0) { 1959 val = ctx->work[idxs[nz-1]]; 1960 for (j=0;j<nz-1;j++) { 1961 sum += ctx->work[idxs[j]]; 1962 ctx->work[idxs[j]] += val; 1963 } 1964 } else { 1965 for (j=0;j<nz-1;j++) { 1966 sum += ctx->work[idxs[j]]; 1967 } 1968 } 1969 ctx->work[idxs[nz-1]] -= sum; 1970 CHKERRQ(ISRestoreIndices(ctx->benign_zerodiag_subs[i],&idxs)); 1971 } 1972 CHKERRQ(VecPlaceArray(x,ctx->work)); 1973 reset_x = PETSC_TRUE; 1974 } 1975 if (transpose) { 1976 CHKERRQ(MatMultTranspose(ctx->A,x,y)); 1977 } else { 1978 CHKERRQ(MatMult(ctx->A,x,y)); 1979 } 1980 if (reset_x) { 1981 CHKERRQ(VecResetArray(x)); 1982 } 1983 if (apply_left) { 1984 PetscScalar *ay; 1985 PetscInt i; 1986 1987 CHKERRQ(VecGetArray(y,&ay)); 1988 for (i=0;i<ctx->benign_n;i++) { 1989 PetscScalar sum,val; 1990 const PetscInt *idxs; 1991 PetscInt nz,j; 1992 CHKERRQ(ISGetLocalSize(ctx->benign_zerodiag_subs[i],&nz)); 1993 CHKERRQ(ISGetIndices(ctx->benign_zerodiag_subs[i],&idxs)); 1994 val = -ay[idxs[nz-1]]; 1995 if (ctx->apply_p0) { 1996 sum = 0.; 1997 for (j=0;j<nz-1;j++) { 1998 sum += ay[idxs[j]]; 1999 ay[idxs[j]] += val; 2000 } 2001 ay[idxs[nz-1]] += sum; 2002 } else { 2003 for (j=0;j<nz-1;j++) { 2004 ay[idxs[j]] += val; 2005 } 2006 ay[idxs[nz-1]] = 0.; 2007 } 2008 CHKERRQ(ISRestoreIndices(ctx->benign_zerodiag_subs[i],&idxs)); 2009 } 2010 CHKERRQ(VecRestoreArray(y,&ay)); 2011 } 2012 PetscFunctionReturn(0); 2013 } 2014 2015 PetscErrorCode PCBDDCBenignMatMultTranspose_Private(Mat A, Vec x, Vec y) 2016 { 2017 PetscFunctionBegin; 2018 CHKERRQ(PCBDDCBenignMatMult_Private_Private(A,x,y,PETSC_TRUE)); 2019 PetscFunctionReturn(0); 2020 } 2021 2022 PetscErrorCode PCBDDCBenignMatMult_Private(Mat A, Vec x, Vec y) 2023 { 2024 PetscFunctionBegin; 2025 CHKERRQ(PCBDDCBenignMatMult_Private_Private(A,x,y,PETSC_FALSE)); 2026 PetscFunctionReturn(0); 2027 } 2028 2029 PetscErrorCode PCBDDCBenignShellMat(PC pc, PetscBool restore) 2030 { 2031 PC_IS *pcis = (PC_IS*)pc->data; 2032 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 2033 PCBDDCBenignMatMult_ctx ctx; 2034 2035 PetscFunctionBegin; 2036 if (!restore) { 2037 Mat A_IB,A_BI; 2038 PetscScalar *work; 2039 PCBDDCReuseSolvers reuse = pcbddc->sub_schurs ? pcbddc->sub_schurs->reuse_solver : NULL; 2040 2041 PetscCheck(!pcbddc->benign_original_mat,PETSC_COMM_SELF,PETSC_ERR_PLIB,"Benign original mat has not been restored"); 2042 if (!pcbddc->benign_change || !pcbddc->benign_n || pcbddc->benign_change_explicit) PetscFunctionReturn(0); 2043 CHKERRQ(PetscMalloc1(pcis->n,&work)); 2044 CHKERRQ(MatCreate(PETSC_COMM_SELF,&A_IB)); 2045 CHKERRQ(MatSetSizes(A_IB,pcis->n-pcis->n_B,pcis->n_B,PETSC_DECIDE,PETSC_DECIDE)); 2046 CHKERRQ(MatSetType(A_IB,MATSHELL)); 2047 CHKERRQ(MatShellSetOperation(A_IB,MATOP_MULT,(void (*)(void))PCBDDCBenignMatMult_Private)); 2048 CHKERRQ(MatShellSetOperation(A_IB,MATOP_MULT_TRANSPOSE,(void (*)(void))PCBDDCBenignMatMultTranspose_Private)); 2049 CHKERRQ(PetscNew(&ctx)); 2050 CHKERRQ(MatShellSetContext(A_IB,ctx)); 2051 ctx->apply_left = PETSC_TRUE; 2052 ctx->apply_right = PETSC_FALSE; 2053 ctx->apply_p0 = PETSC_FALSE; 2054 ctx->benign_n = pcbddc->benign_n; 2055 if (reuse) { 2056 ctx->benign_zerodiag_subs = reuse->benign_zerodiag_subs; 2057 ctx->free = PETSC_FALSE; 2058 } else { /* TODO: could be optimized for successive solves */ 2059 ISLocalToGlobalMapping N_to_D; 2060 PetscInt i; 2061 2062 CHKERRQ(ISLocalToGlobalMappingCreateIS(pcis->is_I_local,&N_to_D)); 2063 CHKERRQ(PetscMalloc1(pcbddc->benign_n,&ctx->benign_zerodiag_subs)); 2064 for (i=0;i<pcbddc->benign_n;i++) { 2065 CHKERRQ(ISGlobalToLocalMappingApplyIS(N_to_D,IS_GTOLM_DROP,pcbddc->benign_zerodiag_subs[i],&ctx->benign_zerodiag_subs[i])); 2066 } 2067 CHKERRQ(ISLocalToGlobalMappingDestroy(&N_to_D)); 2068 ctx->free = PETSC_TRUE; 2069 } 2070 ctx->A = pcis->A_IB; 2071 ctx->work = work; 2072 CHKERRQ(MatSetUp(A_IB)); 2073 CHKERRQ(MatAssemblyBegin(A_IB,MAT_FINAL_ASSEMBLY)); 2074 CHKERRQ(MatAssemblyEnd(A_IB,MAT_FINAL_ASSEMBLY)); 2075 pcis->A_IB = A_IB; 2076 2077 /* A_BI as A_IB^T */ 2078 CHKERRQ(MatCreateTranspose(A_IB,&A_BI)); 2079 pcbddc->benign_original_mat = pcis->A_BI; 2080 pcis->A_BI = A_BI; 2081 } else { 2082 if (!pcbddc->benign_original_mat) { 2083 PetscFunctionReturn(0); 2084 } 2085 CHKERRQ(MatShellGetContext(pcis->A_IB,&ctx)); 2086 CHKERRQ(MatDestroy(&pcis->A_IB)); 2087 pcis->A_IB = ctx->A; 2088 ctx->A = NULL; 2089 CHKERRQ(MatDestroy(&pcis->A_BI)); 2090 pcis->A_BI = pcbddc->benign_original_mat; 2091 pcbddc->benign_original_mat = NULL; 2092 if (ctx->free) { 2093 PetscInt i; 2094 for (i=0;i<ctx->benign_n;i++) { 2095 CHKERRQ(ISDestroy(&ctx->benign_zerodiag_subs[i])); 2096 } 2097 CHKERRQ(PetscFree(ctx->benign_zerodiag_subs)); 2098 } 2099 CHKERRQ(PetscFree(ctx->work)); 2100 CHKERRQ(PetscFree(ctx)); 2101 } 2102 PetscFunctionReturn(0); 2103 } 2104 2105 /* used just in bddc debug mode */ 2106 PetscErrorCode PCBDDCBenignProject(PC pc, IS is1, IS is2, Mat *B) 2107 { 2108 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 2109 Mat_IS *matis = (Mat_IS*)pc->pmat->data; 2110 Mat An; 2111 2112 PetscFunctionBegin; 2113 CHKERRQ(MatPtAP(matis->A,pcbddc->benign_change,MAT_INITIAL_MATRIX,2.0,&An)); 2114 CHKERRQ(MatZeroRowsColumns(An,pcbddc->benign_n,pcbddc->benign_p0_lidx,1.0,NULL,NULL)); 2115 if (is1) { 2116 CHKERRQ(MatCreateSubMatrix(An,is1,is2,MAT_INITIAL_MATRIX,B)); 2117 CHKERRQ(MatDestroy(&An)); 2118 } else { 2119 *B = An; 2120 } 2121 PetscFunctionReturn(0); 2122 } 2123 2124 /* TODO: add reuse flag */ 2125 PetscErrorCode MatSeqAIJCompress(Mat A, Mat *B) 2126 { 2127 Mat Bt; 2128 PetscScalar *a,*bdata; 2129 const PetscInt *ii,*ij; 2130 PetscInt m,n,i,nnz,*bii,*bij; 2131 PetscBool flg_row; 2132 2133 PetscFunctionBegin; 2134 CHKERRQ(MatGetSize(A,&n,&m)); 2135 CHKERRQ(MatGetRowIJ(A,0,PETSC_FALSE,PETSC_FALSE,&n,&ii,&ij,&flg_row)); 2136 CHKERRQ(MatSeqAIJGetArray(A,&a)); 2137 nnz = n; 2138 for (i=0;i<ii[n];i++) { 2139 if (PetscLikely(PetscAbsScalar(a[i]) > PETSC_SMALL)) nnz++; 2140 } 2141 CHKERRQ(PetscMalloc1(n+1,&bii)); 2142 CHKERRQ(PetscMalloc1(nnz,&bij)); 2143 CHKERRQ(PetscMalloc1(nnz,&bdata)); 2144 nnz = 0; 2145 bii[0] = 0; 2146 for (i=0;i<n;i++) { 2147 PetscInt j; 2148 for (j=ii[i];j<ii[i+1];j++) { 2149 PetscScalar entry = a[j]; 2150 if (PetscLikely(PetscAbsScalar(entry) > PETSC_SMALL) || (n == m && ij[j] == i)) { 2151 bij[nnz] = ij[j]; 2152 bdata[nnz] = entry; 2153 nnz++; 2154 } 2155 } 2156 bii[i+1] = nnz; 2157 } 2158 CHKERRQ(MatSeqAIJRestoreArray(A,&a)); 2159 CHKERRQ(MatCreateSeqAIJWithArrays(PetscObjectComm((PetscObject)A),n,m,bii,bij,bdata,&Bt)); 2160 CHKERRQ(MatRestoreRowIJ(A,0,PETSC_FALSE,PETSC_FALSE,&n,&ii,&ij,&flg_row)); 2161 { 2162 Mat_SeqAIJ *b = (Mat_SeqAIJ*)(Bt->data); 2163 b->free_a = PETSC_TRUE; 2164 b->free_ij = PETSC_TRUE; 2165 } 2166 if (*B == A) { 2167 CHKERRQ(MatDestroy(&A)); 2168 } 2169 *B = Bt; 2170 PetscFunctionReturn(0); 2171 } 2172 2173 PetscErrorCode PCBDDCDetectDisconnectedComponents(PC pc, PetscBool filter, PetscInt *ncc, IS* cc[], IS* primalv) 2174 { 2175 Mat B = NULL; 2176 DM dm; 2177 IS is_dummy,*cc_n; 2178 ISLocalToGlobalMapping l2gmap_dummy; 2179 PCBDDCGraph graph; 2180 PetscInt *xadj_filtered = NULL,*adjncy_filtered = NULL; 2181 PetscInt i,n; 2182 PetscInt *xadj,*adjncy; 2183 PetscBool isplex = PETSC_FALSE; 2184 2185 PetscFunctionBegin; 2186 if (ncc) *ncc = 0; 2187 if (cc) *cc = NULL; 2188 if (primalv) *primalv = NULL; 2189 CHKERRQ(PCBDDCGraphCreate(&graph)); 2190 CHKERRQ(MatGetDM(pc->pmat,&dm)); 2191 if (!dm) { 2192 CHKERRQ(PCGetDM(pc,&dm)); 2193 } 2194 if (dm) { 2195 CHKERRQ(PetscObjectTypeCompare((PetscObject)dm,DMPLEX,&isplex)); 2196 } 2197 if (filter) isplex = PETSC_FALSE; 2198 2199 if (isplex) { /* this code has been modified from plexpartition.c */ 2200 PetscInt p, pStart, pEnd, a, adjSize, idx, size, nroots; 2201 PetscInt *adj = NULL; 2202 IS cellNumbering; 2203 const PetscInt *cellNum; 2204 PetscBool useCone, useClosure; 2205 PetscSection section; 2206 PetscSegBuffer adjBuffer; 2207 PetscSF sfPoint; 2208 2209 CHKERRQ(DMPlexGetHeightStratum(dm, 0, &pStart, &pEnd)); 2210 CHKERRQ(DMGetPointSF(dm, &sfPoint)); 2211 CHKERRQ(PetscSFGetGraph(sfPoint, &nroots, NULL, NULL, NULL)); 2212 /* Build adjacency graph via a section/segbuffer */ 2213 CHKERRQ(PetscSectionCreate(PetscObjectComm((PetscObject) dm), §ion)); 2214 CHKERRQ(PetscSectionSetChart(section, pStart, pEnd)); 2215 CHKERRQ(PetscSegBufferCreate(sizeof(PetscInt),1000,&adjBuffer)); 2216 /* Always use FVM adjacency to create partitioner graph */ 2217 CHKERRQ(DMGetBasicAdjacency(dm, &useCone, &useClosure)); 2218 CHKERRQ(DMSetBasicAdjacency(dm, PETSC_TRUE, PETSC_FALSE)); 2219 CHKERRQ(DMPlexGetCellNumbering(dm, &cellNumbering)); 2220 CHKERRQ(ISGetIndices(cellNumbering, &cellNum)); 2221 for (n = 0, p = pStart; p < pEnd; p++) { 2222 /* Skip non-owned cells in parallel (ParMetis expects no overlap) */ 2223 if (nroots > 0) {if (cellNum[p] < 0) continue;} 2224 adjSize = PETSC_DETERMINE; 2225 CHKERRQ(DMPlexGetAdjacency(dm, p, &adjSize, &adj)); 2226 for (a = 0; a < adjSize; ++a) { 2227 const PetscInt point = adj[a]; 2228 if (pStart <= point && point < pEnd) { 2229 PetscInt *PETSC_RESTRICT pBuf; 2230 CHKERRQ(PetscSectionAddDof(section, p, 1)); 2231 CHKERRQ(PetscSegBufferGetInts(adjBuffer, 1, &pBuf)); 2232 *pBuf = point; 2233 } 2234 } 2235 n++; 2236 } 2237 CHKERRQ(DMSetBasicAdjacency(dm, useCone, useClosure)); 2238 /* Derive CSR graph from section/segbuffer */ 2239 CHKERRQ(PetscSectionSetUp(section)); 2240 CHKERRQ(PetscSectionGetStorageSize(section, &size)); 2241 CHKERRQ(PetscMalloc1(n+1, &xadj)); 2242 for (idx = 0, p = pStart; p < pEnd; p++) { 2243 if (nroots > 0) {if (cellNum[p] < 0) continue;} 2244 CHKERRQ(PetscSectionGetOffset(section, p, &(xadj[idx++]))); 2245 } 2246 xadj[n] = size; 2247 CHKERRQ(PetscSegBufferExtractAlloc(adjBuffer, &adjncy)); 2248 /* Clean up */ 2249 CHKERRQ(PetscSegBufferDestroy(&adjBuffer)); 2250 CHKERRQ(PetscSectionDestroy(§ion)); 2251 CHKERRQ(PetscFree(adj)); 2252 graph->xadj = xadj; 2253 graph->adjncy = adjncy; 2254 } else { 2255 Mat A; 2256 PetscBool isseqaij, flg_row; 2257 2258 CHKERRQ(MatISGetLocalMat(pc->pmat,&A)); 2259 if (!A->rmap->N || !A->cmap->N) { 2260 CHKERRQ(PCBDDCGraphDestroy(&graph)); 2261 PetscFunctionReturn(0); 2262 } 2263 CHKERRQ(PetscObjectBaseTypeCompare((PetscObject)A,MATSEQAIJ,&isseqaij)); 2264 if (!isseqaij && filter) { 2265 PetscBool isseqdense; 2266 2267 CHKERRQ(PetscObjectTypeCompare((PetscObject)A,MATSEQDENSE,&isseqdense)); 2268 if (!isseqdense) { 2269 CHKERRQ(MatConvert(A,MATSEQAIJ,MAT_INITIAL_MATRIX,&B)); 2270 } else { /* TODO: rectangular case and LDA */ 2271 PetscScalar *array; 2272 PetscReal chop=1.e-6; 2273 2274 CHKERRQ(MatDuplicate(A,MAT_COPY_VALUES,&B)); 2275 CHKERRQ(MatDenseGetArray(B,&array)); 2276 CHKERRQ(MatGetSize(B,&n,NULL)); 2277 for (i=0;i<n;i++) { 2278 PetscInt j; 2279 for (j=i+1;j<n;j++) { 2280 PetscReal thresh = chop*(PetscAbsScalar(array[i*(n+1)])+PetscAbsScalar(array[j*(n+1)])); 2281 if (PetscAbsScalar(array[i*n+j]) < thresh) array[i*n+j] = 0.; 2282 if (PetscAbsScalar(array[j*n+i]) < thresh) array[j*n+i] = 0.; 2283 } 2284 } 2285 CHKERRQ(MatDenseRestoreArray(B,&array)); 2286 CHKERRQ(MatConvert(B,MATSEQAIJ,MAT_INPLACE_MATRIX,&B)); 2287 } 2288 } else { 2289 CHKERRQ(PetscObjectReference((PetscObject)A)); 2290 B = A; 2291 } 2292 CHKERRQ(MatGetRowIJ(B,0,PETSC_TRUE,PETSC_FALSE,&n,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row)); 2293 2294 /* if filter is true, then removes entries lower than PETSC_SMALL in magnitude */ 2295 if (filter) { 2296 PetscScalar *data; 2297 PetscInt j,cum; 2298 2299 CHKERRQ(PetscCalloc2(n+1,&xadj_filtered,xadj[n],&adjncy_filtered)); 2300 CHKERRQ(MatSeqAIJGetArray(B,&data)); 2301 cum = 0; 2302 for (i=0;i<n;i++) { 2303 PetscInt t; 2304 2305 for (j=xadj[i];j<xadj[i+1];j++) { 2306 if (PetscUnlikely(PetscAbsScalar(data[j]) < PETSC_SMALL)) { 2307 continue; 2308 } 2309 adjncy_filtered[cum+xadj_filtered[i]++] = adjncy[j]; 2310 } 2311 t = xadj_filtered[i]; 2312 xadj_filtered[i] = cum; 2313 cum += t; 2314 } 2315 CHKERRQ(MatSeqAIJRestoreArray(B,&data)); 2316 graph->xadj = xadj_filtered; 2317 graph->adjncy = adjncy_filtered; 2318 } else { 2319 graph->xadj = xadj; 2320 graph->adjncy = adjncy; 2321 } 2322 } 2323 /* compute local connected components using PCBDDCGraph */ 2324 CHKERRQ(ISCreateStride(PETSC_COMM_SELF,n,0,1,&is_dummy)); 2325 CHKERRQ(ISLocalToGlobalMappingCreateIS(is_dummy,&l2gmap_dummy)); 2326 CHKERRQ(ISDestroy(&is_dummy)); 2327 CHKERRQ(PCBDDCGraphInit(graph,l2gmap_dummy,n,PETSC_MAX_INT)); 2328 CHKERRQ(ISLocalToGlobalMappingDestroy(&l2gmap_dummy)); 2329 CHKERRQ(PCBDDCGraphSetUp(graph,1,NULL,NULL,0,NULL,NULL)); 2330 CHKERRQ(PCBDDCGraphComputeConnectedComponents(graph)); 2331 2332 /* partial clean up */ 2333 CHKERRQ(PetscFree2(xadj_filtered,adjncy_filtered)); 2334 if (B) { 2335 PetscBool flg_row; 2336 CHKERRQ(MatRestoreRowIJ(B,0,PETSC_TRUE,PETSC_FALSE,&n,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row)); 2337 CHKERRQ(MatDestroy(&B)); 2338 } 2339 if (isplex) { 2340 CHKERRQ(PetscFree(xadj)); 2341 CHKERRQ(PetscFree(adjncy)); 2342 } 2343 2344 /* get back data */ 2345 if (isplex) { 2346 if (ncc) *ncc = graph->ncc; 2347 if (cc || primalv) { 2348 Mat A; 2349 PetscBT btv,btvt; 2350 PetscSection subSection; 2351 PetscInt *ids,cum,cump,*cids,*pids; 2352 2353 CHKERRQ(DMPlexGetSubdomainSection(dm,&subSection)); 2354 CHKERRQ(MatISGetLocalMat(pc->pmat,&A)); 2355 CHKERRQ(PetscMalloc3(A->rmap->n,&ids,graph->ncc+1,&cids,A->rmap->n,&pids)); 2356 CHKERRQ(PetscBTCreate(A->rmap->n,&btv)); 2357 CHKERRQ(PetscBTCreate(A->rmap->n,&btvt)); 2358 2359 cids[0] = 0; 2360 for (i = 0, cump = 0, cum = 0; i < graph->ncc; i++) { 2361 PetscInt j; 2362 2363 CHKERRQ(PetscBTMemzero(A->rmap->n,btvt)); 2364 for (j = graph->cptr[i]; j < graph->cptr[i+1]; j++) { 2365 PetscInt k, size, *closure = NULL, cell = graph->queue[j]; 2366 2367 CHKERRQ(DMPlexGetTransitiveClosure(dm,cell,PETSC_TRUE,&size,&closure)); 2368 for (k = 0; k < 2*size; k += 2) { 2369 PetscInt s, pp, p = closure[k], off, dof, cdof; 2370 2371 CHKERRQ(PetscSectionGetConstraintDof(subSection,p,&cdof)); 2372 CHKERRQ(PetscSectionGetOffset(subSection,p,&off)); 2373 CHKERRQ(PetscSectionGetDof(subSection,p,&dof)); 2374 for (s = 0; s < dof-cdof; s++) { 2375 if (PetscBTLookupSet(btvt,off+s)) continue; 2376 if (!PetscBTLookup(btv,off+s)) ids[cum++] = off+s; 2377 else pids[cump++] = off+s; /* cross-vertex */ 2378 } 2379 CHKERRQ(DMPlexGetTreeParent(dm,p,&pp,NULL)); 2380 if (pp != p) { 2381 CHKERRQ(PetscSectionGetConstraintDof(subSection,pp,&cdof)); 2382 CHKERRQ(PetscSectionGetOffset(subSection,pp,&off)); 2383 CHKERRQ(PetscSectionGetDof(subSection,pp,&dof)); 2384 for (s = 0; s < dof-cdof; s++) { 2385 if (PetscBTLookupSet(btvt,off+s)) continue; 2386 if (!PetscBTLookup(btv,off+s)) ids[cum++] = off+s; 2387 else pids[cump++] = off+s; /* cross-vertex */ 2388 } 2389 } 2390 } 2391 CHKERRQ(DMPlexRestoreTransitiveClosure(dm,cell,PETSC_TRUE,&size,&closure)); 2392 } 2393 cids[i+1] = cum; 2394 /* mark dofs as already assigned */ 2395 for (j = cids[i]; j < cids[i+1]; j++) { 2396 CHKERRQ(PetscBTSet(btv,ids[j])); 2397 } 2398 } 2399 if (cc) { 2400 CHKERRQ(PetscMalloc1(graph->ncc,&cc_n)); 2401 for (i = 0; i < graph->ncc; i++) { 2402 CHKERRQ(ISCreateGeneral(PETSC_COMM_SELF,cids[i+1]-cids[i],ids+cids[i],PETSC_COPY_VALUES,&cc_n[i])); 2403 } 2404 *cc = cc_n; 2405 } 2406 if (primalv) { 2407 CHKERRQ(ISCreateGeneral(PetscObjectComm((PetscObject)pc),cump,pids,PETSC_COPY_VALUES,primalv)); 2408 } 2409 CHKERRQ(PetscFree3(ids,cids,pids)); 2410 CHKERRQ(PetscBTDestroy(&btv)); 2411 CHKERRQ(PetscBTDestroy(&btvt)); 2412 } 2413 } else { 2414 if (ncc) *ncc = graph->ncc; 2415 if (cc) { 2416 CHKERRQ(PetscMalloc1(graph->ncc,&cc_n)); 2417 for (i=0;i<graph->ncc;i++) { 2418 CHKERRQ(ISCreateGeneral(PETSC_COMM_SELF,graph->cptr[i+1]-graph->cptr[i],graph->queue+graph->cptr[i],PETSC_COPY_VALUES,&cc_n[i])); 2419 } 2420 *cc = cc_n; 2421 } 2422 } 2423 /* clean up graph */ 2424 graph->xadj = NULL; 2425 graph->adjncy = NULL; 2426 CHKERRQ(PCBDDCGraphDestroy(&graph)); 2427 PetscFunctionReturn(0); 2428 } 2429 2430 PetscErrorCode PCBDDCBenignCheck(PC pc, IS zerodiag) 2431 { 2432 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 2433 PC_IS* pcis = (PC_IS*)(pc->data); 2434 IS dirIS = NULL; 2435 PetscInt i; 2436 2437 PetscFunctionBegin; 2438 CHKERRQ(PCBDDCGraphGetDirichletDofs(pcbddc->mat_graph,&dirIS)); 2439 if (zerodiag) { 2440 Mat A; 2441 Vec vec3_N; 2442 PetscScalar *vals; 2443 const PetscInt *idxs; 2444 PetscInt nz,*count; 2445 2446 /* p0 */ 2447 CHKERRQ(VecSet(pcis->vec1_N,0.)); 2448 CHKERRQ(PetscMalloc1(pcis->n,&vals)); 2449 CHKERRQ(ISGetLocalSize(zerodiag,&nz)); 2450 CHKERRQ(ISGetIndices(zerodiag,&idxs)); 2451 for (i=0;i<nz;i++) vals[i] = 1.; 2452 CHKERRQ(VecSetValues(pcis->vec1_N,nz,idxs,vals,INSERT_VALUES)); 2453 CHKERRQ(VecAssemblyBegin(pcis->vec1_N)); 2454 CHKERRQ(VecAssemblyEnd(pcis->vec1_N)); 2455 /* v_I */ 2456 CHKERRQ(VecSetRandom(pcis->vec2_N,NULL)); 2457 for (i=0;i<nz;i++) vals[i] = 0.; 2458 CHKERRQ(VecSetValues(pcis->vec2_N,nz,idxs,vals,INSERT_VALUES)); 2459 CHKERRQ(ISRestoreIndices(zerodiag,&idxs)); 2460 CHKERRQ(ISGetIndices(pcis->is_B_local,&idxs)); 2461 for (i=0;i<pcis->n_B;i++) vals[i] = 0.; 2462 CHKERRQ(VecSetValues(pcis->vec2_N,pcis->n_B,idxs,vals,INSERT_VALUES)); 2463 CHKERRQ(ISRestoreIndices(pcis->is_B_local,&idxs)); 2464 if (dirIS) { 2465 PetscInt n; 2466 2467 CHKERRQ(ISGetLocalSize(dirIS,&n)); 2468 CHKERRQ(ISGetIndices(dirIS,&idxs)); 2469 for (i=0;i<n;i++) vals[i] = 0.; 2470 CHKERRQ(VecSetValues(pcis->vec2_N,n,idxs,vals,INSERT_VALUES)); 2471 CHKERRQ(ISRestoreIndices(dirIS,&idxs)); 2472 } 2473 CHKERRQ(VecAssemblyBegin(pcis->vec2_N)); 2474 CHKERRQ(VecAssemblyEnd(pcis->vec2_N)); 2475 CHKERRQ(VecDuplicate(pcis->vec1_N,&vec3_N)); 2476 CHKERRQ(VecSet(vec3_N,0.)); 2477 CHKERRQ(MatISGetLocalMat(pc->pmat,&A)); 2478 CHKERRQ(MatMult(A,pcis->vec1_N,vec3_N)); 2479 CHKERRQ(VecDot(vec3_N,pcis->vec2_N,&vals[0])); 2480 PetscCheckFalse(PetscAbsScalar(vals[0]) > 1.e-1,PETSC_COMM_SELF,PETSC_ERR_SUP,"Benign trick can not be applied! b(v_I,p_0) = %1.6e (should be numerically 0.)",PetscAbsScalar(vals[0])); 2481 CHKERRQ(PetscFree(vals)); 2482 CHKERRQ(VecDestroy(&vec3_N)); 2483 2484 /* there should not be any pressure dofs lying on the interface */ 2485 CHKERRQ(PetscCalloc1(pcis->n,&count)); 2486 CHKERRQ(ISGetIndices(pcis->is_B_local,&idxs)); 2487 for (i=0;i<pcis->n_B;i++) count[idxs[i]]++; 2488 CHKERRQ(ISRestoreIndices(pcis->is_B_local,&idxs)); 2489 CHKERRQ(ISGetIndices(zerodiag,&idxs)); 2490 for (i=0;i<nz;i++) PetscCheckFalse(count[idxs[i]],PETSC_COMM_SELF,PETSC_ERR_SUP,"Benign trick can not be applied! pressure dof %D is an interface dof",idxs[i]); 2491 CHKERRQ(ISRestoreIndices(zerodiag,&idxs)); 2492 CHKERRQ(PetscFree(count)); 2493 } 2494 CHKERRQ(ISDestroy(&dirIS)); 2495 2496 /* check PCBDDCBenignGetOrSetP0 */ 2497 CHKERRQ(VecSetRandom(pcis->vec1_global,NULL)); 2498 for (i=0;i<pcbddc->benign_n;i++) pcbddc->benign_p0[i] = -PetscGlobalRank-i; 2499 CHKERRQ(PCBDDCBenignGetOrSetP0(pc,pcis->vec1_global,PETSC_FALSE)); 2500 for (i=0;i<pcbddc->benign_n;i++) pcbddc->benign_p0[i] = 1; 2501 CHKERRQ(PCBDDCBenignGetOrSetP0(pc,pcis->vec1_global,PETSC_TRUE)); 2502 for (i=0;i<pcbddc->benign_n;i++) { 2503 PetscInt val = PetscRealPart(pcbddc->benign_p0[i]); 2504 PetscCheckFalse(val != -PetscGlobalRank-i,PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error testing PCBDDCBenignGetOrSetP0! Found %g at %D instead of %g",PetscRealPart(pcbddc->benign_p0[i]),i,-PetscGlobalRank-i); 2505 } 2506 PetscFunctionReturn(0); 2507 } 2508 2509 PetscErrorCode PCBDDCBenignDetectSaddlePoint(PC pc, PetscBool reuse, IS *zerodiaglocal) 2510 { 2511 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 2512 Mat_IS* matis = (Mat_IS*)(pc->pmat->data); 2513 IS pressures = NULL,zerodiag = NULL,*bzerodiag = NULL,zerodiag_save,*zerodiag_subs; 2514 PetscInt nz,n,benign_n,bsp = 1; 2515 PetscInt *interior_dofs,n_interior_dofs,nneu; 2516 PetscBool sorted,have_null,has_null_pressures,recompute_zerodiag,checkb; 2517 PetscErrorCode ierr; 2518 2519 PetscFunctionBegin; 2520 if (reuse) goto project_b0; 2521 CHKERRQ(PetscSFDestroy(&pcbddc->benign_sf)); 2522 CHKERRQ(MatDestroy(&pcbddc->benign_B0)); 2523 for (n=0;n<pcbddc->benign_n;n++) { 2524 CHKERRQ(ISDestroy(&pcbddc->benign_zerodiag_subs[n])); 2525 } 2526 CHKERRQ(PetscFree(pcbddc->benign_zerodiag_subs)); 2527 has_null_pressures = PETSC_TRUE; 2528 have_null = PETSC_TRUE; 2529 /* if a local information on dofs is present, gets pressure dofs from command line (uses the last field is not provided) 2530 Without local information, it uses only the zerodiagonal dofs (ok if the pressure block is all zero and it is a scalar field) 2531 Checks if all the pressure dofs in each subdomain have a zero diagonal 2532 If not, a change of basis on pressures is not needed 2533 since the local Schur complements are already SPD 2534 */ 2535 if (pcbddc->n_ISForDofsLocal) { 2536 IS iP = NULL; 2537 PetscInt p,*pp; 2538 PetscBool flg; 2539 2540 CHKERRQ(PetscMalloc1(pcbddc->n_ISForDofsLocal,&pp)); 2541 n = pcbddc->n_ISForDofsLocal; 2542 ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)pc),((PetscObject)pc)->prefix,"BDDC benign options","PC");CHKERRQ(ierr); 2543 CHKERRQ(PetscOptionsIntArray("-pc_bddc_pressure_field","Field id for pressures",NULL,pp,&n,&flg)); 2544 ierr = PetscOptionsEnd();CHKERRQ(ierr); 2545 if (!flg) { 2546 n = 1; 2547 pp[0] = pcbddc->n_ISForDofsLocal-1; 2548 } 2549 2550 bsp = 0; 2551 for (p=0;p<n;p++) { 2552 PetscInt bs; 2553 2554 PetscCheckFalse(pp[p] < 0 || pp[p] > pcbddc->n_ISForDofsLocal-1,PetscObjectComm((PetscObject)pc),PETSC_ERR_USER,"Invalid field id for pressures %D",pp[p]); 2555 CHKERRQ(ISGetBlockSize(pcbddc->ISForDofsLocal[pp[p]],&bs)); 2556 bsp += bs; 2557 } 2558 CHKERRQ(PetscMalloc1(bsp,&bzerodiag)); 2559 bsp = 0; 2560 for (p=0;p<n;p++) { 2561 const PetscInt *idxs; 2562 PetscInt b,bs,npl,*bidxs; 2563 2564 CHKERRQ(ISGetBlockSize(pcbddc->ISForDofsLocal[pp[p]],&bs)); 2565 CHKERRQ(ISGetLocalSize(pcbddc->ISForDofsLocal[pp[p]],&npl)); 2566 CHKERRQ(ISGetIndices(pcbddc->ISForDofsLocal[pp[p]],&idxs)); 2567 CHKERRQ(PetscMalloc1(npl/bs,&bidxs)); 2568 for (b=0;b<bs;b++) { 2569 PetscInt i; 2570 2571 for (i=0;i<npl/bs;i++) bidxs[i] = idxs[bs*i+b]; 2572 CHKERRQ(ISCreateGeneral(PETSC_COMM_SELF,npl/bs,bidxs,PETSC_COPY_VALUES,&bzerodiag[bsp])); 2573 bsp++; 2574 } 2575 CHKERRQ(PetscFree(bidxs)); 2576 CHKERRQ(ISRestoreIndices(pcbddc->ISForDofsLocal[pp[p]],&idxs)); 2577 } 2578 CHKERRQ(ISConcatenate(PETSC_COMM_SELF,bsp,bzerodiag,&pressures)); 2579 2580 /* remove zeroed out pressures if we are setting up a BDDC solver for a saddle-point FETI-DP */ 2581 CHKERRQ(PetscObjectQuery((PetscObject)pc,"__KSPFETIDP_lP",(PetscObject*)&iP)); 2582 if (iP) { 2583 IS newpressures; 2584 2585 CHKERRQ(ISDifference(pressures,iP,&newpressures)); 2586 CHKERRQ(ISDestroy(&pressures)); 2587 pressures = newpressures; 2588 } 2589 CHKERRQ(ISSorted(pressures,&sorted)); 2590 if (!sorted) { 2591 CHKERRQ(ISSort(pressures)); 2592 } 2593 CHKERRQ(PetscFree(pp)); 2594 } 2595 2596 /* pcis has not been setup yet, so get the local size from the subdomain matrix */ 2597 CHKERRQ(MatGetLocalSize(pcbddc->local_mat,&n,NULL)); 2598 if (!n) pcbddc->benign_change_explicit = PETSC_TRUE; 2599 CHKERRQ(MatFindZeroDiagonals(pcbddc->local_mat,&zerodiag)); 2600 CHKERRQ(ISSorted(zerodiag,&sorted)); 2601 if (!sorted) { 2602 CHKERRQ(ISSort(zerodiag)); 2603 } 2604 CHKERRQ(PetscObjectReference((PetscObject)zerodiag)); 2605 zerodiag_save = zerodiag; 2606 CHKERRQ(ISGetLocalSize(zerodiag,&nz)); 2607 if (!nz) { 2608 if (n) have_null = PETSC_FALSE; 2609 has_null_pressures = PETSC_FALSE; 2610 CHKERRQ(ISDestroy(&zerodiag)); 2611 } 2612 recompute_zerodiag = PETSC_FALSE; 2613 2614 /* in case disconnected subdomains info is present, split the pressures accordingly (otherwise the benign trick could fail) */ 2615 zerodiag_subs = NULL; 2616 benign_n = 0; 2617 n_interior_dofs = 0; 2618 interior_dofs = NULL; 2619 nneu = 0; 2620 if (pcbddc->NeumannBoundariesLocal) { 2621 CHKERRQ(ISGetLocalSize(pcbddc->NeumannBoundariesLocal,&nneu)); 2622 } 2623 checkb = (PetscBool)(!pcbddc->NeumannBoundariesLocal || pcbddc->current_level); 2624 if (checkb) { /* need to compute interior nodes */ 2625 PetscInt n,i,j; 2626 PetscInt n_neigh,*neigh,*n_shared,**shared; 2627 PetscInt *iwork; 2628 2629 CHKERRQ(ISLocalToGlobalMappingGetSize(matis->rmapping,&n)); 2630 CHKERRQ(ISLocalToGlobalMappingGetInfo(matis->rmapping,&n_neigh,&neigh,&n_shared,&shared)); 2631 CHKERRQ(PetscCalloc1(n,&iwork)); 2632 CHKERRQ(PetscMalloc1(n,&interior_dofs)); 2633 for (i=1;i<n_neigh;i++) 2634 for (j=0;j<n_shared[i];j++) 2635 iwork[shared[i][j]] += 1; 2636 for (i=0;i<n;i++) 2637 if (!iwork[i]) 2638 interior_dofs[n_interior_dofs++] = i; 2639 CHKERRQ(PetscFree(iwork)); 2640 CHKERRQ(ISLocalToGlobalMappingRestoreInfo(matis->rmapping,&n_neigh,&neigh,&n_shared,&shared)); 2641 } 2642 if (has_null_pressures) { 2643 IS *subs; 2644 PetscInt nsubs,i,j,nl; 2645 const PetscInt *idxs; 2646 PetscScalar *array; 2647 Vec *work; 2648 2649 subs = pcbddc->local_subs; 2650 nsubs = pcbddc->n_local_subs; 2651 /* these vectors are needed to check if the constant on pressures is in the kernel of the local operator B (i.e. B(v_I,p0) should be zero) */ 2652 if (checkb) { 2653 CHKERRQ(VecDuplicateVecs(matis->y,2,&work)); 2654 CHKERRQ(ISGetLocalSize(zerodiag,&nl)); 2655 CHKERRQ(ISGetIndices(zerodiag,&idxs)); 2656 /* work[0] = 1_p */ 2657 CHKERRQ(VecSet(work[0],0.)); 2658 CHKERRQ(VecGetArray(work[0],&array)); 2659 for (j=0;j<nl;j++) array[idxs[j]] = 1.; 2660 CHKERRQ(VecRestoreArray(work[0],&array)); 2661 /* work[0] = 1_v */ 2662 CHKERRQ(VecSet(work[1],1.)); 2663 CHKERRQ(VecGetArray(work[1],&array)); 2664 for (j=0;j<nl;j++) array[idxs[j]] = 0.; 2665 CHKERRQ(VecRestoreArray(work[1],&array)); 2666 CHKERRQ(ISRestoreIndices(zerodiag,&idxs)); 2667 } 2668 2669 if (nsubs > 1 || bsp > 1) { 2670 IS *is; 2671 PetscInt b,totb; 2672 2673 totb = bsp; 2674 is = bsp > 1 ? bzerodiag : &zerodiag; 2675 nsubs = PetscMax(nsubs,1); 2676 CHKERRQ(PetscCalloc1(nsubs*totb,&zerodiag_subs)); 2677 for (b=0;b<totb;b++) { 2678 for (i=0;i<nsubs;i++) { 2679 ISLocalToGlobalMapping l2g; 2680 IS t_zerodiag_subs; 2681 PetscInt nl; 2682 2683 if (subs) { 2684 CHKERRQ(ISLocalToGlobalMappingCreateIS(subs[i],&l2g)); 2685 } else { 2686 IS tis; 2687 2688 CHKERRQ(MatGetLocalSize(pcbddc->local_mat,&nl,NULL)); 2689 CHKERRQ(ISCreateStride(PETSC_COMM_SELF,nl,0,1,&tis)); 2690 CHKERRQ(ISLocalToGlobalMappingCreateIS(tis,&l2g)); 2691 CHKERRQ(ISDestroy(&tis)); 2692 } 2693 CHKERRQ(ISGlobalToLocalMappingApplyIS(l2g,IS_GTOLM_DROP,is[b],&t_zerodiag_subs)); 2694 CHKERRQ(ISGetLocalSize(t_zerodiag_subs,&nl)); 2695 if (nl) { 2696 PetscBool valid = PETSC_TRUE; 2697 2698 if (checkb) { 2699 CHKERRQ(VecSet(matis->x,0)); 2700 CHKERRQ(ISGetLocalSize(subs[i],&nl)); 2701 CHKERRQ(ISGetIndices(subs[i],&idxs)); 2702 CHKERRQ(VecGetArray(matis->x,&array)); 2703 for (j=0;j<nl;j++) array[idxs[j]] = 1.; 2704 CHKERRQ(VecRestoreArray(matis->x,&array)); 2705 CHKERRQ(ISRestoreIndices(subs[i],&idxs)); 2706 CHKERRQ(VecPointwiseMult(matis->x,work[0],matis->x)); 2707 CHKERRQ(MatMult(matis->A,matis->x,matis->y)); 2708 CHKERRQ(VecPointwiseMult(matis->y,work[1],matis->y)); 2709 CHKERRQ(VecGetArray(matis->y,&array)); 2710 for (j=0;j<n_interior_dofs;j++) { 2711 if (PetscAbsScalar(array[interior_dofs[j]]) > PETSC_SMALL) { 2712 valid = PETSC_FALSE; 2713 break; 2714 } 2715 } 2716 CHKERRQ(VecRestoreArray(matis->y,&array)); 2717 } 2718 if (valid && nneu) { 2719 const PetscInt *idxs; 2720 PetscInt nzb; 2721 2722 CHKERRQ(ISGetIndices(pcbddc->NeumannBoundariesLocal,&idxs)); 2723 CHKERRQ(ISGlobalToLocalMappingApply(l2g,IS_GTOLM_DROP,nneu,idxs,&nzb,NULL)); 2724 CHKERRQ(ISRestoreIndices(pcbddc->NeumannBoundariesLocal,&idxs)); 2725 if (nzb) valid = PETSC_FALSE; 2726 } 2727 if (valid && pressures) { 2728 IS t_pressure_subs,tmp; 2729 PetscInt i1,i2; 2730 2731 CHKERRQ(ISGlobalToLocalMappingApplyIS(l2g,IS_GTOLM_DROP,pressures,&t_pressure_subs)); 2732 CHKERRQ(ISEmbed(t_zerodiag_subs,t_pressure_subs,PETSC_TRUE,&tmp)); 2733 CHKERRQ(ISGetLocalSize(tmp,&i1)); 2734 CHKERRQ(ISGetLocalSize(t_zerodiag_subs,&i2)); 2735 if (i2 != i1) valid = PETSC_FALSE; 2736 CHKERRQ(ISDestroy(&t_pressure_subs)); 2737 CHKERRQ(ISDestroy(&tmp)); 2738 } 2739 if (valid) { 2740 CHKERRQ(ISLocalToGlobalMappingApplyIS(l2g,t_zerodiag_subs,&zerodiag_subs[benign_n])); 2741 benign_n++; 2742 } else recompute_zerodiag = PETSC_TRUE; 2743 } 2744 CHKERRQ(ISDestroy(&t_zerodiag_subs)); 2745 CHKERRQ(ISLocalToGlobalMappingDestroy(&l2g)); 2746 } 2747 } 2748 } else { /* there's just one subdomain (or zero if they have not been detected */ 2749 PetscBool valid = PETSC_TRUE; 2750 2751 if (nneu) valid = PETSC_FALSE; 2752 if (valid && pressures) { 2753 CHKERRQ(ISEqual(pressures,zerodiag,&valid)); 2754 } 2755 if (valid && checkb) { 2756 CHKERRQ(MatMult(matis->A,work[0],matis->x)); 2757 CHKERRQ(VecPointwiseMult(matis->x,work[1],matis->x)); 2758 CHKERRQ(VecGetArray(matis->x,&array)); 2759 for (j=0;j<n_interior_dofs;j++) { 2760 if (PetscAbsScalar(array[interior_dofs[j]]) > PETSC_SMALL) { 2761 valid = PETSC_FALSE; 2762 break; 2763 } 2764 } 2765 CHKERRQ(VecRestoreArray(matis->x,&array)); 2766 } 2767 if (valid) { 2768 benign_n = 1; 2769 CHKERRQ(PetscMalloc1(benign_n,&zerodiag_subs)); 2770 CHKERRQ(PetscObjectReference((PetscObject)zerodiag)); 2771 zerodiag_subs[0] = zerodiag; 2772 } 2773 } 2774 if (checkb) { 2775 CHKERRQ(VecDestroyVecs(2,&work)); 2776 } 2777 } 2778 CHKERRQ(PetscFree(interior_dofs)); 2779 2780 if (!benign_n) { 2781 PetscInt n; 2782 2783 CHKERRQ(ISDestroy(&zerodiag)); 2784 recompute_zerodiag = PETSC_FALSE; 2785 CHKERRQ(MatGetLocalSize(pcbddc->local_mat,&n,NULL)); 2786 if (n) have_null = PETSC_FALSE; 2787 } 2788 2789 /* final check for null pressures */ 2790 if (zerodiag && pressures) { 2791 CHKERRQ(ISEqual(pressures,zerodiag,&have_null)); 2792 } 2793 2794 if (recompute_zerodiag) { 2795 CHKERRQ(ISDestroy(&zerodiag)); 2796 if (benign_n == 1) { 2797 CHKERRQ(PetscObjectReference((PetscObject)zerodiag_subs[0])); 2798 zerodiag = zerodiag_subs[0]; 2799 } else { 2800 PetscInt i,nzn,*new_idxs; 2801 2802 nzn = 0; 2803 for (i=0;i<benign_n;i++) { 2804 PetscInt ns; 2805 CHKERRQ(ISGetLocalSize(zerodiag_subs[i],&ns)); 2806 nzn += ns; 2807 } 2808 CHKERRQ(PetscMalloc1(nzn,&new_idxs)); 2809 nzn = 0; 2810 for (i=0;i<benign_n;i++) { 2811 PetscInt ns,*idxs; 2812 CHKERRQ(ISGetLocalSize(zerodiag_subs[i],&ns)); 2813 CHKERRQ(ISGetIndices(zerodiag_subs[i],(const PetscInt**)&idxs)); 2814 CHKERRQ(PetscArraycpy(new_idxs+nzn,idxs,ns)); 2815 CHKERRQ(ISRestoreIndices(zerodiag_subs[i],(const PetscInt**)&idxs)); 2816 nzn += ns; 2817 } 2818 CHKERRQ(PetscSortInt(nzn,new_idxs)); 2819 CHKERRQ(ISCreateGeneral(PETSC_COMM_SELF,nzn,new_idxs,PETSC_OWN_POINTER,&zerodiag)); 2820 } 2821 have_null = PETSC_FALSE; 2822 } 2823 2824 /* determines if the coarse solver will be singular or not */ 2825 CHKERRMPI(MPIU_Allreduce(&have_null,&pcbddc->benign_null,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)pc))); 2826 2827 /* Prepare matrix to compute no-net-flux */ 2828 if (pcbddc->compute_nonetflux && !pcbddc->divudotp) { 2829 Mat A,loc_divudotp; 2830 ISLocalToGlobalMapping rl2g,cl2g,l2gmap; 2831 IS row,col,isused = NULL; 2832 PetscInt M,N,n,st,n_isused; 2833 2834 if (pressures) { 2835 isused = pressures; 2836 } else { 2837 isused = zerodiag_save; 2838 } 2839 CHKERRQ(MatISGetLocalToGlobalMapping(pc->pmat,&l2gmap,NULL)); 2840 CHKERRQ(MatISGetLocalMat(pc->pmat,&A)); 2841 CHKERRQ(MatGetLocalSize(A,&n,NULL)); 2842 PetscCheckFalse(!isused && n,PETSC_COMM_SELF,PETSC_ERR_USER,"Don't know how to extract div u dot p! Please provide the pressure field"); 2843 n_isused = 0; 2844 if (isused) { 2845 CHKERRQ(ISGetLocalSize(isused,&n_isused)); 2846 } 2847 CHKERRMPI(MPI_Scan(&n_isused,&st,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)pc))); 2848 st = st-n_isused; 2849 if (n) { 2850 const PetscInt *gidxs; 2851 2852 CHKERRQ(MatCreateSubMatrix(A,isused,NULL,MAT_INITIAL_MATRIX,&loc_divudotp)); 2853 CHKERRQ(ISLocalToGlobalMappingGetIndices(l2gmap,&gidxs)); 2854 /* TODO: extend ISCreateStride with st = PETSC_DECIDE */ 2855 CHKERRQ(ISCreateStride(PetscObjectComm((PetscObject)pc),n_isused,st,1,&row)); 2856 CHKERRQ(ISCreateGeneral(PetscObjectComm((PetscObject)pc),n,gidxs,PETSC_COPY_VALUES,&col)); 2857 CHKERRQ(ISLocalToGlobalMappingRestoreIndices(l2gmap,&gidxs)); 2858 } else { 2859 CHKERRQ(MatCreateSeqAIJ(PETSC_COMM_SELF,0,0,1,NULL,&loc_divudotp)); 2860 CHKERRQ(ISCreateStride(PetscObjectComm((PetscObject)pc),n_isused,st,1,&row)); 2861 CHKERRQ(ISCreateGeneral(PetscObjectComm((PetscObject)pc),0,NULL,PETSC_COPY_VALUES,&col)); 2862 } 2863 CHKERRQ(MatGetSize(pc->pmat,NULL,&N)); 2864 CHKERRQ(ISGetSize(row,&M)); 2865 CHKERRQ(ISLocalToGlobalMappingCreateIS(row,&rl2g)); 2866 CHKERRQ(ISLocalToGlobalMappingCreateIS(col,&cl2g)); 2867 CHKERRQ(ISDestroy(&row)); 2868 CHKERRQ(ISDestroy(&col)); 2869 CHKERRQ(MatCreate(PetscObjectComm((PetscObject)pc),&pcbddc->divudotp)); 2870 CHKERRQ(MatSetType(pcbddc->divudotp,MATIS)); 2871 CHKERRQ(MatSetSizes(pcbddc->divudotp,PETSC_DECIDE,PETSC_DECIDE,M,N)); 2872 CHKERRQ(MatSetLocalToGlobalMapping(pcbddc->divudotp,rl2g,cl2g)); 2873 CHKERRQ(ISLocalToGlobalMappingDestroy(&rl2g)); 2874 CHKERRQ(ISLocalToGlobalMappingDestroy(&cl2g)); 2875 CHKERRQ(MatISSetLocalMat(pcbddc->divudotp,loc_divudotp)); 2876 CHKERRQ(MatDestroy(&loc_divudotp)); 2877 CHKERRQ(MatAssemblyBegin(pcbddc->divudotp,MAT_FINAL_ASSEMBLY)); 2878 CHKERRQ(MatAssemblyEnd(pcbddc->divudotp,MAT_FINAL_ASSEMBLY)); 2879 } 2880 CHKERRQ(ISDestroy(&zerodiag_save)); 2881 CHKERRQ(ISDestroy(&pressures)); 2882 if (bzerodiag) { 2883 PetscInt i; 2884 2885 for (i=0;i<bsp;i++) { 2886 CHKERRQ(ISDestroy(&bzerodiag[i])); 2887 } 2888 CHKERRQ(PetscFree(bzerodiag)); 2889 } 2890 pcbddc->benign_n = benign_n; 2891 pcbddc->benign_zerodiag_subs = zerodiag_subs; 2892 2893 /* determines if the problem has subdomains with 0 pressure block */ 2894 have_null = (PetscBool)(!!pcbddc->benign_n); 2895 CHKERRMPI(MPIU_Allreduce(&have_null,&pcbddc->benign_have_null,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc))); 2896 2897 project_b0: 2898 CHKERRQ(MatGetLocalSize(pcbddc->local_mat,&n,NULL)); 2899 /* change of basis and p0 dofs */ 2900 if (pcbddc->benign_n) { 2901 PetscInt i,s,*nnz; 2902 2903 /* local change of basis for pressures */ 2904 CHKERRQ(MatDestroy(&pcbddc->benign_change)); 2905 CHKERRQ(MatCreate(PetscObjectComm((PetscObject)pcbddc->local_mat),&pcbddc->benign_change)); 2906 CHKERRQ(MatSetType(pcbddc->benign_change,MATAIJ)); 2907 CHKERRQ(MatSetSizes(pcbddc->benign_change,n,n,PETSC_DECIDE,PETSC_DECIDE)); 2908 CHKERRQ(PetscMalloc1(n,&nnz)); 2909 for (i=0;i<n;i++) nnz[i] = 1; /* defaults to identity */ 2910 for (i=0;i<pcbddc->benign_n;i++) { 2911 const PetscInt *idxs; 2912 PetscInt nzs,j; 2913 2914 CHKERRQ(ISGetLocalSize(pcbddc->benign_zerodiag_subs[i],&nzs)); 2915 CHKERRQ(ISGetIndices(pcbddc->benign_zerodiag_subs[i],&idxs)); 2916 for (j=0;j<nzs-1;j++) nnz[idxs[j]] = 2; /* change on pressures */ 2917 nnz[idxs[nzs-1]] = nzs; /* last local pressure dof in subdomain */ 2918 CHKERRQ(ISRestoreIndices(pcbddc->benign_zerodiag_subs[i],&idxs)); 2919 } 2920 CHKERRQ(MatSeqAIJSetPreallocation(pcbddc->benign_change,0,nnz)); 2921 CHKERRQ(MatSetOption(pcbddc->benign_change,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_TRUE)); 2922 CHKERRQ(PetscFree(nnz)); 2923 /* set identity by default */ 2924 for (i=0;i<n;i++) { 2925 CHKERRQ(MatSetValue(pcbddc->benign_change,i,i,1.,INSERT_VALUES)); 2926 } 2927 CHKERRQ(PetscFree3(pcbddc->benign_p0_lidx,pcbddc->benign_p0_gidx,pcbddc->benign_p0)); 2928 CHKERRQ(PetscMalloc3(pcbddc->benign_n,&pcbddc->benign_p0_lidx,pcbddc->benign_n,&pcbddc->benign_p0_gidx,pcbddc->benign_n,&pcbddc->benign_p0)); 2929 /* set change on pressures */ 2930 for (s=0;s<pcbddc->benign_n;s++) { 2931 PetscScalar *array; 2932 const PetscInt *idxs; 2933 PetscInt nzs; 2934 2935 CHKERRQ(ISGetLocalSize(pcbddc->benign_zerodiag_subs[s],&nzs)); 2936 CHKERRQ(ISGetIndices(pcbddc->benign_zerodiag_subs[s],&idxs)); 2937 for (i=0;i<nzs-1;i++) { 2938 PetscScalar vals[2]; 2939 PetscInt cols[2]; 2940 2941 cols[0] = idxs[i]; 2942 cols[1] = idxs[nzs-1]; 2943 vals[0] = 1.; 2944 vals[1] = 1.; 2945 CHKERRQ(MatSetValues(pcbddc->benign_change,1,cols,2,cols,vals,INSERT_VALUES)); 2946 } 2947 CHKERRQ(PetscMalloc1(nzs,&array)); 2948 for (i=0;i<nzs-1;i++) array[i] = -1.; 2949 array[nzs-1] = 1.; 2950 CHKERRQ(MatSetValues(pcbddc->benign_change,1,idxs+nzs-1,nzs,idxs,array,INSERT_VALUES)); 2951 /* store local idxs for p0 */ 2952 pcbddc->benign_p0_lidx[s] = idxs[nzs-1]; 2953 CHKERRQ(ISRestoreIndices(pcbddc->benign_zerodiag_subs[s],&idxs)); 2954 CHKERRQ(PetscFree(array)); 2955 } 2956 CHKERRQ(MatAssemblyBegin(pcbddc->benign_change,MAT_FINAL_ASSEMBLY)); 2957 CHKERRQ(MatAssemblyEnd(pcbddc->benign_change,MAT_FINAL_ASSEMBLY)); 2958 2959 /* project if needed */ 2960 if (pcbddc->benign_change_explicit) { 2961 Mat M; 2962 2963 CHKERRQ(MatPtAP(pcbddc->local_mat,pcbddc->benign_change,MAT_INITIAL_MATRIX,2.0,&M)); 2964 CHKERRQ(MatDestroy(&pcbddc->local_mat)); 2965 CHKERRQ(MatSeqAIJCompress(M,&pcbddc->local_mat)); 2966 CHKERRQ(MatDestroy(&M)); 2967 } 2968 /* store global idxs for p0 */ 2969 CHKERRQ(ISLocalToGlobalMappingApply(matis->rmapping,pcbddc->benign_n,pcbddc->benign_p0_lidx,pcbddc->benign_p0_gidx)); 2970 } 2971 *zerodiaglocal = zerodiag; 2972 PetscFunctionReturn(0); 2973 } 2974 2975 PetscErrorCode PCBDDCBenignGetOrSetP0(PC pc, Vec v, PetscBool get) 2976 { 2977 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 2978 PetscScalar *array; 2979 2980 PetscFunctionBegin; 2981 if (!pcbddc->benign_sf) { 2982 CHKERRQ(PetscSFCreate(PetscObjectComm((PetscObject)pc),&pcbddc->benign_sf)); 2983 CHKERRQ(PetscSFSetGraphLayout(pcbddc->benign_sf,pc->pmat->rmap,pcbddc->benign_n,NULL,PETSC_OWN_POINTER,pcbddc->benign_p0_gidx)); 2984 } 2985 if (get) { 2986 CHKERRQ(VecGetArrayRead(v,(const PetscScalar**)&array)); 2987 CHKERRQ(PetscSFBcastBegin(pcbddc->benign_sf,MPIU_SCALAR,array,pcbddc->benign_p0,MPI_REPLACE)); 2988 CHKERRQ(PetscSFBcastEnd(pcbddc->benign_sf,MPIU_SCALAR,array,pcbddc->benign_p0,MPI_REPLACE)); 2989 CHKERRQ(VecRestoreArrayRead(v,(const PetscScalar**)&array)); 2990 } else { 2991 CHKERRQ(VecGetArray(v,&array)); 2992 CHKERRQ(PetscSFReduceBegin(pcbddc->benign_sf,MPIU_SCALAR,pcbddc->benign_p0,array,MPI_REPLACE)); 2993 CHKERRQ(PetscSFReduceEnd(pcbddc->benign_sf,MPIU_SCALAR,pcbddc->benign_p0,array,MPI_REPLACE)); 2994 CHKERRQ(VecRestoreArray(v,&array)); 2995 } 2996 PetscFunctionReturn(0); 2997 } 2998 2999 PetscErrorCode PCBDDCBenignPopOrPushB0(PC pc, PetscBool pop) 3000 { 3001 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 3002 3003 PetscFunctionBegin; 3004 /* TODO: add error checking 3005 - avoid nested pop (or push) calls. 3006 - cannot push before pop. 3007 - cannot call this if pcbddc->local_mat is NULL 3008 */ 3009 if (!pcbddc->benign_n) { 3010 PetscFunctionReturn(0); 3011 } 3012 if (pop) { 3013 if (pcbddc->benign_change_explicit) { 3014 IS is_p0; 3015 MatReuse reuse; 3016 3017 /* extract B_0 */ 3018 reuse = MAT_INITIAL_MATRIX; 3019 if (pcbddc->benign_B0) { 3020 reuse = MAT_REUSE_MATRIX; 3021 } 3022 CHKERRQ(ISCreateGeneral(PETSC_COMM_SELF,pcbddc->benign_n,pcbddc->benign_p0_lidx,PETSC_COPY_VALUES,&is_p0)); 3023 CHKERRQ(MatCreateSubMatrix(pcbddc->local_mat,is_p0,NULL,reuse,&pcbddc->benign_B0)); 3024 /* remove rows and cols from local problem */ 3025 CHKERRQ(MatSetOption(pcbddc->local_mat,MAT_KEEP_NONZERO_PATTERN,PETSC_TRUE)); 3026 CHKERRQ(MatSetOption(pcbddc->local_mat,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_FALSE)); 3027 CHKERRQ(MatZeroRowsColumnsIS(pcbddc->local_mat,is_p0,1.0,NULL,NULL)); 3028 CHKERRQ(ISDestroy(&is_p0)); 3029 } else { 3030 Mat_IS *matis = (Mat_IS*)pc->pmat->data; 3031 PetscScalar *vals; 3032 PetscInt i,n,*idxs_ins; 3033 3034 CHKERRQ(VecGetLocalSize(matis->y,&n)); 3035 CHKERRQ(PetscMalloc2(n,&idxs_ins,n,&vals)); 3036 if (!pcbddc->benign_B0) { 3037 PetscInt *nnz; 3038 CHKERRQ(MatCreate(PetscObjectComm((PetscObject)pcbddc->local_mat),&pcbddc->benign_B0)); 3039 CHKERRQ(MatSetType(pcbddc->benign_B0,MATAIJ)); 3040 CHKERRQ(MatSetSizes(pcbddc->benign_B0,pcbddc->benign_n,n,PETSC_DECIDE,PETSC_DECIDE)); 3041 CHKERRQ(PetscMalloc1(pcbddc->benign_n,&nnz)); 3042 for (i=0;i<pcbddc->benign_n;i++) { 3043 CHKERRQ(ISGetLocalSize(pcbddc->benign_zerodiag_subs[i],&nnz[i])); 3044 nnz[i] = n - nnz[i]; 3045 } 3046 CHKERRQ(MatSeqAIJSetPreallocation(pcbddc->benign_B0,0,nnz)); 3047 CHKERRQ(MatSetOption(pcbddc->benign_B0,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_TRUE)); 3048 CHKERRQ(PetscFree(nnz)); 3049 } 3050 3051 for (i=0;i<pcbddc->benign_n;i++) { 3052 PetscScalar *array; 3053 PetscInt *idxs,j,nz,cum; 3054 3055 CHKERRQ(VecSet(matis->x,0.)); 3056 CHKERRQ(ISGetLocalSize(pcbddc->benign_zerodiag_subs[i],&nz)); 3057 CHKERRQ(ISGetIndices(pcbddc->benign_zerodiag_subs[i],(const PetscInt**)&idxs)); 3058 for (j=0;j<nz;j++) vals[j] = 1.; 3059 CHKERRQ(VecSetValues(matis->x,nz,idxs,vals,INSERT_VALUES)); 3060 CHKERRQ(VecAssemblyBegin(matis->x)); 3061 CHKERRQ(VecAssemblyEnd(matis->x)); 3062 CHKERRQ(VecSet(matis->y,0.)); 3063 CHKERRQ(MatMult(matis->A,matis->x,matis->y)); 3064 CHKERRQ(VecGetArray(matis->y,&array)); 3065 cum = 0; 3066 for (j=0;j<n;j++) { 3067 if (PetscUnlikely(PetscAbsScalar(array[j]) > PETSC_SMALL)) { 3068 vals[cum] = array[j]; 3069 idxs_ins[cum] = j; 3070 cum++; 3071 } 3072 } 3073 CHKERRQ(MatSetValues(pcbddc->benign_B0,1,&i,cum,idxs_ins,vals,INSERT_VALUES)); 3074 CHKERRQ(VecRestoreArray(matis->y,&array)); 3075 CHKERRQ(ISRestoreIndices(pcbddc->benign_zerodiag_subs[i],(const PetscInt**)&idxs)); 3076 } 3077 CHKERRQ(MatAssemblyBegin(pcbddc->benign_B0,MAT_FINAL_ASSEMBLY)); 3078 CHKERRQ(MatAssemblyEnd(pcbddc->benign_B0,MAT_FINAL_ASSEMBLY)); 3079 CHKERRQ(PetscFree2(idxs_ins,vals)); 3080 } 3081 } else { /* push */ 3082 if (pcbddc->benign_change_explicit) { 3083 PetscInt i; 3084 3085 for (i=0;i<pcbddc->benign_n;i++) { 3086 PetscScalar *B0_vals; 3087 PetscInt *B0_cols,B0_ncol; 3088 3089 CHKERRQ(MatGetRow(pcbddc->benign_B0,i,&B0_ncol,(const PetscInt**)&B0_cols,(const PetscScalar**)&B0_vals)); 3090 CHKERRQ(MatSetValues(pcbddc->local_mat,1,pcbddc->benign_p0_lidx+i,B0_ncol,B0_cols,B0_vals,INSERT_VALUES)); 3091 CHKERRQ(MatSetValues(pcbddc->local_mat,B0_ncol,B0_cols,1,pcbddc->benign_p0_lidx+i,B0_vals,INSERT_VALUES)); 3092 CHKERRQ(MatSetValue(pcbddc->local_mat,pcbddc->benign_p0_lidx[i],pcbddc->benign_p0_lidx[i],0.0,INSERT_VALUES)); 3093 CHKERRQ(MatRestoreRow(pcbddc->benign_B0,i,&B0_ncol,(const PetscInt**)&B0_cols,(const PetscScalar**)&B0_vals)); 3094 } 3095 CHKERRQ(MatAssemblyBegin(pcbddc->local_mat,MAT_FINAL_ASSEMBLY)); 3096 CHKERRQ(MatAssemblyEnd(pcbddc->local_mat,MAT_FINAL_ASSEMBLY)); 3097 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Cannot push B0!"); 3098 } 3099 PetscFunctionReturn(0); 3100 } 3101 3102 PetscErrorCode PCBDDCAdaptiveSelection(PC pc) 3103 { 3104 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 3105 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 3106 PetscBLASInt B_dummyint,B_neigs,B_ierr,B_lwork; 3107 PetscBLASInt *B_iwork,*B_ifail; 3108 PetscScalar *work,lwork; 3109 PetscScalar *St,*S,*eigv; 3110 PetscScalar *Sarray,*Starray; 3111 PetscReal *eigs,thresh,lthresh,uthresh; 3112 PetscInt i,nmax,nmin,nv,cum,mss,cum2,cumarray,maxneigs; 3113 PetscBool allocated_S_St; 3114 #if defined(PETSC_USE_COMPLEX) 3115 PetscReal *rwork; 3116 #endif 3117 PetscErrorCode ierr; 3118 3119 PetscFunctionBegin; 3120 PetscCheck(sub_schurs,PETSC_COMM_SELF,PETSC_ERR_PLIB,"Adaptive selection of constraints requires SubSchurs data"); 3121 PetscCheck(sub_schurs->schur_explicit,PetscObjectComm((PetscObject)pc),PETSC_ERR_SUP,"Adaptive selection of constraints requires MUMPS and/or MKL_CPARDISO"); 3122 PetscCheckFalse(sub_schurs->n_subs && (!sub_schurs->is_symmetric),PETSC_COMM_SELF,PETSC_ERR_SUP,"Adaptive selection not yet implemented for this matrix pencil (herm %d, symm %d, posdef %d)",sub_schurs->is_hermitian,sub_schurs->is_symmetric,sub_schurs->is_posdef); 3123 CHKERRQ(PetscLogEventBegin(PC_BDDC_AdaptiveSetUp[pcbddc->current_level],pc,0,0,0)); 3124 3125 if (pcbddc->dbg_flag) { 3126 CHKERRQ(PetscViewerFlush(pcbddc->dbg_viewer)); 3127 CHKERRQ(PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n")); 3128 CHKERRQ(PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Check adaptive selection of constraints\n")); 3129 CHKERRQ(PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer)); 3130 } 3131 3132 if (pcbddc->dbg_flag) { 3133 CHKERRQ(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d cc %D (%d,%d).\n",PetscGlobalRank,sub_schurs->n_subs,sub_schurs->is_hermitian,sub_schurs->is_posdef)); 3134 } 3135 3136 /* max size of subsets */ 3137 mss = 0; 3138 for (i=0;i<sub_schurs->n_subs;i++) { 3139 PetscInt subset_size; 3140 3141 CHKERRQ(ISGetLocalSize(sub_schurs->is_subs[i],&subset_size)); 3142 mss = PetscMax(mss,subset_size); 3143 } 3144 3145 /* min/max and threshold */ 3146 nmax = pcbddc->adaptive_nmax > 0 ? pcbddc->adaptive_nmax : mss; 3147 nmin = pcbddc->adaptive_nmin > 0 ? pcbddc->adaptive_nmin : 0; 3148 nmax = PetscMax(nmin,nmax); 3149 allocated_S_St = PETSC_FALSE; 3150 if (nmin || !sub_schurs->is_posdef) { /* XXX */ 3151 allocated_S_St = PETSC_TRUE; 3152 } 3153 3154 /* allocate lapack workspace */ 3155 cum = cum2 = 0; 3156 maxneigs = 0; 3157 for (i=0;i<sub_schurs->n_subs;i++) { 3158 PetscInt n,subset_size; 3159 3160 CHKERRQ(ISGetLocalSize(sub_schurs->is_subs[i],&subset_size)); 3161 n = PetscMin(subset_size,nmax); 3162 cum += subset_size; 3163 cum2 += subset_size*n; 3164 maxneigs = PetscMax(maxneigs,n); 3165 } 3166 lwork = 0; 3167 if (mss) { 3168 if (sub_schurs->is_symmetric) { 3169 PetscScalar sdummy = 0.; 3170 PetscBLASInt B_itype = 1; 3171 PetscBLASInt B_N = mss, idummy = 0; 3172 PetscReal rdummy = 0.,zero = 0.0; 3173 PetscReal eps = 0.0; /* dlamch? */ 3174 3175 B_lwork = -1; 3176 /* some implementations may complain about NULL pointers, even if we are querying */ 3177 S = &sdummy; 3178 St = &sdummy; 3179 eigs = &rdummy; 3180 eigv = &sdummy; 3181 B_iwork = &idummy; 3182 B_ifail = &idummy; 3183 #if defined(PETSC_USE_COMPLEX) 3184 rwork = &rdummy; 3185 #endif 3186 thresh = 1.0; 3187 CHKERRQ(PetscFPTrapPush(PETSC_FP_TRAP_OFF)); 3188 #if defined(PETSC_USE_COMPLEX) 3189 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&zero,&thresh,&B_dummyint,&B_dummyint,&eps,&B_neigs,eigs,eigv,&B_N,&lwork,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr)); 3190 #else 3191 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&zero,&thresh,&B_dummyint,&B_dummyint,&eps,&B_neigs,eigs,eigv,&B_N,&lwork,&B_lwork,B_iwork,B_ifail,&B_ierr)); 3192 #endif 3193 PetscCheckFalse(B_ierr != 0,PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to SYGVX Lapack routine %d",(int)B_ierr); 3194 CHKERRQ(PetscFPTrapPop()); 3195 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Not yet implemented"); 3196 } 3197 3198 nv = 0; 3199 if (sub_schurs->is_vertices && pcbddc->use_vertices) { /* complement set of active subsets, each entry is a vertex (boundary made by active subsets, vertices and dirichlet dofs) */ 3200 CHKERRQ(ISGetLocalSize(sub_schurs->is_vertices,&nv)); 3201 } 3202 CHKERRQ(PetscBLASIntCast((PetscInt)PetscRealPart(lwork),&B_lwork)); 3203 if (allocated_S_St) { 3204 CHKERRQ(PetscMalloc2(mss*mss,&S,mss*mss,&St)); 3205 } 3206 CHKERRQ(PetscMalloc5(mss*mss,&eigv,mss,&eigs,B_lwork,&work,5*mss,&B_iwork,mss,&B_ifail)); 3207 #if defined(PETSC_USE_COMPLEX) 3208 CHKERRQ(PetscMalloc1(7*mss,&rwork)); 3209 #endif 3210 ierr = PetscMalloc5(nv+sub_schurs->n_subs,&pcbddc->adaptive_constraints_n, 3211 nv+sub_schurs->n_subs+1,&pcbddc->adaptive_constraints_idxs_ptr, 3212 nv+sub_schurs->n_subs+1,&pcbddc->adaptive_constraints_data_ptr, 3213 nv+cum,&pcbddc->adaptive_constraints_idxs, 3214 nv+cum2,&pcbddc->adaptive_constraints_data);CHKERRQ(ierr); 3215 CHKERRQ(PetscArrayzero(pcbddc->adaptive_constraints_n,nv+sub_schurs->n_subs)); 3216 3217 maxneigs = 0; 3218 cum = cumarray = 0; 3219 pcbddc->adaptive_constraints_idxs_ptr[0] = 0; 3220 pcbddc->adaptive_constraints_data_ptr[0] = 0; 3221 if (sub_schurs->is_vertices && pcbddc->use_vertices) { 3222 const PetscInt *idxs; 3223 3224 CHKERRQ(ISGetIndices(sub_schurs->is_vertices,&idxs)); 3225 for (cum=0;cum<nv;cum++) { 3226 pcbddc->adaptive_constraints_n[cum] = 1; 3227 pcbddc->adaptive_constraints_idxs[cum] = idxs[cum]; 3228 pcbddc->adaptive_constraints_data[cum] = 1.0; 3229 pcbddc->adaptive_constraints_idxs_ptr[cum+1] = pcbddc->adaptive_constraints_idxs_ptr[cum]+1; 3230 pcbddc->adaptive_constraints_data_ptr[cum+1] = pcbddc->adaptive_constraints_data_ptr[cum]+1; 3231 } 3232 CHKERRQ(ISRestoreIndices(sub_schurs->is_vertices,&idxs)); 3233 } 3234 3235 if (mss) { /* multilevel */ 3236 CHKERRQ(MatSeqAIJGetArray(sub_schurs->sum_S_Ej_inv_all,&Sarray)); 3237 CHKERRQ(MatSeqAIJGetArray(sub_schurs->sum_S_Ej_tilda_all,&Starray)); 3238 } 3239 3240 lthresh = pcbddc->adaptive_threshold[0]; 3241 uthresh = pcbddc->adaptive_threshold[1]; 3242 for (i=0;i<sub_schurs->n_subs;i++) { 3243 const PetscInt *idxs; 3244 PetscReal upper,lower; 3245 PetscInt j,subset_size,eigs_start = 0; 3246 PetscBLASInt B_N; 3247 PetscBool same_data = PETSC_FALSE; 3248 PetscBool scal = PETSC_FALSE; 3249 3250 if (pcbddc->use_deluxe_scaling) { 3251 upper = PETSC_MAX_REAL; 3252 lower = uthresh; 3253 } else { 3254 PetscCheck(sub_schurs->is_posdef,PETSC_COMM_SELF,PETSC_ERR_SUP,"Not yet implemented without deluxe scaling"); 3255 upper = 1./uthresh; 3256 lower = 0.; 3257 } 3258 CHKERRQ(ISGetLocalSize(sub_schurs->is_subs[i],&subset_size)); 3259 CHKERRQ(ISGetIndices(sub_schurs->is_subs[i],&idxs)); 3260 CHKERRQ(PetscBLASIntCast(subset_size,&B_N)); 3261 /* this is experimental: we assume the dofs have been properly grouped to have 3262 the diagonal blocks Schur complements either positive or negative definite (true for Stokes) */ 3263 if (!sub_schurs->is_posdef) { 3264 Mat T; 3265 3266 for (j=0;j<subset_size;j++) { 3267 if (PetscRealPart(*(Sarray+cumarray+j*(subset_size+1))) < 0.0) { 3268 CHKERRQ(MatCreateSeqDense(PETSC_COMM_SELF,subset_size,subset_size,Sarray+cumarray,&T)); 3269 CHKERRQ(MatScale(T,-1.0)); 3270 CHKERRQ(MatDestroy(&T)); 3271 CHKERRQ(MatCreateSeqDense(PETSC_COMM_SELF,subset_size,subset_size,Starray+cumarray,&T)); 3272 CHKERRQ(MatScale(T,-1.0)); 3273 CHKERRQ(MatDestroy(&T)); 3274 if (sub_schurs->change_primal_sub) { 3275 PetscInt nz,k; 3276 const PetscInt *idxs; 3277 3278 CHKERRQ(ISGetLocalSize(sub_schurs->change_primal_sub[i],&nz)); 3279 CHKERRQ(ISGetIndices(sub_schurs->change_primal_sub[i],&idxs)); 3280 for (k=0;k<nz;k++) { 3281 *( Sarray + cumarray + idxs[k]*(subset_size+1)) *= -1.0; 3282 *(Starray + cumarray + idxs[k]*(subset_size+1)) = 0.0; 3283 } 3284 CHKERRQ(ISRestoreIndices(sub_schurs->change_primal_sub[i],&idxs)); 3285 } 3286 scal = PETSC_TRUE; 3287 break; 3288 } 3289 } 3290 } 3291 3292 if (allocated_S_St) { /* S and S_t should be copied since we could need them later */ 3293 if (sub_schurs->is_symmetric) { 3294 PetscInt j,k; 3295 if (sub_schurs->n_subs == 1) { /* zeroing memory to use PetscArraycmp() later */ 3296 CHKERRQ(PetscArrayzero(S,subset_size*subset_size)); 3297 CHKERRQ(PetscArrayzero(St,subset_size*subset_size)); 3298 } 3299 for (j=0;j<subset_size;j++) { 3300 for (k=j;k<subset_size;k++) { 3301 S [j*subset_size+k] = Sarray [cumarray+j*subset_size+k]; 3302 St[j*subset_size+k] = Starray[cumarray+j*subset_size+k]; 3303 } 3304 } 3305 } else { 3306 CHKERRQ(PetscArraycpy(S,Sarray+cumarray,subset_size*subset_size)); 3307 CHKERRQ(PetscArraycpy(St,Starray+cumarray,subset_size*subset_size)); 3308 } 3309 } else { 3310 S = Sarray + cumarray; 3311 St = Starray + cumarray; 3312 } 3313 /* see if we can save some work */ 3314 if (sub_schurs->n_subs == 1 && pcbddc->use_deluxe_scaling) { 3315 CHKERRQ(PetscArraycmp(S,St,subset_size*subset_size,&same_data)); 3316 } 3317 3318 if (same_data && !sub_schurs->change) { /* there's no need of constraints here */ 3319 B_neigs = 0; 3320 } else { 3321 if (sub_schurs->is_symmetric) { 3322 PetscBLASInt B_itype = 1; 3323 PetscBLASInt B_IL, B_IU; 3324 PetscReal eps = -1.0; /* dlamch? */ 3325 PetscInt nmin_s; 3326 PetscBool compute_range; 3327 3328 B_neigs = 0; 3329 compute_range = (PetscBool)!same_data; 3330 if (nmin >= subset_size) compute_range = PETSC_FALSE; 3331 3332 if (pcbddc->dbg_flag) { 3333 PetscInt nc = 0; 3334 3335 if (sub_schurs->change_primal_sub) { 3336 CHKERRQ(ISGetLocalSize(sub_schurs->change_primal_sub[i],&nc)); 3337 } 3338 CHKERRQ(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Computing for sub %D/%D size %D count %D fid %D (range %d) (change %D).\n",i,sub_schurs->n_subs,subset_size,pcbddc->mat_graph->count[idxs[0]]+1,pcbddc->mat_graph->which_dof[idxs[0]],compute_range,nc)); 3339 } 3340 3341 CHKERRQ(PetscFPTrapPush(PETSC_FP_TRAP_OFF)); 3342 if (compute_range) { 3343 3344 /* ask for eigenvalues larger than thresh */ 3345 if (sub_schurs->is_posdef) { 3346 #if defined(PETSC_USE_COMPLEX) 3347 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&lower,&upper,&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr)); 3348 #else 3349 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&lower,&upper,&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr)); 3350 #endif 3351 CHKERRQ(PetscLogFlops((4.0*subset_size*subset_size*subset_size)/3.0)); 3352 } else { /* no theory so far, but it works nicely */ 3353 PetscInt recipe = 0,recipe_m = 1; 3354 PetscReal bb[2]; 3355 3356 CHKERRQ(PetscOptionsGetInt(NULL,((PetscObject)pc)->prefix,"-pc_bddc_adaptive_recipe",&recipe,NULL)); 3357 switch (recipe) { 3358 case 0: 3359 if (scal) { bb[0] = PETSC_MIN_REAL; bb[1] = lthresh; } 3360 else { bb[0] = uthresh; bb[1] = PETSC_MAX_REAL; } 3361 #if defined(PETSC_USE_COMPLEX) 3362 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&bb[0],&bb[1],&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr)); 3363 #else 3364 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&bb[0],&bb[1],&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr)); 3365 #endif 3366 CHKERRQ(PetscLogFlops((4.0*subset_size*subset_size*subset_size)/3.0)); 3367 break; 3368 case 1: 3369 bb[0] = PETSC_MIN_REAL; bb[1] = lthresh*lthresh; 3370 #if defined(PETSC_USE_COMPLEX) 3371 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&bb[0],&bb[1],&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr)); 3372 #else 3373 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&bb[0],&bb[1],&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr)); 3374 #endif 3375 CHKERRQ(PetscLogFlops((4.0*subset_size*subset_size*subset_size)/3.0)); 3376 if (!scal) { 3377 PetscBLASInt B_neigs2 = 0; 3378 3379 bb[0] = PetscMax(lthresh*lthresh,uthresh); bb[1] = PETSC_MAX_REAL; 3380 CHKERRQ(PetscArraycpy(S,Sarray+cumarray,subset_size*subset_size)); 3381 CHKERRQ(PetscArraycpy(St,Starray+cumarray,subset_size*subset_size)); 3382 #if defined(PETSC_USE_COMPLEX) 3383 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&bb[0],&bb[1],&B_IL,&B_IU,&eps,&B_neigs2,eigs+B_neigs,eigv+B_neigs*B_N,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr)); 3384 #else 3385 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&bb[0],&bb[1],&B_IL,&B_IU,&eps,&B_neigs2,eigs+B_neigs,eigv+B_neigs*B_N,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr)); 3386 #endif 3387 CHKERRQ(PetscLogFlops((4.0*subset_size*subset_size*subset_size)/3.0)); 3388 B_neigs += B_neigs2; 3389 } 3390 break; 3391 case 2: 3392 if (scal) { 3393 bb[0] = PETSC_MIN_REAL; 3394 bb[1] = 0; 3395 #if defined(PETSC_USE_COMPLEX) 3396 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&bb[0],&bb[1],&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr)); 3397 #else 3398 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&bb[0],&bb[1],&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr)); 3399 #endif 3400 CHKERRQ(PetscLogFlops((4.0*subset_size*subset_size*subset_size)/3.0)); 3401 } else { 3402 PetscBLASInt B_neigs2 = 0; 3403 PetscBool import = PETSC_FALSE; 3404 3405 lthresh = PetscMax(lthresh,0.0); 3406 if (lthresh > 0.0) { 3407 bb[0] = PETSC_MIN_REAL; 3408 bb[1] = lthresh*lthresh; 3409 3410 import = PETSC_TRUE; 3411 #if defined(PETSC_USE_COMPLEX) 3412 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&bb[0],&bb[1],&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr)); 3413 #else 3414 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&bb[0],&bb[1],&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr)); 3415 #endif 3416 CHKERRQ(PetscLogFlops((4.0*subset_size*subset_size*subset_size)/3.0)); 3417 } 3418 bb[0] = PetscMax(lthresh*lthresh,uthresh); 3419 bb[1] = PETSC_MAX_REAL; 3420 if (import) { 3421 CHKERRQ(PetscArraycpy(S,Sarray+cumarray,subset_size*subset_size)); 3422 CHKERRQ(PetscArraycpy(St,Starray+cumarray,subset_size*subset_size)); 3423 } 3424 #if defined(PETSC_USE_COMPLEX) 3425 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&bb[0],&bb[1],&B_IL,&B_IU,&eps,&B_neigs2,eigs+B_neigs,eigv+B_neigs*B_N,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr)); 3426 #else 3427 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&bb[0],&bb[1],&B_IL,&B_IU,&eps,&B_neigs2,eigs+B_neigs,eigv+B_neigs*B_N,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr)); 3428 #endif 3429 CHKERRQ(PetscLogFlops((4.0*subset_size*subset_size*subset_size)/3.0)); 3430 B_neigs += B_neigs2; 3431 } 3432 break; 3433 case 3: 3434 if (scal) { 3435 CHKERRQ(PetscOptionsGetInt(NULL,((PetscObject)pc)->prefix,"-pc_bddc_adaptive_recipe3_min_scal",&recipe_m,NULL)); 3436 } else { 3437 CHKERRQ(PetscOptionsGetInt(NULL,((PetscObject)pc)->prefix,"-pc_bddc_adaptive_recipe3_min",&recipe_m,NULL)); 3438 } 3439 if (!scal) { 3440 bb[0] = uthresh; 3441 bb[1] = PETSC_MAX_REAL; 3442 #if defined(PETSC_USE_COMPLEX) 3443 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&bb[0],&bb[1],&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr)); 3444 #else 3445 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&bb[0],&bb[1],&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr)); 3446 #endif 3447 CHKERRQ(PetscLogFlops((4.0*subset_size*subset_size*subset_size)/3.0)); 3448 } 3449 if (recipe_m > 0 && B_N - B_neigs > 0) { 3450 PetscBLASInt B_neigs2 = 0; 3451 3452 B_IL = 1; 3453 CHKERRQ(PetscBLASIntCast(PetscMin(recipe_m,B_N - B_neigs),&B_IU)); 3454 CHKERRQ(PetscArraycpy(S,Sarray+cumarray,subset_size*subset_size)); 3455 CHKERRQ(PetscArraycpy(St,Starray+cumarray,subset_size*subset_size)); 3456 #if defined(PETSC_USE_COMPLEX) 3457 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","I","L",&B_N,St,&B_N,S,&B_N,&lower,&upper,&B_IL,&B_IU,&eps,&B_neigs2,eigs+B_neigs,eigv+B_neigs*B_N,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr)); 3458 #else 3459 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","I","L",&B_N,St,&B_N,S,&B_N,&lower,&upper,&B_IL,&B_IU,&eps,&B_neigs2,eigs+B_neigs,eigv+B_neigs*B_N,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr)); 3460 #endif 3461 CHKERRQ(PetscLogFlops((4.0*subset_size*subset_size*subset_size)/3.0)); 3462 B_neigs += B_neigs2; 3463 } 3464 break; 3465 case 4: 3466 bb[0] = PETSC_MIN_REAL; bb[1] = lthresh; 3467 #if defined(PETSC_USE_COMPLEX) 3468 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&bb[0],&bb[1],&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr)); 3469 #else 3470 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&bb[0],&bb[1],&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr)); 3471 #endif 3472 CHKERRQ(PetscLogFlops((4.0*subset_size*subset_size*subset_size)/3.0)); 3473 { 3474 PetscBLASInt B_neigs2 = 0; 3475 3476 bb[0] = PetscMax(lthresh+PETSC_SMALL,uthresh); bb[1] = PETSC_MAX_REAL; 3477 CHKERRQ(PetscArraycpy(S,Sarray+cumarray,subset_size*subset_size)); 3478 CHKERRQ(PetscArraycpy(St,Starray+cumarray,subset_size*subset_size)); 3479 #if defined(PETSC_USE_COMPLEX) 3480 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&bb[0],&bb[1],&B_IL,&B_IU,&eps,&B_neigs2,eigs+B_neigs,eigv+B_neigs*B_N,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr)); 3481 #else 3482 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&bb[0],&bb[1],&B_IL,&B_IU,&eps,&B_neigs2,eigs+B_neigs,eigv+B_neigs*B_N,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr)); 3483 #endif 3484 CHKERRQ(PetscLogFlops((4.0*subset_size*subset_size*subset_size)/3.0)); 3485 B_neigs += B_neigs2; 3486 } 3487 break; 3488 case 5: /* same as before: first compute all eigenvalues, then filter */ 3489 #if defined(PETSC_USE_COMPLEX) 3490 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","A","L",&B_N,St,&B_N,S,&B_N,&bb[0],&bb[1],&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr)); 3491 #else 3492 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","A","L",&B_N,St,&B_N,S,&B_N,&bb[0],&bb[1],&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr)); 3493 #endif 3494 CHKERRQ(PetscLogFlops((4.0*subset_size*subset_size*subset_size)/3.0)); 3495 { 3496 PetscInt e,k,ne; 3497 for (e=0,ne=0;e<B_neigs;e++) { 3498 if (eigs[e] < lthresh || eigs[e] > uthresh) { 3499 for (k=0;k<B_N;k++) S[ne*B_N+k] = eigv[e*B_N+k]; 3500 eigs[ne] = eigs[e]; 3501 ne++; 3502 } 3503 } 3504 CHKERRQ(PetscArraycpy(eigv,S,B_N*ne)); 3505 B_neigs = ne; 3506 } 3507 break; 3508 default: 3509 SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_SUP,"Unknown recipe %D",recipe); 3510 } 3511 } 3512 } else if (!same_data) { /* this is just to see all the eigenvalues */ 3513 B_IU = PetscMax(1,PetscMin(B_N,nmax)); 3514 B_IL = 1; 3515 #if defined(PETSC_USE_COMPLEX) 3516 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","I","L",&B_N,St,&B_N,S,&B_N,&lower,&upper,&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr)); 3517 #else 3518 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","I","L",&B_N,St,&B_N,S,&B_N,&lower,&upper,&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr)); 3519 #endif 3520 CHKERRQ(PetscLogFlops((4.0*subset_size*subset_size*subset_size)/3.0)); 3521 } else { /* same_data is true, so just get the adaptive functional requested by the user */ 3522 PetscInt k; 3523 PetscCheck(sub_schurs->change_primal_sub,PETSC_COMM_SELF,PETSC_ERR_PLIB,"This should not happen"); 3524 CHKERRQ(ISGetLocalSize(sub_schurs->change_primal_sub[i],&nmax)); 3525 CHKERRQ(PetscBLASIntCast(nmax,&B_neigs)); 3526 nmin = nmax; 3527 CHKERRQ(PetscArrayzero(eigv,subset_size*nmax)); 3528 for (k=0;k<nmax;k++) { 3529 eigs[k] = 1./PETSC_SMALL; 3530 eigv[k*(subset_size+1)] = 1.0; 3531 } 3532 } 3533 CHKERRQ(PetscFPTrapPop()); 3534 if (B_ierr) { 3535 PetscCheckFalse(B_ierr < 0,PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: illegal value for argument %d",-(int)B_ierr); 3536 else PetscCheckFalse(B_ierr <= B_N,PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: %d eigenvalues failed to converge",(int)B_ierr); 3537 else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: leading minor of order %d is not positive definite",(int)B_ierr-B_N-1); 3538 } 3539 3540 if (B_neigs > nmax) { 3541 if (pcbddc->dbg_flag) { 3542 CHKERRQ(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," found %d eigs, more than maximum required %D.\n",B_neigs,nmax)); 3543 } 3544 if (pcbddc->use_deluxe_scaling) eigs_start = scal ? 0 : B_neigs-nmax; 3545 B_neigs = nmax; 3546 } 3547 3548 nmin_s = PetscMin(nmin,B_N); 3549 if (B_neigs < nmin_s) { 3550 PetscBLASInt B_neigs2 = 0; 3551 3552 if (pcbddc->use_deluxe_scaling) { 3553 if (scal) { 3554 B_IU = nmin_s; 3555 B_IL = B_neigs + 1; 3556 } else { 3557 B_IL = B_N - nmin_s + 1; 3558 B_IU = B_N - B_neigs; 3559 } 3560 } else { 3561 B_IL = B_neigs + 1; 3562 B_IU = nmin_s; 3563 } 3564 if (pcbddc->dbg_flag) { 3565 CHKERRQ(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," found %d eigs, less than minimum required %D. Asking for %d to %d incl (fortran like)\n",B_neigs,nmin,B_IL,B_IU)); 3566 } 3567 if (sub_schurs->is_symmetric) { 3568 PetscInt j,k; 3569 for (j=0;j<subset_size;j++) { 3570 for (k=j;k<subset_size;k++) { 3571 S [j*subset_size+k] = Sarray [cumarray+j*subset_size+k]; 3572 St[j*subset_size+k] = Starray[cumarray+j*subset_size+k]; 3573 } 3574 } 3575 } else { 3576 CHKERRQ(PetscArraycpy(S,Sarray+cumarray,subset_size*subset_size)); 3577 CHKERRQ(PetscArraycpy(St,Starray+cumarray,subset_size*subset_size)); 3578 } 3579 CHKERRQ(PetscFPTrapPush(PETSC_FP_TRAP_OFF)); 3580 #if defined(PETSC_USE_COMPLEX) 3581 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","I","L",&B_N,St,&B_N,S,&B_N,&lower,&upper,&B_IL,&B_IU,&eps,&B_neigs2,eigs+B_neigs,eigv+B_neigs*subset_size,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr)); 3582 #else 3583 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","I","L",&B_N,St,&B_N,S,&B_N,&lower,&upper,&B_IL,&B_IU,&eps,&B_neigs2,eigs+B_neigs,eigv+B_neigs*subset_size,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr)); 3584 #endif 3585 CHKERRQ(PetscLogFlops((4.0*subset_size*subset_size*subset_size)/3.0)); 3586 CHKERRQ(PetscFPTrapPop()); 3587 B_neigs += B_neigs2; 3588 } 3589 if (B_ierr) { 3590 PetscCheckFalse(B_ierr < 0,PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: illegal value for argument %d",-(int)B_ierr); 3591 else PetscCheckFalse(B_ierr <= B_N,PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: %d eigenvalues failed to converge",(int)B_ierr); 3592 else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: leading minor of order %d is not positive definite",(int)B_ierr-B_N-1); 3593 } 3594 if (pcbddc->dbg_flag) { 3595 CHKERRQ(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," -> Got %d eigs\n",B_neigs)); 3596 for (j=0;j<B_neigs;j++) { 3597 if (eigs[j] == 0.0) { 3598 CHKERRQ(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," Inf\n")); 3599 } else { 3600 if (pcbddc->use_deluxe_scaling) { 3601 CHKERRQ(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," %1.6e\n",eigs[j+eigs_start])); 3602 } else { 3603 CHKERRQ(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," %1.6e\n",1./eigs[j+eigs_start])); 3604 } 3605 } 3606 } 3607 } 3608 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Not yet implemented"); 3609 } 3610 /* change the basis back to the original one */ 3611 if (sub_schurs->change) { 3612 Mat change,phi,phit; 3613 3614 if (pcbddc->dbg_flag > 2) { 3615 PetscInt ii; 3616 for (ii=0;ii<B_neigs;ii++) { 3617 CHKERRQ(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," -> Eigenvector (old basis) %d/%d (%d)\n",ii,B_neigs,B_N)); 3618 for (j=0;j<B_N;j++) { 3619 #if defined(PETSC_USE_COMPLEX) 3620 PetscReal r = PetscRealPart(eigv[(ii+eigs_start)*subset_size+j]); 3621 PetscReal c = PetscImaginaryPart(eigv[(ii+eigs_start)*subset_size+j]); 3622 CHKERRQ(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," %1.4e + %1.4e i\n",r,c)); 3623 #else 3624 CHKERRQ(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," %1.4e\n",eigv[(ii+eigs_start)*subset_size+j])); 3625 #endif 3626 } 3627 } 3628 } 3629 CHKERRQ(KSPGetOperators(sub_schurs->change[i],&change,NULL)); 3630 CHKERRQ(MatCreateSeqDense(PETSC_COMM_SELF,subset_size,B_neigs,eigv+eigs_start*subset_size,&phit)); 3631 CHKERRQ(MatMatMult(change,phit,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&phi)); 3632 CHKERRQ(MatCopy(phi,phit,SAME_NONZERO_PATTERN)); 3633 CHKERRQ(MatDestroy(&phit)); 3634 CHKERRQ(MatDestroy(&phi)); 3635 } 3636 maxneigs = PetscMax(B_neigs,maxneigs); 3637 pcbddc->adaptive_constraints_n[i+nv] = B_neigs; 3638 if (B_neigs) { 3639 CHKERRQ(PetscArraycpy(pcbddc->adaptive_constraints_data+pcbddc->adaptive_constraints_data_ptr[cum],eigv+eigs_start*subset_size,B_neigs*subset_size)); 3640 3641 if (pcbddc->dbg_flag > 1) { 3642 PetscInt ii; 3643 for (ii=0;ii<B_neigs;ii++) { 3644 CHKERRQ(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," -> Eigenvector %d/%d (%d)\n",ii,B_neigs,B_N)); 3645 for (j=0;j<B_N;j++) { 3646 #if defined(PETSC_USE_COMPLEX) 3647 PetscReal r = PetscRealPart(pcbddc->adaptive_constraints_data[ii*subset_size+j+pcbddc->adaptive_constraints_data_ptr[cum]]); 3648 PetscReal c = PetscImaginaryPart(pcbddc->adaptive_constraints_data[ii*subset_size+j+pcbddc->adaptive_constraints_data_ptr[cum]]); 3649 CHKERRQ(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," %1.4e + %1.4e i\n",r,c)); 3650 #else 3651 CHKERRQ(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," %1.4e\n",pcbddc->adaptive_constraints_data[ii*subset_size+j+pcbddc->adaptive_constraints_data_ptr[cum]])); 3652 #endif 3653 } 3654 } 3655 } 3656 CHKERRQ(PetscArraycpy(pcbddc->adaptive_constraints_idxs+pcbddc->adaptive_constraints_idxs_ptr[cum],idxs,subset_size)); 3657 pcbddc->adaptive_constraints_idxs_ptr[cum+1] = pcbddc->adaptive_constraints_idxs_ptr[cum] + subset_size; 3658 pcbddc->adaptive_constraints_data_ptr[cum+1] = pcbddc->adaptive_constraints_data_ptr[cum] + subset_size*B_neigs; 3659 cum++; 3660 } 3661 CHKERRQ(ISRestoreIndices(sub_schurs->is_subs[i],&idxs)); 3662 /* shift for next computation */ 3663 cumarray += subset_size*subset_size; 3664 } 3665 if (pcbddc->dbg_flag) { 3666 CHKERRQ(PetscViewerFlush(pcbddc->dbg_viewer)); 3667 } 3668 3669 if (mss) { 3670 CHKERRQ(MatSeqAIJRestoreArray(sub_schurs->sum_S_Ej_inv_all,&Sarray)); 3671 CHKERRQ(MatSeqAIJRestoreArray(sub_schurs->sum_S_Ej_tilda_all,&Starray)); 3672 /* destroy matrices (junk) */ 3673 CHKERRQ(MatDestroy(&sub_schurs->sum_S_Ej_inv_all)); 3674 CHKERRQ(MatDestroy(&sub_schurs->sum_S_Ej_tilda_all)); 3675 } 3676 if (allocated_S_St) { 3677 CHKERRQ(PetscFree2(S,St)); 3678 } 3679 CHKERRQ(PetscFree5(eigv,eigs,work,B_iwork,B_ifail)); 3680 #if defined(PETSC_USE_COMPLEX) 3681 CHKERRQ(PetscFree(rwork)); 3682 #endif 3683 if (pcbddc->dbg_flag) { 3684 PetscInt maxneigs_r; 3685 CHKERRMPI(MPIU_Allreduce(&maxneigs,&maxneigs_r,1,MPIU_INT,MPI_MAX,PetscObjectComm((PetscObject)pc))); 3686 CHKERRQ(PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Maximum number of constraints per cc %D\n",maxneigs_r)); 3687 } 3688 CHKERRQ(PetscLogEventEnd(PC_BDDC_AdaptiveSetUp[pcbddc->current_level],pc,0,0,0)); 3689 PetscFunctionReturn(0); 3690 } 3691 3692 PetscErrorCode PCBDDCSetUpSolvers(PC pc) 3693 { 3694 PetscScalar *coarse_submat_vals; 3695 3696 PetscFunctionBegin; 3697 /* Setup local scatters R_to_B and (optionally) R_to_D */ 3698 /* PCBDDCSetUpLocalWorkVectors should be called first! */ 3699 CHKERRQ(PCBDDCSetUpLocalScatters(pc)); 3700 3701 /* Setup local neumann solver ksp_R */ 3702 /* PCBDDCSetUpLocalScatters should be called first! */ 3703 CHKERRQ(PCBDDCSetUpLocalSolvers(pc,PETSC_FALSE,PETSC_TRUE)); 3704 3705 /* 3706 Setup local correction and local part of coarse basis. 3707 Gives back the dense local part of the coarse matrix in column major ordering 3708 */ 3709 CHKERRQ(PCBDDCSetUpCorrection(pc,&coarse_submat_vals)); 3710 3711 /* Compute total number of coarse nodes and setup coarse solver */ 3712 CHKERRQ(PCBDDCSetUpCoarseSolver(pc,coarse_submat_vals)); 3713 3714 /* free */ 3715 CHKERRQ(PetscFree(coarse_submat_vals)); 3716 PetscFunctionReturn(0); 3717 } 3718 3719 PetscErrorCode PCBDDCResetCustomization(PC pc) 3720 { 3721 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 3722 3723 PetscFunctionBegin; 3724 CHKERRQ(ISDestroy(&pcbddc->user_primal_vertices)); 3725 CHKERRQ(ISDestroy(&pcbddc->user_primal_vertices_local)); 3726 CHKERRQ(ISDestroy(&pcbddc->NeumannBoundaries)); 3727 CHKERRQ(ISDestroy(&pcbddc->NeumannBoundariesLocal)); 3728 CHKERRQ(ISDestroy(&pcbddc->DirichletBoundaries)); 3729 CHKERRQ(MatNullSpaceDestroy(&pcbddc->onearnullspace)); 3730 CHKERRQ(PetscFree(pcbddc->onearnullvecs_state)); 3731 CHKERRQ(ISDestroy(&pcbddc->DirichletBoundariesLocal)); 3732 CHKERRQ(PCBDDCSetDofsSplitting(pc,0,NULL)); 3733 CHKERRQ(PCBDDCSetDofsSplittingLocal(pc,0,NULL)); 3734 PetscFunctionReturn(0); 3735 } 3736 3737 PetscErrorCode PCBDDCResetTopography(PC pc) 3738 { 3739 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 3740 PetscInt i; 3741 3742 PetscFunctionBegin; 3743 CHKERRQ(MatDestroy(&pcbddc->nedcG)); 3744 CHKERRQ(ISDestroy(&pcbddc->nedclocal)); 3745 CHKERRQ(MatDestroy(&pcbddc->discretegradient)); 3746 CHKERRQ(MatDestroy(&pcbddc->user_ChangeOfBasisMatrix)); 3747 CHKERRQ(MatDestroy(&pcbddc->ChangeOfBasisMatrix)); 3748 CHKERRQ(MatDestroy(&pcbddc->switch_static_change)); 3749 CHKERRQ(VecDestroy(&pcbddc->work_change)); 3750 CHKERRQ(MatDestroy(&pcbddc->ConstraintMatrix)); 3751 CHKERRQ(MatDestroy(&pcbddc->divudotp)); 3752 CHKERRQ(ISDestroy(&pcbddc->divudotp_vl2l)); 3753 CHKERRQ(PCBDDCGraphDestroy(&pcbddc->mat_graph)); 3754 for (i=0;i<pcbddc->n_local_subs;i++) { 3755 CHKERRQ(ISDestroy(&pcbddc->local_subs[i])); 3756 } 3757 pcbddc->n_local_subs = 0; 3758 CHKERRQ(PetscFree(pcbddc->local_subs)); 3759 CHKERRQ(PCBDDCSubSchursDestroy(&pcbddc->sub_schurs)); 3760 pcbddc->graphanalyzed = PETSC_FALSE; 3761 pcbddc->recompute_topography = PETSC_TRUE; 3762 pcbddc->corner_selected = PETSC_FALSE; 3763 PetscFunctionReturn(0); 3764 } 3765 3766 PetscErrorCode PCBDDCResetSolvers(PC pc) 3767 { 3768 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 3769 3770 PetscFunctionBegin; 3771 CHKERRQ(VecDestroy(&pcbddc->coarse_vec)); 3772 if (pcbddc->coarse_phi_B) { 3773 PetscScalar *array; 3774 CHKERRQ(MatDenseGetArray(pcbddc->coarse_phi_B,&array)); 3775 CHKERRQ(PetscFree(array)); 3776 } 3777 CHKERRQ(MatDestroy(&pcbddc->coarse_phi_B)); 3778 CHKERRQ(MatDestroy(&pcbddc->coarse_phi_D)); 3779 CHKERRQ(MatDestroy(&pcbddc->coarse_psi_B)); 3780 CHKERRQ(MatDestroy(&pcbddc->coarse_psi_D)); 3781 CHKERRQ(VecDestroy(&pcbddc->vec1_P)); 3782 CHKERRQ(VecDestroy(&pcbddc->vec1_C)); 3783 CHKERRQ(MatDestroy(&pcbddc->local_auxmat2)); 3784 CHKERRQ(MatDestroy(&pcbddc->local_auxmat1)); 3785 CHKERRQ(VecDestroy(&pcbddc->vec1_R)); 3786 CHKERRQ(VecDestroy(&pcbddc->vec2_R)); 3787 CHKERRQ(ISDestroy(&pcbddc->is_R_local)); 3788 CHKERRQ(VecScatterDestroy(&pcbddc->R_to_B)); 3789 CHKERRQ(VecScatterDestroy(&pcbddc->R_to_D)); 3790 CHKERRQ(VecScatterDestroy(&pcbddc->coarse_loc_to_glob)); 3791 CHKERRQ(KSPReset(pcbddc->ksp_D)); 3792 CHKERRQ(KSPReset(pcbddc->ksp_R)); 3793 CHKERRQ(KSPReset(pcbddc->coarse_ksp)); 3794 CHKERRQ(MatDestroy(&pcbddc->local_mat)); 3795 CHKERRQ(PetscFree(pcbddc->primal_indices_local_idxs)); 3796 CHKERRQ(PetscFree2(pcbddc->local_primal_ref_node,pcbddc->local_primal_ref_mult)); 3797 CHKERRQ(PetscFree(pcbddc->global_primal_indices)); 3798 CHKERRQ(ISDestroy(&pcbddc->coarse_subassembling)); 3799 CHKERRQ(MatDestroy(&pcbddc->benign_change)); 3800 CHKERRQ(VecDestroy(&pcbddc->benign_vec)); 3801 CHKERRQ(PCBDDCBenignShellMat(pc,PETSC_TRUE)); 3802 CHKERRQ(MatDestroy(&pcbddc->benign_B0)); 3803 CHKERRQ(PetscSFDestroy(&pcbddc->benign_sf)); 3804 if (pcbddc->benign_zerodiag_subs) { 3805 PetscInt i; 3806 for (i=0;i<pcbddc->benign_n;i++) { 3807 CHKERRQ(ISDestroy(&pcbddc->benign_zerodiag_subs[i])); 3808 } 3809 CHKERRQ(PetscFree(pcbddc->benign_zerodiag_subs)); 3810 } 3811 CHKERRQ(PetscFree3(pcbddc->benign_p0_lidx,pcbddc->benign_p0_gidx,pcbddc->benign_p0)); 3812 PetscFunctionReturn(0); 3813 } 3814 3815 PetscErrorCode PCBDDCSetUpLocalWorkVectors(PC pc) 3816 { 3817 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 3818 PC_IS *pcis = (PC_IS*)pc->data; 3819 VecType impVecType; 3820 PetscInt n_constraints,n_R,old_size; 3821 3822 PetscFunctionBegin; 3823 n_constraints = pcbddc->local_primal_size - pcbddc->benign_n - pcbddc->n_vertices; 3824 n_R = pcis->n - pcbddc->n_vertices; 3825 CHKERRQ(VecGetType(pcis->vec1_N,&impVecType)); 3826 /* local work vectors (try to avoid unneeded work)*/ 3827 /* R nodes */ 3828 old_size = -1; 3829 if (pcbddc->vec1_R) { 3830 CHKERRQ(VecGetSize(pcbddc->vec1_R,&old_size)); 3831 } 3832 if (n_R != old_size) { 3833 CHKERRQ(VecDestroy(&pcbddc->vec1_R)); 3834 CHKERRQ(VecDestroy(&pcbddc->vec2_R)); 3835 CHKERRQ(VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_R)); 3836 CHKERRQ(VecSetSizes(pcbddc->vec1_R,PETSC_DECIDE,n_R)); 3837 CHKERRQ(VecSetType(pcbddc->vec1_R,impVecType)); 3838 CHKERRQ(VecDuplicate(pcbddc->vec1_R,&pcbddc->vec2_R)); 3839 } 3840 /* local primal dofs */ 3841 old_size = -1; 3842 if (pcbddc->vec1_P) { 3843 CHKERRQ(VecGetSize(pcbddc->vec1_P,&old_size)); 3844 } 3845 if (pcbddc->local_primal_size != old_size) { 3846 CHKERRQ(VecDestroy(&pcbddc->vec1_P)); 3847 CHKERRQ(VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_P)); 3848 CHKERRQ(VecSetSizes(pcbddc->vec1_P,PETSC_DECIDE,pcbddc->local_primal_size)); 3849 CHKERRQ(VecSetType(pcbddc->vec1_P,impVecType)); 3850 } 3851 /* local explicit constraints */ 3852 old_size = -1; 3853 if (pcbddc->vec1_C) { 3854 CHKERRQ(VecGetSize(pcbddc->vec1_C,&old_size)); 3855 } 3856 if (n_constraints && n_constraints != old_size) { 3857 CHKERRQ(VecDestroy(&pcbddc->vec1_C)); 3858 CHKERRQ(VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_C)); 3859 CHKERRQ(VecSetSizes(pcbddc->vec1_C,PETSC_DECIDE,n_constraints)); 3860 CHKERRQ(VecSetType(pcbddc->vec1_C,impVecType)); 3861 } 3862 PetscFunctionReturn(0); 3863 } 3864 3865 PetscErrorCode PCBDDCSetUpCorrection(PC pc, PetscScalar **coarse_submat_vals_n) 3866 { 3867 /* pointers to pcis and pcbddc */ 3868 PC_IS* pcis = (PC_IS*)pc->data; 3869 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 3870 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 3871 /* submatrices of local problem */ 3872 Mat A_RV,A_VR,A_VV,local_auxmat2_R; 3873 /* submatrices of local coarse problem */ 3874 Mat S_VV,S_CV,S_VC,S_CC; 3875 /* working matrices */ 3876 Mat C_CR; 3877 /* additional working stuff */ 3878 PC pc_R; 3879 Mat F,Brhs = NULL; 3880 Vec dummy_vec; 3881 PetscBool isLU,isCHOL,need_benign_correction,sparserhs; 3882 PetscScalar *coarse_submat_vals; /* TODO: use a PETSc matrix */ 3883 PetscScalar *work; 3884 PetscInt *idx_V_B; 3885 PetscInt lda_rhs,n,n_vertices,n_constraints,*p0_lidx_I; 3886 PetscInt i,n_R,n_D,n_B; 3887 PetscScalar one=1.0,m_one=-1.0; 3888 3889 PetscFunctionBegin; 3890 PetscCheckFalse(!pcbddc->symmetric_primal && pcbddc->benign_n,PETSC_COMM_SELF,PETSC_ERR_SUP,"Non-symmetric primal basis computation with benign trick not yet implemented"); 3891 CHKERRQ(PetscLogEventBegin(PC_BDDC_CorrectionSetUp[pcbddc->current_level],pc,0,0,0)); 3892 3893 /* Set Non-overlapping dimensions */ 3894 n_vertices = pcbddc->n_vertices; 3895 n_constraints = pcbddc->local_primal_size - pcbddc->benign_n - n_vertices; 3896 n_B = pcis->n_B; 3897 n_D = pcis->n - n_B; 3898 n_R = pcis->n - n_vertices; 3899 3900 /* vertices in boundary numbering */ 3901 CHKERRQ(PetscMalloc1(n_vertices,&idx_V_B)); 3902 CHKERRQ(ISGlobalToLocalMappingApply(pcis->BtoNmap,IS_GTOLM_DROP,n_vertices,pcbddc->local_primal_ref_node,&i,idx_V_B)); 3903 PetscCheckFalse(i != n_vertices,PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error in boundary numbering for BDDC vertices! %D != %D",n_vertices,i); 3904 3905 /* Subdomain contribution (Non-overlapping) to coarse matrix */ 3906 CHKERRQ(PetscCalloc1(pcbddc->local_primal_size*pcbddc->local_primal_size,&coarse_submat_vals)); 3907 CHKERRQ(MatCreateSeqDense(PETSC_COMM_SELF,n_vertices,n_vertices,coarse_submat_vals,&S_VV)); 3908 CHKERRQ(MatDenseSetLDA(S_VV,pcbddc->local_primal_size)); 3909 CHKERRQ(MatCreateSeqDense(PETSC_COMM_SELF,n_constraints,n_vertices,coarse_submat_vals+n_vertices,&S_CV)); 3910 CHKERRQ(MatDenseSetLDA(S_CV,pcbddc->local_primal_size)); 3911 CHKERRQ(MatCreateSeqDense(PETSC_COMM_SELF,n_vertices,n_constraints,coarse_submat_vals+pcbddc->local_primal_size*n_vertices,&S_VC)); 3912 CHKERRQ(MatDenseSetLDA(S_VC,pcbddc->local_primal_size)); 3913 CHKERRQ(MatCreateSeqDense(PETSC_COMM_SELF,n_constraints,n_constraints,coarse_submat_vals+(pcbddc->local_primal_size+1)*n_vertices,&S_CC)); 3914 CHKERRQ(MatDenseSetLDA(S_CC,pcbddc->local_primal_size)); 3915 3916 /* determine if can use MatSolve routines instead of calling KSPSolve on ksp_R */ 3917 CHKERRQ(KSPGetPC(pcbddc->ksp_R,&pc_R)); 3918 CHKERRQ(PCSetUp(pc_R)); 3919 CHKERRQ(PetscObjectTypeCompare((PetscObject)pc_R,PCLU,&isLU)); 3920 CHKERRQ(PetscObjectTypeCompare((PetscObject)pc_R,PCCHOLESKY,&isCHOL)); 3921 lda_rhs = n_R; 3922 need_benign_correction = PETSC_FALSE; 3923 if (isLU || isCHOL) { 3924 CHKERRQ(PCFactorGetMatrix(pc_R,&F)); 3925 } else if (sub_schurs && sub_schurs->reuse_solver) { 3926 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 3927 MatFactorType type; 3928 3929 F = reuse_solver->F; 3930 CHKERRQ(MatGetFactorType(F,&type)); 3931 if (type == MAT_FACTOR_CHOLESKY) isCHOL = PETSC_TRUE; 3932 if (type == MAT_FACTOR_LU) isLU = PETSC_TRUE; 3933 CHKERRQ(MatGetSize(F,&lda_rhs,NULL)); 3934 need_benign_correction = (PetscBool)(!!reuse_solver->benign_n); 3935 } else F = NULL; 3936 3937 /* determine if we can use a sparse right-hand side */ 3938 sparserhs = PETSC_FALSE; 3939 if (F) { 3940 MatSolverType solver; 3941 3942 CHKERRQ(MatFactorGetSolverType(F,&solver)); 3943 CHKERRQ(PetscStrcmp(solver,MATSOLVERMUMPS,&sparserhs)); 3944 } 3945 3946 /* allocate workspace */ 3947 n = 0; 3948 if (n_constraints) { 3949 n += lda_rhs*n_constraints; 3950 } 3951 if (n_vertices) { 3952 n = PetscMax(2*lda_rhs*n_vertices,n); 3953 n = PetscMax((lda_rhs+n_B)*n_vertices,n); 3954 } 3955 if (!pcbddc->symmetric_primal) { 3956 n = PetscMax(2*lda_rhs*pcbddc->local_primal_size,n); 3957 } 3958 CHKERRQ(PetscMalloc1(n,&work)); 3959 3960 /* create dummy vector to modify rhs and sol of MatMatSolve (work array will never be used) */ 3961 dummy_vec = NULL; 3962 if (need_benign_correction && lda_rhs != n_R && F) { 3963 CHKERRQ(VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&dummy_vec)); 3964 CHKERRQ(VecSetSizes(dummy_vec,lda_rhs,PETSC_DECIDE)); 3965 CHKERRQ(VecSetType(dummy_vec,((PetscObject)pcis->vec1_N)->type_name)); 3966 } 3967 3968 CHKERRQ(MatDestroy(&pcbddc->local_auxmat1)); 3969 CHKERRQ(MatDestroy(&pcbddc->local_auxmat2)); 3970 3971 /* Precompute stuffs needed for preprocessing and application of BDDC*/ 3972 if (n_constraints) { 3973 Mat M3,C_B; 3974 IS is_aux; 3975 3976 /* Extract constraints on R nodes: C_{CR} */ 3977 CHKERRQ(ISCreateStride(PETSC_COMM_SELF,n_constraints,n_vertices,1,&is_aux)); 3978 CHKERRQ(MatCreateSubMatrix(pcbddc->ConstraintMatrix,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&C_CR)); 3979 CHKERRQ(MatCreateSubMatrix(pcbddc->ConstraintMatrix,is_aux,pcis->is_B_local,MAT_INITIAL_MATRIX,&C_B)); 3980 3981 /* Assemble local_auxmat2_R = (- A_{RR}^{-1} C^T_{CR}) needed by BDDC setup */ 3982 /* Assemble pcbddc->local_auxmat2 = R_to_B (- A_{RR}^{-1} C^T_{CR}) needed by BDDC application */ 3983 if (!sparserhs) { 3984 CHKERRQ(PetscArrayzero(work,lda_rhs*n_constraints)); 3985 for (i=0;i<n_constraints;i++) { 3986 const PetscScalar *row_cmat_values; 3987 const PetscInt *row_cmat_indices; 3988 PetscInt size_of_constraint,j; 3989 3990 CHKERRQ(MatGetRow(C_CR,i,&size_of_constraint,&row_cmat_indices,&row_cmat_values)); 3991 for (j=0;j<size_of_constraint;j++) { 3992 work[row_cmat_indices[j]+i*lda_rhs] = -row_cmat_values[j]; 3993 } 3994 CHKERRQ(MatRestoreRow(C_CR,i,&size_of_constraint,&row_cmat_indices,&row_cmat_values)); 3995 } 3996 CHKERRQ(MatCreateSeqDense(PETSC_COMM_SELF,lda_rhs,n_constraints,work,&Brhs)); 3997 } else { 3998 Mat tC_CR; 3999 4000 CHKERRQ(MatScale(C_CR,-1.0)); 4001 if (lda_rhs != n_R) { 4002 PetscScalar *aa; 4003 PetscInt r,*ii,*jj; 4004 PetscBool done; 4005 4006 CHKERRQ(MatGetRowIJ(C_CR,0,PETSC_FALSE,PETSC_FALSE,&r,(const PetscInt**)&ii,(const PetscInt**)&jj,&done)); 4007 PetscCheck(done,PETSC_COMM_SELF,PETSC_ERR_PLIB,"GetRowIJ failed"); 4008 CHKERRQ(MatSeqAIJGetArray(C_CR,&aa)); 4009 CHKERRQ(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,n_constraints,lda_rhs,ii,jj,aa,&tC_CR)); 4010 CHKERRQ(MatRestoreRowIJ(C_CR,0,PETSC_FALSE,PETSC_FALSE,&r,(const PetscInt**)&ii,(const PetscInt**)&jj,&done)); 4011 PetscCheck(done,PETSC_COMM_SELF,PETSC_ERR_PLIB,"RestoreRowIJ failed"); 4012 } else { 4013 CHKERRQ(PetscObjectReference((PetscObject)C_CR)); 4014 tC_CR = C_CR; 4015 } 4016 CHKERRQ(MatCreateTranspose(tC_CR,&Brhs)); 4017 CHKERRQ(MatDestroy(&tC_CR)); 4018 } 4019 CHKERRQ(MatCreateSeqDense(PETSC_COMM_SELF,lda_rhs,n_constraints,NULL,&local_auxmat2_R)); 4020 if (F) { 4021 if (need_benign_correction) { 4022 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 4023 4024 /* rhs is already zero on interior dofs, no need to change the rhs */ 4025 CHKERRQ(PetscArrayzero(reuse_solver->benign_save_vals,pcbddc->benign_n)); 4026 } 4027 CHKERRQ(MatMatSolve(F,Brhs,local_auxmat2_R)); 4028 if (need_benign_correction) { 4029 PetscScalar *marr; 4030 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 4031 4032 CHKERRQ(MatDenseGetArray(local_auxmat2_R,&marr)); 4033 if (lda_rhs != n_R) { 4034 for (i=0;i<n_constraints;i++) { 4035 CHKERRQ(VecPlaceArray(dummy_vec,marr+i*lda_rhs)); 4036 CHKERRQ(PCBDDCReuseSolversBenignAdapt(reuse_solver,dummy_vec,NULL,PETSC_TRUE,PETSC_TRUE)); 4037 CHKERRQ(VecResetArray(dummy_vec)); 4038 } 4039 } else { 4040 for (i=0;i<n_constraints;i++) { 4041 CHKERRQ(VecPlaceArray(pcbddc->vec1_R,marr+i*lda_rhs)); 4042 CHKERRQ(PCBDDCReuseSolversBenignAdapt(reuse_solver,pcbddc->vec1_R,NULL,PETSC_TRUE,PETSC_TRUE)); 4043 CHKERRQ(VecResetArray(pcbddc->vec1_R)); 4044 } 4045 } 4046 CHKERRQ(MatDenseRestoreArray(local_auxmat2_R,&marr)); 4047 } 4048 } else { 4049 PetscScalar *marr; 4050 4051 CHKERRQ(MatDenseGetArray(local_auxmat2_R,&marr)); 4052 for (i=0;i<n_constraints;i++) { 4053 CHKERRQ(VecPlaceArray(pcbddc->vec1_R,work+i*lda_rhs)); 4054 CHKERRQ(VecPlaceArray(pcbddc->vec2_R,marr+i*lda_rhs)); 4055 CHKERRQ(KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R)); 4056 CHKERRQ(KSPCheckSolve(pcbddc->ksp_R,pc,pcbddc->vec2_R)); 4057 CHKERRQ(VecResetArray(pcbddc->vec1_R)); 4058 CHKERRQ(VecResetArray(pcbddc->vec2_R)); 4059 } 4060 CHKERRQ(MatDenseRestoreArray(local_auxmat2_R,&marr)); 4061 } 4062 if (sparserhs) { 4063 CHKERRQ(MatScale(C_CR,-1.0)); 4064 } 4065 CHKERRQ(MatDestroy(&Brhs)); 4066 if (!pcbddc->switch_static) { 4067 CHKERRQ(MatCreateSeqDense(PETSC_COMM_SELF,n_B,n_constraints,NULL,&pcbddc->local_auxmat2)); 4068 for (i=0;i<n_constraints;i++) { 4069 Vec r, b; 4070 CHKERRQ(MatDenseGetColumnVecRead(local_auxmat2_R,i,&r)); 4071 CHKERRQ(MatDenseGetColumnVec(pcbddc->local_auxmat2,i,&b)); 4072 CHKERRQ(VecScatterBegin(pcbddc->R_to_B,r,b,INSERT_VALUES,SCATTER_FORWARD)); 4073 CHKERRQ(VecScatterEnd(pcbddc->R_to_B,r,b,INSERT_VALUES,SCATTER_FORWARD)); 4074 CHKERRQ(MatDenseRestoreColumnVec(pcbddc->local_auxmat2,i,&b)); 4075 CHKERRQ(MatDenseRestoreColumnVecRead(local_auxmat2_R,i,&r)); 4076 } 4077 CHKERRQ(MatMatMult(C_B,pcbddc->local_auxmat2,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&M3)); 4078 } else { 4079 if (lda_rhs != n_R) { 4080 IS dummy; 4081 4082 CHKERRQ(ISCreateStride(PETSC_COMM_SELF,n_R,0,1,&dummy)); 4083 CHKERRQ(MatCreateSubMatrix(local_auxmat2_R,dummy,NULL,MAT_INITIAL_MATRIX,&pcbddc->local_auxmat2)); 4084 CHKERRQ(ISDestroy(&dummy)); 4085 } else { 4086 CHKERRQ(PetscObjectReference((PetscObject)local_auxmat2_R)); 4087 pcbddc->local_auxmat2 = local_auxmat2_R; 4088 } 4089 CHKERRQ(MatMatMult(C_CR,pcbddc->local_auxmat2,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&M3)); 4090 } 4091 CHKERRQ(ISDestroy(&is_aux)); 4092 /* Assemble explicitly S_CC = ( C_{CR} A_{RR}^{-1} C^T_{CR})^{-1} */ 4093 CHKERRQ(MatScale(M3,m_one)); 4094 if (isCHOL) { 4095 CHKERRQ(MatCholeskyFactor(M3,NULL,NULL)); 4096 } else { 4097 CHKERRQ(MatLUFactor(M3,NULL,NULL,NULL)); 4098 } 4099 CHKERRQ(MatSeqDenseInvertFactors_Private(M3)); 4100 /* Assemble local_auxmat1 = S_CC*C_{CB} needed by BDDC application in KSP and in preproc */ 4101 CHKERRQ(MatMatMult(M3,C_B,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&pcbddc->local_auxmat1)); 4102 CHKERRQ(MatDestroy(&C_B)); 4103 CHKERRQ(MatCopy(M3,S_CC,SAME_NONZERO_PATTERN)); /* S_CC can have a different LDA, MatMatSolve doesn't support it */ 4104 CHKERRQ(MatDestroy(&M3)); 4105 } 4106 4107 /* Get submatrices from subdomain matrix */ 4108 if (n_vertices) { 4109 #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA) 4110 PetscBool oldpin; 4111 #endif 4112 PetscBool isaij; 4113 IS is_aux; 4114 4115 if (sub_schurs && sub_schurs->reuse_solver) { /* is_R_local is not sorted, ISComplement doesn't like it */ 4116 IS tis; 4117 4118 CHKERRQ(ISDuplicate(pcbddc->is_R_local,&tis)); 4119 CHKERRQ(ISSort(tis)); 4120 CHKERRQ(ISComplement(tis,0,pcis->n,&is_aux)); 4121 CHKERRQ(ISDestroy(&tis)); 4122 } else { 4123 CHKERRQ(ISComplement(pcbddc->is_R_local,0,pcis->n,&is_aux)); 4124 } 4125 #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA) 4126 oldpin = pcbddc->local_mat->boundtocpu; 4127 #endif 4128 CHKERRQ(MatBindToCPU(pcbddc->local_mat,PETSC_TRUE)); 4129 CHKERRQ(MatCreateSubMatrix(pcbddc->local_mat,pcbddc->is_R_local,is_aux,MAT_INITIAL_MATRIX,&A_RV)); 4130 CHKERRQ(MatCreateSubMatrix(pcbddc->local_mat,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&A_VR)); 4131 CHKERRQ(PetscObjectBaseTypeCompare((PetscObject)A_VR,MATSEQAIJ,&isaij)); 4132 if (!isaij) { /* TODO REMOVE: MatMatMult(A_VR,A_RRmA_RV) below may raise an error */ 4133 CHKERRQ(MatConvert(A_VR,MATSEQAIJ,MAT_INPLACE_MATRIX,&A_VR)); 4134 } 4135 CHKERRQ(MatCreateSubMatrix(pcbddc->local_mat,is_aux,is_aux,MAT_INITIAL_MATRIX,&A_VV)); 4136 #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA) 4137 CHKERRQ(MatBindToCPU(pcbddc->local_mat,oldpin)); 4138 #endif 4139 CHKERRQ(ISDestroy(&is_aux)); 4140 } 4141 4142 /* Matrix of coarse basis functions (local) */ 4143 if (pcbddc->coarse_phi_B) { 4144 PetscInt on_B,on_primal,on_D=n_D; 4145 if (pcbddc->coarse_phi_D) { 4146 CHKERRQ(MatGetSize(pcbddc->coarse_phi_D,&on_D,NULL)); 4147 } 4148 CHKERRQ(MatGetSize(pcbddc->coarse_phi_B,&on_B,&on_primal)); 4149 if (on_B != n_B || on_primal != pcbddc->local_primal_size || on_D != n_D) { 4150 PetscScalar *marray; 4151 4152 CHKERRQ(MatDenseGetArray(pcbddc->coarse_phi_B,&marray)); 4153 CHKERRQ(PetscFree(marray)); 4154 CHKERRQ(MatDestroy(&pcbddc->coarse_phi_B)); 4155 CHKERRQ(MatDestroy(&pcbddc->coarse_psi_B)); 4156 CHKERRQ(MatDestroy(&pcbddc->coarse_phi_D)); 4157 CHKERRQ(MatDestroy(&pcbddc->coarse_psi_D)); 4158 } 4159 } 4160 4161 if (!pcbddc->coarse_phi_B) { 4162 PetscScalar *marr; 4163 4164 /* memory size */ 4165 n = n_B*pcbddc->local_primal_size; 4166 if (pcbddc->switch_static || pcbddc->dbg_flag) n += n_D*pcbddc->local_primal_size; 4167 if (!pcbddc->symmetric_primal) n *= 2; 4168 CHKERRQ(PetscCalloc1(n,&marr)); 4169 CHKERRQ(MatCreateSeqDense(PETSC_COMM_SELF,n_B,pcbddc->local_primal_size,marr,&pcbddc->coarse_phi_B)); 4170 marr += n_B*pcbddc->local_primal_size; 4171 if (pcbddc->switch_static || pcbddc->dbg_flag) { 4172 CHKERRQ(MatCreateSeqDense(PETSC_COMM_SELF,n_D,pcbddc->local_primal_size,marr,&pcbddc->coarse_phi_D)); 4173 marr += n_D*pcbddc->local_primal_size; 4174 } 4175 if (!pcbddc->symmetric_primal) { 4176 CHKERRQ(MatCreateSeqDense(PETSC_COMM_SELF,n_B,pcbddc->local_primal_size,marr,&pcbddc->coarse_psi_B)); 4177 marr += n_B*pcbddc->local_primal_size; 4178 if (pcbddc->switch_static || pcbddc->dbg_flag) { 4179 CHKERRQ(MatCreateSeqDense(PETSC_COMM_SELF,n_D,pcbddc->local_primal_size,marr,&pcbddc->coarse_psi_D)); 4180 } 4181 } else { 4182 CHKERRQ(PetscObjectReference((PetscObject)pcbddc->coarse_phi_B)); 4183 pcbddc->coarse_psi_B = pcbddc->coarse_phi_B; 4184 if (pcbddc->switch_static || pcbddc->dbg_flag) { 4185 CHKERRQ(PetscObjectReference((PetscObject)pcbddc->coarse_phi_D)); 4186 pcbddc->coarse_psi_D = pcbddc->coarse_phi_D; 4187 } 4188 } 4189 } 4190 4191 /* We are now ready to evaluate coarse basis functions and subdomain contribution to coarse problem */ 4192 p0_lidx_I = NULL; 4193 if (pcbddc->benign_n && (pcbddc->switch_static || pcbddc->dbg_flag)) { 4194 const PetscInt *idxs; 4195 4196 CHKERRQ(ISGetIndices(pcis->is_I_local,&idxs)); 4197 CHKERRQ(PetscMalloc1(pcbddc->benign_n,&p0_lidx_I)); 4198 for (i=0;i<pcbddc->benign_n;i++) { 4199 CHKERRQ(PetscFindInt(pcbddc->benign_p0_lidx[i],pcis->n-pcis->n_B,idxs,&p0_lidx_I[i])); 4200 } 4201 CHKERRQ(ISRestoreIndices(pcis->is_I_local,&idxs)); 4202 } 4203 4204 /* vertices */ 4205 if (n_vertices) { 4206 PetscBool restoreavr = PETSC_FALSE; 4207 4208 CHKERRQ(MatConvert(A_VV,MATDENSE,MAT_INPLACE_MATRIX,&A_VV)); 4209 4210 if (n_R) { 4211 Mat A_RRmA_RV,A_RV_bcorr=NULL,S_VVt; /* S_VVt with LDA=N */ 4212 PetscBLASInt B_N,B_one = 1; 4213 const PetscScalar *x; 4214 PetscScalar *y; 4215 4216 CHKERRQ(MatScale(A_RV,m_one)); 4217 if (need_benign_correction) { 4218 ISLocalToGlobalMapping RtoN; 4219 IS is_p0; 4220 PetscInt *idxs_p0,n; 4221 4222 CHKERRQ(PetscMalloc1(pcbddc->benign_n,&idxs_p0)); 4223 CHKERRQ(ISLocalToGlobalMappingCreateIS(pcbddc->is_R_local,&RtoN)); 4224 CHKERRQ(ISGlobalToLocalMappingApply(RtoN,IS_GTOLM_DROP,pcbddc->benign_n,pcbddc->benign_p0_lidx,&n,idxs_p0)); 4225 PetscCheckFalse(n != pcbddc->benign_n,PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error in R numbering for benign p0! %D != %D",n,pcbddc->benign_n); 4226 CHKERRQ(ISLocalToGlobalMappingDestroy(&RtoN)); 4227 CHKERRQ(ISCreateGeneral(PETSC_COMM_SELF,n,idxs_p0,PETSC_OWN_POINTER,&is_p0)); 4228 CHKERRQ(MatCreateSubMatrix(A_RV,is_p0,NULL,MAT_INITIAL_MATRIX,&A_RV_bcorr)); 4229 CHKERRQ(ISDestroy(&is_p0)); 4230 } 4231 4232 CHKERRQ(MatCreateSeqDense(PETSC_COMM_SELF,lda_rhs,n_vertices,work,&A_RRmA_RV)); 4233 if (!sparserhs || need_benign_correction) { 4234 if (lda_rhs == n_R) { 4235 CHKERRQ(MatConvert(A_RV,MATDENSE,MAT_INPLACE_MATRIX,&A_RV)); 4236 } else { 4237 PetscScalar *av,*array; 4238 const PetscInt *xadj,*adjncy; 4239 PetscInt n; 4240 PetscBool flg_row; 4241 4242 array = work+lda_rhs*n_vertices; 4243 CHKERRQ(PetscArrayzero(array,lda_rhs*n_vertices)); 4244 CHKERRQ(MatConvert(A_RV,MATSEQAIJ,MAT_INPLACE_MATRIX,&A_RV)); 4245 CHKERRQ(MatGetRowIJ(A_RV,0,PETSC_FALSE,PETSC_FALSE,&n,&xadj,&adjncy,&flg_row)); 4246 CHKERRQ(MatSeqAIJGetArray(A_RV,&av)); 4247 for (i=0;i<n;i++) { 4248 PetscInt j; 4249 for (j=xadj[i];j<xadj[i+1];j++) array[lda_rhs*adjncy[j]+i] = av[j]; 4250 } 4251 CHKERRQ(MatRestoreRowIJ(A_RV,0,PETSC_FALSE,PETSC_FALSE,&n,&xadj,&adjncy,&flg_row)); 4252 CHKERRQ(MatDestroy(&A_RV)); 4253 CHKERRQ(MatCreateSeqDense(PETSC_COMM_SELF,lda_rhs,n_vertices,array,&A_RV)); 4254 } 4255 if (need_benign_correction) { 4256 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 4257 PetscScalar *marr; 4258 4259 CHKERRQ(MatDenseGetArray(A_RV,&marr)); 4260 /* need \Phi^T A_RV = (I+L)A_RV, L given by 4261 4262 | 0 0 0 | (V) 4263 L = | 0 0 -1 | (P-p0) 4264 | 0 0 -1 | (p0) 4265 4266 */ 4267 for (i=0;i<reuse_solver->benign_n;i++) { 4268 const PetscScalar *vals; 4269 const PetscInt *idxs,*idxs_zero; 4270 PetscInt n,j,nz; 4271 4272 CHKERRQ(ISGetLocalSize(reuse_solver->benign_zerodiag_subs[i],&nz)); 4273 CHKERRQ(ISGetIndices(reuse_solver->benign_zerodiag_subs[i],&idxs_zero)); 4274 CHKERRQ(MatGetRow(A_RV_bcorr,i,&n,&idxs,&vals)); 4275 for (j=0;j<n;j++) { 4276 PetscScalar val = vals[j]; 4277 PetscInt k,col = idxs[j]; 4278 for (k=0;k<nz;k++) marr[idxs_zero[k]+lda_rhs*col] -= val; 4279 } 4280 CHKERRQ(MatRestoreRow(A_RV_bcorr,i,&n,&idxs,&vals)); 4281 CHKERRQ(ISRestoreIndices(reuse_solver->benign_zerodiag_subs[i],&idxs_zero)); 4282 } 4283 CHKERRQ(MatDenseRestoreArray(A_RV,&marr)); 4284 } 4285 CHKERRQ(PetscObjectReference((PetscObject)A_RV)); 4286 Brhs = A_RV; 4287 } else { 4288 Mat tA_RVT,A_RVT; 4289 4290 if (!pcbddc->symmetric_primal) { 4291 /* A_RV already scaled by -1 */ 4292 CHKERRQ(MatTranspose(A_RV,MAT_INITIAL_MATRIX,&A_RVT)); 4293 } else { 4294 restoreavr = PETSC_TRUE; 4295 CHKERRQ(MatScale(A_VR,-1.0)); 4296 CHKERRQ(PetscObjectReference((PetscObject)A_VR)); 4297 A_RVT = A_VR; 4298 } 4299 if (lda_rhs != n_R) { 4300 PetscScalar *aa; 4301 PetscInt r,*ii,*jj; 4302 PetscBool done; 4303 4304 CHKERRQ(MatGetRowIJ(A_RVT,0,PETSC_FALSE,PETSC_FALSE,&r,(const PetscInt**)&ii,(const PetscInt**)&jj,&done)); 4305 PetscCheck(done,PETSC_COMM_SELF,PETSC_ERR_PLIB,"GetRowIJ failed"); 4306 CHKERRQ(MatSeqAIJGetArray(A_RVT,&aa)); 4307 CHKERRQ(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,n_vertices,lda_rhs,ii,jj,aa,&tA_RVT)); 4308 CHKERRQ(MatRestoreRowIJ(A_RVT,0,PETSC_FALSE,PETSC_FALSE,&r,(const PetscInt**)&ii,(const PetscInt**)&jj,&done)); 4309 PetscCheck(done,PETSC_COMM_SELF,PETSC_ERR_PLIB,"RestoreRowIJ failed"); 4310 } else { 4311 CHKERRQ(PetscObjectReference((PetscObject)A_RVT)); 4312 tA_RVT = A_RVT; 4313 } 4314 CHKERRQ(MatCreateTranspose(tA_RVT,&Brhs)); 4315 CHKERRQ(MatDestroy(&tA_RVT)); 4316 CHKERRQ(MatDestroy(&A_RVT)); 4317 } 4318 if (F) { 4319 /* need to correct the rhs */ 4320 if (need_benign_correction) { 4321 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 4322 PetscScalar *marr; 4323 4324 CHKERRQ(MatDenseGetArray(Brhs,&marr)); 4325 if (lda_rhs != n_R) { 4326 for (i=0;i<n_vertices;i++) { 4327 CHKERRQ(VecPlaceArray(dummy_vec,marr+i*lda_rhs)); 4328 CHKERRQ(PCBDDCReuseSolversBenignAdapt(reuse_solver,dummy_vec,NULL,PETSC_FALSE,PETSC_TRUE)); 4329 CHKERRQ(VecResetArray(dummy_vec)); 4330 } 4331 } else { 4332 for (i=0;i<n_vertices;i++) { 4333 CHKERRQ(VecPlaceArray(pcbddc->vec1_R,marr+i*lda_rhs)); 4334 CHKERRQ(PCBDDCReuseSolversBenignAdapt(reuse_solver,pcbddc->vec1_R,NULL,PETSC_FALSE,PETSC_TRUE)); 4335 CHKERRQ(VecResetArray(pcbddc->vec1_R)); 4336 } 4337 } 4338 CHKERRQ(MatDenseRestoreArray(Brhs,&marr)); 4339 } 4340 CHKERRQ(MatMatSolve(F,Brhs,A_RRmA_RV)); 4341 if (restoreavr) { 4342 CHKERRQ(MatScale(A_VR,-1.0)); 4343 } 4344 /* need to correct the solution */ 4345 if (need_benign_correction) { 4346 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 4347 PetscScalar *marr; 4348 4349 CHKERRQ(MatDenseGetArray(A_RRmA_RV,&marr)); 4350 if (lda_rhs != n_R) { 4351 for (i=0;i<n_vertices;i++) { 4352 CHKERRQ(VecPlaceArray(dummy_vec,marr+i*lda_rhs)); 4353 CHKERRQ(PCBDDCReuseSolversBenignAdapt(reuse_solver,dummy_vec,NULL,PETSC_TRUE,PETSC_TRUE)); 4354 CHKERRQ(VecResetArray(dummy_vec)); 4355 } 4356 } else { 4357 for (i=0;i<n_vertices;i++) { 4358 CHKERRQ(VecPlaceArray(pcbddc->vec1_R,marr+i*lda_rhs)); 4359 CHKERRQ(PCBDDCReuseSolversBenignAdapt(reuse_solver,pcbddc->vec1_R,NULL,PETSC_TRUE,PETSC_TRUE)); 4360 CHKERRQ(VecResetArray(pcbddc->vec1_R)); 4361 } 4362 } 4363 CHKERRQ(MatDenseRestoreArray(A_RRmA_RV,&marr)); 4364 } 4365 } else { 4366 CHKERRQ(MatDenseGetArray(Brhs,&y)); 4367 for (i=0;i<n_vertices;i++) { 4368 CHKERRQ(VecPlaceArray(pcbddc->vec1_R,y+i*lda_rhs)); 4369 CHKERRQ(VecPlaceArray(pcbddc->vec2_R,work+i*lda_rhs)); 4370 CHKERRQ(KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R)); 4371 CHKERRQ(KSPCheckSolve(pcbddc->ksp_R,pc,pcbddc->vec2_R)); 4372 CHKERRQ(VecResetArray(pcbddc->vec1_R)); 4373 CHKERRQ(VecResetArray(pcbddc->vec2_R)); 4374 } 4375 CHKERRQ(MatDenseRestoreArray(Brhs,&y)); 4376 } 4377 CHKERRQ(MatDestroy(&A_RV)); 4378 CHKERRQ(MatDestroy(&Brhs)); 4379 /* S_VV and S_CV */ 4380 if (n_constraints) { 4381 Mat B; 4382 4383 CHKERRQ(PetscArrayzero(work+lda_rhs*n_vertices,n_B*n_vertices)); 4384 for (i=0;i<n_vertices;i++) { 4385 CHKERRQ(VecPlaceArray(pcbddc->vec1_R,work+i*lda_rhs)); 4386 CHKERRQ(VecPlaceArray(pcis->vec1_B,work+lda_rhs*n_vertices+i*n_B)); 4387 CHKERRQ(VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD)); 4388 CHKERRQ(VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD)); 4389 CHKERRQ(VecResetArray(pcis->vec1_B)); 4390 CHKERRQ(VecResetArray(pcbddc->vec1_R)); 4391 } 4392 CHKERRQ(MatCreateSeqDense(PETSC_COMM_SELF,n_B,n_vertices,work+lda_rhs*n_vertices,&B)); 4393 /* Reuse dense S_C = pcbddc->local_auxmat1 * B */ 4394 CHKERRQ(MatProductCreateWithMat(pcbddc->local_auxmat1,B,NULL,S_CV)); 4395 CHKERRQ(MatProductSetType(S_CV,MATPRODUCT_AB)); 4396 CHKERRQ(MatProductSetFromOptions(S_CV)); 4397 CHKERRQ(MatProductSymbolic(S_CV)); 4398 CHKERRQ(MatProductNumeric(S_CV)); 4399 CHKERRQ(MatProductClear(S_CV)); 4400 4401 CHKERRQ(MatDestroy(&B)); 4402 CHKERRQ(MatCreateSeqDense(PETSC_COMM_SELF,lda_rhs,n_vertices,work+lda_rhs*n_vertices,&B)); 4403 /* Reuse B = local_auxmat2_R * S_CV */ 4404 CHKERRQ(MatProductCreateWithMat(local_auxmat2_R,S_CV,NULL,B)); 4405 CHKERRQ(MatProductSetType(B,MATPRODUCT_AB)); 4406 CHKERRQ(MatProductSetFromOptions(B)); 4407 CHKERRQ(MatProductSymbolic(B)); 4408 CHKERRQ(MatProductNumeric(B)); 4409 4410 CHKERRQ(MatScale(S_CV,m_one)); 4411 CHKERRQ(PetscBLASIntCast(lda_rhs*n_vertices,&B_N)); 4412 PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&B_N,&one,work+lda_rhs*n_vertices,&B_one,work,&B_one)); 4413 CHKERRQ(MatDestroy(&B)); 4414 } 4415 if (lda_rhs != n_R) { 4416 CHKERRQ(MatDestroy(&A_RRmA_RV)); 4417 CHKERRQ(MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_vertices,work,&A_RRmA_RV)); 4418 CHKERRQ(MatDenseSetLDA(A_RRmA_RV,lda_rhs)); 4419 } 4420 CHKERRQ(MatMatMult(A_VR,A_RRmA_RV,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&S_VVt)); 4421 /* need A_VR * \Phi * A_RRmA_RV = A_VR * (I+L)^T * A_RRmA_RV, L given as before */ 4422 if (need_benign_correction) { 4423 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 4424 PetscScalar *marr,*sums; 4425 4426 CHKERRQ(PetscMalloc1(n_vertices,&sums)); 4427 CHKERRQ(MatDenseGetArray(S_VVt,&marr)); 4428 for (i=0;i<reuse_solver->benign_n;i++) { 4429 const PetscScalar *vals; 4430 const PetscInt *idxs,*idxs_zero; 4431 PetscInt n,j,nz; 4432 4433 CHKERRQ(ISGetLocalSize(reuse_solver->benign_zerodiag_subs[i],&nz)); 4434 CHKERRQ(ISGetIndices(reuse_solver->benign_zerodiag_subs[i],&idxs_zero)); 4435 for (j=0;j<n_vertices;j++) { 4436 PetscInt k; 4437 sums[j] = 0.; 4438 for (k=0;k<nz;k++) sums[j] += work[idxs_zero[k]+j*lda_rhs]; 4439 } 4440 CHKERRQ(MatGetRow(A_RV_bcorr,i,&n,&idxs,&vals)); 4441 for (j=0;j<n;j++) { 4442 PetscScalar val = vals[j]; 4443 PetscInt k; 4444 for (k=0;k<n_vertices;k++) { 4445 marr[idxs[j]+k*n_vertices] += val*sums[k]; 4446 } 4447 } 4448 CHKERRQ(MatRestoreRow(A_RV_bcorr,i,&n,&idxs,&vals)); 4449 CHKERRQ(ISRestoreIndices(reuse_solver->benign_zerodiag_subs[i],&idxs_zero)); 4450 } 4451 CHKERRQ(PetscFree(sums)); 4452 CHKERRQ(MatDenseRestoreArray(S_VVt,&marr)); 4453 CHKERRQ(MatDestroy(&A_RV_bcorr)); 4454 } 4455 CHKERRQ(MatDestroy(&A_RRmA_RV)); 4456 CHKERRQ(PetscBLASIntCast(n_vertices*n_vertices,&B_N)); 4457 CHKERRQ(MatDenseGetArrayRead(A_VV,&x)); 4458 CHKERRQ(MatDenseGetArray(S_VVt,&y)); 4459 PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&B_N,&one,x,&B_one,y,&B_one)); 4460 CHKERRQ(MatDenseRestoreArrayRead(A_VV,&x)); 4461 CHKERRQ(MatDenseRestoreArray(S_VVt,&y)); 4462 CHKERRQ(MatCopy(S_VVt,S_VV,SAME_NONZERO_PATTERN)); 4463 CHKERRQ(MatDestroy(&S_VVt)); 4464 } else { 4465 CHKERRQ(MatCopy(A_VV,S_VV,SAME_NONZERO_PATTERN)); 4466 } 4467 CHKERRQ(MatDestroy(&A_VV)); 4468 4469 /* coarse basis functions */ 4470 for (i=0;i<n_vertices;i++) { 4471 Vec v; 4472 PetscScalar one = 1.0,zero = 0.0; 4473 4474 CHKERRQ(VecPlaceArray(pcbddc->vec1_R,work+lda_rhs*i)); 4475 CHKERRQ(MatDenseGetColumnVec(pcbddc->coarse_phi_B,i,&v)); 4476 CHKERRQ(VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,v,INSERT_VALUES,SCATTER_FORWARD)); 4477 CHKERRQ(VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,v,INSERT_VALUES,SCATTER_FORWARD)); 4478 if (PetscDefined(USE_DEBUG)) { /* The following VecSetValues() expects a sequential matrix */ 4479 PetscMPIInt rank; 4480 CHKERRMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)pcbddc->coarse_phi_B),&rank)); 4481 PetscCheckFalse(rank > 1,PetscObjectComm((PetscObject)pcbddc->coarse_phi_B),PETSC_ERR_PLIB,"Expected a sequential dense matrix"); 4482 } 4483 CHKERRQ(VecSetValues(v,1,&idx_V_B[i],&one,INSERT_VALUES)); 4484 CHKERRQ(VecAssemblyBegin(v)); /* If v is on device, hope VecSetValues() eventually implemented by a host to device memcopy */ 4485 CHKERRQ(VecAssemblyEnd(v)); 4486 CHKERRQ(MatDenseRestoreColumnVec(pcbddc->coarse_phi_B,i,&v)); 4487 4488 if (pcbddc->switch_static || pcbddc->dbg_flag) { 4489 PetscInt j; 4490 4491 CHKERRQ(MatDenseGetColumnVec(pcbddc->coarse_phi_D,i,&v)); 4492 CHKERRQ(VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,v,INSERT_VALUES,SCATTER_FORWARD)); 4493 CHKERRQ(VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,v,INSERT_VALUES,SCATTER_FORWARD)); 4494 if (PetscDefined(USE_DEBUG)) { /* The following VecSetValues() expects a sequential matrix */ 4495 PetscMPIInt rank; 4496 CHKERRMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)pcbddc->coarse_phi_D),&rank)); 4497 PetscCheckFalse(rank > 1,PetscObjectComm((PetscObject)pcbddc->coarse_phi_D),PETSC_ERR_PLIB,"Expected a sequential dense matrix"); 4498 } 4499 for (j=0;j<pcbddc->benign_n;j++) CHKERRQ(VecSetValues(v,1,&p0_lidx_I[j],&zero,INSERT_VALUES)); 4500 CHKERRQ(VecAssemblyBegin(v)); 4501 CHKERRQ(VecAssemblyEnd(v)); 4502 CHKERRQ(MatDenseRestoreColumnVec(pcbddc->coarse_phi_D,i,&v)); 4503 } 4504 CHKERRQ(VecResetArray(pcbddc->vec1_R)); 4505 } 4506 /* if n_R == 0 the object is not destroyed */ 4507 CHKERRQ(MatDestroy(&A_RV)); 4508 } 4509 CHKERRQ(VecDestroy(&dummy_vec)); 4510 4511 if (n_constraints) { 4512 Mat B; 4513 4514 CHKERRQ(MatCreateSeqDense(PETSC_COMM_SELF,lda_rhs,n_constraints,work,&B)); 4515 CHKERRQ(MatScale(S_CC,m_one)); 4516 CHKERRQ(MatProductCreateWithMat(local_auxmat2_R,S_CC,NULL,B)); 4517 CHKERRQ(MatProductSetType(B,MATPRODUCT_AB)); 4518 CHKERRQ(MatProductSetFromOptions(B)); 4519 CHKERRQ(MatProductSymbolic(B)); 4520 CHKERRQ(MatProductNumeric(B)); 4521 4522 CHKERRQ(MatScale(S_CC,m_one)); 4523 if (n_vertices) { 4524 if (isCHOL || need_benign_correction) { /* if we can solve the interior problem with cholesky, we should also be fine with transposing here */ 4525 CHKERRQ(MatTranspose(S_CV,MAT_REUSE_MATRIX,&S_VC)); 4526 } else { 4527 Mat S_VCt; 4528 4529 if (lda_rhs != n_R) { 4530 CHKERRQ(MatDestroy(&B)); 4531 CHKERRQ(MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_constraints,work,&B)); 4532 CHKERRQ(MatDenseSetLDA(B,lda_rhs)); 4533 } 4534 CHKERRQ(MatMatMult(A_VR,B,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&S_VCt)); 4535 CHKERRQ(MatCopy(S_VCt,S_VC,SAME_NONZERO_PATTERN)); 4536 CHKERRQ(MatDestroy(&S_VCt)); 4537 } 4538 } 4539 CHKERRQ(MatDestroy(&B)); 4540 /* coarse basis functions */ 4541 for (i=0;i<n_constraints;i++) { 4542 Vec v; 4543 4544 CHKERRQ(VecPlaceArray(pcbddc->vec1_R,work+lda_rhs*i)); 4545 CHKERRQ(MatDenseGetColumnVec(pcbddc->coarse_phi_B,i+n_vertices,&v)); 4546 CHKERRQ(VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,v,INSERT_VALUES,SCATTER_FORWARD)); 4547 CHKERRQ(VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,v,INSERT_VALUES,SCATTER_FORWARD)); 4548 CHKERRQ(MatDenseRestoreColumnVec(pcbddc->coarse_phi_B,i+n_vertices,&v)); 4549 if (pcbddc->switch_static || pcbddc->dbg_flag) { 4550 PetscInt j; 4551 PetscScalar zero = 0.0; 4552 CHKERRQ(MatDenseGetColumnVec(pcbddc->coarse_phi_D,i+n_vertices,&v)); 4553 CHKERRQ(VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,v,INSERT_VALUES,SCATTER_FORWARD)); 4554 CHKERRQ(VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,v,INSERT_VALUES,SCATTER_FORWARD)); 4555 for (j=0;j<pcbddc->benign_n;j++) CHKERRQ(VecSetValues(v,1,&p0_lidx_I[j],&zero,INSERT_VALUES)); 4556 CHKERRQ(VecAssemblyBegin(v)); 4557 CHKERRQ(VecAssemblyEnd(v)); 4558 CHKERRQ(MatDenseRestoreColumnVec(pcbddc->coarse_phi_D,i+n_vertices,&v)); 4559 } 4560 CHKERRQ(VecResetArray(pcbddc->vec1_R)); 4561 } 4562 } 4563 if (n_constraints) { 4564 CHKERRQ(MatDestroy(&local_auxmat2_R)); 4565 } 4566 CHKERRQ(PetscFree(p0_lidx_I)); 4567 4568 /* coarse matrix entries relative to B_0 */ 4569 if (pcbddc->benign_n) { 4570 Mat B0_B,B0_BPHI; 4571 IS is_dummy; 4572 const PetscScalar *data; 4573 PetscInt j; 4574 4575 CHKERRQ(ISCreateStride(PETSC_COMM_SELF,pcbddc->benign_n,0,1,&is_dummy)); 4576 CHKERRQ(MatCreateSubMatrix(pcbddc->benign_B0,is_dummy,pcis->is_B_local,MAT_INITIAL_MATRIX,&B0_B)); 4577 CHKERRQ(ISDestroy(&is_dummy)); 4578 CHKERRQ(MatMatMult(B0_B,pcbddc->coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&B0_BPHI)); 4579 CHKERRQ(MatConvert(B0_BPHI,MATSEQDENSE,MAT_INPLACE_MATRIX,&B0_BPHI)); 4580 CHKERRQ(MatDenseGetArrayRead(B0_BPHI,&data)); 4581 for (j=0;j<pcbddc->benign_n;j++) { 4582 PetscInt primal_idx = pcbddc->local_primal_size - pcbddc->benign_n + j; 4583 for (i=0;i<pcbddc->local_primal_size;i++) { 4584 coarse_submat_vals[primal_idx*pcbddc->local_primal_size+i] = data[i*pcbddc->benign_n+j]; 4585 coarse_submat_vals[i*pcbddc->local_primal_size+primal_idx] = data[i*pcbddc->benign_n+j]; 4586 } 4587 } 4588 CHKERRQ(MatDenseRestoreArrayRead(B0_BPHI,&data)); 4589 CHKERRQ(MatDestroy(&B0_B)); 4590 CHKERRQ(MatDestroy(&B0_BPHI)); 4591 } 4592 4593 /* compute other basis functions for non-symmetric problems */ 4594 if (!pcbddc->symmetric_primal) { 4595 Mat B_V=NULL,B_C=NULL; 4596 PetscScalar *marray; 4597 4598 if (n_constraints) { 4599 Mat S_CCT,C_CRT; 4600 4601 CHKERRQ(MatTranspose(C_CR,MAT_INITIAL_MATRIX,&C_CRT)); 4602 CHKERRQ(MatTranspose(S_CC,MAT_INITIAL_MATRIX,&S_CCT)); 4603 CHKERRQ(MatMatMult(C_CRT,S_CCT,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&B_C)); 4604 CHKERRQ(MatDestroy(&S_CCT)); 4605 if (n_vertices) { 4606 Mat S_VCT; 4607 4608 CHKERRQ(MatTranspose(S_VC,MAT_INITIAL_MATRIX,&S_VCT)); 4609 CHKERRQ(MatMatMult(C_CRT,S_VCT,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&B_V)); 4610 CHKERRQ(MatDestroy(&S_VCT)); 4611 } 4612 CHKERRQ(MatDestroy(&C_CRT)); 4613 } else { 4614 CHKERRQ(MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_vertices,NULL,&B_V)); 4615 } 4616 if (n_vertices && n_R) { 4617 PetscScalar *av,*marray; 4618 const PetscInt *xadj,*adjncy; 4619 PetscInt n; 4620 PetscBool flg_row; 4621 4622 /* B_V = B_V - A_VR^T */ 4623 CHKERRQ(MatConvert(A_VR,MATSEQAIJ,MAT_INPLACE_MATRIX,&A_VR)); 4624 CHKERRQ(MatGetRowIJ(A_VR,0,PETSC_FALSE,PETSC_FALSE,&n,&xadj,&adjncy,&flg_row)); 4625 CHKERRQ(MatSeqAIJGetArray(A_VR,&av)); 4626 CHKERRQ(MatDenseGetArray(B_V,&marray)); 4627 for (i=0;i<n;i++) { 4628 PetscInt j; 4629 for (j=xadj[i];j<xadj[i+1];j++) marray[i*n_R + adjncy[j]] -= av[j]; 4630 } 4631 CHKERRQ(MatDenseRestoreArray(B_V,&marray)); 4632 CHKERRQ(MatRestoreRowIJ(A_VR,0,PETSC_FALSE,PETSC_FALSE,&n,&xadj,&adjncy,&flg_row)); 4633 CHKERRQ(MatDestroy(&A_VR)); 4634 } 4635 4636 /* currently there's no support for MatTransposeMatSolve(F,B,X) */ 4637 if (n_vertices) { 4638 CHKERRQ(MatDenseGetArray(B_V,&marray)); 4639 for (i=0;i<n_vertices;i++) { 4640 CHKERRQ(VecPlaceArray(pcbddc->vec1_R,marray+i*n_R)); 4641 CHKERRQ(VecPlaceArray(pcbddc->vec2_R,work+i*n_R)); 4642 CHKERRQ(KSPSolveTranspose(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R)); 4643 CHKERRQ(KSPCheckSolve(pcbddc->ksp_R,pc,pcbddc->vec2_R)); 4644 CHKERRQ(VecResetArray(pcbddc->vec1_R)); 4645 CHKERRQ(VecResetArray(pcbddc->vec2_R)); 4646 } 4647 CHKERRQ(MatDenseRestoreArray(B_V,&marray)); 4648 } 4649 if (B_C) { 4650 CHKERRQ(MatDenseGetArray(B_C,&marray)); 4651 for (i=n_vertices;i<n_constraints+n_vertices;i++) { 4652 CHKERRQ(VecPlaceArray(pcbddc->vec1_R,marray+(i-n_vertices)*n_R)); 4653 CHKERRQ(VecPlaceArray(pcbddc->vec2_R,work+i*n_R)); 4654 CHKERRQ(KSPSolveTranspose(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R)); 4655 CHKERRQ(KSPCheckSolve(pcbddc->ksp_R,pc,pcbddc->vec2_R)); 4656 CHKERRQ(VecResetArray(pcbddc->vec1_R)); 4657 CHKERRQ(VecResetArray(pcbddc->vec2_R)); 4658 } 4659 CHKERRQ(MatDenseRestoreArray(B_C,&marray)); 4660 } 4661 /* coarse basis functions */ 4662 for (i=0;i<pcbddc->local_primal_size;i++) { 4663 Vec v; 4664 4665 CHKERRQ(VecPlaceArray(pcbddc->vec1_R,work+i*n_R)); 4666 CHKERRQ(MatDenseGetColumnVec(pcbddc->coarse_psi_B,i,&v)); 4667 CHKERRQ(VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,v,INSERT_VALUES,SCATTER_FORWARD)); 4668 CHKERRQ(VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,v,INSERT_VALUES,SCATTER_FORWARD)); 4669 if (i<n_vertices) { 4670 PetscScalar one = 1.0; 4671 CHKERRQ(VecSetValues(v,1,&idx_V_B[i],&one,INSERT_VALUES)); 4672 CHKERRQ(VecAssemblyBegin(v)); 4673 CHKERRQ(VecAssemblyEnd(v)); 4674 } 4675 CHKERRQ(MatDenseRestoreColumnVec(pcbddc->coarse_psi_B,i,&v)); 4676 4677 if (pcbddc->switch_static || pcbddc->dbg_flag) { 4678 CHKERRQ(MatDenseGetColumnVec(pcbddc->coarse_psi_D,i,&v)); 4679 CHKERRQ(VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,v,INSERT_VALUES,SCATTER_FORWARD)); 4680 CHKERRQ(VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,v,INSERT_VALUES,SCATTER_FORWARD)); 4681 CHKERRQ(MatDenseRestoreColumnVec(pcbddc->coarse_psi_D,i,&v)); 4682 } 4683 CHKERRQ(VecResetArray(pcbddc->vec1_R)); 4684 } 4685 CHKERRQ(MatDestroy(&B_V)); 4686 CHKERRQ(MatDestroy(&B_C)); 4687 } 4688 4689 /* free memory */ 4690 CHKERRQ(PetscFree(idx_V_B)); 4691 CHKERRQ(MatDestroy(&S_VV)); 4692 CHKERRQ(MatDestroy(&S_CV)); 4693 CHKERRQ(MatDestroy(&S_VC)); 4694 CHKERRQ(MatDestroy(&S_CC)); 4695 CHKERRQ(PetscFree(work)); 4696 if (n_vertices) { 4697 CHKERRQ(MatDestroy(&A_VR)); 4698 } 4699 if (n_constraints) { 4700 CHKERRQ(MatDestroy(&C_CR)); 4701 } 4702 CHKERRQ(PetscLogEventEnd(PC_BDDC_CorrectionSetUp[pcbddc->current_level],pc,0,0,0)); 4703 4704 /* Checking coarse_sub_mat and coarse basis functios */ 4705 /* Symmetric case : It should be \Phi^{(j)^T} A^{(j)} \Phi^{(j)}=coarse_sub_mat */ 4706 /* Non-symmetric case : It should be \Psi^{(j)^T} A^{(j)} \Phi^{(j)}=coarse_sub_mat */ 4707 if (pcbddc->dbg_flag) { 4708 Mat coarse_sub_mat; 4709 Mat AUXMAT,TM1,TM2,TM3,TM4; 4710 Mat coarse_phi_D,coarse_phi_B; 4711 Mat coarse_psi_D,coarse_psi_B; 4712 Mat A_II,A_BB,A_IB,A_BI; 4713 Mat C_B,CPHI; 4714 IS is_dummy; 4715 Vec mones; 4716 MatType checkmattype=MATSEQAIJ; 4717 PetscReal real_value; 4718 4719 if (pcbddc->benign_n && !pcbddc->benign_change_explicit) { 4720 Mat A; 4721 CHKERRQ(PCBDDCBenignProject(pc,NULL,NULL,&A)); 4722 CHKERRQ(MatCreateSubMatrix(A,pcis->is_I_local,pcis->is_I_local,MAT_INITIAL_MATRIX,&A_II)); 4723 CHKERRQ(MatCreateSubMatrix(A,pcis->is_I_local,pcis->is_B_local,MAT_INITIAL_MATRIX,&A_IB)); 4724 CHKERRQ(MatCreateSubMatrix(A,pcis->is_B_local,pcis->is_I_local,MAT_INITIAL_MATRIX,&A_BI)); 4725 CHKERRQ(MatCreateSubMatrix(A,pcis->is_B_local,pcis->is_B_local,MAT_INITIAL_MATRIX,&A_BB)); 4726 CHKERRQ(MatDestroy(&A)); 4727 } else { 4728 CHKERRQ(MatConvert(pcis->A_II,checkmattype,MAT_INITIAL_MATRIX,&A_II)); 4729 CHKERRQ(MatConvert(pcis->A_IB,checkmattype,MAT_INITIAL_MATRIX,&A_IB)); 4730 CHKERRQ(MatConvert(pcis->A_BI,checkmattype,MAT_INITIAL_MATRIX,&A_BI)); 4731 CHKERRQ(MatConvert(pcis->A_BB,checkmattype,MAT_INITIAL_MATRIX,&A_BB)); 4732 } 4733 CHKERRQ(MatConvert(pcbddc->coarse_phi_D,checkmattype,MAT_INITIAL_MATRIX,&coarse_phi_D)); 4734 CHKERRQ(MatConvert(pcbddc->coarse_phi_B,checkmattype,MAT_INITIAL_MATRIX,&coarse_phi_B)); 4735 if (!pcbddc->symmetric_primal) { 4736 CHKERRQ(MatConvert(pcbddc->coarse_psi_D,checkmattype,MAT_INITIAL_MATRIX,&coarse_psi_D)); 4737 CHKERRQ(MatConvert(pcbddc->coarse_psi_B,checkmattype,MAT_INITIAL_MATRIX,&coarse_psi_B)); 4738 } 4739 CHKERRQ(MatCreateSeqDense(PETSC_COMM_SELF,pcbddc->local_primal_size,pcbddc->local_primal_size,coarse_submat_vals,&coarse_sub_mat)); 4740 4741 CHKERRQ(PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n")); 4742 CHKERRQ(PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Check coarse sub mat computation (symmetric %d)\n",pcbddc->symmetric_primal)); 4743 CHKERRQ(PetscViewerFlush(pcbddc->dbg_viewer)); 4744 if (!pcbddc->symmetric_primal) { 4745 CHKERRQ(MatMatMult(A_II,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT)); 4746 CHKERRQ(MatTransposeMatMult(coarse_psi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM1)); 4747 CHKERRQ(MatDestroy(&AUXMAT)); 4748 CHKERRQ(MatMatMult(A_BB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT)); 4749 CHKERRQ(MatTransposeMatMult(coarse_psi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM2)); 4750 CHKERRQ(MatDestroy(&AUXMAT)); 4751 CHKERRQ(MatMatMult(A_IB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT)); 4752 CHKERRQ(MatTransposeMatMult(coarse_psi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM3)); 4753 CHKERRQ(MatDestroy(&AUXMAT)); 4754 CHKERRQ(MatMatMult(A_BI,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT)); 4755 CHKERRQ(MatTransposeMatMult(coarse_psi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM4)); 4756 CHKERRQ(MatDestroy(&AUXMAT)); 4757 } else { 4758 CHKERRQ(MatPtAP(A_II,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&TM1)); 4759 CHKERRQ(MatPtAP(A_BB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&TM2)); 4760 CHKERRQ(MatMatMult(A_IB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT)); 4761 CHKERRQ(MatTransposeMatMult(coarse_phi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM3)); 4762 CHKERRQ(MatDestroy(&AUXMAT)); 4763 CHKERRQ(MatMatMult(A_BI,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT)); 4764 CHKERRQ(MatTransposeMatMult(coarse_phi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM4)); 4765 CHKERRQ(MatDestroy(&AUXMAT)); 4766 } 4767 CHKERRQ(MatAXPY(TM1,one,TM2,DIFFERENT_NONZERO_PATTERN)); 4768 CHKERRQ(MatAXPY(TM1,one,TM3,DIFFERENT_NONZERO_PATTERN)); 4769 CHKERRQ(MatAXPY(TM1,one,TM4,DIFFERENT_NONZERO_PATTERN)); 4770 CHKERRQ(MatConvert(TM1,MATSEQDENSE,MAT_INPLACE_MATRIX,&TM1)); 4771 if (pcbddc->benign_n) { 4772 Mat B0_B,B0_BPHI; 4773 const PetscScalar *data2; 4774 PetscScalar *data; 4775 PetscInt j; 4776 4777 CHKERRQ(ISCreateStride(PETSC_COMM_SELF,pcbddc->benign_n,0,1,&is_dummy)); 4778 CHKERRQ(MatCreateSubMatrix(pcbddc->benign_B0,is_dummy,pcis->is_B_local,MAT_INITIAL_MATRIX,&B0_B)); 4779 CHKERRQ(MatMatMult(B0_B,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&B0_BPHI)); 4780 CHKERRQ(MatConvert(B0_BPHI,MATSEQDENSE,MAT_INPLACE_MATRIX,&B0_BPHI)); 4781 CHKERRQ(MatDenseGetArray(TM1,&data)); 4782 CHKERRQ(MatDenseGetArrayRead(B0_BPHI,&data2)); 4783 for (j=0;j<pcbddc->benign_n;j++) { 4784 PetscInt primal_idx = pcbddc->local_primal_size - pcbddc->benign_n + j; 4785 for (i=0;i<pcbddc->local_primal_size;i++) { 4786 data[primal_idx*pcbddc->local_primal_size+i] += data2[i*pcbddc->benign_n+j]; 4787 data[i*pcbddc->local_primal_size+primal_idx] += data2[i*pcbddc->benign_n+j]; 4788 } 4789 } 4790 CHKERRQ(MatDenseRestoreArray(TM1,&data)); 4791 CHKERRQ(MatDenseRestoreArrayRead(B0_BPHI,&data2)); 4792 CHKERRQ(MatDestroy(&B0_B)); 4793 CHKERRQ(ISDestroy(&is_dummy)); 4794 CHKERRQ(MatDestroy(&B0_BPHI)); 4795 } 4796 #if 0 4797 { 4798 PetscViewer viewer; 4799 char filename[256]; 4800 sprintf(filename,"details_local_coarse_mat%d_level%d.m",PetscGlobalRank,pcbddc->current_level); 4801 CHKERRQ(PetscViewerASCIIOpen(PETSC_COMM_SELF,filename,&viewer)); 4802 CHKERRQ(PetscViewerPushFormat(viewer,PETSC_VIEWER_ASCII_MATLAB)); 4803 CHKERRQ(PetscObjectSetName((PetscObject)coarse_sub_mat,"computed")); 4804 CHKERRQ(MatView(coarse_sub_mat,viewer)); 4805 CHKERRQ(PetscObjectSetName((PetscObject)TM1,"projected")); 4806 CHKERRQ(MatView(TM1,viewer)); 4807 if (pcbddc->coarse_phi_B) { 4808 CHKERRQ(PetscObjectSetName((PetscObject)pcbddc->coarse_phi_B,"phi_B")); 4809 CHKERRQ(MatView(pcbddc->coarse_phi_B,viewer)); 4810 } 4811 if (pcbddc->coarse_phi_D) { 4812 CHKERRQ(PetscObjectSetName((PetscObject)pcbddc->coarse_phi_D,"phi_D")); 4813 CHKERRQ(MatView(pcbddc->coarse_phi_D,viewer)); 4814 } 4815 if (pcbddc->coarse_psi_B) { 4816 CHKERRQ(PetscObjectSetName((PetscObject)pcbddc->coarse_psi_B,"psi_B")); 4817 CHKERRQ(MatView(pcbddc->coarse_psi_B,viewer)); 4818 } 4819 if (pcbddc->coarse_psi_D) { 4820 CHKERRQ(PetscObjectSetName((PetscObject)pcbddc->coarse_psi_D,"psi_D")); 4821 CHKERRQ(MatView(pcbddc->coarse_psi_D,viewer)); 4822 } 4823 CHKERRQ(PetscObjectSetName((PetscObject)pcbddc->local_mat,"A")); 4824 CHKERRQ(MatView(pcbddc->local_mat,viewer)); 4825 CHKERRQ(PetscObjectSetName((PetscObject)pcbddc->ConstraintMatrix,"C")); 4826 CHKERRQ(MatView(pcbddc->ConstraintMatrix,viewer)); 4827 CHKERRQ(PetscObjectSetName((PetscObject)pcis->is_I_local,"I")); 4828 CHKERRQ(ISView(pcis->is_I_local,viewer)); 4829 CHKERRQ(PetscObjectSetName((PetscObject)pcis->is_B_local,"B")); 4830 CHKERRQ(ISView(pcis->is_B_local,viewer)); 4831 CHKERRQ(PetscObjectSetName((PetscObject)pcbddc->is_R_local,"R")); 4832 CHKERRQ(ISView(pcbddc->is_R_local,viewer)); 4833 CHKERRQ(PetscViewerDestroy(&viewer)); 4834 } 4835 #endif 4836 CHKERRQ(MatAXPY(TM1,m_one,coarse_sub_mat,DIFFERENT_NONZERO_PATTERN)); 4837 CHKERRQ(MatNorm(TM1,NORM_FROBENIUS,&real_value)); 4838 CHKERRQ(PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer)); 4839 CHKERRQ(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d matrix error % 1.14e\n",PetscGlobalRank,real_value)); 4840 4841 /* check constraints */ 4842 CHKERRQ(ISCreateStride(PETSC_COMM_SELF,pcbddc->local_primal_size-pcbddc->benign_n,0,1,&is_dummy)); 4843 CHKERRQ(MatCreateSubMatrix(pcbddc->ConstraintMatrix,is_dummy,pcis->is_B_local,MAT_INITIAL_MATRIX,&C_B)); 4844 if (!pcbddc->benign_n) { /* TODO: add benign case */ 4845 CHKERRQ(MatMatMult(C_B,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&CPHI)); 4846 } else { 4847 PetscScalar *data; 4848 Mat tmat; 4849 CHKERRQ(MatDenseGetArray(pcbddc->coarse_phi_B,&data)); 4850 CHKERRQ(MatCreateSeqDense(PETSC_COMM_SELF,pcis->n_B,pcbddc->local_primal_size-pcbddc->benign_n,data,&tmat)); 4851 CHKERRQ(MatDenseRestoreArray(pcbddc->coarse_phi_B,&data)); 4852 CHKERRQ(MatMatMult(C_B,tmat,MAT_INITIAL_MATRIX,1.0,&CPHI)); 4853 CHKERRQ(MatDestroy(&tmat)); 4854 } 4855 CHKERRQ(MatCreateVecs(CPHI,&mones,NULL)); 4856 CHKERRQ(VecSet(mones,-1.0)); 4857 CHKERRQ(MatDiagonalSet(CPHI,mones,ADD_VALUES)); 4858 CHKERRQ(MatNorm(CPHI,NORM_FROBENIUS,&real_value)); 4859 CHKERRQ(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d phi constraints error % 1.14e\n",PetscGlobalRank,real_value)); 4860 if (!pcbddc->symmetric_primal) { 4861 CHKERRQ(MatMatMult(C_B,coarse_psi_B,MAT_REUSE_MATRIX,1.0,&CPHI)); 4862 CHKERRQ(VecSet(mones,-1.0)); 4863 CHKERRQ(MatDiagonalSet(CPHI,mones,ADD_VALUES)); 4864 CHKERRQ(MatNorm(CPHI,NORM_FROBENIUS,&real_value)); 4865 CHKERRQ(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d psi constraints error % 1.14e\n",PetscGlobalRank,real_value)); 4866 } 4867 CHKERRQ(MatDestroy(&C_B)); 4868 CHKERRQ(MatDestroy(&CPHI)); 4869 CHKERRQ(ISDestroy(&is_dummy)); 4870 CHKERRQ(VecDestroy(&mones)); 4871 CHKERRQ(PetscViewerFlush(pcbddc->dbg_viewer)); 4872 CHKERRQ(MatDestroy(&A_II)); 4873 CHKERRQ(MatDestroy(&A_BB)); 4874 CHKERRQ(MatDestroy(&A_IB)); 4875 CHKERRQ(MatDestroy(&A_BI)); 4876 CHKERRQ(MatDestroy(&TM1)); 4877 CHKERRQ(MatDestroy(&TM2)); 4878 CHKERRQ(MatDestroy(&TM3)); 4879 CHKERRQ(MatDestroy(&TM4)); 4880 CHKERRQ(MatDestroy(&coarse_phi_D)); 4881 CHKERRQ(MatDestroy(&coarse_phi_B)); 4882 if (!pcbddc->symmetric_primal) { 4883 CHKERRQ(MatDestroy(&coarse_psi_D)); 4884 CHKERRQ(MatDestroy(&coarse_psi_B)); 4885 } 4886 CHKERRQ(MatDestroy(&coarse_sub_mat)); 4887 } 4888 /* FINAL CUDA support (we cannot currently mix viennacl and cuda vectors */ 4889 { 4890 PetscBool gpu; 4891 4892 CHKERRQ(PetscObjectTypeCompare((PetscObject)pcis->vec1_N,VECSEQCUDA,&gpu)); 4893 if (gpu) { 4894 if (pcbddc->local_auxmat1) { 4895 CHKERRQ(MatConvert(pcbddc->local_auxmat1,MATSEQDENSECUDA,MAT_INPLACE_MATRIX,&pcbddc->local_auxmat1)); 4896 } 4897 if (pcbddc->local_auxmat2) { 4898 CHKERRQ(MatConvert(pcbddc->local_auxmat2,MATSEQDENSECUDA,MAT_INPLACE_MATRIX,&pcbddc->local_auxmat2)); 4899 } 4900 if (pcbddc->coarse_phi_B) { 4901 CHKERRQ(MatConvert(pcbddc->coarse_phi_B,MATSEQDENSECUDA,MAT_INPLACE_MATRIX,&pcbddc->coarse_phi_B)); 4902 } 4903 if (pcbddc->coarse_phi_D) { 4904 CHKERRQ(MatConvert(pcbddc->coarse_phi_D,MATSEQDENSECUDA,MAT_INPLACE_MATRIX,&pcbddc->coarse_phi_D)); 4905 } 4906 if (pcbddc->coarse_psi_B) { 4907 CHKERRQ(MatConvert(pcbddc->coarse_psi_B,MATSEQDENSECUDA,MAT_INPLACE_MATRIX,&pcbddc->coarse_psi_B)); 4908 } 4909 if (pcbddc->coarse_psi_D) { 4910 CHKERRQ(MatConvert(pcbddc->coarse_psi_D,MATSEQDENSECUDA,MAT_INPLACE_MATRIX,&pcbddc->coarse_psi_D)); 4911 } 4912 } 4913 } 4914 /* get back data */ 4915 *coarse_submat_vals_n = coarse_submat_vals; 4916 PetscFunctionReturn(0); 4917 } 4918 4919 PetscErrorCode MatCreateSubMatrixUnsorted(Mat A, IS isrow, IS iscol, Mat* B) 4920 { 4921 Mat *work_mat; 4922 IS isrow_s,iscol_s; 4923 PetscBool rsorted,csorted; 4924 PetscInt rsize,*idxs_perm_r=NULL,csize,*idxs_perm_c=NULL; 4925 4926 PetscFunctionBegin; 4927 CHKERRQ(ISSorted(isrow,&rsorted)); 4928 CHKERRQ(ISSorted(iscol,&csorted)); 4929 CHKERRQ(ISGetLocalSize(isrow,&rsize)); 4930 CHKERRQ(ISGetLocalSize(iscol,&csize)); 4931 4932 if (!rsorted) { 4933 const PetscInt *idxs; 4934 PetscInt *idxs_sorted,i; 4935 4936 CHKERRQ(PetscMalloc1(rsize,&idxs_perm_r)); 4937 CHKERRQ(PetscMalloc1(rsize,&idxs_sorted)); 4938 for (i=0;i<rsize;i++) { 4939 idxs_perm_r[i] = i; 4940 } 4941 CHKERRQ(ISGetIndices(isrow,&idxs)); 4942 CHKERRQ(PetscSortIntWithPermutation(rsize,idxs,idxs_perm_r)); 4943 for (i=0;i<rsize;i++) { 4944 idxs_sorted[i] = idxs[idxs_perm_r[i]]; 4945 } 4946 CHKERRQ(ISRestoreIndices(isrow,&idxs)); 4947 CHKERRQ(ISCreateGeneral(PETSC_COMM_SELF,rsize,idxs_sorted,PETSC_OWN_POINTER,&isrow_s)); 4948 } else { 4949 CHKERRQ(PetscObjectReference((PetscObject)isrow)); 4950 isrow_s = isrow; 4951 } 4952 4953 if (!csorted) { 4954 if (isrow == iscol) { 4955 CHKERRQ(PetscObjectReference((PetscObject)isrow_s)); 4956 iscol_s = isrow_s; 4957 } else { 4958 const PetscInt *idxs; 4959 PetscInt *idxs_sorted,i; 4960 4961 CHKERRQ(PetscMalloc1(csize,&idxs_perm_c)); 4962 CHKERRQ(PetscMalloc1(csize,&idxs_sorted)); 4963 for (i=0;i<csize;i++) { 4964 idxs_perm_c[i] = i; 4965 } 4966 CHKERRQ(ISGetIndices(iscol,&idxs)); 4967 CHKERRQ(PetscSortIntWithPermutation(csize,idxs,idxs_perm_c)); 4968 for (i=0;i<csize;i++) { 4969 idxs_sorted[i] = idxs[idxs_perm_c[i]]; 4970 } 4971 CHKERRQ(ISRestoreIndices(iscol,&idxs)); 4972 CHKERRQ(ISCreateGeneral(PETSC_COMM_SELF,csize,idxs_sorted,PETSC_OWN_POINTER,&iscol_s)); 4973 } 4974 } else { 4975 CHKERRQ(PetscObjectReference((PetscObject)iscol)); 4976 iscol_s = iscol; 4977 } 4978 4979 CHKERRQ(MatCreateSubMatrices(A,1,&isrow_s,&iscol_s,MAT_INITIAL_MATRIX,&work_mat)); 4980 4981 if (!rsorted || !csorted) { 4982 Mat new_mat; 4983 IS is_perm_r,is_perm_c; 4984 4985 if (!rsorted) { 4986 PetscInt *idxs_r,i; 4987 CHKERRQ(PetscMalloc1(rsize,&idxs_r)); 4988 for (i=0;i<rsize;i++) { 4989 idxs_r[idxs_perm_r[i]] = i; 4990 } 4991 CHKERRQ(PetscFree(idxs_perm_r)); 4992 CHKERRQ(ISCreateGeneral(PETSC_COMM_SELF,rsize,idxs_r,PETSC_OWN_POINTER,&is_perm_r)); 4993 } else { 4994 CHKERRQ(ISCreateStride(PETSC_COMM_SELF,rsize,0,1,&is_perm_r)); 4995 } 4996 CHKERRQ(ISSetPermutation(is_perm_r)); 4997 4998 if (!csorted) { 4999 if (isrow_s == iscol_s) { 5000 CHKERRQ(PetscObjectReference((PetscObject)is_perm_r)); 5001 is_perm_c = is_perm_r; 5002 } else { 5003 PetscInt *idxs_c,i; 5004 PetscCheck(idxs_perm_c,PETSC_COMM_SELF,PETSC_ERR_PLIB,"Permutation array not present"); 5005 CHKERRQ(PetscMalloc1(csize,&idxs_c)); 5006 for (i=0;i<csize;i++) { 5007 idxs_c[idxs_perm_c[i]] = i; 5008 } 5009 CHKERRQ(PetscFree(idxs_perm_c)); 5010 CHKERRQ(ISCreateGeneral(PETSC_COMM_SELF,csize,idxs_c,PETSC_OWN_POINTER,&is_perm_c)); 5011 } 5012 } else { 5013 CHKERRQ(ISCreateStride(PETSC_COMM_SELF,csize,0,1,&is_perm_c)); 5014 } 5015 CHKERRQ(ISSetPermutation(is_perm_c)); 5016 5017 CHKERRQ(MatPermute(work_mat[0],is_perm_r,is_perm_c,&new_mat)); 5018 CHKERRQ(MatDestroy(&work_mat[0])); 5019 work_mat[0] = new_mat; 5020 CHKERRQ(ISDestroy(&is_perm_r)); 5021 CHKERRQ(ISDestroy(&is_perm_c)); 5022 } 5023 5024 CHKERRQ(PetscObjectReference((PetscObject)work_mat[0])); 5025 *B = work_mat[0]; 5026 CHKERRQ(MatDestroyMatrices(1,&work_mat)); 5027 CHKERRQ(ISDestroy(&isrow_s)); 5028 CHKERRQ(ISDestroy(&iscol_s)); 5029 PetscFunctionReturn(0); 5030 } 5031 5032 PetscErrorCode PCBDDCComputeLocalMatrix(PC pc, Mat ChangeOfBasisMatrix) 5033 { 5034 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 5035 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 5036 Mat new_mat,lA; 5037 IS is_local,is_global; 5038 PetscInt local_size; 5039 PetscBool isseqaij; 5040 5041 PetscFunctionBegin; 5042 CHKERRQ(MatDestroy(&pcbddc->local_mat)); 5043 CHKERRQ(MatGetSize(matis->A,&local_size,NULL)); 5044 CHKERRQ(ISCreateStride(PetscObjectComm((PetscObject)matis->A),local_size,0,1,&is_local)); 5045 CHKERRQ(ISLocalToGlobalMappingApplyIS(matis->rmapping,is_local,&is_global)); 5046 CHKERRQ(ISDestroy(&is_local)); 5047 CHKERRQ(MatCreateSubMatrixUnsorted(ChangeOfBasisMatrix,is_global,is_global,&new_mat)); 5048 CHKERRQ(ISDestroy(&is_global)); 5049 5050 if (pcbddc->dbg_flag) { 5051 Vec x,x_change; 5052 PetscReal error; 5053 5054 CHKERRQ(MatCreateVecs(ChangeOfBasisMatrix,&x,&x_change)); 5055 CHKERRQ(VecSetRandom(x,NULL)); 5056 CHKERRQ(MatMult(ChangeOfBasisMatrix,x,x_change)); 5057 CHKERRQ(VecScatterBegin(matis->cctx,x,matis->x,INSERT_VALUES,SCATTER_FORWARD)); 5058 CHKERRQ(VecScatterEnd(matis->cctx,x,matis->x,INSERT_VALUES,SCATTER_FORWARD)); 5059 CHKERRQ(MatMult(new_mat,matis->x,matis->y)); 5060 if (!pcbddc->change_interior) { 5061 const PetscScalar *x,*y,*v; 5062 PetscReal lerror = 0.; 5063 PetscInt i; 5064 5065 CHKERRQ(VecGetArrayRead(matis->x,&x)); 5066 CHKERRQ(VecGetArrayRead(matis->y,&y)); 5067 CHKERRQ(VecGetArrayRead(matis->counter,&v)); 5068 for (i=0;i<local_size;i++) 5069 if (PetscRealPart(v[i]) < 1.5 && PetscAbsScalar(x[i]-y[i]) > lerror) 5070 lerror = PetscAbsScalar(x[i]-y[i]); 5071 CHKERRQ(VecRestoreArrayRead(matis->x,&x)); 5072 CHKERRQ(VecRestoreArrayRead(matis->y,&y)); 5073 CHKERRQ(VecRestoreArrayRead(matis->counter,&v)); 5074 CHKERRMPI(MPIU_Allreduce(&lerror,&error,1,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)pc))); 5075 if (error > PETSC_SMALL) { 5076 if (!pcbddc->user_ChangeOfBasisMatrix || pcbddc->current_level) { 5077 SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"Error global vs local change on I: %1.6e",error); 5078 } else { 5079 SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_USER,"Error global vs local change on I: %1.6e",error); 5080 } 5081 } 5082 } 5083 CHKERRQ(VecScatterBegin(matis->rctx,matis->y,x,INSERT_VALUES,SCATTER_REVERSE)); 5084 CHKERRQ(VecScatterEnd(matis->rctx,matis->y,x,INSERT_VALUES,SCATTER_REVERSE)); 5085 CHKERRQ(VecAXPY(x,-1.0,x_change)); 5086 CHKERRQ(VecNorm(x,NORM_INFINITY,&error)); 5087 if (error > PETSC_SMALL) { 5088 if (!pcbddc->user_ChangeOfBasisMatrix || pcbddc->current_level) { 5089 SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"Error global vs local change on N: %1.6e",error); 5090 } else { 5091 SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_USER,"Error global vs local change on N: %1.6e",error); 5092 } 5093 } 5094 CHKERRQ(VecDestroy(&x)); 5095 CHKERRQ(VecDestroy(&x_change)); 5096 } 5097 5098 /* lA is present if we are setting up an inner BDDC for a saddle point FETI-DP */ 5099 CHKERRQ(PetscObjectQuery((PetscObject)pc,"__KSPFETIDP_lA" ,(PetscObject*)&lA)); 5100 5101 /* TODO: HOW TO WORK WITH BAIJ and SBAIJ and SEQDENSE? */ 5102 CHKERRQ(PetscObjectBaseTypeCompare((PetscObject)matis->A,MATSEQAIJ,&isseqaij)); 5103 if (isseqaij) { 5104 CHKERRQ(MatDestroy(&pcbddc->local_mat)); 5105 CHKERRQ(MatPtAP(matis->A,new_mat,MAT_INITIAL_MATRIX,2.0,&pcbddc->local_mat)); 5106 if (lA) { 5107 Mat work; 5108 CHKERRQ(MatPtAP(lA,new_mat,MAT_INITIAL_MATRIX,2.0,&work)); 5109 CHKERRQ(PetscObjectCompose((PetscObject)pc,"__KSPFETIDP_lA" ,(PetscObject)work)); 5110 CHKERRQ(MatDestroy(&work)); 5111 } 5112 } else { 5113 Mat work_mat; 5114 5115 CHKERRQ(MatDestroy(&pcbddc->local_mat)); 5116 CHKERRQ(MatConvert(matis->A,MATSEQAIJ,MAT_INITIAL_MATRIX,&work_mat)); 5117 CHKERRQ(MatPtAP(work_mat,new_mat,MAT_INITIAL_MATRIX,2.0,&pcbddc->local_mat)); 5118 CHKERRQ(MatDestroy(&work_mat)); 5119 if (lA) { 5120 Mat work; 5121 CHKERRQ(MatConvert(lA,MATSEQAIJ,MAT_INITIAL_MATRIX,&work_mat)); 5122 CHKERRQ(MatPtAP(work_mat,new_mat,MAT_INITIAL_MATRIX,2.0,&work)); 5123 CHKERRQ(PetscObjectCompose((PetscObject)pc,"__KSPFETIDP_lA" ,(PetscObject)work)); 5124 CHKERRQ(MatDestroy(&work)); 5125 } 5126 } 5127 if (matis->A->symmetric_set) { 5128 CHKERRQ(MatSetOption(pcbddc->local_mat,MAT_SYMMETRIC,matis->A->symmetric)); 5129 #if !defined(PETSC_USE_COMPLEX) 5130 CHKERRQ(MatSetOption(pcbddc->local_mat,MAT_HERMITIAN,matis->A->symmetric)); 5131 #endif 5132 } 5133 CHKERRQ(MatDestroy(&new_mat)); 5134 PetscFunctionReturn(0); 5135 } 5136 5137 PetscErrorCode PCBDDCSetUpLocalScatters(PC pc) 5138 { 5139 PC_IS* pcis = (PC_IS*)(pc->data); 5140 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 5141 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 5142 PetscInt *idx_R_local=NULL; 5143 PetscInt n_vertices,i,j,n_R,n_D,n_B; 5144 PetscInt vbs,bs; 5145 PetscBT bitmask=NULL; 5146 5147 PetscFunctionBegin; 5148 /* 5149 No need to setup local scatters if 5150 - primal space is unchanged 5151 AND 5152 - we actually have locally some primal dofs (could not be true in multilevel or for isolated subdomains) 5153 AND 5154 - we are not in debugging mode (this is needed since there are Synchronized prints at the end of the subroutine 5155 */ 5156 if (!pcbddc->new_primal_space_local && pcbddc->local_primal_size && !pcbddc->dbg_flag) { 5157 PetscFunctionReturn(0); 5158 } 5159 /* destroy old objects */ 5160 CHKERRQ(ISDestroy(&pcbddc->is_R_local)); 5161 CHKERRQ(VecScatterDestroy(&pcbddc->R_to_B)); 5162 CHKERRQ(VecScatterDestroy(&pcbddc->R_to_D)); 5163 /* Set Non-overlapping dimensions */ 5164 n_B = pcis->n_B; 5165 n_D = pcis->n - n_B; 5166 n_vertices = pcbddc->n_vertices; 5167 5168 /* Dohrmann's notation: dofs splitted in R (Remaining: all dofs but the vertices) and V (Vertices) */ 5169 5170 /* create auxiliary bitmask and allocate workspace */ 5171 if (!sub_schurs || !sub_schurs->reuse_solver) { 5172 CHKERRQ(PetscMalloc1(pcis->n-n_vertices,&idx_R_local)); 5173 CHKERRQ(PetscBTCreate(pcis->n,&bitmask)); 5174 for (i=0;i<n_vertices;i++) { 5175 CHKERRQ(PetscBTSet(bitmask,pcbddc->local_primal_ref_node[i])); 5176 } 5177 5178 for (i=0, n_R=0; i<pcis->n; i++) { 5179 if (!PetscBTLookup(bitmask,i)) { 5180 idx_R_local[n_R++] = i; 5181 } 5182 } 5183 } else { /* A different ordering (already computed) is present if we are reusing the Schur solver */ 5184 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 5185 5186 CHKERRQ(ISGetIndices(reuse_solver->is_R,(const PetscInt**)&idx_R_local)); 5187 CHKERRQ(ISGetLocalSize(reuse_solver->is_R,&n_R)); 5188 } 5189 5190 /* Block code */ 5191 vbs = 1; 5192 CHKERRQ(MatGetBlockSize(pcbddc->local_mat,&bs)); 5193 if (bs>1 && !(n_vertices%bs)) { 5194 PetscBool is_blocked = PETSC_TRUE; 5195 PetscInt *vary; 5196 if (!sub_schurs || !sub_schurs->reuse_solver) { 5197 CHKERRQ(PetscMalloc1(pcis->n/bs,&vary)); 5198 CHKERRQ(PetscArrayzero(vary,pcis->n/bs)); 5199 /* Verify that the vertex indices correspond to each element in a block (code taken from sbaij2.c) */ 5200 /* it is ok to check this way since local_primal_ref_node are always sorted by local numbering and idx_R_local is obtained as a complement */ 5201 for (i=0; i<n_vertices; i++) vary[pcbddc->local_primal_ref_node[i]/bs]++; 5202 for (i=0; i<pcis->n/bs; i++) { 5203 if (vary[i]!=0 && vary[i]!=bs) { 5204 is_blocked = PETSC_FALSE; 5205 break; 5206 } 5207 } 5208 CHKERRQ(PetscFree(vary)); 5209 } else { 5210 /* Verify directly the R set */ 5211 for (i=0; i<n_R/bs; i++) { 5212 PetscInt j,node=idx_R_local[bs*i]; 5213 for (j=1; j<bs; j++) { 5214 if (node != idx_R_local[bs*i+j]-j) { 5215 is_blocked = PETSC_FALSE; 5216 break; 5217 } 5218 } 5219 } 5220 } 5221 if (is_blocked) { /* build compressed IS for R nodes (complement of vertices) */ 5222 vbs = bs; 5223 for (i=0;i<n_R/vbs;i++) { 5224 idx_R_local[i] = idx_R_local[vbs*i]/vbs; 5225 } 5226 } 5227 } 5228 CHKERRQ(ISCreateBlock(PETSC_COMM_SELF,vbs,n_R/vbs,idx_R_local,PETSC_COPY_VALUES,&pcbddc->is_R_local)); 5229 if (sub_schurs && sub_schurs->reuse_solver) { 5230 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 5231 5232 CHKERRQ(ISRestoreIndices(reuse_solver->is_R,(const PetscInt**)&idx_R_local)); 5233 CHKERRQ(ISDestroy(&reuse_solver->is_R)); 5234 CHKERRQ(PetscObjectReference((PetscObject)pcbddc->is_R_local)); 5235 reuse_solver->is_R = pcbddc->is_R_local; 5236 } else { 5237 CHKERRQ(PetscFree(idx_R_local)); 5238 } 5239 5240 /* print some info if requested */ 5241 if (pcbddc->dbg_flag) { 5242 CHKERRQ(PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n")); 5243 CHKERRQ(PetscViewerFlush(pcbddc->dbg_viewer)); 5244 CHKERRQ(PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer)); 5245 CHKERRQ(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d local dimensions\n",PetscGlobalRank)); 5246 CHKERRQ(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local_size = %D, dirichlet_size = %D, boundary_size = %D\n",pcis->n,n_D,n_B)); 5247 CHKERRQ(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"r_size = %D, v_size = %D, constraints = %D, local_primal_size = %D\n",n_R,n_vertices,pcbddc->local_primal_size-n_vertices-pcbddc->benign_n,pcbddc->local_primal_size)); 5248 CHKERRQ(PetscViewerFlush(pcbddc->dbg_viewer)); 5249 } 5250 5251 /* VecScatters pcbddc->R_to_B and (optionally) pcbddc->R_to_D */ 5252 if (!sub_schurs || !sub_schurs->reuse_solver) { 5253 IS is_aux1,is_aux2; 5254 PetscInt *aux_array1,*aux_array2,*is_indices,*idx_R_local; 5255 5256 CHKERRQ(ISGetIndices(pcbddc->is_R_local,(const PetscInt**)&idx_R_local)); 5257 CHKERRQ(PetscMalloc1(pcis->n_B-n_vertices,&aux_array1)); 5258 CHKERRQ(PetscMalloc1(pcis->n_B-n_vertices,&aux_array2)); 5259 CHKERRQ(ISGetIndices(pcis->is_I_local,(const PetscInt**)&is_indices)); 5260 for (i=0; i<n_D; i++) { 5261 CHKERRQ(PetscBTSet(bitmask,is_indices[i])); 5262 } 5263 CHKERRQ(ISRestoreIndices(pcis->is_I_local,(const PetscInt**)&is_indices)); 5264 for (i=0, j=0; i<n_R; i++) { 5265 if (!PetscBTLookup(bitmask,idx_R_local[i])) { 5266 aux_array1[j++] = i; 5267 } 5268 } 5269 CHKERRQ(ISCreateGeneral(PETSC_COMM_SELF,j,aux_array1,PETSC_OWN_POINTER,&is_aux1)); 5270 CHKERRQ(ISGetIndices(pcis->is_B_local,(const PetscInt**)&is_indices)); 5271 for (i=0, j=0; i<n_B; i++) { 5272 if (!PetscBTLookup(bitmask,is_indices[i])) { 5273 aux_array2[j++] = i; 5274 } 5275 } 5276 CHKERRQ(ISRestoreIndices(pcis->is_B_local,(const PetscInt**)&is_indices)); 5277 CHKERRQ(ISCreateGeneral(PETSC_COMM_SELF,j,aux_array2,PETSC_OWN_POINTER,&is_aux2)); 5278 CHKERRQ(VecScatterCreate(pcbddc->vec1_R,is_aux1,pcis->vec1_B,is_aux2,&pcbddc->R_to_B)); 5279 CHKERRQ(ISDestroy(&is_aux1)); 5280 CHKERRQ(ISDestroy(&is_aux2)); 5281 5282 if (pcbddc->switch_static || pcbddc->dbg_flag) { 5283 CHKERRQ(PetscMalloc1(n_D,&aux_array1)); 5284 for (i=0, j=0; i<n_R; i++) { 5285 if (PetscBTLookup(bitmask,idx_R_local[i])) { 5286 aux_array1[j++] = i; 5287 } 5288 } 5289 CHKERRQ(ISCreateGeneral(PETSC_COMM_SELF,j,aux_array1,PETSC_OWN_POINTER,&is_aux1)); 5290 CHKERRQ(VecScatterCreate(pcbddc->vec1_R,is_aux1,pcis->vec1_D,(IS)0,&pcbddc->R_to_D)); 5291 CHKERRQ(ISDestroy(&is_aux1)); 5292 } 5293 CHKERRQ(PetscBTDestroy(&bitmask)); 5294 CHKERRQ(ISRestoreIndices(pcbddc->is_R_local,(const PetscInt**)&idx_R_local)); 5295 } else { 5296 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 5297 IS tis; 5298 PetscInt schur_size; 5299 5300 CHKERRQ(ISGetLocalSize(reuse_solver->is_B,&schur_size)); 5301 CHKERRQ(ISCreateStride(PETSC_COMM_SELF,schur_size,n_D,1,&tis)); 5302 CHKERRQ(VecScatterCreate(pcbddc->vec1_R,tis,pcis->vec1_B,reuse_solver->is_B,&pcbddc->R_to_B)); 5303 CHKERRQ(ISDestroy(&tis)); 5304 if (pcbddc->switch_static || pcbddc->dbg_flag) { 5305 CHKERRQ(ISCreateStride(PETSC_COMM_SELF,n_D,0,1,&tis)); 5306 CHKERRQ(VecScatterCreate(pcbddc->vec1_R,tis,pcis->vec1_D,(IS)0,&pcbddc->R_to_D)); 5307 CHKERRQ(ISDestroy(&tis)); 5308 } 5309 } 5310 PetscFunctionReturn(0); 5311 } 5312 5313 static PetscErrorCode MatNullSpacePropagateAny_Private(Mat A, IS is, Mat B) 5314 { 5315 MatNullSpace NullSpace; 5316 Mat dmat; 5317 const Vec *nullvecs; 5318 Vec v,v2,*nullvecs2; 5319 VecScatter sct = NULL; 5320 PetscContainer c; 5321 PetscScalar *ddata; 5322 PetscInt k,nnsp_size,bsiz,bsiz2,n,N,bs; 5323 PetscBool nnsp_has_cnst; 5324 5325 PetscFunctionBegin; 5326 if (!is && !B) { /* MATIS */ 5327 Mat_IS* matis = (Mat_IS*)A->data; 5328 5329 if (!B) { 5330 CHKERRQ(MatISGetLocalMat(A,&B)); 5331 } 5332 sct = matis->cctx; 5333 CHKERRQ(PetscObjectReference((PetscObject)sct)); 5334 } else { 5335 CHKERRQ(MatGetNullSpace(B,&NullSpace)); 5336 if (!NullSpace) { 5337 CHKERRQ(MatGetNearNullSpace(B,&NullSpace)); 5338 } 5339 if (NullSpace) PetscFunctionReturn(0); 5340 } 5341 CHKERRQ(MatGetNullSpace(A,&NullSpace)); 5342 if (!NullSpace) { 5343 CHKERRQ(MatGetNearNullSpace(A,&NullSpace)); 5344 } 5345 if (!NullSpace) PetscFunctionReturn(0); 5346 5347 CHKERRQ(MatCreateVecs(A,&v,NULL)); 5348 CHKERRQ(MatCreateVecs(B,&v2,NULL)); 5349 if (!sct) { 5350 CHKERRQ(VecScatterCreate(v,is,v2,NULL,&sct)); 5351 } 5352 CHKERRQ(MatNullSpaceGetVecs(NullSpace,&nnsp_has_cnst,&nnsp_size,(const Vec**)&nullvecs)); 5353 bsiz = bsiz2 = nnsp_size+!!nnsp_has_cnst; 5354 CHKERRQ(PetscMalloc1(bsiz,&nullvecs2)); 5355 CHKERRQ(VecGetBlockSize(v2,&bs)); 5356 CHKERRQ(VecGetSize(v2,&N)); 5357 CHKERRQ(VecGetLocalSize(v2,&n)); 5358 CHKERRQ(PetscMalloc1(n*bsiz,&ddata)); 5359 for (k=0;k<nnsp_size;k++) { 5360 CHKERRQ(VecCreateMPIWithArray(PetscObjectComm((PetscObject)B),bs,n,N,ddata + n*k,&nullvecs2[k])); 5361 CHKERRQ(VecScatterBegin(sct,nullvecs[k],nullvecs2[k],INSERT_VALUES,SCATTER_FORWARD)); 5362 CHKERRQ(VecScatterEnd(sct,nullvecs[k],nullvecs2[k],INSERT_VALUES,SCATTER_FORWARD)); 5363 } 5364 if (nnsp_has_cnst) { 5365 CHKERRQ(VecCreateMPIWithArray(PetscObjectComm((PetscObject)B),bs,n,N,ddata + n*nnsp_size,&nullvecs2[nnsp_size])); 5366 CHKERRQ(VecSet(nullvecs2[nnsp_size],1.0)); 5367 } 5368 CHKERRQ(PCBDDCOrthonormalizeVecs(&bsiz2,nullvecs2)); 5369 CHKERRQ(MatNullSpaceCreate(PetscObjectComm((PetscObject)B),PETSC_FALSE,bsiz2,nullvecs2,&NullSpace)); 5370 5371 CHKERRQ(MatCreateDense(PetscObjectComm((PetscObject)B),n,PETSC_DECIDE,N,bsiz2,ddata,&dmat)); 5372 CHKERRQ(PetscContainerCreate(PetscObjectComm((PetscObject)B),&c)); 5373 CHKERRQ(PetscContainerSetPointer(c,ddata)); 5374 CHKERRQ(PetscContainerSetUserDestroy(c,PetscContainerUserDestroyDefault)); 5375 CHKERRQ(PetscObjectCompose((PetscObject)dmat,"_PBDDC_Null_dmat_arr",(PetscObject)c)); 5376 CHKERRQ(PetscContainerDestroy(&c)); 5377 CHKERRQ(PetscObjectCompose((PetscObject)NullSpace,"_PBDDC_Null_dmat",(PetscObject)dmat)); 5378 CHKERRQ(MatDestroy(&dmat)); 5379 5380 for (k=0;k<bsiz;k++) { 5381 CHKERRQ(VecDestroy(&nullvecs2[k])); 5382 } 5383 CHKERRQ(PetscFree(nullvecs2)); 5384 CHKERRQ(MatSetNearNullSpace(B,NullSpace)); 5385 CHKERRQ(MatNullSpaceDestroy(&NullSpace)); 5386 CHKERRQ(VecDestroy(&v)); 5387 CHKERRQ(VecDestroy(&v2)); 5388 CHKERRQ(VecScatterDestroy(&sct)); 5389 PetscFunctionReturn(0); 5390 } 5391 5392 PetscErrorCode PCBDDCSetUpLocalSolvers(PC pc, PetscBool dirichlet, PetscBool neumann) 5393 { 5394 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 5395 PC_IS *pcis = (PC_IS*)pc->data; 5396 PC pc_temp; 5397 Mat A_RR; 5398 MatNullSpace nnsp; 5399 MatReuse reuse; 5400 PetscScalar m_one = -1.0; 5401 PetscReal value; 5402 PetscInt n_D,n_R; 5403 PetscBool issbaij,opts; 5404 void (*f)(void) = NULL; 5405 char dir_prefix[256],neu_prefix[256],str_level[16]; 5406 size_t len; 5407 5408 PetscFunctionBegin; 5409 CHKERRQ(PetscLogEventBegin(PC_BDDC_LocalSolvers[pcbddc->current_level],pc,0,0,0)); 5410 /* approximate solver, propagate NearNullSpace if needed */ 5411 if (!pc->setupcalled && (pcbddc->NullSpace_corr[0] || pcbddc->NullSpace_corr[2])) { 5412 MatNullSpace gnnsp1,gnnsp2; 5413 PetscBool lhas,ghas; 5414 5415 CHKERRQ(MatGetNearNullSpace(pcbddc->local_mat,&nnsp)); 5416 CHKERRQ(MatGetNearNullSpace(pc->pmat,&gnnsp1)); 5417 CHKERRQ(MatGetNullSpace(pc->pmat,&gnnsp2)); 5418 lhas = nnsp ? PETSC_TRUE : PETSC_FALSE; 5419 CHKERRMPI(MPIU_Allreduce(&lhas,&ghas,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc))); 5420 if (!ghas && (gnnsp1 || gnnsp2)) { 5421 CHKERRQ(MatNullSpacePropagateAny_Private(pc->pmat,NULL,NULL)); 5422 } 5423 } 5424 5425 /* compute prefixes */ 5426 CHKERRQ(PetscStrcpy(dir_prefix,"")); 5427 CHKERRQ(PetscStrcpy(neu_prefix,"")); 5428 if (!pcbddc->current_level) { 5429 CHKERRQ(PetscStrncpy(dir_prefix,((PetscObject)pc)->prefix,sizeof(dir_prefix))); 5430 CHKERRQ(PetscStrncpy(neu_prefix,((PetscObject)pc)->prefix,sizeof(neu_prefix))); 5431 CHKERRQ(PetscStrlcat(dir_prefix,"pc_bddc_dirichlet_",sizeof(dir_prefix))); 5432 CHKERRQ(PetscStrlcat(neu_prefix,"pc_bddc_neumann_",sizeof(neu_prefix))); 5433 } else { 5434 CHKERRQ(PetscSNPrintf(str_level,sizeof(str_level),"l%d_",(int)(pcbddc->current_level))); 5435 CHKERRQ(PetscStrlen(((PetscObject)pc)->prefix,&len)); 5436 len -= 15; /* remove "pc_bddc_coarse_" */ 5437 if (pcbddc->current_level>1) len -= 3; /* remove "lX_" with X level number */ 5438 if (pcbddc->current_level>10) len -= 1; /* remove another char from level number */ 5439 /* Nonstandard use of PetscStrncpy() to only copy a portion of the input string */ 5440 CHKERRQ(PetscStrncpy(dir_prefix,((PetscObject)pc)->prefix,len+1)); 5441 CHKERRQ(PetscStrncpy(neu_prefix,((PetscObject)pc)->prefix,len+1)); 5442 CHKERRQ(PetscStrlcat(dir_prefix,"pc_bddc_dirichlet_",sizeof(dir_prefix))); 5443 CHKERRQ(PetscStrlcat(neu_prefix,"pc_bddc_neumann_",sizeof(neu_prefix))); 5444 CHKERRQ(PetscStrlcat(dir_prefix,str_level,sizeof(dir_prefix))); 5445 CHKERRQ(PetscStrlcat(neu_prefix,str_level,sizeof(neu_prefix))); 5446 } 5447 5448 /* DIRICHLET PROBLEM */ 5449 if (dirichlet) { 5450 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 5451 if (pcbddc->benign_n && !pcbddc->benign_change_explicit) { 5452 PetscCheckFalse(!sub_schurs || !sub_schurs->reuse_solver,PETSC_COMM_SELF,PETSC_ERR_SUP,"Not yet implemented"); 5453 if (pcbddc->dbg_flag) { 5454 Mat A_IIn; 5455 5456 CHKERRQ(PCBDDCBenignProject(pc,pcis->is_I_local,pcis->is_I_local,&A_IIn)); 5457 CHKERRQ(MatDestroy(&pcis->A_II)); 5458 pcis->A_II = A_IIn; 5459 } 5460 } 5461 if (pcbddc->local_mat->symmetric_set) { 5462 CHKERRQ(MatSetOption(pcis->A_II,MAT_SYMMETRIC,pcbddc->local_mat->symmetric)); 5463 } 5464 /* Matrix for Dirichlet problem is pcis->A_II */ 5465 n_D = pcis->n - pcis->n_B; 5466 opts = PETSC_FALSE; 5467 if (!pcbddc->ksp_D) { /* create object if not yet build */ 5468 opts = PETSC_TRUE; 5469 CHKERRQ(KSPCreate(PETSC_COMM_SELF,&pcbddc->ksp_D)); 5470 CHKERRQ(PetscObjectIncrementTabLevel((PetscObject)pcbddc->ksp_D,(PetscObject)pc,1)); 5471 /* default */ 5472 CHKERRQ(KSPSetType(pcbddc->ksp_D,KSPPREONLY)); 5473 CHKERRQ(KSPSetOptionsPrefix(pcbddc->ksp_D,dir_prefix)); 5474 CHKERRQ(PetscObjectTypeCompare((PetscObject)pcis->pA_II,MATSEQSBAIJ,&issbaij)); 5475 CHKERRQ(KSPGetPC(pcbddc->ksp_D,&pc_temp)); 5476 if (issbaij) { 5477 CHKERRQ(PCSetType(pc_temp,PCCHOLESKY)); 5478 } else { 5479 CHKERRQ(PCSetType(pc_temp,PCLU)); 5480 } 5481 CHKERRQ(KSPSetErrorIfNotConverged(pcbddc->ksp_D,pc->erroriffailure)); 5482 } 5483 CHKERRQ(MatSetOptionsPrefix(pcis->pA_II,((PetscObject)pcbddc->ksp_D)->prefix)); 5484 CHKERRQ(KSPSetOperators(pcbddc->ksp_D,pcis->A_II,pcis->pA_II)); 5485 /* Allow user's customization */ 5486 if (opts) { 5487 CHKERRQ(KSPSetFromOptions(pcbddc->ksp_D)); 5488 } 5489 CHKERRQ(MatGetNearNullSpace(pcis->pA_II,&nnsp)); 5490 if (pcbddc->NullSpace_corr[0] && !nnsp) { /* approximate solver, propagate NearNullSpace */ 5491 CHKERRQ(MatNullSpacePropagateAny_Private(pcbddc->local_mat,pcis->is_I_local,pcis->pA_II)); 5492 } 5493 CHKERRQ(MatGetNearNullSpace(pcis->pA_II,&nnsp)); 5494 CHKERRQ(KSPGetPC(pcbddc->ksp_D,&pc_temp)); 5495 CHKERRQ(PetscObjectQueryFunction((PetscObject)pc_temp,"PCSetCoordinates_C",&f)); 5496 if (f && pcbddc->mat_graph->cloc && !nnsp) { 5497 PetscReal *coords = pcbddc->mat_graph->coords,*scoords; 5498 const PetscInt *idxs; 5499 PetscInt cdim = pcbddc->mat_graph->cdim,nl,i,d; 5500 5501 CHKERRQ(ISGetLocalSize(pcis->is_I_local,&nl)); 5502 CHKERRQ(ISGetIndices(pcis->is_I_local,&idxs)); 5503 CHKERRQ(PetscMalloc1(nl*cdim,&scoords)); 5504 for (i=0;i<nl;i++) { 5505 for (d=0;d<cdim;d++) { 5506 scoords[i*cdim+d] = coords[idxs[i]*cdim+d]; 5507 } 5508 } 5509 CHKERRQ(ISRestoreIndices(pcis->is_I_local,&idxs)); 5510 CHKERRQ(PCSetCoordinates(pc_temp,cdim,nl,scoords)); 5511 CHKERRQ(PetscFree(scoords)); 5512 } 5513 if (sub_schurs && sub_schurs->reuse_solver) { 5514 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 5515 5516 CHKERRQ(KSPSetPC(pcbddc->ksp_D,reuse_solver->interior_solver)); 5517 } 5518 5519 /* umfpack interface has a bug when matrix dimension is zero. TODO solve from umfpack interface */ 5520 if (!n_D) { 5521 CHKERRQ(KSPGetPC(pcbddc->ksp_D,&pc_temp)); 5522 CHKERRQ(PCSetType(pc_temp,PCNONE)); 5523 } 5524 CHKERRQ(KSPSetUp(pcbddc->ksp_D)); 5525 /* set ksp_D into pcis data */ 5526 CHKERRQ(PetscObjectReference((PetscObject)pcbddc->ksp_D)); 5527 CHKERRQ(KSPDestroy(&pcis->ksp_D)); 5528 pcis->ksp_D = pcbddc->ksp_D; 5529 } 5530 5531 /* NEUMANN PROBLEM */ 5532 A_RR = NULL; 5533 if (neumann) { 5534 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 5535 PetscInt ibs,mbs; 5536 PetscBool issbaij, reuse_neumann_solver; 5537 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 5538 5539 reuse_neumann_solver = PETSC_FALSE; 5540 if (sub_schurs && sub_schurs->reuse_solver) { 5541 IS iP; 5542 5543 reuse_neumann_solver = PETSC_TRUE; 5544 CHKERRQ(PetscObjectQuery((PetscObject)sub_schurs->A,"__KSPFETIDP_iP",(PetscObject*)&iP)); 5545 if (iP) reuse_neumann_solver = PETSC_FALSE; 5546 } 5547 /* Matrix for Neumann problem is A_RR -> we need to create/reuse it at this point */ 5548 CHKERRQ(ISGetSize(pcbddc->is_R_local,&n_R)); 5549 if (pcbddc->ksp_R) { /* already created ksp */ 5550 PetscInt nn_R; 5551 CHKERRQ(KSPGetOperators(pcbddc->ksp_R,NULL,&A_RR)); 5552 CHKERRQ(PetscObjectReference((PetscObject)A_RR)); 5553 CHKERRQ(MatGetSize(A_RR,&nn_R,NULL)); 5554 if (nn_R != n_R) { /* old ksp is not reusable, so reset it */ 5555 CHKERRQ(KSPReset(pcbddc->ksp_R)); 5556 CHKERRQ(MatDestroy(&A_RR)); 5557 reuse = MAT_INITIAL_MATRIX; 5558 } else { /* same sizes, but nonzero pattern depend on primal vertices so it can be changed */ 5559 if (pcbddc->new_primal_space_local) { /* we are not sure the matrix will have the same nonzero pattern */ 5560 CHKERRQ(MatDestroy(&A_RR)); 5561 reuse = MAT_INITIAL_MATRIX; 5562 } else { /* safe to reuse the matrix */ 5563 reuse = MAT_REUSE_MATRIX; 5564 } 5565 } 5566 /* last check */ 5567 if (pc->flag == DIFFERENT_NONZERO_PATTERN) { 5568 CHKERRQ(MatDestroy(&A_RR)); 5569 reuse = MAT_INITIAL_MATRIX; 5570 } 5571 } else { /* first time, so we need to create the matrix */ 5572 reuse = MAT_INITIAL_MATRIX; 5573 } 5574 /* convert pcbddc->local_mat if needed later in PCBDDCSetUpCorrection 5575 TODO: Get Rid of these conversions */ 5576 CHKERRQ(MatGetBlockSize(pcbddc->local_mat,&mbs)); 5577 CHKERRQ(ISGetBlockSize(pcbddc->is_R_local,&ibs)); 5578 CHKERRQ(PetscObjectTypeCompare((PetscObject)pcbddc->local_mat,MATSEQSBAIJ,&issbaij)); 5579 if (ibs != mbs) { /* need to convert to SEQAIJ to extract any submatrix with is_R_local */ 5580 if (matis->A == pcbddc->local_mat) { 5581 CHKERRQ(MatDestroy(&pcbddc->local_mat)); 5582 CHKERRQ(MatConvert(matis->A,MATSEQAIJ,MAT_INITIAL_MATRIX,&pcbddc->local_mat)); 5583 } else { 5584 CHKERRQ(MatConvert(pcbddc->local_mat,MATSEQAIJ,MAT_INPLACE_MATRIX,&pcbddc->local_mat)); 5585 } 5586 } else if (issbaij) { /* need to convert to BAIJ to get offdiagonal blocks */ 5587 if (matis->A == pcbddc->local_mat) { 5588 CHKERRQ(MatDestroy(&pcbddc->local_mat)); 5589 CHKERRQ(MatConvert(matis->A,mbs > 1 ? MATSEQBAIJ : MATSEQAIJ,MAT_INITIAL_MATRIX,&pcbddc->local_mat)); 5590 } else { 5591 CHKERRQ(MatConvert(pcbddc->local_mat,mbs > 1 ? MATSEQBAIJ : MATSEQAIJ,MAT_INPLACE_MATRIX,&pcbddc->local_mat)); 5592 } 5593 } 5594 /* extract A_RR */ 5595 if (reuse_neumann_solver) { 5596 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 5597 5598 if (pcbddc->dbg_flag) { /* we need A_RR to test the solver later */ 5599 CHKERRQ(MatDestroy(&A_RR)); 5600 if (reuse_solver->benign_n) { /* we are not using the explicit change of basis on the pressures */ 5601 CHKERRQ(PCBDDCBenignProject(pc,pcbddc->is_R_local,pcbddc->is_R_local,&A_RR)); 5602 } else { 5603 CHKERRQ(MatCreateSubMatrix(pcbddc->local_mat,pcbddc->is_R_local,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&A_RR)); 5604 } 5605 } else { 5606 CHKERRQ(MatDestroy(&A_RR)); 5607 CHKERRQ(PCGetOperators(reuse_solver->correction_solver,&A_RR,NULL)); 5608 CHKERRQ(PetscObjectReference((PetscObject)A_RR)); 5609 } 5610 } else { /* we have to build the neumann solver, so we need to extract the relevant matrix */ 5611 CHKERRQ(MatCreateSubMatrix(pcbddc->local_mat,pcbddc->is_R_local,pcbddc->is_R_local,reuse,&A_RR)); 5612 } 5613 if (pcbddc->local_mat->symmetric_set) { 5614 CHKERRQ(MatSetOption(A_RR,MAT_SYMMETRIC,pcbddc->local_mat->symmetric)); 5615 } 5616 opts = PETSC_FALSE; 5617 if (!pcbddc->ksp_R) { /* create object if not present */ 5618 opts = PETSC_TRUE; 5619 CHKERRQ(KSPCreate(PETSC_COMM_SELF,&pcbddc->ksp_R)); 5620 CHKERRQ(PetscObjectIncrementTabLevel((PetscObject)pcbddc->ksp_R,(PetscObject)pc,1)); 5621 /* default */ 5622 CHKERRQ(KSPSetType(pcbddc->ksp_R,KSPPREONLY)); 5623 CHKERRQ(KSPSetOptionsPrefix(pcbddc->ksp_R,neu_prefix)); 5624 CHKERRQ(KSPGetPC(pcbddc->ksp_R,&pc_temp)); 5625 CHKERRQ(PetscObjectTypeCompare((PetscObject)A_RR,MATSEQSBAIJ,&issbaij)); 5626 if (issbaij) { 5627 CHKERRQ(PCSetType(pc_temp,PCCHOLESKY)); 5628 } else { 5629 CHKERRQ(PCSetType(pc_temp,PCLU)); 5630 } 5631 CHKERRQ(KSPSetErrorIfNotConverged(pcbddc->ksp_R,pc->erroriffailure)); 5632 } 5633 CHKERRQ(KSPSetOperators(pcbddc->ksp_R,A_RR,A_RR)); 5634 CHKERRQ(MatSetOptionsPrefix(A_RR,((PetscObject)pcbddc->ksp_R)->prefix)); 5635 if (opts) { /* Allow user's customization once */ 5636 CHKERRQ(KSPSetFromOptions(pcbddc->ksp_R)); 5637 } 5638 CHKERRQ(MatGetNearNullSpace(A_RR,&nnsp)); 5639 if (pcbddc->NullSpace_corr[2] && !nnsp) { /* approximate solver, propagate NearNullSpace */ 5640 CHKERRQ(MatNullSpacePropagateAny_Private(pcbddc->local_mat,pcbddc->is_R_local,A_RR)); 5641 } 5642 CHKERRQ(MatGetNearNullSpace(A_RR,&nnsp)); 5643 CHKERRQ(KSPGetPC(pcbddc->ksp_R,&pc_temp)); 5644 CHKERRQ(PetscObjectQueryFunction((PetscObject)pc_temp,"PCSetCoordinates_C",&f)); 5645 if (f && pcbddc->mat_graph->cloc && !nnsp) { 5646 PetscReal *coords = pcbddc->mat_graph->coords,*scoords; 5647 const PetscInt *idxs; 5648 PetscInt cdim = pcbddc->mat_graph->cdim,nl,i,d; 5649 5650 CHKERRQ(ISGetLocalSize(pcbddc->is_R_local,&nl)); 5651 CHKERRQ(ISGetIndices(pcbddc->is_R_local,&idxs)); 5652 CHKERRQ(PetscMalloc1(nl*cdim,&scoords)); 5653 for (i=0;i<nl;i++) { 5654 for (d=0;d<cdim;d++) { 5655 scoords[i*cdim+d] = coords[idxs[i]*cdim+d]; 5656 } 5657 } 5658 CHKERRQ(ISRestoreIndices(pcbddc->is_R_local,&idxs)); 5659 CHKERRQ(PCSetCoordinates(pc_temp,cdim,nl,scoords)); 5660 CHKERRQ(PetscFree(scoords)); 5661 } 5662 5663 /* umfpack interface has a bug when matrix dimension is zero. TODO solve from umfpack interface */ 5664 if (!n_R) { 5665 CHKERRQ(KSPGetPC(pcbddc->ksp_R,&pc_temp)); 5666 CHKERRQ(PCSetType(pc_temp,PCNONE)); 5667 } 5668 /* Reuse solver if it is present */ 5669 if (reuse_neumann_solver) { 5670 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 5671 5672 CHKERRQ(KSPSetPC(pcbddc->ksp_R,reuse_solver->correction_solver)); 5673 } 5674 CHKERRQ(KSPSetUp(pcbddc->ksp_R)); 5675 } 5676 5677 if (pcbddc->dbg_flag) { 5678 CHKERRQ(PetscViewerFlush(pcbddc->dbg_viewer)); 5679 CHKERRQ(PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer)); 5680 CHKERRQ(PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n")); 5681 } 5682 CHKERRQ(PetscLogEventEnd(PC_BDDC_LocalSolvers[pcbddc->current_level],pc,0,0,0)); 5683 5684 /* adapt Dirichlet and Neumann solvers if a nullspace correction has been requested */ 5685 if (pcbddc->NullSpace_corr[0]) { 5686 CHKERRQ(PCBDDCSetUseExactDirichlet(pc,PETSC_FALSE)); 5687 } 5688 if (dirichlet && pcbddc->NullSpace_corr[0] && !pcbddc->switch_static) { 5689 CHKERRQ(PCBDDCNullSpaceAssembleCorrection(pc,PETSC_TRUE,pcbddc->NullSpace_corr[1])); 5690 } 5691 if (neumann && pcbddc->NullSpace_corr[2]) { 5692 CHKERRQ(PCBDDCNullSpaceAssembleCorrection(pc,PETSC_FALSE,pcbddc->NullSpace_corr[3])); 5693 } 5694 /* check Dirichlet and Neumann solvers */ 5695 if (pcbddc->dbg_flag) { 5696 if (dirichlet) { /* Dirichlet */ 5697 CHKERRQ(VecSetRandom(pcis->vec1_D,NULL)); 5698 CHKERRQ(MatMult(pcis->A_II,pcis->vec1_D,pcis->vec2_D)); 5699 CHKERRQ(KSPSolve(pcbddc->ksp_D,pcis->vec2_D,pcis->vec2_D)); 5700 CHKERRQ(KSPCheckSolve(pcbddc->ksp_D,pc,pcis->vec2_D)); 5701 CHKERRQ(VecAXPY(pcis->vec1_D,m_one,pcis->vec2_D)); 5702 CHKERRQ(VecNorm(pcis->vec1_D,NORM_INFINITY,&value)); 5703 CHKERRQ(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d infinity error for Dirichlet solve (%s) = % 1.14e \n",PetscGlobalRank,((PetscObject)(pcbddc->ksp_D))->prefix,value)); 5704 CHKERRQ(PetscViewerFlush(pcbddc->dbg_viewer)); 5705 } 5706 if (neumann) { /* Neumann */ 5707 CHKERRQ(VecSetRandom(pcbddc->vec1_R,NULL)); 5708 CHKERRQ(MatMult(A_RR,pcbddc->vec1_R,pcbddc->vec2_R)); 5709 CHKERRQ(KSPSolve(pcbddc->ksp_R,pcbddc->vec2_R,pcbddc->vec2_R)); 5710 CHKERRQ(KSPCheckSolve(pcbddc->ksp_R,pc,pcbddc->vec2_R)); 5711 CHKERRQ(VecAXPY(pcbddc->vec1_R,m_one,pcbddc->vec2_R)); 5712 CHKERRQ(VecNorm(pcbddc->vec1_R,NORM_INFINITY,&value)); 5713 CHKERRQ(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d infinity error for Neumann solve (%s) = % 1.14e\n",PetscGlobalRank,((PetscObject)(pcbddc->ksp_R))->prefix,value)); 5714 CHKERRQ(PetscViewerFlush(pcbddc->dbg_viewer)); 5715 } 5716 } 5717 /* free Neumann problem's matrix */ 5718 CHKERRQ(MatDestroy(&A_RR)); 5719 PetscFunctionReturn(0); 5720 } 5721 5722 static PetscErrorCode PCBDDCSolveSubstructureCorrection(PC pc, Vec inout_B, Vec inout_D, PetscBool applytranspose) 5723 { 5724 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 5725 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 5726 PetscBool reuse_solver = sub_schurs ? ( sub_schurs->reuse_solver ? PETSC_TRUE : PETSC_FALSE) : PETSC_FALSE; 5727 5728 PetscFunctionBegin; 5729 if (!reuse_solver) { 5730 CHKERRQ(VecSet(pcbddc->vec1_R,0.)); 5731 } 5732 if (!pcbddc->switch_static) { 5733 if (applytranspose && pcbddc->local_auxmat1) { 5734 CHKERRQ(MatMultTranspose(pcbddc->local_auxmat2,inout_B,pcbddc->vec1_C)); 5735 CHKERRQ(MatMultTransposeAdd(pcbddc->local_auxmat1,pcbddc->vec1_C,inout_B,inout_B)); 5736 } 5737 if (!reuse_solver) { 5738 CHKERRQ(VecScatterBegin(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE)); 5739 CHKERRQ(VecScatterEnd(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE)); 5740 } else { 5741 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 5742 5743 CHKERRQ(VecScatterBegin(reuse_solver->correction_scatter_B,inout_B,reuse_solver->rhs_B,INSERT_VALUES,SCATTER_FORWARD)); 5744 CHKERRQ(VecScatterEnd(reuse_solver->correction_scatter_B,inout_B,reuse_solver->rhs_B,INSERT_VALUES,SCATTER_FORWARD)); 5745 } 5746 } else { 5747 CHKERRQ(VecScatterBegin(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE)); 5748 CHKERRQ(VecScatterEnd(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE)); 5749 CHKERRQ(VecScatterBegin(pcbddc->R_to_D,inout_D,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE)); 5750 CHKERRQ(VecScatterEnd(pcbddc->R_to_D,inout_D,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE)); 5751 if (applytranspose && pcbddc->local_auxmat1) { 5752 CHKERRQ(MatMultTranspose(pcbddc->local_auxmat2,pcbddc->vec1_R,pcbddc->vec1_C)); 5753 CHKERRQ(MatMultTransposeAdd(pcbddc->local_auxmat1,pcbddc->vec1_C,inout_B,inout_B)); 5754 CHKERRQ(VecScatterBegin(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE)); 5755 CHKERRQ(VecScatterEnd(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE)); 5756 } 5757 } 5758 CHKERRQ(PetscLogEventBegin(PC_BDDC_Solves[pcbddc->current_level][1],pc,0,0,0)); 5759 if (!reuse_solver || pcbddc->switch_static) { 5760 if (applytranspose) { 5761 CHKERRQ(KSPSolveTranspose(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec1_R)); 5762 } else { 5763 CHKERRQ(KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec1_R)); 5764 } 5765 CHKERRQ(KSPCheckSolve(pcbddc->ksp_R,pc,pcbddc->vec1_R)); 5766 } else { 5767 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 5768 5769 if (applytranspose) { 5770 CHKERRQ(MatFactorSolveSchurComplementTranspose(reuse_solver->F,reuse_solver->rhs_B,reuse_solver->sol_B)); 5771 } else { 5772 CHKERRQ(MatFactorSolveSchurComplement(reuse_solver->F,reuse_solver->rhs_B,reuse_solver->sol_B)); 5773 } 5774 } 5775 CHKERRQ(PetscLogEventEnd(PC_BDDC_Solves[pcbddc->current_level][1],pc,0,0,0)); 5776 CHKERRQ(VecSet(inout_B,0.)); 5777 if (!pcbddc->switch_static) { 5778 if (!reuse_solver) { 5779 CHKERRQ(VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD)); 5780 CHKERRQ(VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD)); 5781 } else { 5782 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 5783 5784 CHKERRQ(VecScatterBegin(reuse_solver->correction_scatter_B,reuse_solver->sol_B,inout_B,INSERT_VALUES,SCATTER_REVERSE)); 5785 CHKERRQ(VecScatterEnd(reuse_solver->correction_scatter_B,reuse_solver->sol_B,inout_B,INSERT_VALUES,SCATTER_REVERSE)); 5786 } 5787 if (!applytranspose && pcbddc->local_auxmat1) { 5788 CHKERRQ(MatMult(pcbddc->local_auxmat1,inout_B,pcbddc->vec1_C)); 5789 CHKERRQ(MatMultAdd(pcbddc->local_auxmat2,pcbddc->vec1_C,inout_B,inout_B)); 5790 } 5791 } else { 5792 CHKERRQ(VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD)); 5793 CHKERRQ(VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD)); 5794 CHKERRQ(VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,inout_D,INSERT_VALUES,SCATTER_FORWARD)); 5795 CHKERRQ(VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,inout_D,INSERT_VALUES,SCATTER_FORWARD)); 5796 if (!applytranspose && pcbddc->local_auxmat1) { 5797 CHKERRQ(MatMult(pcbddc->local_auxmat1,inout_B,pcbddc->vec1_C)); 5798 CHKERRQ(MatMultAdd(pcbddc->local_auxmat2,pcbddc->vec1_C,pcbddc->vec1_R,pcbddc->vec1_R)); 5799 } 5800 CHKERRQ(VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD)); 5801 CHKERRQ(VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD)); 5802 CHKERRQ(VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,inout_D,INSERT_VALUES,SCATTER_FORWARD)); 5803 CHKERRQ(VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,inout_D,INSERT_VALUES,SCATTER_FORWARD)); 5804 } 5805 PetscFunctionReturn(0); 5806 } 5807 5808 /* parameter apply transpose determines if the interface preconditioner should be applied transposed or not */ 5809 PetscErrorCode PCBDDCApplyInterfacePreconditioner(PC pc, PetscBool applytranspose) 5810 { 5811 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 5812 PC_IS* pcis = (PC_IS*) (pc->data); 5813 const PetscScalar zero = 0.0; 5814 5815 PetscFunctionBegin; 5816 /* Application of PSI^T or PHI^T (depending on applytranspose, see comment above) */ 5817 if (!pcbddc->benign_apply_coarse_only) { 5818 if (applytranspose) { 5819 CHKERRQ(MatMultTranspose(pcbddc->coarse_phi_B,pcis->vec1_B,pcbddc->vec1_P)); 5820 if (pcbddc->switch_static) CHKERRQ(MatMultTransposeAdd(pcbddc->coarse_phi_D,pcis->vec1_D,pcbddc->vec1_P,pcbddc->vec1_P)); 5821 } else { 5822 CHKERRQ(MatMultTranspose(pcbddc->coarse_psi_B,pcis->vec1_B,pcbddc->vec1_P)); 5823 if (pcbddc->switch_static) CHKERRQ(MatMultTransposeAdd(pcbddc->coarse_psi_D,pcis->vec1_D,pcbddc->vec1_P,pcbddc->vec1_P)); 5824 } 5825 } else { 5826 CHKERRQ(VecSet(pcbddc->vec1_P,zero)); 5827 } 5828 5829 /* add p0 to the last value of vec1_P holding the coarse dof relative to p0 */ 5830 if (pcbddc->benign_n) { 5831 PetscScalar *array; 5832 PetscInt j; 5833 5834 CHKERRQ(VecGetArray(pcbddc->vec1_P,&array)); 5835 for (j=0;j<pcbddc->benign_n;j++) array[pcbddc->local_primal_size-pcbddc->benign_n+j] += pcbddc->benign_p0[j]; 5836 CHKERRQ(VecRestoreArray(pcbddc->vec1_P,&array)); 5837 } 5838 5839 /* start communications from local primal nodes to rhs of coarse solver */ 5840 CHKERRQ(VecSet(pcbddc->coarse_vec,zero)); 5841 CHKERRQ(PCBDDCScatterCoarseDataBegin(pc,ADD_VALUES,SCATTER_FORWARD)); 5842 CHKERRQ(PCBDDCScatterCoarseDataEnd(pc,ADD_VALUES,SCATTER_FORWARD)); 5843 5844 /* Coarse solution -> rhs and sol updated inside PCBDDCScattarCoarseDataBegin/End */ 5845 if (pcbddc->coarse_ksp) { 5846 Mat coarse_mat; 5847 Vec rhs,sol; 5848 MatNullSpace nullsp; 5849 PetscBool isbddc = PETSC_FALSE; 5850 5851 if (pcbddc->benign_have_null) { 5852 PC coarse_pc; 5853 5854 CHKERRQ(KSPGetPC(pcbddc->coarse_ksp,&coarse_pc)); 5855 CHKERRQ(PetscObjectTypeCompare((PetscObject)coarse_pc,PCBDDC,&isbddc)); 5856 /* we need to propagate to coarser levels the need for a possible benign correction */ 5857 if (isbddc && pcbddc->benign_apply_coarse_only && !pcbddc->benign_skip_correction) { 5858 PC_BDDC* coarsepcbddc = (PC_BDDC*)(coarse_pc->data); 5859 coarsepcbddc->benign_skip_correction = PETSC_FALSE; 5860 coarsepcbddc->benign_apply_coarse_only = PETSC_TRUE; 5861 } 5862 } 5863 CHKERRQ(KSPGetRhs(pcbddc->coarse_ksp,&rhs)); 5864 CHKERRQ(KSPGetSolution(pcbddc->coarse_ksp,&sol)); 5865 CHKERRQ(KSPGetOperators(pcbddc->coarse_ksp,&coarse_mat,NULL)); 5866 if (applytranspose) { 5867 PetscCheck(!pcbddc->benign_apply_coarse_only,PetscObjectComm((PetscObject)pcbddc->coarse_ksp),PETSC_ERR_SUP,"Not yet implemented"); 5868 CHKERRQ(PetscLogEventBegin(PC_BDDC_Solves[pcbddc->current_level][2],pc,0,0,0)); 5869 CHKERRQ(KSPSolveTranspose(pcbddc->coarse_ksp,rhs,sol)); 5870 CHKERRQ(PetscLogEventEnd(PC_BDDC_Solves[pcbddc->current_level][2],pc,0,0,0)); 5871 CHKERRQ(KSPCheckSolve(pcbddc->coarse_ksp,pc,sol)); 5872 CHKERRQ(MatGetTransposeNullSpace(coarse_mat,&nullsp)); 5873 if (nullsp) { 5874 CHKERRQ(MatNullSpaceRemove(nullsp,sol)); 5875 } 5876 } else { 5877 CHKERRQ(MatGetNullSpace(coarse_mat,&nullsp)); 5878 if (pcbddc->benign_apply_coarse_only && isbddc) { /* need just to apply the coarse preconditioner during presolve */ 5879 PC coarse_pc; 5880 5881 if (nullsp) { 5882 CHKERRQ(MatNullSpaceRemove(nullsp,rhs)); 5883 } 5884 CHKERRQ(KSPGetPC(pcbddc->coarse_ksp,&coarse_pc)); 5885 CHKERRQ(PCPreSolve(coarse_pc,pcbddc->coarse_ksp)); 5886 CHKERRQ(PCBDDCBenignRemoveInterior(coarse_pc,rhs,sol)); 5887 CHKERRQ(PCPostSolve(coarse_pc,pcbddc->coarse_ksp)); 5888 } else { 5889 CHKERRQ(PetscLogEventBegin(PC_BDDC_Solves[pcbddc->current_level][2],pc,0,0,0)); 5890 CHKERRQ(KSPSolve(pcbddc->coarse_ksp,rhs,sol)); 5891 CHKERRQ(PetscLogEventEnd(PC_BDDC_Solves[pcbddc->current_level][2],pc,0,0,0)); 5892 CHKERRQ(KSPCheckSolve(pcbddc->coarse_ksp,pc,sol)); 5893 if (nullsp) { 5894 CHKERRQ(MatNullSpaceRemove(nullsp,sol)); 5895 } 5896 } 5897 } 5898 /* we don't need the benign correction at coarser levels anymore */ 5899 if (pcbddc->benign_have_null && isbddc) { 5900 PC coarse_pc; 5901 PC_BDDC* coarsepcbddc; 5902 5903 CHKERRQ(KSPGetPC(pcbddc->coarse_ksp,&coarse_pc)); 5904 coarsepcbddc = (PC_BDDC*)(coarse_pc->data); 5905 coarsepcbddc->benign_skip_correction = PETSC_TRUE; 5906 coarsepcbddc->benign_apply_coarse_only = PETSC_FALSE; 5907 } 5908 } 5909 5910 /* Local solution on R nodes */ 5911 if (pcis->n && !pcbddc->benign_apply_coarse_only) { 5912 CHKERRQ(PCBDDCSolveSubstructureCorrection(pc,pcis->vec1_B,pcis->vec1_D,applytranspose)); 5913 } 5914 /* communications from coarse sol to local primal nodes */ 5915 CHKERRQ(PCBDDCScatterCoarseDataBegin(pc,INSERT_VALUES,SCATTER_REVERSE)); 5916 CHKERRQ(PCBDDCScatterCoarseDataEnd(pc,INSERT_VALUES,SCATTER_REVERSE)); 5917 5918 /* Sum contributions from the two levels */ 5919 if (!pcbddc->benign_apply_coarse_only) { 5920 if (applytranspose) { 5921 CHKERRQ(MatMultAdd(pcbddc->coarse_psi_B,pcbddc->vec1_P,pcis->vec1_B,pcis->vec1_B)); 5922 if (pcbddc->switch_static) CHKERRQ(MatMultAdd(pcbddc->coarse_psi_D,pcbddc->vec1_P,pcis->vec1_D,pcis->vec1_D)); 5923 } else { 5924 CHKERRQ(MatMultAdd(pcbddc->coarse_phi_B,pcbddc->vec1_P,pcis->vec1_B,pcis->vec1_B)); 5925 if (pcbddc->switch_static) CHKERRQ(MatMultAdd(pcbddc->coarse_phi_D,pcbddc->vec1_P,pcis->vec1_D,pcis->vec1_D)); 5926 } 5927 /* store p0 */ 5928 if (pcbddc->benign_n) { 5929 PetscScalar *array; 5930 PetscInt j; 5931 5932 CHKERRQ(VecGetArray(pcbddc->vec1_P,&array)); 5933 for (j=0;j<pcbddc->benign_n;j++) pcbddc->benign_p0[j] = array[pcbddc->local_primal_size-pcbddc->benign_n+j]; 5934 CHKERRQ(VecRestoreArray(pcbddc->vec1_P,&array)); 5935 } 5936 } else { /* expand the coarse solution */ 5937 if (applytranspose) { 5938 CHKERRQ(MatMult(pcbddc->coarse_psi_B,pcbddc->vec1_P,pcis->vec1_B)); 5939 } else { 5940 CHKERRQ(MatMult(pcbddc->coarse_phi_B,pcbddc->vec1_P,pcis->vec1_B)); 5941 } 5942 } 5943 PetscFunctionReturn(0); 5944 } 5945 5946 PetscErrorCode PCBDDCScatterCoarseDataBegin(PC pc,InsertMode imode, ScatterMode smode) 5947 { 5948 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 5949 Vec from,to; 5950 const PetscScalar *array; 5951 5952 PetscFunctionBegin; 5953 if (smode == SCATTER_REVERSE) { /* from global to local -> get data from coarse solution */ 5954 from = pcbddc->coarse_vec; 5955 to = pcbddc->vec1_P; 5956 if (pcbddc->coarse_ksp) { /* get array from coarse processes */ 5957 Vec tvec; 5958 5959 CHKERRQ(KSPGetRhs(pcbddc->coarse_ksp,&tvec)); 5960 CHKERRQ(VecResetArray(tvec)); 5961 CHKERRQ(KSPGetSolution(pcbddc->coarse_ksp,&tvec)); 5962 CHKERRQ(VecGetArrayRead(tvec,&array)); 5963 CHKERRQ(VecPlaceArray(from,array)); 5964 CHKERRQ(VecRestoreArrayRead(tvec,&array)); 5965 } 5966 } else { /* from local to global -> put data in coarse right hand side */ 5967 from = pcbddc->vec1_P; 5968 to = pcbddc->coarse_vec; 5969 } 5970 CHKERRQ(VecScatterBegin(pcbddc->coarse_loc_to_glob,from,to,imode,smode)); 5971 PetscFunctionReturn(0); 5972 } 5973 5974 PetscErrorCode PCBDDCScatterCoarseDataEnd(PC pc, InsertMode imode, ScatterMode smode) 5975 { 5976 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 5977 Vec from,to; 5978 const PetscScalar *array; 5979 5980 PetscFunctionBegin; 5981 if (smode == SCATTER_REVERSE) { /* from global to local -> get data from coarse solution */ 5982 from = pcbddc->coarse_vec; 5983 to = pcbddc->vec1_P; 5984 } else { /* from local to global -> put data in coarse right hand side */ 5985 from = pcbddc->vec1_P; 5986 to = pcbddc->coarse_vec; 5987 } 5988 CHKERRQ(VecScatterEnd(pcbddc->coarse_loc_to_glob,from,to,imode,smode)); 5989 if (smode == SCATTER_FORWARD) { 5990 if (pcbddc->coarse_ksp) { /* get array from coarse processes */ 5991 Vec tvec; 5992 5993 CHKERRQ(KSPGetRhs(pcbddc->coarse_ksp,&tvec)); 5994 CHKERRQ(VecGetArrayRead(to,&array)); 5995 CHKERRQ(VecPlaceArray(tvec,array)); 5996 CHKERRQ(VecRestoreArrayRead(to,&array)); 5997 } 5998 } else { 5999 if (pcbddc->coarse_ksp) { /* restore array of pcbddc->coarse_vec */ 6000 CHKERRQ(VecResetArray(from)); 6001 } 6002 } 6003 PetscFunctionReturn(0); 6004 } 6005 6006 PetscErrorCode PCBDDCConstraintsSetUp(PC pc) 6007 { 6008 PetscErrorCode ierr; 6009 PC_IS* pcis = (PC_IS*)(pc->data); 6010 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 6011 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 6012 /* one and zero */ 6013 PetscScalar one=1.0,zero=0.0; 6014 /* space to store constraints and their local indices */ 6015 PetscScalar *constraints_data; 6016 PetscInt *constraints_idxs,*constraints_idxs_B; 6017 PetscInt *constraints_idxs_ptr,*constraints_data_ptr; 6018 PetscInt *constraints_n; 6019 /* iterators */ 6020 PetscInt i,j,k,total_counts,total_counts_cc,cum; 6021 /* BLAS integers */ 6022 PetscBLASInt lwork,lierr; 6023 PetscBLASInt Blas_N,Blas_M,Blas_K,Blas_one=1; 6024 PetscBLASInt Blas_LDA,Blas_LDB,Blas_LDC; 6025 /* reuse */ 6026 PetscInt olocal_primal_size,olocal_primal_size_cc; 6027 PetscInt *olocal_primal_ref_node,*olocal_primal_ref_mult; 6028 /* change of basis */ 6029 PetscBool qr_needed; 6030 PetscBT change_basis,qr_needed_idx; 6031 /* auxiliary stuff */ 6032 PetscInt *nnz,*is_indices; 6033 PetscInt ncc; 6034 /* some quantities */ 6035 PetscInt n_vertices,total_primal_vertices,valid_constraints; 6036 PetscInt size_of_constraint,max_size_of_constraint=0,max_constraints,temp_constraints; 6037 PetscReal tol; /* tolerance for retaining eigenmodes */ 6038 6039 PetscFunctionBegin; 6040 tol = PetscSqrtReal(PETSC_SMALL); 6041 /* Destroy Mat objects computed previously */ 6042 CHKERRQ(MatDestroy(&pcbddc->ChangeOfBasisMatrix)); 6043 CHKERRQ(MatDestroy(&pcbddc->ConstraintMatrix)); 6044 CHKERRQ(MatDestroy(&pcbddc->switch_static_change)); 6045 /* save info on constraints from previous setup (if any) */ 6046 olocal_primal_size = pcbddc->local_primal_size; 6047 olocal_primal_size_cc = pcbddc->local_primal_size_cc; 6048 CHKERRQ(PetscMalloc2(olocal_primal_size_cc,&olocal_primal_ref_node,olocal_primal_size_cc,&olocal_primal_ref_mult)); 6049 CHKERRQ(PetscArraycpy(olocal_primal_ref_node,pcbddc->local_primal_ref_node,olocal_primal_size_cc)); 6050 CHKERRQ(PetscArraycpy(olocal_primal_ref_mult,pcbddc->local_primal_ref_mult,olocal_primal_size_cc)); 6051 CHKERRQ(PetscFree2(pcbddc->local_primal_ref_node,pcbddc->local_primal_ref_mult)); 6052 CHKERRQ(PetscFree(pcbddc->primal_indices_local_idxs)); 6053 6054 if (!pcbddc->adaptive_selection) { 6055 IS ISForVertices,*ISForFaces,*ISForEdges; 6056 MatNullSpace nearnullsp; 6057 const Vec *nearnullvecs; 6058 Vec *localnearnullsp; 6059 PetscScalar *array; 6060 PetscInt n_ISForFaces,n_ISForEdges,nnsp_size; 6061 PetscBool nnsp_has_cnst; 6062 /* LAPACK working arrays for SVD or POD */ 6063 PetscBool skip_lapack,boolforchange; 6064 PetscScalar *work; 6065 PetscReal *singular_vals; 6066 #if defined(PETSC_USE_COMPLEX) 6067 PetscReal *rwork; 6068 #endif 6069 PetscScalar *temp_basis = NULL,*correlation_mat = NULL; 6070 PetscBLASInt dummy_int=1; 6071 PetscScalar dummy_scalar=1.; 6072 PetscBool use_pod = PETSC_FALSE; 6073 6074 /* MKL SVD with same input gives different results on different processes! */ 6075 #if defined(PETSC_MISSING_LAPACK_GESVD) || defined(PETSC_HAVE_MKL_LIBS) 6076 use_pod = PETSC_TRUE; 6077 #endif 6078 /* Get index sets for faces, edges and vertices from graph */ 6079 CHKERRQ(PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph,&n_ISForFaces,&ISForFaces,&n_ISForEdges,&ISForEdges,&ISForVertices)); 6080 /* print some info */ 6081 if (pcbddc->dbg_flag && (!pcbddc->sub_schurs || pcbddc->sub_schurs_rebuild)) { 6082 PetscInt nv; 6083 6084 CHKERRQ(PCBDDCGraphASCIIView(pcbddc->mat_graph,pcbddc->dbg_flag,pcbddc->dbg_viewer)); 6085 CHKERRQ(ISGetSize(ISForVertices,&nv)); 6086 CHKERRQ(PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer)); 6087 CHKERRQ(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"--------------------------------------------------------------\n")); 6088 CHKERRQ(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate vertices (%D)\n",PetscGlobalRank,nv,pcbddc->use_vertices)); 6089 CHKERRQ(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate edges (%D)\n",PetscGlobalRank,n_ISForEdges,pcbddc->use_edges)); 6090 CHKERRQ(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate faces (%D)\n",PetscGlobalRank,n_ISForFaces,pcbddc->use_faces)); 6091 CHKERRQ(PetscViewerFlush(pcbddc->dbg_viewer)); 6092 CHKERRQ(PetscViewerASCIIPopSynchronized(pcbddc->dbg_viewer)); 6093 } 6094 6095 /* free unneeded index sets */ 6096 if (!pcbddc->use_vertices) { 6097 CHKERRQ(ISDestroy(&ISForVertices)); 6098 } 6099 if (!pcbddc->use_edges) { 6100 for (i=0;i<n_ISForEdges;i++) { 6101 CHKERRQ(ISDestroy(&ISForEdges[i])); 6102 } 6103 CHKERRQ(PetscFree(ISForEdges)); 6104 n_ISForEdges = 0; 6105 } 6106 if (!pcbddc->use_faces) { 6107 for (i=0;i<n_ISForFaces;i++) { 6108 CHKERRQ(ISDestroy(&ISForFaces[i])); 6109 } 6110 CHKERRQ(PetscFree(ISForFaces)); 6111 n_ISForFaces = 0; 6112 } 6113 6114 /* check if near null space is attached to global mat */ 6115 if (pcbddc->use_nnsp) { 6116 CHKERRQ(MatGetNearNullSpace(pc->pmat,&nearnullsp)); 6117 } else nearnullsp = NULL; 6118 6119 if (nearnullsp) { 6120 CHKERRQ(MatNullSpaceGetVecs(nearnullsp,&nnsp_has_cnst,&nnsp_size,&nearnullvecs)); 6121 /* remove any stored info */ 6122 CHKERRQ(MatNullSpaceDestroy(&pcbddc->onearnullspace)); 6123 CHKERRQ(PetscFree(pcbddc->onearnullvecs_state)); 6124 /* store information for BDDC solver reuse */ 6125 CHKERRQ(PetscObjectReference((PetscObject)nearnullsp)); 6126 pcbddc->onearnullspace = nearnullsp; 6127 CHKERRQ(PetscMalloc1(nnsp_size,&pcbddc->onearnullvecs_state)); 6128 for (i=0;i<nnsp_size;i++) { 6129 CHKERRQ(PetscObjectStateGet((PetscObject)nearnullvecs[i],&pcbddc->onearnullvecs_state[i])); 6130 } 6131 } else { /* if near null space is not provided BDDC uses constants by default */ 6132 nnsp_size = 0; 6133 nnsp_has_cnst = PETSC_TRUE; 6134 } 6135 /* get max number of constraints on a single cc */ 6136 max_constraints = nnsp_size; 6137 if (nnsp_has_cnst) max_constraints++; 6138 6139 /* 6140 Evaluate maximum storage size needed by the procedure 6141 - Indices for connected component i stored at "constraints_idxs + constraints_idxs_ptr[i]" 6142 - Values for constraints on connected component i stored at "constraints_data + constraints_data_ptr[i]" 6143 There can be multiple constraints per connected component 6144 */ 6145 n_vertices = 0; 6146 if (ISForVertices) { 6147 CHKERRQ(ISGetSize(ISForVertices,&n_vertices)); 6148 } 6149 ncc = n_vertices+n_ISForFaces+n_ISForEdges; 6150 CHKERRQ(PetscMalloc3(ncc+1,&constraints_idxs_ptr,ncc+1,&constraints_data_ptr,ncc,&constraints_n)); 6151 6152 total_counts = n_ISForFaces+n_ISForEdges; 6153 total_counts *= max_constraints; 6154 total_counts += n_vertices; 6155 CHKERRQ(PetscBTCreate(total_counts,&change_basis)); 6156 6157 total_counts = 0; 6158 max_size_of_constraint = 0; 6159 for (i=0;i<n_ISForEdges+n_ISForFaces;i++) { 6160 IS used_is; 6161 if (i<n_ISForEdges) { 6162 used_is = ISForEdges[i]; 6163 } else { 6164 used_is = ISForFaces[i-n_ISForEdges]; 6165 } 6166 CHKERRQ(ISGetSize(used_is,&j)); 6167 total_counts += j; 6168 max_size_of_constraint = PetscMax(j,max_size_of_constraint); 6169 } 6170 CHKERRQ(PetscMalloc3(total_counts*max_constraints+n_vertices,&constraints_data,total_counts+n_vertices,&constraints_idxs,total_counts+n_vertices,&constraints_idxs_B)); 6171 6172 /* get local part of global near null space vectors */ 6173 CHKERRQ(PetscMalloc1(nnsp_size,&localnearnullsp)); 6174 for (k=0;k<nnsp_size;k++) { 6175 CHKERRQ(VecDuplicate(pcis->vec1_N,&localnearnullsp[k])); 6176 CHKERRQ(VecScatterBegin(matis->rctx,nearnullvecs[k],localnearnullsp[k],INSERT_VALUES,SCATTER_FORWARD)); 6177 CHKERRQ(VecScatterEnd(matis->rctx,nearnullvecs[k],localnearnullsp[k],INSERT_VALUES,SCATTER_FORWARD)); 6178 } 6179 6180 /* whether or not to skip lapack calls */ 6181 skip_lapack = PETSC_TRUE; 6182 if (n_ISForFaces+n_ISForEdges && max_constraints > 1 && !pcbddc->use_nnsp_true) skip_lapack = PETSC_FALSE; 6183 6184 /* First we issue queries to allocate optimal workspace for LAPACKgesvd (or LAPACKsyev if SVD is missing) */ 6185 if (!skip_lapack) { 6186 PetscScalar temp_work; 6187 6188 if (use_pod) { 6189 /* Proper Orthogonal Decomposition (POD) using the snapshot method */ 6190 CHKERRQ(PetscMalloc1(max_constraints*max_constraints,&correlation_mat)); 6191 CHKERRQ(PetscMalloc1(max_constraints,&singular_vals)); 6192 CHKERRQ(PetscMalloc1(max_size_of_constraint*max_constraints,&temp_basis)); 6193 #if defined(PETSC_USE_COMPLEX) 6194 CHKERRQ(PetscMalloc1(3*max_constraints,&rwork)); 6195 #endif 6196 /* now we evaluate the optimal workspace using query with lwork=-1 */ 6197 CHKERRQ(PetscBLASIntCast(max_constraints,&Blas_N)); 6198 CHKERRQ(PetscBLASIntCast(max_constraints,&Blas_LDA)); 6199 lwork = -1; 6200 CHKERRQ(PetscFPTrapPush(PETSC_FP_TRAP_OFF)); 6201 #if !defined(PETSC_USE_COMPLEX) 6202 PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,&temp_work,&lwork,&lierr)); 6203 #else 6204 PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,&temp_work,&lwork,rwork,&lierr)); 6205 #endif 6206 CHKERRQ(PetscFPTrapPop()); 6207 PetscCheck(!lierr,PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to SYEV Lapack routine %d",(int)lierr); 6208 } else { 6209 #if !defined(PETSC_MISSING_LAPACK_GESVD) 6210 /* SVD */ 6211 PetscInt max_n,min_n; 6212 max_n = max_size_of_constraint; 6213 min_n = max_constraints; 6214 if (max_size_of_constraint < max_constraints) { 6215 min_n = max_size_of_constraint; 6216 max_n = max_constraints; 6217 } 6218 CHKERRQ(PetscMalloc1(min_n,&singular_vals)); 6219 #if defined(PETSC_USE_COMPLEX) 6220 CHKERRQ(PetscMalloc1(5*min_n,&rwork)); 6221 #endif 6222 /* now we evaluate the optimal workspace using query with lwork=-1 */ 6223 lwork = -1; 6224 CHKERRQ(PetscBLASIntCast(max_n,&Blas_M)); 6225 CHKERRQ(PetscBLASIntCast(min_n,&Blas_N)); 6226 CHKERRQ(PetscBLASIntCast(max_n,&Blas_LDA)); 6227 CHKERRQ(PetscFPTrapPush(PETSC_FP_TRAP_OFF)); 6228 #if !defined(PETSC_USE_COMPLEX) 6229 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&constraints_data[0],&Blas_LDA,singular_vals,&dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,&temp_work,&lwork,&lierr)); 6230 #else 6231 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&constraints_data[0],&Blas_LDA,singular_vals,&dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,&temp_work,&lwork,rwork,&lierr)); 6232 #endif 6233 CHKERRQ(PetscFPTrapPop()); 6234 PetscCheck(!lierr,PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to GESVD Lapack routine %d",(int)lierr); 6235 #else 6236 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"This should not happen"); 6237 #endif /* on missing GESVD */ 6238 } 6239 /* Allocate optimal workspace */ 6240 CHKERRQ(PetscBLASIntCast((PetscInt)PetscRealPart(temp_work),&lwork)); 6241 CHKERRQ(PetscMalloc1(lwork,&work)); 6242 } 6243 /* Now we can loop on constraining sets */ 6244 total_counts = 0; 6245 constraints_idxs_ptr[0] = 0; 6246 constraints_data_ptr[0] = 0; 6247 /* vertices */ 6248 if (n_vertices) { 6249 CHKERRQ(ISGetIndices(ISForVertices,(const PetscInt**)&is_indices)); 6250 CHKERRQ(PetscArraycpy(constraints_idxs,is_indices,n_vertices)); 6251 for (i=0;i<n_vertices;i++) { 6252 constraints_n[total_counts] = 1; 6253 constraints_data[total_counts] = 1.0; 6254 constraints_idxs_ptr[total_counts+1] = constraints_idxs_ptr[total_counts]+1; 6255 constraints_data_ptr[total_counts+1] = constraints_data_ptr[total_counts]+1; 6256 total_counts++; 6257 } 6258 CHKERRQ(ISRestoreIndices(ISForVertices,(const PetscInt**)&is_indices)); 6259 n_vertices = total_counts; 6260 } 6261 6262 /* edges and faces */ 6263 total_counts_cc = total_counts; 6264 for (ncc=0;ncc<n_ISForEdges+n_ISForFaces;ncc++) { 6265 IS used_is; 6266 PetscBool idxs_copied = PETSC_FALSE; 6267 6268 if (ncc<n_ISForEdges) { 6269 used_is = ISForEdges[ncc]; 6270 boolforchange = pcbddc->use_change_of_basis; /* change or not the basis on the edge */ 6271 } else { 6272 used_is = ISForFaces[ncc-n_ISForEdges]; 6273 boolforchange = (PetscBool)(pcbddc->use_change_of_basis && pcbddc->use_change_on_faces); /* change or not the basis on the face */ 6274 } 6275 temp_constraints = 0; /* zero the number of constraints I have on this conn comp */ 6276 6277 CHKERRQ(ISGetSize(used_is,&size_of_constraint)); 6278 CHKERRQ(ISGetIndices(used_is,(const PetscInt**)&is_indices)); 6279 /* change of basis should not be performed on local periodic nodes */ 6280 if (pcbddc->mat_graph->mirrors && pcbddc->mat_graph->mirrors[is_indices[0]]) boolforchange = PETSC_FALSE; 6281 if (nnsp_has_cnst) { 6282 PetscScalar quad_value; 6283 6284 CHKERRQ(PetscArraycpy(constraints_idxs + constraints_idxs_ptr[total_counts_cc],is_indices,size_of_constraint)); 6285 idxs_copied = PETSC_TRUE; 6286 6287 if (!pcbddc->use_nnsp_true) { 6288 quad_value = (PetscScalar)(1.0/PetscSqrtReal((PetscReal)size_of_constraint)); 6289 } else { 6290 quad_value = 1.0; 6291 } 6292 for (j=0;j<size_of_constraint;j++) { 6293 constraints_data[constraints_data_ptr[total_counts_cc]+j] = quad_value; 6294 } 6295 temp_constraints++; 6296 total_counts++; 6297 } 6298 for (k=0;k<nnsp_size;k++) { 6299 PetscReal real_value; 6300 PetscScalar *ptr_to_data; 6301 6302 CHKERRQ(VecGetArrayRead(localnearnullsp[k],(const PetscScalar**)&array)); 6303 ptr_to_data = &constraints_data[constraints_data_ptr[total_counts_cc]+temp_constraints*size_of_constraint]; 6304 for (j=0;j<size_of_constraint;j++) { 6305 ptr_to_data[j] = array[is_indices[j]]; 6306 } 6307 CHKERRQ(VecRestoreArrayRead(localnearnullsp[k],(const PetscScalar**)&array)); 6308 /* check if array is null on the connected component */ 6309 CHKERRQ(PetscBLASIntCast(size_of_constraint,&Blas_N)); 6310 PetscStackCallBLAS("BLASasum",real_value = BLASasum_(&Blas_N,ptr_to_data,&Blas_one)); 6311 if (real_value > tol*size_of_constraint) { /* keep indices and values */ 6312 temp_constraints++; 6313 total_counts++; 6314 if (!idxs_copied) { 6315 CHKERRQ(PetscArraycpy(constraints_idxs + constraints_idxs_ptr[total_counts_cc],is_indices,size_of_constraint)); 6316 idxs_copied = PETSC_TRUE; 6317 } 6318 } 6319 } 6320 CHKERRQ(ISRestoreIndices(used_is,(const PetscInt**)&is_indices)); 6321 valid_constraints = temp_constraints; 6322 if (!pcbddc->use_nnsp_true && temp_constraints) { 6323 if (temp_constraints == 1) { /* just normalize the constraint */ 6324 PetscScalar norm,*ptr_to_data; 6325 6326 ptr_to_data = &constraints_data[constraints_data_ptr[total_counts_cc]]; 6327 CHKERRQ(PetscBLASIntCast(size_of_constraint,&Blas_N)); 6328 PetscStackCallBLAS("BLASdot",norm = BLASdot_(&Blas_N,ptr_to_data,&Blas_one,ptr_to_data,&Blas_one)); 6329 norm = 1.0/PetscSqrtReal(PetscRealPart(norm)); 6330 PetscStackCallBLAS("BLASscal",BLASscal_(&Blas_N,&norm,ptr_to_data,&Blas_one)); 6331 } else { /* perform SVD */ 6332 PetscScalar *ptr_to_data = &constraints_data[constraints_data_ptr[total_counts_cc]]; 6333 6334 if (use_pod) { 6335 /* SVD: Y = U*S*V^H -> U (eigenvectors of Y*Y^H) = Y*V*(S)^\dag 6336 POD: Y^H*Y = V*D*V^H, D = S^H*S -> U = Y*V*D^(-1/2) 6337 -> When PETSC_USE_COMPLEX and PETSC_MISSING_LAPACK_GESVD are defined 6338 the constraints basis will differ (by a complex factor with absolute value equal to 1) 6339 from that computed using LAPACKgesvd 6340 -> This is due to a different computation of eigenvectors in LAPACKheev 6341 -> The quality of the POD-computed basis will be the same */ 6342 CHKERRQ(PetscArrayzero(correlation_mat,temp_constraints*temp_constraints)); 6343 /* Store upper triangular part of correlation matrix */ 6344 CHKERRQ(PetscBLASIntCast(size_of_constraint,&Blas_N)); 6345 CHKERRQ(PetscFPTrapPush(PETSC_FP_TRAP_OFF)); 6346 for (j=0;j<temp_constraints;j++) { 6347 for (k=0;k<j+1;k++) { 6348 PetscStackCallBLAS("BLASdot",correlation_mat[j*temp_constraints+k] = BLASdot_(&Blas_N,ptr_to_data+k*size_of_constraint,&Blas_one,ptr_to_data+j*size_of_constraint,&Blas_one)); 6349 } 6350 } 6351 /* compute eigenvalues and eigenvectors of correlation matrix */ 6352 CHKERRQ(PetscBLASIntCast(temp_constraints,&Blas_N)); 6353 CHKERRQ(PetscBLASIntCast(temp_constraints,&Blas_LDA)); 6354 #if !defined(PETSC_USE_COMPLEX) 6355 PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,work,&lwork,&lierr)); 6356 #else 6357 PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,work,&lwork,rwork,&lierr)); 6358 #endif 6359 CHKERRQ(PetscFPTrapPop()); 6360 PetscCheck(!lierr,PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYEV Lapack routine %d",(int)lierr); 6361 /* retain eigenvalues greater than tol: note that LAPACKsyev gives eigs in ascending order */ 6362 j = 0; 6363 while (j < temp_constraints && singular_vals[j]/singular_vals[temp_constraints-1] < tol) j++; 6364 total_counts = total_counts-j; 6365 valid_constraints = temp_constraints-j; 6366 /* scale and copy POD basis into used quadrature memory */ 6367 CHKERRQ(PetscBLASIntCast(size_of_constraint,&Blas_M)); 6368 CHKERRQ(PetscBLASIntCast(temp_constraints,&Blas_N)); 6369 CHKERRQ(PetscBLASIntCast(temp_constraints,&Blas_K)); 6370 CHKERRQ(PetscBLASIntCast(size_of_constraint,&Blas_LDA)); 6371 CHKERRQ(PetscBLASIntCast(temp_constraints,&Blas_LDB)); 6372 CHKERRQ(PetscBLASIntCast(size_of_constraint,&Blas_LDC)); 6373 if (j<temp_constraints) { 6374 PetscInt ii; 6375 for (k=j;k<temp_constraints;k++) singular_vals[k] = 1.0/PetscSqrtReal(singular_vals[k]); 6376 CHKERRQ(PetscFPTrapPush(PETSC_FP_TRAP_OFF)); 6377 PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&Blas_M,&Blas_N,&Blas_K,&one,ptr_to_data,&Blas_LDA,correlation_mat,&Blas_LDB,&zero,temp_basis,&Blas_LDC)); 6378 CHKERRQ(PetscFPTrapPop()); 6379 for (k=0;k<temp_constraints-j;k++) { 6380 for (ii=0;ii<size_of_constraint;ii++) { 6381 ptr_to_data[k*size_of_constraint+ii] = singular_vals[temp_constraints-1-k]*temp_basis[(temp_constraints-1-k)*size_of_constraint+ii]; 6382 } 6383 } 6384 } 6385 } else { 6386 #if !defined(PETSC_MISSING_LAPACK_GESVD) 6387 CHKERRQ(PetscBLASIntCast(size_of_constraint,&Blas_M)); 6388 CHKERRQ(PetscBLASIntCast(temp_constraints,&Blas_N)); 6389 CHKERRQ(PetscBLASIntCast(size_of_constraint,&Blas_LDA)); 6390 CHKERRQ(PetscFPTrapPush(PETSC_FP_TRAP_OFF)); 6391 #if !defined(PETSC_USE_COMPLEX) 6392 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,ptr_to_data,&Blas_LDA,singular_vals,&dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,work,&lwork,&lierr)); 6393 #else 6394 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,ptr_to_data,&Blas_LDA,singular_vals,&dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,work,&lwork,rwork,&lierr)); 6395 #endif 6396 PetscCheck(!lierr,PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in GESVD Lapack routine %d",(int)lierr); 6397 CHKERRQ(PetscFPTrapPop()); 6398 /* retain eigenvalues greater than tol: note that LAPACKgesvd gives eigs in descending order */ 6399 k = temp_constraints; 6400 if (k > size_of_constraint) k = size_of_constraint; 6401 j = 0; 6402 while (j < k && singular_vals[k-j-1]/singular_vals[0] < tol) j++; 6403 valid_constraints = k-j; 6404 total_counts = total_counts-temp_constraints+valid_constraints; 6405 #else 6406 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"This should not happen"); 6407 #endif /* on missing GESVD */ 6408 } 6409 } 6410 } 6411 /* update pointers information */ 6412 if (valid_constraints) { 6413 constraints_n[total_counts_cc] = valid_constraints; 6414 constraints_idxs_ptr[total_counts_cc+1] = constraints_idxs_ptr[total_counts_cc]+size_of_constraint; 6415 constraints_data_ptr[total_counts_cc+1] = constraints_data_ptr[total_counts_cc]+size_of_constraint*valid_constraints; 6416 /* set change_of_basis flag */ 6417 if (boolforchange) { 6418 PetscBTSet(change_basis,total_counts_cc); 6419 } 6420 total_counts_cc++; 6421 } 6422 } 6423 /* free workspace */ 6424 if (!skip_lapack) { 6425 CHKERRQ(PetscFree(work)); 6426 #if defined(PETSC_USE_COMPLEX) 6427 CHKERRQ(PetscFree(rwork)); 6428 #endif 6429 CHKERRQ(PetscFree(singular_vals)); 6430 CHKERRQ(PetscFree(correlation_mat)); 6431 CHKERRQ(PetscFree(temp_basis)); 6432 } 6433 for (k=0;k<nnsp_size;k++) { 6434 CHKERRQ(VecDestroy(&localnearnullsp[k])); 6435 } 6436 CHKERRQ(PetscFree(localnearnullsp)); 6437 /* free index sets of faces, edges and vertices */ 6438 for (i=0;i<n_ISForFaces;i++) { 6439 CHKERRQ(ISDestroy(&ISForFaces[i])); 6440 } 6441 if (n_ISForFaces) { 6442 CHKERRQ(PetscFree(ISForFaces)); 6443 } 6444 for (i=0;i<n_ISForEdges;i++) { 6445 CHKERRQ(ISDestroy(&ISForEdges[i])); 6446 } 6447 if (n_ISForEdges) { 6448 CHKERRQ(PetscFree(ISForEdges)); 6449 } 6450 CHKERRQ(ISDestroy(&ISForVertices)); 6451 } else { 6452 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 6453 6454 total_counts = 0; 6455 n_vertices = 0; 6456 if (sub_schurs->is_vertices && pcbddc->use_vertices) { 6457 CHKERRQ(ISGetLocalSize(sub_schurs->is_vertices,&n_vertices)); 6458 } 6459 max_constraints = 0; 6460 total_counts_cc = 0; 6461 for (i=0;i<sub_schurs->n_subs+n_vertices;i++) { 6462 total_counts += pcbddc->adaptive_constraints_n[i]; 6463 if (pcbddc->adaptive_constraints_n[i]) total_counts_cc++; 6464 max_constraints = PetscMax(max_constraints,pcbddc->adaptive_constraints_n[i]); 6465 } 6466 constraints_idxs_ptr = pcbddc->adaptive_constraints_idxs_ptr; 6467 constraints_data_ptr = pcbddc->adaptive_constraints_data_ptr; 6468 constraints_idxs = pcbddc->adaptive_constraints_idxs; 6469 constraints_data = pcbddc->adaptive_constraints_data; 6470 /* constraints_n differs from pcbddc->adaptive_constraints_n */ 6471 CHKERRQ(PetscMalloc1(total_counts_cc,&constraints_n)); 6472 total_counts_cc = 0; 6473 for (i=0;i<sub_schurs->n_subs+n_vertices;i++) { 6474 if (pcbddc->adaptive_constraints_n[i]) { 6475 constraints_n[total_counts_cc++] = pcbddc->adaptive_constraints_n[i]; 6476 } 6477 } 6478 6479 max_size_of_constraint = 0; 6480 for (i=0;i<total_counts_cc;i++) max_size_of_constraint = PetscMax(max_size_of_constraint,constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i]); 6481 CHKERRQ(PetscMalloc1(constraints_idxs_ptr[total_counts_cc],&constraints_idxs_B)); 6482 /* Change of basis */ 6483 CHKERRQ(PetscBTCreate(total_counts_cc,&change_basis)); 6484 if (pcbddc->use_change_of_basis) { 6485 for (i=0;i<sub_schurs->n_subs;i++) { 6486 if (PetscBTLookup(sub_schurs->is_edge,i) || pcbddc->use_change_on_faces) { 6487 CHKERRQ(PetscBTSet(change_basis,i+n_vertices)); 6488 } 6489 } 6490 } 6491 } 6492 pcbddc->local_primal_size = total_counts; 6493 CHKERRQ(PetscMalloc1(pcbddc->local_primal_size+pcbddc->benign_n,&pcbddc->primal_indices_local_idxs)); 6494 6495 /* map constraints_idxs in boundary numbering */ 6496 CHKERRQ(ISGlobalToLocalMappingApply(pcis->BtoNmap,IS_GTOLM_DROP,constraints_idxs_ptr[total_counts_cc],constraints_idxs,&i,constraints_idxs_B)); 6497 PetscCheckFalse(i != constraints_idxs_ptr[total_counts_cc],PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error in boundary numbering for constraints indices %D != %D",constraints_idxs_ptr[total_counts_cc],i); 6498 6499 /* Create constraint matrix */ 6500 CHKERRQ(MatCreate(PETSC_COMM_SELF,&pcbddc->ConstraintMatrix)); 6501 CHKERRQ(MatSetType(pcbddc->ConstraintMatrix,MATAIJ)); 6502 CHKERRQ(MatSetSizes(pcbddc->ConstraintMatrix,pcbddc->local_primal_size,pcis->n,pcbddc->local_primal_size,pcis->n)); 6503 6504 /* find primal_dofs: subdomain corners plus dofs selected as primal after change of basis */ 6505 /* determine if a QR strategy is needed for change of basis */ 6506 qr_needed = pcbddc->use_qr_single; 6507 CHKERRQ(PetscBTCreate(total_counts_cc,&qr_needed_idx)); 6508 total_primal_vertices=0; 6509 pcbddc->local_primal_size_cc = 0; 6510 for (i=0;i<total_counts_cc;i++) { 6511 size_of_constraint = constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i]; 6512 if (size_of_constraint == 1 && pcbddc->mat_graph->custom_minimal_size) { 6513 pcbddc->primal_indices_local_idxs[total_primal_vertices++] = constraints_idxs[constraints_idxs_ptr[i]]; 6514 pcbddc->local_primal_size_cc += 1; 6515 } else if (PetscBTLookup(change_basis,i)) { 6516 for (k=0;k<constraints_n[i];k++) { 6517 pcbddc->primal_indices_local_idxs[total_primal_vertices++] = constraints_idxs[constraints_idxs_ptr[i]+k]; 6518 } 6519 pcbddc->local_primal_size_cc += constraints_n[i]; 6520 if (constraints_n[i] > 1 || pcbddc->use_qr_single) { 6521 PetscBTSet(qr_needed_idx,i); 6522 qr_needed = PETSC_TRUE; 6523 } 6524 } else { 6525 pcbddc->local_primal_size_cc += 1; 6526 } 6527 } 6528 /* note that the local variable n_vertices used below stores the number of pointwise constraints */ 6529 pcbddc->n_vertices = total_primal_vertices; 6530 /* permute indices in order to have a sorted set of vertices */ 6531 CHKERRQ(PetscSortInt(total_primal_vertices,pcbddc->primal_indices_local_idxs)); 6532 CHKERRQ(PetscMalloc2(pcbddc->local_primal_size_cc+pcbddc->benign_n,&pcbddc->local_primal_ref_node,pcbddc->local_primal_size_cc+pcbddc->benign_n,&pcbddc->local_primal_ref_mult)); 6533 CHKERRQ(PetscArraycpy(pcbddc->local_primal_ref_node,pcbddc->primal_indices_local_idxs,total_primal_vertices)); 6534 for (i=0;i<total_primal_vertices;i++) pcbddc->local_primal_ref_mult[i] = 1; 6535 6536 /* nonzero structure of constraint matrix */ 6537 /* and get reference dof for local constraints */ 6538 CHKERRQ(PetscMalloc1(pcbddc->local_primal_size,&nnz)); 6539 for (i=0;i<total_primal_vertices;i++) nnz[i] = 1; 6540 6541 j = total_primal_vertices; 6542 total_counts = total_primal_vertices; 6543 cum = total_primal_vertices; 6544 for (i=n_vertices;i<total_counts_cc;i++) { 6545 if (!PetscBTLookup(change_basis,i)) { 6546 pcbddc->local_primal_ref_node[cum] = constraints_idxs[constraints_idxs_ptr[i]]; 6547 pcbddc->local_primal_ref_mult[cum] = constraints_n[i]; 6548 cum++; 6549 size_of_constraint = constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i]; 6550 for (k=0;k<constraints_n[i];k++) { 6551 pcbddc->primal_indices_local_idxs[total_counts++] = constraints_idxs[constraints_idxs_ptr[i]+k]; 6552 nnz[j+k] = size_of_constraint; 6553 } 6554 j += constraints_n[i]; 6555 } 6556 } 6557 CHKERRQ(MatSeqAIJSetPreallocation(pcbddc->ConstraintMatrix,0,nnz)); 6558 CHKERRQ(MatSetOption(pcbddc->ConstraintMatrix,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_TRUE)); 6559 CHKERRQ(PetscFree(nnz)); 6560 6561 /* set values in constraint matrix */ 6562 for (i=0;i<total_primal_vertices;i++) { 6563 CHKERRQ(MatSetValue(pcbddc->ConstraintMatrix,i,pcbddc->local_primal_ref_node[i],1.0,INSERT_VALUES)); 6564 } 6565 total_counts = total_primal_vertices; 6566 for (i=n_vertices;i<total_counts_cc;i++) { 6567 if (!PetscBTLookup(change_basis,i)) { 6568 PetscInt *cols; 6569 6570 size_of_constraint = constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i]; 6571 cols = constraints_idxs+constraints_idxs_ptr[i]; 6572 for (k=0;k<constraints_n[i];k++) { 6573 PetscInt row = total_counts+k; 6574 PetscScalar *vals; 6575 6576 vals = constraints_data+constraints_data_ptr[i]+k*size_of_constraint; 6577 CHKERRQ(MatSetValues(pcbddc->ConstraintMatrix,1,&row,size_of_constraint,cols,vals,INSERT_VALUES)); 6578 } 6579 total_counts += constraints_n[i]; 6580 } 6581 } 6582 /* assembling */ 6583 CHKERRQ(MatAssemblyBegin(pcbddc->ConstraintMatrix,MAT_FINAL_ASSEMBLY)); 6584 CHKERRQ(MatAssemblyEnd(pcbddc->ConstraintMatrix,MAT_FINAL_ASSEMBLY)); 6585 CHKERRQ(MatViewFromOptions(pcbddc->ConstraintMatrix,(PetscObject)pc,"-pc_bddc_constraint_mat_view")); 6586 6587 /* Create matrix for change of basis. We don't need it in case pcbddc->use_change_of_basis is FALSE */ 6588 if (pcbddc->use_change_of_basis) { 6589 /* dual and primal dofs on a single cc */ 6590 PetscInt dual_dofs,primal_dofs; 6591 /* working stuff for GEQRF */ 6592 PetscScalar *qr_basis = NULL,*qr_tau = NULL,*qr_work = NULL,lqr_work_t; 6593 PetscBLASInt lqr_work; 6594 /* working stuff for UNGQR */ 6595 PetscScalar *gqr_work = NULL,lgqr_work_t=0.0; 6596 PetscBLASInt lgqr_work; 6597 /* working stuff for TRTRS */ 6598 PetscScalar *trs_rhs = NULL; 6599 PetscBLASInt Blas_NRHS; 6600 /* pointers for values insertion into change of basis matrix */ 6601 PetscInt *start_rows,*start_cols; 6602 PetscScalar *start_vals; 6603 /* working stuff for values insertion */ 6604 PetscBT is_primal; 6605 PetscInt *aux_primal_numbering_B; 6606 /* matrix sizes */ 6607 PetscInt global_size,local_size; 6608 /* temporary change of basis */ 6609 Mat localChangeOfBasisMatrix; 6610 /* extra space for debugging */ 6611 PetscScalar *dbg_work = NULL; 6612 6613 /* local temporary change of basis acts on local interfaces -> dimension is n_B x n_B */ 6614 CHKERRQ(MatCreate(PETSC_COMM_SELF,&localChangeOfBasisMatrix)); 6615 CHKERRQ(MatSetType(localChangeOfBasisMatrix,MATAIJ)); 6616 CHKERRQ(MatSetSizes(localChangeOfBasisMatrix,pcis->n,pcis->n,pcis->n,pcis->n)); 6617 /* nonzeros for local mat */ 6618 CHKERRQ(PetscMalloc1(pcis->n,&nnz)); 6619 if (!pcbddc->benign_change || pcbddc->fake_change) { 6620 for (i=0;i<pcis->n;i++) nnz[i]=1; 6621 } else { 6622 const PetscInt *ii; 6623 PetscInt n; 6624 PetscBool flg_row; 6625 CHKERRQ(MatGetRowIJ(pcbddc->benign_change,0,PETSC_FALSE,PETSC_FALSE,&n,&ii,NULL,&flg_row)); 6626 for (i=0;i<n;i++) nnz[i] = ii[i+1]-ii[i]; 6627 CHKERRQ(MatRestoreRowIJ(pcbddc->benign_change,0,PETSC_FALSE,PETSC_FALSE,&n,&ii,NULL,&flg_row)); 6628 } 6629 for (i=n_vertices;i<total_counts_cc;i++) { 6630 if (PetscBTLookup(change_basis,i)) { 6631 size_of_constraint = constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i]; 6632 if (PetscBTLookup(qr_needed_idx,i)) { 6633 for (j=0;j<size_of_constraint;j++) nnz[constraints_idxs[constraints_idxs_ptr[i]+j]] = size_of_constraint; 6634 } else { 6635 nnz[constraints_idxs[constraints_idxs_ptr[i]]] = size_of_constraint; 6636 for (j=1;j<size_of_constraint;j++) nnz[constraints_idxs[constraints_idxs_ptr[i]+j]] = 2; 6637 } 6638 } 6639 } 6640 CHKERRQ(MatSeqAIJSetPreallocation(localChangeOfBasisMatrix,0,nnz)); 6641 CHKERRQ(MatSetOption(localChangeOfBasisMatrix,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_TRUE)); 6642 CHKERRQ(PetscFree(nnz)); 6643 /* Set interior change in the matrix */ 6644 if (!pcbddc->benign_change || pcbddc->fake_change) { 6645 for (i=0;i<pcis->n;i++) { 6646 CHKERRQ(MatSetValue(localChangeOfBasisMatrix,i,i,1.0,INSERT_VALUES)); 6647 } 6648 } else { 6649 const PetscInt *ii,*jj; 6650 PetscScalar *aa; 6651 PetscInt n; 6652 PetscBool flg_row; 6653 CHKERRQ(MatGetRowIJ(pcbddc->benign_change,0,PETSC_FALSE,PETSC_FALSE,&n,&ii,&jj,&flg_row)); 6654 CHKERRQ(MatSeqAIJGetArray(pcbddc->benign_change,&aa)); 6655 for (i=0;i<n;i++) { 6656 CHKERRQ(MatSetValues(localChangeOfBasisMatrix,1,&i,ii[i+1]-ii[i],jj+ii[i],aa+ii[i],INSERT_VALUES)); 6657 } 6658 CHKERRQ(MatSeqAIJRestoreArray(pcbddc->benign_change,&aa)); 6659 CHKERRQ(MatRestoreRowIJ(pcbddc->benign_change,0,PETSC_FALSE,PETSC_FALSE,&n,&ii,&jj,&flg_row)); 6660 } 6661 6662 if (pcbddc->dbg_flag) { 6663 CHKERRQ(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"--------------------------------------------------------------\n")); 6664 CHKERRQ(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Checking change of basis computation for subdomain %04d\n",PetscGlobalRank)); 6665 } 6666 6667 /* Now we loop on the constraints which need a change of basis */ 6668 /* 6669 Change of basis matrix is evaluated similarly to the FIRST APPROACH in 6670 Klawonn and Widlund, Dual-primal FETI-DP methods for linear elasticity, (see Sect 6.2.1) 6671 6672 Basic blocks of change of basis matrix T computed by 6673 6674 - Using the following block transformation if there is only a primal dof on the cc (and -pc_bddc_use_qr_single is not specified) 6675 6676 | 1 0 ... 0 s_1/S | 6677 | 0 1 ... 0 s_2/S | 6678 | ... | 6679 | 0 ... 1 s_{n-1}/S | 6680 | -s_1/s_n ... -s_{n-1}/s_n s_n/S | 6681 6682 with S = \sum_{i=1}^n s_i^2 6683 NOTE: in the above example, the primal dof is the last one of the edge in LOCAL ordering 6684 in the current implementation, the primal dof is the first one of the edge in GLOBAL ordering 6685 6686 - QR decomposition of constraints otherwise 6687 */ 6688 if (qr_needed && max_size_of_constraint) { 6689 /* space to store Q */ 6690 CHKERRQ(PetscMalloc1(max_size_of_constraint*max_size_of_constraint,&qr_basis)); 6691 /* array to store scaling factors for reflectors */ 6692 CHKERRQ(PetscMalloc1(max_constraints,&qr_tau)); 6693 /* first we issue queries for optimal work */ 6694 CHKERRQ(PetscBLASIntCast(max_size_of_constraint,&Blas_M)); 6695 CHKERRQ(PetscBLASIntCast(max_constraints,&Blas_N)); 6696 CHKERRQ(PetscBLASIntCast(max_size_of_constraint,&Blas_LDA)); 6697 lqr_work = -1; 6698 PetscStackCallBLAS("LAPACKgeqrf",LAPACKgeqrf_(&Blas_M,&Blas_N,qr_basis,&Blas_LDA,qr_tau,&lqr_work_t,&lqr_work,&lierr)); 6699 PetscCheck(!lierr,PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to GEQRF Lapack routine %d",(int)lierr); 6700 CHKERRQ(PetscBLASIntCast((PetscInt)PetscRealPart(lqr_work_t),&lqr_work)); 6701 CHKERRQ(PetscMalloc1((PetscInt)PetscRealPart(lqr_work_t),&qr_work)); 6702 lgqr_work = -1; 6703 CHKERRQ(PetscBLASIntCast(max_size_of_constraint,&Blas_M)); 6704 CHKERRQ(PetscBLASIntCast(max_size_of_constraint,&Blas_N)); 6705 CHKERRQ(PetscBLASIntCast(max_constraints,&Blas_K)); 6706 CHKERRQ(PetscBLASIntCast(max_size_of_constraint,&Blas_LDA)); 6707 if (Blas_K>Blas_M) Blas_K=Blas_M; /* adjust just for computing optimal work */ 6708 PetscStackCallBLAS("LAPACKorgqr",LAPACKorgqr_(&Blas_M,&Blas_N,&Blas_K,qr_basis,&Blas_LDA,qr_tau,&lgqr_work_t,&lgqr_work,&lierr)); 6709 PetscCheck(!lierr,PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to ORGQR/UNGQR Lapack routine %d",(int)lierr); 6710 CHKERRQ(PetscBLASIntCast((PetscInt)PetscRealPart(lgqr_work_t),&lgqr_work)); 6711 CHKERRQ(PetscMalloc1((PetscInt)PetscRealPart(lgqr_work_t),&gqr_work)); 6712 /* array to store rhs and solution of triangular solver */ 6713 CHKERRQ(PetscMalloc1(max_constraints*max_constraints,&trs_rhs)); 6714 /* allocating workspace for check */ 6715 if (pcbddc->dbg_flag) { 6716 CHKERRQ(PetscMalloc1(max_size_of_constraint*(max_constraints+max_size_of_constraint),&dbg_work)); 6717 } 6718 } 6719 /* array to store whether a node is primal or not */ 6720 CHKERRQ(PetscBTCreate(pcis->n_B,&is_primal)); 6721 CHKERRQ(PetscMalloc1(total_primal_vertices,&aux_primal_numbering_B)); 6722 CHKERRQ(ISGlobalToLocalMappingApply(pcis->BtoNmap,IS_GTOLM_DROP,total_primal_vertices,pcbddc->local_primal_ref_node,&i,aux_primal_numbering_B)); 6723 PetscCheckFalse(i != total_primal_vertices,PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error in boundary numbering for BDDC vertices! %D != %D",total_primal_vertices,i); 6724 for (i=0;i<total_primal_vertices;i++) { 6725 CHKERRQ(PetscBTSet(is_primal,aux_primal_numbering_B[i])); 6726 } 6727 CHKERRQ(PetscFree(aux_primal_numbering_B)); 6728 6729 /* loop on constraints and see whether or not they need a change of basis and compute it */ 6730 for (total_counts=n_vertices;total_counts<total_counts_cc;total_counts++) { 6731 size_of_constraint = constraints_idxs_ptr[total_counts+1]-constraints_idxs_ptr[total_counts]; 6732 if (PetscBTLookup(change_basis,total_counts)) { 6733 /* get constraint info */ 6734 primal_dofs = constraints_n[total_counts]; 6735 dual_dofs = size_of_constraint-primal_dofs; 6736 6737 if (pcbddc->dbg_flag) { 6738 CHKERRQ(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Constraints %D: %D need a change of basis (size %D)\n",total_counts,primal_dofs,size_of_constraint)); 6739 } 6740 6741 if (PetscBTLookup(qr_needed_idx,total_counts)) { /* QR */ 6742 6743 /* copy quadrature constraints for change of basis check */ 6744 if (pcbddc->dbg_flag) { 6745 CHKERRQ(PetscArraycpy(dbg_work,&constraints_data[constraints_data_ptr[total_counts]],size_of_constraint*primal_dofs)); 6746 } 6747 /* copy temporary constraints into larger work vector (in order to store all columns of Q) */ 6748 CHKERRQ(PetscArraycpy(qr_basis,&constraints_data[constraints_data_ptr[total_counts]],size_of_constraint*primal_dofs)); 6749 6750 /* compute QR decomposition of constraints */ 6751 CHKERRQ(PetscBLASIntCast(size_of_constraint,&Blas_M)); 6752 CHKERRQ(PetscBLASIntCast(primal_dofs,&Blas_N)); 6753 CHKERRQ(PetscBLASIntCast(size_of_constraint,&Blas_LDA)); 6754 CHKERRQ(PetscFPTrapPush(PETSC_FP_TRAP_OFF)); 6755 PetscStackCallBLAS("LAPACKgeqrf",LAPACKgeqrf_(&Blas_M,&Blas_N,qr_basis,&Blas_LDA,qr_tau,qr_work,&lqr_work,&lierr)); 6756 PetscCheck(!lierr,PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in GEQRF Lapack routine %d",(int)lierr); 6757 CHKERRQ(PetscFPTrapPop()); 6758 6759 /* explicitly compute R^-T */ 6760 CHKERRQ(PetscArrayzero(trs_rhs,primal_dofs*primal_dofs)); 6761 for (j=0;j<primal_dofs;j++) trs_rhs[j*(primal_dofs+1)] = 1.0; 6762 CHKERRQ(PetscBLASIntCast(primal_dofs,&Blas_N)); 6763 CHKERRQ(PetscBLASIntCast(primal_dofs,&Blas_NRHS)); 6764 CHKERRQ(PetscBLASIntCast(size_of_constraint,&Blas_LDA)); 6765 CHKERRQ(PetscBLASIntCast(primal_dofs,&Blas_LDB)); 6766 CHKERRQ(PetscFPTrapPush(PETSC_FP_TRAP_OFF)); 6767 PetscStackCallBLAS("LAPACKtrtrs",LAPACKtrtrs_("U","T","N",&Blas_N,&Blas_NRHS,qr_basis,&Blas_LDA,trs_rhs,&Blas_LDB,&lierr)); 6768 PetscCheck(!lierr,PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in TRTRS Lapack routine %d",(int)lierr); 6769 CHKERRQ(PetscFPTrapPop()); 6770 6771 /* explicitly compute all columns of Q (Q = [Q1 | Q2]) overwriting QR factorization in qr_basis */ 6772 CHKERRQ(PetscBLASIntCast(size_of_constraint,&Blas_M)); 6773 CHKERRQ(PetscBLASIntCast(size_of_constraint,&Blas_N)); 6774 CHKERRQ(PetscBLASIntCast(primal_dofs,&Blas_K)); 6775 CHKERRQ(PetscBLASIntCast(size_of_constraint,&Blas_LDA)); 6776 CHKERRQ(PetscFPTrapPush(PETSC_FP_TRAP_OFF)); 6777 PetscStackCallBLAS("LAPACKorgqr",LAPACKorgqr_(&Blas_M,&Blas_N,&Blas_K,qr_basis,&Blas_LDA,qr_tau,gqr_work,&lgqr_work,&lierr)); 6778 PetscCheck(!lierr,PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in ORGQR/UNGQR Lapack routine %d",(int)lierr); 6779 CHKERRQ(PetscFPTrapPop()); 6780 6781 /* first primal_dofs columns of Q need to be re-scaled in order to be unitary w.r.t constraints 6782 i.e. C_{pxn}*Q_{nxn} should be equal to [I_pxp | 0_pxd] (see check below) 6783 where n=size_of_constraint, p=primal_dofs, d=dual_dofs (n=p+d), I and 0 identity and null matrix resp. */ 6784 CHKERRQ(PetscBLASIntCast(size_of_constraint,&Blas_M)); 6785 CHKERRQ(PetscBLASIntCast(primal_dofs,&Blas_N)); 6786 CHKERRQ(PetscBLASIntCast(primal_dofs,&Blas_K)); 6787 CHKERRQ(PetscBLASIntCast(size_of_constraint,&Blas_LDA)); 6788 CHKERRQ(PetscBLASIntCast(primal_dofs,&Blas_LDB)); 6789 CHKERRQ(PetscBLASIntCast(size_of_constraint,&Blas_LDC)); 6790 CHKERRQ(PetscFPTrapPush(PETSC_FP_TRAP_OFF)); 6791 PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&Blas_M,&Blas_N,&Blas_K,&one,qr_basis,&Blas_LDA,trs_rhs,&Blas_LDB,&zero,constraints_data+constraints_data_ptr[total_counts],&Blas_LDC)); 6792 CHKERRQ(PetscFPTrapPop()); 6793 CHKERRQ(PetscArraycpy(qr_basis,&constraints_data[constraints_data_ptr[total_counts]],size_of_constraint*primal_dofs)); 6794 6795 /* insert values in change of basis matrix respecting global ordering of new primal dofs */ 6796 start_rows = &constraints_idxs[constraints_idxs_ptr[total_counts]]; 6797 /* insert cols for primal dofs */ 6798 for (j=0;j<primal_dofs;j++) { 6799 start_vals = &qr_basis[j*size_of_constraint]; 6800 start_cols = &constraints_idxs[constraints_idxs_ptr[total_counts]+j]; 6801 CHKERRQ(MatSetValues(localChangeOfBasisMatrix,size_of_constraint,start_rows,1,start_cols,start_vals,INSERT_VALUES)); 6802 } 6803 /* insert cols for dual dofs */ 6804 for (j=0,k=0;j<dual_dofs;k++) { 6805 if (!PetscBTLookup(is_primal,constraints_idxs_B[constraints_idxs_ptr[total_counts]+k])) { 6806 start_vals = &qr_basis[(primal_dofs+j)*size_of_constraint]; 6807 start_cols = &constraints_idxs[constraints_idxs_ptr[total_counts]+k]; 6808 CHKERRQ(MatSetValues(localChangeOfBasisMatrix,size_of_constraint,start_rows,1,start_cols,start_vals,INSERT_VALUES)); 6809 j++; 6810 } 6811 } 6812 6813 /* check change of basis */ 6814 if (pcbddc->dbg_flag) { 6815 PetscInt ii,jj; 6816 PetscBool valid_qr=PETSC_TRUE; 6817 CHKERRQ(PetscBLASIntCast(primal_dofs,&Blas_M)); 6818 CHKERRQ(PetscBLASIntCast(size_of_constraint,&Blas_N)); 6819 CHKERRQ(PetscBLASIntCast(size_of_constraint,&Blas_K)); 6820 CHKERRQ(PetscBLASIntCast(size_of_constraint,&Blas_LDA)); 6821 CHKERRQ(PetscBLASIntCast(size_of_constraint,&Blas_LDB)); 6822 CHKERRQ(PetscBLASIntCast(primal_dofs,&Blas_LDC)); 6823 CHKERRQ(PetscFPTrapPush(PETSC_FP_TRAP_OFF)); 6824 PetscStackCallBLAS("BLASgemm",BLASgemm_("T","N",&Blas_M,&Blas_N,&Blas_K,&one,dbg_work,&Blas_LDA,qr_basis,&Blas_LDB,&zero,&dbg_work[size_of_constraint*primal_dofs],&Blas_LDC)); 6825 CHKERRQ(PetscFPTrapPop()); 6826 for (jj=0;jj<size_of_constraint;jj++) { 6827 for (ii=0;ii<primal_dofs;ii++) { 6828 if (ii != jj && PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]) > 1.e-12) valid_qr = PETSC_FALSE; 6829 if (ii == jj && PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]-(PetscReal)1) > 1.e-12) valid_qr = PETSC_FALSE; 6830 } 6831 } 6832 if (!valid_qr) { 6833 CHKERRQ(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> wrong change of basis!\n")); 6834 for (jj=0;jj<size_of_constraint;jj++) { 6835 for (ii=0;ii<primal_dofs;ii++) { 6836 if (ii != jj && PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]) > 1.e-12) { 6837 CHKERRQ(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\tQr basis function %D is not orthogonal to constraint %D (%1.14e)!\n",jj,ii,PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]))); 6838 } 6839 if (ii == jj && PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]-(PetscReal)1) > 1.e-12) { 6840 CHKERRQ(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\tQr basis function %D is not unitary w.r.t constraint %D (%1.14e)!\n",jj,ii,PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]))); 6841 } 6842 } 6843 } 6844 } else { 6845 CHKERRQ(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> right change of basis!\n")); 6846 } 6847 } 6848 } else { /* simple transformation block */ 6849 PetscInt row,col; 6850 PetscScalar val,norm; 6851 6852 CHKERRQ(PetscBLASIntCast(size_of_constraint,&Blas_N)); 6853 PetscStackCallBLAS("BLASdot",norm = BLASdot_(&Blas_N,constraints_data+constraints_data_ptr[total_counts],&Blas_one,constraints_data+constraints_data_ptr[total_counts],&Blas_one)); 6854 for (j=0;j<size_of_constraint;j++) { 6855 PetscInt row_B = constraints_idxs_B[constraints_idxs_ptr[total_counts]+j]; 6856 row = constraints_idxs[constraints_idxs_ptr[total_counts]+j]; 6857 if (!PetscBTLookup(is_primal,row_B)) { 6858 col = constraints_idxs[constraints_idxs_ptr[total_counts]]; 6859 CHKERRQ(MatSetValue(localChangeOfBasisMatrix,row,row,1.0,INSERT_VALUES)); 6860 CHKERRQ(MatSetValue(localChangeOfBasisMatrix,row,col,constraints_data[constraints_data_ptr[total_counts]+j]/norm,INSERT_VALUES)); 6861 } else { 6862 for (k=0;k<size_of_constraint;k++) { 6863 col = constraints_idxs[constraints_idxs_ptr[total_counts]+k]; 6864 if (row != col) { 6865 val = -constraints_data[constraints_data_ptr[total_counts]+k]/constraints_data[constraints_data_ptr[total_counts]]; 6866 } else { 6867 val = constraints_data[constraints_data_ptr[total_counts]]/norm; 6868 } 6869 CHKERRQ(MatSetValue(localChangeOfBasisMatrix,row,col,val,INSERT_VALUES)); 6870 } 6871 } 6872 } 6873 if (pcbddc->dbg_flag) { 6874 CHKERRQ(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> using standard change of basis\n")); 6875 } 6876 } 6877 } else { 6878 if (pcbddc->dbg_flag) { 6879 CHKERRQ(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Constraint %D does not need a change of basis (size %D)\n",total_counts,size_of_constraint)); 6880 } 6881 } 6882 } 6883 6884 /* free workspace */ 6885 if (qr_needed) { 6886 if (pcbddc->dbg_flag) { 6887 CHKERRQ(PetscFree(dbg_work)); 6888 } 6889 CHKERRQ(PetscFree(trs_rhs)); 6890 CHKERRQ(PetscFree(qr_tau)); 6891 CHKERRQ(PetscFree(qr_work)); 6892 CHKERRQ(PetscFree(gqr_work)); 6893 CHKERRQ(PetscFree(qr_basis)); 6894 } 6895 CHKERRQ(PetscBTDestroy(&is_primal)); 6896 CHKERRQ(MatAssemblyBegin(localChangeOfBasisMatrix,MAT_FINAL_ASSEMBLY)); 6897 CHKERRQ(MatAssemblyEnd(localChangeOfBasisMatrix,MAT_FINAL_ASSEMBLY)); 6898 6899 /* assembling of global change of variable */ 6900 if (!pcbddc->fake_change) { 6901 Mat tmat; 6902 PetscInt bs; 6903 6904 CHKERRQ(VecGetSize(pcis->vec1_global,&global_size)); 6905 CHKERRQ(VecGetLocalSize(pcis->vec1_global,&local_size)); 6906 CHKERRQ(MatDuplicate(pc->pmat,MAT_DO_NOT_COPY_VALUES,&tmat)); 6907 CHKERRQ(MatISSetLocalMat(tmat,localChangeOfBasisMatrix)); 6908 CHKERRQ(MatAssemblyBegin(tmat,MAT_FINAL_ASSEMBLY)); 6909 CHKERRQ(MatAssemblyEnd(tmat,MAT_FINAL_ASSEMBLY)); 6910 CHKERRQ(MatCreate(PetscObjectComm((PetscObject)pc),&pcbddc->ChangeOfBasisMatrix)); 6911 CHKERRQ(MatSetType(pcbddc->ChangeOfBasisMatrix,MATAIJ)); 6912 CHKERRQ(MatGetBlockSize(pc->pmat,&bs)); 6913 CHKERRQ(MatSetBlockSize(pcbddc->ChangeOfBasisMatrix,bs)); 6914 CHKERRQ(MatSetSizes(pcbddc->ChangeOfBasisMatrix,local_size,local_size,global_size,global_size)); 6915 CHKERRQ(MatISSetMPIXAIJPreallocation_Private(tmat,pcbddc->ChangeOfBasisMatrix,PETSC_TRUE)); 6916 CHKERRQ(MatConvert(tmat,MATAIJ,MAT_REUSE_MATRIX,&pcbddc->ChangeOfBasisMatrix)); 6917 CHKERRQ(MatDestroy(&tmat)); 6918 CHKERRQ(VecSet(pcis->vec1_global,0.0)); 6919 CHKERRQ(VecSet(pcis->vec1_N,1.0)); 6920 CHKERRQ(VecScatterBegin(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE)); 6921 CHKERRQ(VecScatterEnd(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE)); 6922 CHKERRQ(VecReciprocal(pcis->vec1_global)); 6923 CHKERRQ(MatDiagonalScale(pcbddc->ChangeOfBasisMatrix,pcis->vec1_global,NULL)); 6924 6925 /* check */ 6926 if (pcbddc->dbg_flag) { 6927 PetscReal error; 6928 Vec x,x_change; 6929 6930 CHKERRQ(VecDuplicate(pcis->vec1_global,&x)); 6931 CHKERRQ(VecDuplicate(pcis->vec1_global,&x_change)); 6932 CHKERRQ(VecSetRandom(x,NULL)); 6933 CHKERRQ(VecCopy(x,pcis->vec1_global)); 6934 CHKERRQ(VecScatterBegin(matis->rctx,x,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD)); 6935 CHKERRQ(VecScatterEnd(matis->rctx,x,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD)); 6936 CHKERRQ(MatMult(localChangeOfBasisMatrix,pcis->vec1_N,pcis->vec2_N)); 6937 CHKERRQ(VecScatterBegin(matis->rctx,pcis->vec2_N,x,INSERT_VALUES,SCATTER_REVERSE)); 6938 CHKERRQ(VecScatterEnd(matis->rctx,pcis->vec2_N,x,INSERT_VALUES,SCATTER_REVERSE)); 6939 CHKERRQ(MatMult(pcbddc->ChangeOfBasisMatrix,pcis->vec1_global,x_change)); 6940 CHKERRQ(VecAXPY(x,-1.0,x_change)); 6941 CHKERRQ(VecNorm(x,NORM_INFINITY,&error)); 6942 if (error > PETSC_SMALL) { 6943 SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"Error global vs local change on N: %1.6e",error); 6944 } 6945 CHKERRQ(VecDestroy(&x)); 6946 CHKERRQ(VecDestroy(&x_change)); 6947 } 6948 /* adapt sub_schurs computed (if any) */ 6949 if (pcbddc->use_deluxe_scaling) { 6950 PCBDDCSubSchurs sub_schurs=pcbddc->sub_schurs; 6951 6952 PetscCheckFalse(pcbddc->use_change_of_basis && pcbddc->adaptive_userdefined,PetscObjectComm((PetscObject)pc),PETSC_ERR_SUP,"Cannot mix automatic change of basis, adaptive selection and user-defined constraints"); 6953 if (sub_schurs && sub_schurs->S_Ej_all) { 6954 Mat S_new,tmat; 6955 IS is_all_N,is_V_Sall = NULL; 6956 6957 CHKERRQ(ISLocalToGlobalMappingApplyIS(pcis->BtoNmap,sub_schurs->is_Ej_all,&is_all_N)); 6958 CHKERRQ(MatCreateSubMatrix(localChangeOfBasisMatrix,is_all_N,is_all_N,MAT_INITIAL_MATRIX,&tmat)); 6959 if (pcbddc->deluxe_zerorows) { 6960 ISLocalToGlobalMapping NtoSall; 6961 IS is_V; 6962 CHKERRQ(ISCreateGeneral(PETSC_COMM_SELF,pcbddc->n_vertices,pcbddc->local_primal_ref_node,PETSC_COPY_VALUES,&is_V)); 6963 CHKERRQ(ISLocalToGlobalMappingCreateIS(is_all_N,&NtoSall)); 6964 CHKERRQ(ISGlobalToLocalMappingApplyIS(NtoSall,IS_GTOLM_DROP,is_V,&is_V_Sall)); 6965 CHKERRQ(ISLocalToGlobalMappingDestroy(&NtoSall)); 6966 CHKERRQ(ISDestroy(&is_V)); 6967 } 6968 CHKERRQ(ISDestroy(&is_all_N)); 6969 CHKERRQ(MatPtAP(sub_schurs->S_Ej_all,tmat,MAT_INITIAL_MATRIX,1.0,&S_new)); 6970 CHKERRQ(MatDestroy(&sub_schurs->S_Ej_all)); 6971 CHKERRQ(PetscObjectReference((PetscObject)S_new)); 6972 if (pcbddc->deluxe_zerorows) { 6973 const PetscScalar *array; 6974 const PetscInt *idxs_V,*idxs_all; 6975 PetscInt i,n_V; 6976 6977 CHKERRQ(MatZeroRowsColumnsIS(S_new,is_V_Sall,1.,NULL,NULL)); 6978 CHKERRQ(ISGetLocalSize(is_V_Sall,&n_V)); 6979 CHKERRQ(ISGetIndices(is_V_Sall,&idxs_V)); 6980 CHKERRQ(ISGetIndices(sub_schurs->is_Ej_all,&idxs_all)); 6981 CHKERRQ(VecGetArrayRead(pcis->D,&array)); 6982 for (i=0;i<n_V;i++) { 6983 PetscScalar val; 6984 PetscInt idx; 6985 6986 idx = idxs_V[i]; 6987 val = array[idxs_all[idxs_V[i]]]; 6988 CHKERRQ(MatSetValue(S_new,idx,idx,val,INSERT_VALUES)); 6989 } 6990 CHKERRQ(MatAssemblyBegin(S_new,MAT_FINAL_ASSEMBLY)); 6991 CHKERRQ(MatAssemblyEnd(S_new,MAT_FINAL_ASSEMBLY)); 6992 CHKERRQ(VecRestoreArrayRead(pcis->D,&array)); 6993 CHKERRQ(ISRestoreIndices(sub_schurs->is_Ej_all,&idxs_all)); 6994 CHKERRQ(ISRestoreIndices(is_V_Sall,&idxs_V)); 6995 } 6996 sub_schurs->S_Ej_all = S_new; 6997 CHKERRQ(MatDestroy(&S_new)); 6998 if (sub_schurs->sum_S_Ej_all) { 6999 CHKERRQ(MatPtAP(sub_schurs->sum_S_Ej_all,tmat,MAT_INITIAL_MATRIX,1.0,&S_new)); 7000 CHKERRQ(MatDestroy(&sub_schurs->sum_S_Ej_all)); 7001 CHKERRQ(PetscObjectReference((PetscObject)S_new)); 7002 if (pcbddc->deluxe_zerorows) { 7003 CHKERRQ(MatZeroRowsColumnsIS(S_new,is_V_Sall,1.,NULL,NULL)); 7004 } 7005 sub_schurs->sum_S_Ej_all = S_new; 7006 CHKERRQ(MatDestroy(&S_new)); 7007 } 7008 CHKERRQ(ISDestroy(&is_V_Sall)); 7009 CHKERRQ(MatDestroy(&tmat)); 7010 } 7011 /* destroy any change of basis context in sub_schurs */ 7012 if (sub_schurs && sub_schurs->change) { 7013 PetscInt i; 7014 7015 for (i=0;i<sub_schurs->n_subs;i++) { 7016 CHKERRQ(KSPDestroy(&sub_schurs->change[i])); 7017 } 7018 CHKERRQ(PetscFree(sub_schurs->change)); 7019 } 7020 } 7021 if (pcbddc->switch_static) { /* need to save the local change */ 7022 pcbddc->switch_static_change = localChangeOfBasisMatrix; 7023 } else { 7024 CHKERRQ(MatDestroy(&localChangeOfBasisMatrix)); 7025 } 7026 /* determine if any process has changed the pressures locally */ 7027 pcbddc->change_interior = pcbddc->benign_have_null; 7028 } else { /* fake change (get back change of basis into ConstraintMatrix and info on qr) */ 7029 CHKERRQ(MatDestroy(&pcbddc->ConstraintMatrix)); 7030 pcbddc->ConstraintMatrix = localChangeOfBasisMatrix; 7031 pcbddc->use_qr_single = qr_needed; 7032 } 7033 } else if (pcbddc->user_ChangeOfBasisMatrix || pcbddc->benign_saddle_point) { 7034 if (!pcbddc->benign_have_null && pcbddc->user_ChangeOfBasisMatrix) { 7035 CHKERRQ(PetscObjectReference((PetscObject)pcbddc->user_ChangeOfBasisMatrix)); 7036 pcbddc->ChangeOfBasisMatrix = pcbddc->user_ChangeOfBasisMatrix; 7037 } else { 7038 Mat benign_global = NULL; 7039 if (pcbddc->benign_have_null) { 7040 Mat M; 7041 7042 pcbddc->change_interior = PETSC_TRUE; 7043 CHKERRQ(VecCopy(matis->counter,pcis->vec1_N)); 7044 CHKERRQ(VecReciprocal(pcis->vec1_N)); 7045 CHKERRQ(MatDuplicate(pc->pmat,MAT_DO_NOT_COPY_VALUES,&benign_global)); 7046 if (pcbddc->benign_change) { 7047 CHKERRQ(MatDuplicate(pcbddc->benign_change,MAT_COPY_VALUES,&M)); 7048 CHKERRQ(MatDiagonalScale(M,pcis->vec1_N,NULL)); 7049 } else { 7050 CHKERRQ(MatCreateSeqAIJ(PETSC_COMM_SELF,pcis->n,pcis->n,1,NULL,&M)); 7051 CHKERRQ(MatDiagonalSet(M,pcis->vec1_N,INSERT_VALUES)); 7052 } 7053 CHKERRQ(MatISSetLocalMat(benign_global,M)); 7054 CHKERRQ(MatDestroy(&M)); 7055 CHKERRQ(MatAssemblyBegin(benign_global,MAT_FINAL_ASSEMBLY)); 7056 CHKERRQ(MatAssemblyEnd(benign_global,MAT_FINAL_ASSEMBLY)); 7057 } 7058 if (pcbddc->user_ChangeOfBasisMatrix) { 7059 CHKERRQ(MatMatMult(pcbddc->user_ChangeOfBasisMatrix,benign_global,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&pcbddc->ChangeOfBasisMatrix)); 7060 CHKERRQ(MatDestroy(&benign_global)); 7061 } else if (pcbddc->benign_have_null) { 7062 pcbddc->ChangeOfBasisMatrix = benign_global; 7063 } 7064 } 7065 if (pcbddc->switch_static && pcbddc->ChangeOfBasisMatrix) { /* need to save the local change */ 7066 IS is_global; 7067 const PetscInt *gidxs; 7068 7069 CHKERRQ(ISLocalToGlobalMappingGetIndices(matis->rmapping,&gidxs)); 7070 CHKERRQ(ISCreateGeneral(PetscObjectComm((PetscObject)pc),pcis->n,gidxs,PETSC_COPY_VALUES,&is_global)); 7071 CHKERRQ(ISLocalToGlobalMappingRestoreIndices(matis->rmapping,&gidxs)); 7072 CHKERRQ(MatCreateSubMatrixUnsorted(pcbddc->ChangeOfBasisMatrix,is_global,is_global,&pcbddc->switch_static_change)); 7073 CHKERRQ(ISDestroy(&is_global)); 7074 } 7075 } 7076 if (!pcbddc->fake_change && pcbddc->ChangeOfBasisMatrix && !pcbddc->work_change) { 7077 CHKERRQ(VecDuplicate(pcis->vec1_global,&pcbddc->work_change)); 7078 } 7079 7080 if (!pcbddc->fake_change) { 7081 /* add pressure dofs to set of primal nodes for numbering purposes */ 7082 for (i=0;i<pcbddc->benign_n;i++) { 7083 pcbddc->local_primal_ref_node[pcbddc->local_primal_size_cc] = pcbddc->benign_p0_lidx[i]; 7084 pcbddc->primal_indices_local_idxs[pcbddc->local_primal_size] = pcbddc->benign_p0_lidx[i]; 7085 pcbddc->local_primal_ref_mult[pcbddc->local_primal_size_cc] = 1; 7086 pcbddc->local_primal_size_cc++; 7087 pcbddc->local_primal_size++; 7088 } 7089 7090 /* check if a new primal space has been introduced (also take into account benign trick) */ 7091 pcbddc->new_primal_space_local = PETSC_TRUE; 7092 if (olocal_primal_size == pcbddc->local_primal_size) { 7093 CHKERRQ(PetscArraycmp(pcbddc->local_primal_ref_node,olocal_primal_ref_node,olocal_primal_size_cc,&pcbddc->new_primal_space_local)); 7094 pcbddc->new_primal_space_local = (PetscBool)(!pcbddc->new_primal_space_local); 7095 if (!pcbddc->new_primal_space_local) { 7096 CHKERRQ(PetscArraycmp(pcbddc->local_primal_ref_mult,olocal_primal_ref_mult,olocal_primal_size_cc,&pcbddc->new_primal_space_local)); 7097 pcbddc->new_primal_space_local = (PetscBool)(!pcbddc->new_primal_space_local); 7098 } 7099 } 7100 /* new_primal_space will be used for numbering of coarse dofs, so it should be the same across all subdomains */ 7101 CHKERRMPI(MPIU_Allreduce(&pcbddc->new_primal_space_local,&pcbddc->new_primal_space,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc))); 7102 } 7103 CHKERRQ(PetscFree2(olocal_primal_ref_node,olocal_primal_ref_mult)); 7104 7105 /* flush dbg viewer */ 7106 if (pcbddc->dbg_flag) { 7107 CHKERRQ(PetscViewerFlush(pcbddc->dbg_viewer)); 7108 } 7109 7110 /* free workspace */ 7111 CHKERRQ(PetscBTDestroy(&qr_needed_idx)); 7112 CHKERRQ(PetscBTDestroy(&change_basis)); 7113 if (!pcbddc->adaptive_selection) { 7114 CHKERRQ(PetscFree3(constraints_idxs_ptr,constraints_data_ptr,constraints_n)); 7115 CHKERRQ(PetscFree3(constraints_data,constraints_idxs,constraints_idxs_B)); 7116 } else { 7117 ierr = PetscFree5(pcbddc->adaptive_constraints_n, 7118 pcbddc->adaptive_constraints_idxs_ptr, 7119 pcbddc->adaptive_constraints_data_ptr, 7120 pcbddc->adaptive_constraints_idxs, 7121 pcbddc->adaptive_constraints_data);CHKERRQ(ierr); 7122 CHKERRQ(PetscFree(constraints_n)); 7123 CHKERRQ(PetscFree(constraints_idxs_B)); 7124 } 7125 PetscFunctionReturn(0); 7126 } 7127 7128 PetscErrorCode PCBDDCAnalyzeInterface(PC pc) 7129 { 7130 ISLocalToGlobalMapping map; 7131 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 7132 Mat_IS *matis = (Mat_IS*)pc->pmat->data; 7133 PetscInt i,N; 7134 PetscBool rcsr = PETSC_FALSE; 7135 7136 PetscFunctionBegin; 7137 if (pcbddc->recompute_topography) { 7138 pcbddc->graphanalyzed = PETSC_FALSE; 7139 /* Reset previously computed graph */ 7140 CHKERRQ(PCBDDCGraphReset(pcbddc->mat_graph)); 7141 /* Init local Graph struct */ 7142 CHKERRQ(MatGetSize(pc->pmat,&N,NULL)); 7143 CHKERRQ(MatISGetLocalToGlobalMapping(pc->pmat,&map,NULL)); 7144 CHKERRQ(PCBDDCGraphInit(pcbddc->mat_graph,map,N,pcbddc->graphmaxcount)); 7145 7146 if (pcbddc->user_primal_vertices_local && !pcbddc->user_primal_vertices) { 7147 CHKERRQ(PCBDDCConsistencyCheckIS(pc,MPI_LOR,&pcbddc->user_primal_vertices_local)); 7148 } 7149 /* Check validity of the csr graph passed in by the user */ 7150 PetscCheckFalse(pcbddc->mat_graph->nvtxs_csr && pcbddc->mat_graph->nvtxs_csr != pcbddc->mat_graph->nvtxs,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Invalid size of local CSR graph! Found %D, expected %D",pcbddc->mat_graph->nvtxs_csr,pcbddc->mat_graph->nvtxs); 7151 7152 /* Set default CSR adjacency of local dofs if not provided by the user with PCBDDCSetLocalAdjacencyGraph */ 7153 if (!pcbddc->mat_graph->xadj && pcbddc->use_local_adj) { 7154 PetscInt *xadj,*adjncy; 7155 PetscInt nvtxs; 7156 PetscBool flg_row=PETSC_FALSE; 7157 7158 CHKERRQ(MatGetRowIJ(matis->A,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row)); 7159 if (flg_row) { 7160 CHKERRQ(PCBDDCSetLocalAdjacencyGraph(pc,nvtxs,xadj,adjncy,PETSC_COPY_VALUES)); 7161 pcbddc->computed_rowadj = PETSC_TRUE; 7162 } 7163 CHKERRQ(MatRestoreRowIJ(matis->A,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row)); 7164 rcsr = PETSC_TRUE; 7165 } 7166 if (pcbddc->dbg_flag) { 7167 CHKERRQ(PetscViewerFlush(pcbddc->dbg_viewer)); 7168 } 7169 7170 if (pcbddc->mat_graph->cdim && !pcbddc->mat_graph->cloc) { 7171 PetscReal *lcoords; 7172 PetscInt n; 7173 MPI_Datatype dimrealtype; 7174 7175 /* TODO: support for blocked */ 7176 PetscCheckFalse(pcbddc->mat_graph->cnloc != pc->pmat->rmap->n,PETSC_COMM_SELF,PETSC_ERR_USER,"Invalid number of local coordinates! Got %D, expected %D",pcbddc->mat_graph->cnloc,pc->pmat->rmap->n); 7177 CHKERRQ(MatGetLocalSize(matis->A,&n,NULL)); 7178 CHKERRQ(PetscMalloc1(pcbddc->mat_graph->cdim*n,&lcoords)); 7179 CHKERRMPI(MPI_Type_contiguous(pcbddc->mat_graph->cdim,MPIU_REAL,&dimrealtype)); 7180 CHKERRMPI(MPI_Type_commit(&dimrealtype)); 7181 CHKERRQ(PetscSFBcastBegin(matis->sf,dimrealtype,pcbddc->mat_graph->coords,lcoords,MPI_REPLACE)); 7182 CHKERRQ(PetscSFBcastEnd(matis->sf,dimrealtype,pcbddc->mat_graph->coords,lcoords,MPI_REPLACE)); 7183 CHKERRMPI(MPI_Type_free(&dimrealtype)); 7184 CHKERRQ(PetscFree(pcbddc->mat_graph->coords)); 7185 7186 pcbddc->mat_graph->coords = lcoords; 7187 pcbddc->mat_graph->cloc = PETSC_TRUE; 7188 pcbddc->mat_graph->cnloc = n; 7189 } 7190 PetscCheckFalse(pcbddc->mat_graph->cnloc && pcbddc->mat_graph->cnloc != pcbddc->mat_graph->nvtxs,PETSC_COMM_SELF,PETSC_ERR_USER,"Invalid number of local subdomain coordinates! Got %D, expected %D",pcbddc->mat_graph->cnloc,pcbddc->mat_graph->nvtxs); 7191 pcbddc->mat_graph->active_coords = (PetscBool)(pcbddc->corner_selection && pcbddc->mat_graph->cdim && !pcbddc->corner_selected); 7192 7193 /* Setup of Graph */ 7194 pcbddc->mat_graph->commsizelimit = 0; /* don't use the COMM_SELF variant of the graph */ 7195 CHKERRQ(PCBDDCGraphSetUp(pcbddc->mat_graph,pcbddc->vertex_size,pcbddc->NeumannBoundariesLocal,pcbddc->DirichletBoundariesLocal,pcbddc->n_ISForDofsLocal,pcbddc->ISForDofsLocal,pcbddc->user_primal_vertices_local)); 7196 7197 /* attach info on disconnected subdomains if present */ 7198 if (pcbddc->n_local_subs) { 7199 PetscInt *local_subs,n,totn; 7200 7201 CHKERRQ(MatGetLocalSize(matis->A,&n,NULL)); 7202 CHKERRQ(PetscMalloc1(n,&local_subs)); 7203 for (i=0;i<n;i++) local_subs[i] = pcbddc->n_local_subs; 7204 for (i=0;i<pcbddc->n_local_subs;i++) { 7205 const PetscInt *idxs; 7206 PetscInt nl,j; 7207 7208 CHKERRQ(ISGetLocalSize(pcbddc->local_subs[i],&nl)); 7209 CHKERRQ(ISGetIndices(pcbddc->local_subs[i],&idxs)); 7210 for (j=0;j<nl;j++) local_subs[idxs[j]] = i; 7211 CHKERRQ(ISRestoreIndices(pcbddc->local_subs[i],&idxs)); 7212 } 7213 for (i=0,totn=0;i<n;i++) totn = PetscMax(totn,local_subs[i]); 7214 pcbddc->mat_graph->n_local_subs = totn + 1; 7215 pcbddc->mat_graph->local_subs = local_subs; 7216 } 7217 } 7218 7219 if (!pcbddc->graphanalyzed) { 7220 /* Graph's connected components analysis */ 7221 CHKERRQ(PCBDDCGraphComputeConnectedComponents(pcbddc->mat_graph)); 7222 pcbddc->graphanalyzed = PETSC_TRUE; 7223 pcbddc->corner_selected = pcbddc->corner_selection; 7224 } 7225 if (rcsr) pcbddc->mat_graph->nvtxs_csr = 0; 7226 PetscFunctionReturn(0); 7227 } 7228 7229 PetscErrorCode PCBDDCOrthonormalizeVecs(PetscInt *nio, Vec vecs[]) 7230 { 7231 PetscInt i,j,n; 7232 PetscScalar *alphas; 7233 PetscReal norm,*onorms; 7234 7235 PetscFunctionBegin; 7236 n = *nio; 7237 if (!n) PetscFunctionReturn(0); 7238 CHKERRQ(PetscMalloc2(n,&alphas,n,&onorms)); 7239 CHKERRQ(VecNormalize(vecs[0],&norm)); 7240 if (norm < PETSC_SMALL) { 7241 onorms[0] = 0.0; 7242 CHKERRQ(VecSet(vecs[0],0.0)); 7243 } else { 7244 onorms[0] = norm; 7245 } 7246 7247 for (i=1;i<n;i++) { 7248 CHKERRQ(VecMDot(vecs[i],i,vecs,alphas)); 7249 for (j=0;j<i;j++) alphas[j] = PetscConj(-alphas[j]); 7250 CHKERRQ(VecMAXPY(vecs[i],i,alphas,vecs)); 7251 CHKERRQ(VecNormalize(vecs[i],&norm)); 7252 if (norm < PETSC_SMALL) { 7253 onorms[i] = 0.0; 7254 CHKERRQ(VecSet(vecs[i],0.0)); 7255 } else { 7256 onorms[i] = norm; 7257 } 7258 } 7259 /* push nonzero vectors at the beginning */ 7260 for (i=0;i<n;i++) { 7261 if (onorms[i] == 0.0) { 7262 for (j=i+1;j<n;j++) { 7263 if (onorms[j] != 0.0) { 7264 CHKERRQ(VecCopy(vecs[j],vecs[i])); 7265 onorms[j] = 0.0; 7266 } 7267 } 7268 } 7269 } 7270 for (i=0,*nio=0;i<n;i++) *nio += onorms[i] != 0.0 ? 1 : 0; 7271 CHKERRQ(PetscFree2(alphas,onorms)); 7272 PetscFunctionReturn(0); 7273 } 7274 7275 PetscErrorCode PCBDDCMatISGetSubassemblingPattern(Mat mat, PetscInt *n_subdomains, PetscInt redprocs, IS* is_sends, PetscBool *have_void) 7276 { 7277 ISLocalToGlobalMapping mapping; 7278 Mat A; 7279 PetscInt n_neighs,*neighs,*n_shared,**shared; 7280 PetscMPIInt size,rank,color; 7281 PetscInt *xadj,*adjncy; 7282 PetscInt *adjncy_wgt,*v_wgt,*ranks_send_to_idx; 7283 PetscInt im_active,active_procs,N,n,i,j,threshold = 2; 7284 PetscInt void_procs,*procs_candidates = NULL; 7285 PetscInt xadj_count,*count; 7286 PetscBool ismatis,use_vwgt=PETSC_FALSE; 7287 PetscSubcomm psubcomm; 7288 MPI_Comm subcomm; 7289 7290 PetscFunctionBegin; 7291 PetscValidHeaderSpecific(mat,MAT_CLASSID,1); 7292 CHKERRQ(PetscObjectTypeCompare((PetscObject)mat,MATIS,&ismatis)); 7293 PetscCheck(ismatis,PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot use %s on a matrix object which is not of type MATIS",PETSC_FUNCTION_NAME); 7294 PetscValidLogicalCollectiveInt(mat,*n_subdomains,2); 7295 PetscValidLogicalCollectiveInt(mat,redprocs,3); 7296 PetscCheckFalse(*n_subdomains <=0,PetscObjectComm((PetscObject)mat),PETSC_ERR_ARG_WRONG,"Invalid number of subdomains requested %D",*n_subdomains); 7297 7298 if (have_void) *have_void = PETSC_FALSE; 7299 CHKERRMPI(MPI_Comm_size(PetscObjectComm((PetscObject)mat),&size)); 7300 CHKERRMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank)); 7301 CHKERRQ(MatISGetLocalMat(mat,&A)); 7302 CHKERRQ(MatGetLocalSize(A,&n,NULL)); 7303 im_active = !!n; 7304 CHKERRMPI(MPIU_Allreduce(&im_active,&active_procs,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)mat))); 7305 void_procs = size - active_procs; 7306 /* get ranks of of non-active processes in mat communicator */ 7307 if (void_procs) { 7308 PetscInt ncand; 7309 7310 if (have_void) *have_void = PETSC_TRUE; 7311 CHKERRQ(PetscMalloc1(size,&procs_candidates)); 7312 CHKERRMPI(MPI_Allgather(&im_active,1,MPIU_INT,procs_candidates,1,MPIU_INT,PetscObjectComm((PetscObject)mat))); 7313 for (i=0,ncand=0;i<size;i++) { 7314 if (!procs_candidates[i]) { 7315 procs_candidates[ncand++] = i; 7316 } 7317 } 7318 /* force n_subdomains to be not greater that the number of non-active processes */ 7319 *n_subdomains = PetscMin(void_procs,*n_subdomains); 7320 } 7321 7322 /* number of subdomains requested greater than active processes or matrix size -> just shift the matrix 7323 number of subdomains requested 1 -> send to rank-0 or first candidate in voids */ 7324 CHKERRQ(MatGetSize(mat,&N,NULL)); 7325 if (active_procs < *n_subdomains || *n_subdomains == 1 || N <= *n_subdomains) { 7326 PetscInt issize,isidx,dest; 7327 if (*n_subdomains == 1) dest = 0; 7328 else dest = rank; 7329 if (im_active) { 7330 issize = 1; 7331 if (procs_candidates) { /* shift the pattern on non-active candidates (if any) */ 7332 isidx = procs_candidates[dest]; 7333 } else { 7334 isidx = dest; 7335 } 7336 } else { 7337 issize = 0; 7338 isidx = -1; 7339 } 7340 if (*n_subdomains != 1) *n_subdomains = active_procs; 7341 CHKERRQ(ISCreateGeneral(PetscObjectComm((PetscObject)mat),issize,&isidx,PETSC_COPY_VALUES,is_sends)); 7342 CHKERRQ(PetscFree(procs_candidates)); 7343 PetscFunctionReturn(0); 7344 } 7345 CHKERRQ(PetscOptionsGetBool(NULL,NULL,"-matis_partitioning_use_vwgt",&use_vwgt,NULL)); 7346 CHKERRQ(PetscOptionsGetInt(NULL,NULL,"-matis_partitioning_threshold",&threshold,NULL)); 7347 threshold = PetscMax(threshold,2); 7348 7349 /* Get info on mapping */ 7350 CHKERRQ(MatISGetLocalToGlobalMapping(mat,&mapping,NULL)); 7351 CHKERRQ(ISLocalToGlobalMappingGetInfo(mapping,&n_neighs,&neighs,&n_shared,&shared)); 7352 7353 /* build local CSR graph of subdomains' connectivity */ 7354 CHKERRQ(PetscMalloc1(2,&xadj)); 7355 xadj[0] = 0; 7356 xadj[1] = PetscMax(n_neighs-1,0); 7357 CHKERRQ(PetscMalloc1(xadj[1],&adjncy)); 7358 CHKERRQ(PetscMalloc1(xadj[1],&adjncy_wgt)); 7359 CHKERRQ(PetscCalloc1(n,&count)); 7360 for (i=1;i<n_neighs;i++) 7361 for (j=0;j<n_shared[i];j++) 7362 count[shared[i][j]] += 1; 7363 7364 xadj_count = 0; 7365 for (i=1;i<n_neighs;i++) { 7366 for (j=0;j<n_shared[i];j++) { 7367 if (count[shared[i][j]] < threshold) { 7368 adjncy[xadj_count] = neighs[i]; 7369 adjncy_wgt[xadj_count] = n_shared[i]; 7370 xadj_count++; 7371 break; 7372 } 7373 } 7374 } 7375 xadj[1] = xadj_count; 7376 CHKERRQ(PetscFree(count)); 7377 CHKERRQ(ISLocalToGlobalMappingRestoreInfo(mapping,&n_neighs,&neighs,&n_shared,&shared)); 7378 CHKERRQ(PetscSortIntWithArray(xadj[1],adjncy,adjncy_wgt)); 7379 7380 CHKERRQ(PetscMalloc1(1,&ranks_send_to_idx)); 7381 7382 /* Restrict work on active processes only */ 7383 CHKERRQ(PetscMPIIntCast(im_active,&color)); 7384 if (void_procs) { 7385 CHKERRQ(PetscSubcommCreate(PetscObjectComm((PetscObject)mat),&psubcomm)); 7386 CHKERRQ(PetscSubcommSetNumber(psubcomm,2)); /* 2 groups, active process and not active processes */ 7387 CHKERRQ(PetscSubcommSetTypeGeneral(psubcomm,color,rank)); 7388 subcomm = PetscSubcommChild(psubcomm); 7389 } else { 7390 psubcomm = NULL; 7391 subcomm = PetscObjectComm((PetscObject)mat); 7392 } 7393 7394 v_wgt = NULL; 7395 if (!color) { 7396 CHKERRQ(PetscFree(xadj)); 7397 CHKERRQ(PetscFree(adjncy)); 7398 CHKERRQ(PetscFree(adjncy_wgt)); 7399 } else { 7400 Mat subdomain_adj; 7401 IS new_ranks,new_ranks_contig; 7402 MatPartitioning partitioner; 7403 PetscInt rstart=0,rend=0; 7404 PetscInt *is_indices,*oldranks; 7405 PetscMPIInt size; 7406 PetscBool aggregate; 7407 7408 CHKERRMPI(MPI_Comm_size(subcomm,&size)); 7409 if (void_procs) { 7410 PetscInt prank = rank; 7411 CHKERRQ(PetscMalloc1(size,&oldranks)); 7412 CHKERRMPI(MPI_Allgather(&prank,1,MPIU_INT,oldranks,1,MPIU_INT,subcomm)); 7413 for (i=0;i<xadj[1];i++) { 7414 CHKERRQ(PetscFindInt(adjncy[i],size,oldranks,&adjncy[i])); 7415 } 7416 CHKERRQ(PetscSortIntWithArray(xadj[1],adjncy,adjncy_wgt)); 7417 } else { 7418 oldranks = NULL; 7419 } 7420 aggregate = ((redprocs > 0 && redprocs < size) ? PETSC_TRUE : PETSC_FALSE); 7421 if (aggregate) { /* TODO: all this part could be made more efficient */ 7422 PetscInt lrows,row,ncols,*cols; 7423 PetscMPIInt nrank; 7424 PetscScalar *vals; 7425 7426 CHKERRMPI(MPI_Comm_rank(subcomm,&nrank)); 7427 lrows = 0; 7428 if (nrank<redprocs) { 7429 lrows = size/redprocs; 7430 if (nrank<size%redprocs) lrows++; 7431 } 7432 CHKERRQ(MatCreateAIJ(subcomm,lrows,lrows,size,size,50,NULL,50,NULL,&subdomain_adj)); 7433 CHKERRQ(MatGetOwnershipRange(subdomain_adj,&rstart,&rend)); 7434 CHKERRQ(MatSetOption(subdomain_adj,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_FALSE)); 7435 CHKERRQ(MatSetOption(subdomain_adj,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE)); 7436 row = nrank; 7437 ncols = xadj[1]-xadj[0]; 7438 cols = adjncy; 7439 CHKERRQ(PetscMalloc1(ncols,&vals)); 7440 for (i=0;i<ncols;i++) vals[i] = adjncy_wgt[i]; 7441 CHKERRQ(MatSetValues(subdomain_adj,1,&row,ncols,cols,vals,INSERT_VALUES)); 7442 CHKERRQ(MatAssemblyBegin(subdomain_adj,MAT_FINAL_ASSEMBLY)); 7443 CHKERRQ(MatAssemblyEnd(subdomain_adj,MAT_FINAL_ASSEMBLY)); 7444 CHKERRQ(PetscFree(xadj)); 7445 CHKERRQ(PetscFree(adjncy)); 7446 CHKERRQ(PetscFree(adjncy_wgt)); 7447 CHKERRQ(PetscFree(vals)); 7448 if (use_vwgt) { 7449 Vec v; 7450 const PetscScalar *array; 7451 PetscInt nl; 7452 7453 CHKERRQ(MatCreateVecs(subdomain_adj,&v,NULL)); 7454 CHKERRQ(VecSetValue(v,row,(PetscScalar)n,INSERT_VALUES)); 7455 CHKERRQ(VecAssemblyBegin(v)); 7456 CHKERRQ(VecAssemblyEnd(v)); 7457 CHKERRQ(VecGetLocalSize(v,&nl)); 7458 CHKERRQ(VecGetArrayRead(v,&array)); 7459 CHKERRQ(PetscMalloc1(nl,&v_wgt)); 7460 for (i=0;i<nl;i++) v_wgt[i] = (PetscInt)PetscRealPart(array[i]); 7461 CHKERRQ(VecRestoreArrayRead(v,&array)); 7462 CHKERRQ(VecDestroy(&v)); 7463 } 7464 } else { 7465 CHKERRQ(MatCreateMPIAdj(subcomm,1,(PetscInt)size,xadj,adjncy,adjncy_wgt,&subdomain_adj)); 7466 if (use_vwgt) { 7467 CHKERRQ(PetscMalloc1(1,&v_wgt)); 7468 v_wgt[0] = n; 7469 } 7470 } 7471 /* CHKERRQ(MatView(subdomain_adj,0)); */ 7472 7473 /* Partition */ 7474 CHKERRQ(MatPartitioningCreate(subcomm,&partitioner)); 7475 #if defined(PETSC_HAVE_PTSCOTCH) 7476 CHKERRQ(MatPartitioningSetType(partitioner,MATPARTITIONINGPTSCOTCH)); 7477 #elif defined(PETSC_HAVE_PARMETIS) 7478 CHKERRQ(MatPartitioningSetType(partitioner,MATPARTITIONINGPARMETIS)); 7479 #else 7480 CHKERRQ(MatPartitioningSetType(partitioner,MATPARTITIONINGAVERAGE)); 7481 #endif 7482 CHKERRQ(MatPartitioningSetAdjacency(partitioner,subdomain_adj)); 7483 if (v_wgt) { 7484 CHKERRQ(MatPartitioningSetVertexWeights(partitioner,v_wgt)); 7485 } 7486 *n_subdomains = PetscMin((PetscInt)size,*n_subdomains); 7487 CHKERRQ(MatPartitioningSetNParts(partitioner,*n_subdomains)); 7488 CHKERRQ(MatPartitioningSetFromOptions(partitioner)); 7489 CHKERRQ(MatPartitioningApply(partitioner,&new_ranks)); 7490 /* CHKERRQ(MatPartitioningView(partitioner,0)); */ 7491 7492 /* renumber new_ranks to avoid "holes" in new set of processors */ 7493 CHKERRQ(ISRenumber(new_ranks,NULL,NULL,&new_ranks_contig)); 7494 CHKERRQ(ISDestroy(&new_ranks)); 7495 CHKERRQ(ISGetIndices(new_ranks_contig,(const PetscInt**)&is_indices)); 7496 if (!aggregate) { 7497 if (procs_candidates) { /* shift the pattern on non-active candidates (if any) */ 7498 PetscAssert(oldranks,PETSC_COMM_SELF,PETSC_ERR_PLIB,"This should not happen"); 7499 ranks_send_to_idx[0] = procs_candidates[oldranks[is_indices[0]]]; 7500 } else if (oldranks) { 7501 ranks_send_to_idx[0] = oldranks[is_indices[0]]; 7502 } else { 7503 ranks_send_to_idx[0] = is_indices[0]; 7504 } 7505 } else { 7506 PetscInt idx = 0; 7507 PetscMPIInt tag; 7508 MPI_Request *reqs; 7509 7510 CHKERRQ(PetscObjectGetNewTag((PetscObject)subdomain_adj,&tag)); 7511 CHKERRQ(PetscMalloc1(rend-rstart,&reqs)); 7512 for (i=rstart;i<rend;i++) { 7513 CHKERRMPI(MPI_Isend(is_indices+i-rstart,1,MPIU_INT,i,tag,subcomm,&reqs[i-rstart])); 7514 } 7515 CHKERRMPI(MPI_Recv(&idx,1,MPIU_INT,MPI_ANY_SOURCE,tag,subcomm,MPI_STATUS_IGNORE)); 7516 CHKERRMPI(MPI_Waitall(rend-rstart,reqs,MPI_STATUSES_IGNORE)); 7517 CHKERRQ(PetscFree(reqs)); 7518 if (procs_candidates) { /* shift the pattern on non-active candidates (if any) */ 7519 PetscAssert(oldranks,PETSC_COMM_SELF,PETSC_ERR_PLIB,"This should not happen"); 7520 ranks_send_to_idx[0] = procs_candidates[oldranks[idx]]; 7521 } else if (oldranks) { 7522 ranks_send_to_idx[0] = oldranks[idx]; 7523 } else { 7524 ranks_send_to_idx[0] = idx; 7525 } 7526 } 7527 CHKERRQ(ISRestoreIndices(new_ranks_contig,(const PetscInt**)&is_indices)); 7528 /* clean up */ 7529 CHKERRQ(PetscFree(oldranks)); 7530 CHKERRQ(ISDestroy(&new_ranks_contig)); 7531 CHKERRQ(MatDestroy(&subdomain_adj)); 7532 CHKERRQ(MatPartitioningDestroy(&partitioner)); 7533 } 7534 CHKERRQ(PetscSubcommDestroy(&psubcomm)); 7535 CHKERRQ(PetscFree(procs_candidates)); 7536 7537 /* assemble parallel IS for sends */ 7538 i = 1; 7539 if (!color) i=0; 7540 CHKERRQ(ISCreateGeneral(PetscObjectComm((PetscObject)mat),i,ranks_send_to_idx,PETSC_OWN_POINTER,is_sends)); 7541 PetscFunctionReturn(0); 7542 } 7543 7544 typedef enum {MATDENSE_PRIVATE=0,MATAIJ_PRIVATE,MATBAIJ_PRIVATE,MATSBAIJ_PRIVATE}MatTypePrivate; 7545 7546 PetscErrorCode PCBDDCMatISSubassemble(Mat mat, IS is_sends, PetscInt n_subdomains, PetscBool restrict_comm, PetscBool restrict_full, PetscBool reuse, Mat *mat_n, PetscInt nis, IS isarray[], PetscInt nvecs, Vec nnsp_vec[]) 7547 { 7548 Mat local_mat; 7549 IS is_sends_internal; 7550 PetscInt rows,cols,new_local_rows; 7551 PetscInt i,bs,buf_size_idxs,buf_size_idxs_is,buf_size_vals,buf_size_vecs; 7552 PetscBool ismatis,isdense,newisdense,destroy_mat; 7553 ISLocalToGlobalMapping l2gmap; 7554 PetscInt* l2gmap_indices; 7555 const PetscInt* is_indices; 7556 MatType new_local_type; 7557 /* buffers */ 7558 PetscInt *ptr_idxs,*send_buffer_idxs,*recv_buffer_idxs; 7559 PetscInt *ptr_idxs_is,*send_buffer_idxs_is,*recv_buffer_idxs_is; 7560 PetscInt *recv_buffer_idxs_local; 7561 PetscScalar *ptr_vals,*recv_buffer_vals; 7562 const PetscScalar *send_buffer_vals; 7563 PetscScalar *ptr_vecs,*send_buffer_vecs,*recv_buffer_vecs; 7564 /* MPI */ 7565 MPI_Comm comm,comm_n; 7566 PetscSubcomm subcomm; 7567 PetscMPIInt n_sends,n_recvs,size; 7568 PetscMPIInt *iflags,*ilengths_idxs,*ilengths_vals,*ilengths_idxs_is; 7569 PetscMPIInt *onodes,*onodes_is,*olengths_idxs,*olengths_idxs_is,*olengths_vals; 7570 PetscMPIInt len,tag_idxs,tag_idxs_is,tag_vals,tag_vecs,source_dest; 7571 MPI_Request *send_req_idxs,*send_req_idxs_is,*send_req_vals,*send_req_vecs; 7572 MPI_Request *recv_req_idxs,*recv_req_idxs_is,*recv_req_vals,*recv_req_vecs; 7573 7574 PetscFunctionBegin; 7575 PetscValidHeaderSpecific(mat,MAT_CLASSID,1); 7576 CHKERRQ(PetscObjectTypeCompare((PetscObject)mat,MATIS,&ismatis)); 7577 PetscCheck(ismatis,PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot use %s on a matrix object which is not of type MATIS",PETSC_FUNCTION_NAME); 7578 PetscValidLogicalCollectiveInt(mat,n_subdomains,3); 7579 PetscValidLogicalCollectiveBool(mat,restrict_comm,4); 7580 PetscValidLogicalCollectiveBool(mat,restrict_full,5); 7581 PetscValidLogicalCollectiveBool(mat,reuse,6); 7582 PetscValidLogicalCollectiveInt(mat,nis,8); 7583 PetscValidLogicalCollectiveInt(mat,nvecs,10); 7584 if (nvecs) { 7585 PetscCheckFalse(nvecs > 1,PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Just 1 vector supported"); 7586 PetscValidHeaderSpecific(nnsp_vec[0],VEC_CLASSID,11); 7587 } 7588 /* further checks */ 7589 CHKERRQ(MatISGetLocalMat(mat,&local_mat)); 7590 CHKERRQ(PetscObjectTypeCompare((PetscObject)local_mat,MATSEQDENSE,&isdense)); 7591 PetscCheck(isdense,PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Currently cannot subassemble MATIS when local matrix type is not of type SEQDENSE"); 7592 CHKERRQ(MatGetSize(local_mat,&rows,&cols)); 7593 PetscCheck(rows == cols,PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Local MATIS matrices should be square"); 7594 if (reuse && *mat_n) { 7595 PetscInt mrows,mcols,mnrows,mncols; 7596 PetscValidHeaderSpecific(*mat_n,MAT_CLASSID,7); 7597 CHKERRQ(PetscObjectTypeCompare((PetscObject)*mat_n,MATIS,&ismatis)); 7598 PetscCheck(ismatis,PetscObjectComm((PetscObject)*mat_n),PETSC_ERR_SUP,"Cannot reuse a matrix which is not of type MATIS"); 7599 CHKERRQ(MatGetSize(mat,&mrows,&mcols)); 7600 CHKERRQ(MatGetSize(*mat_n,&mnrows,&mncols)); 7601 PetscCheck(mrows == mnrows,PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot reuse matrix! Wrong number of rows %D != %D",mrows,mnrows); 7602 PetscCheck(mcols == mncols,PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot reuse matrix! Wrong number of cols %D != %D",mcols,mncols); 7603 } 7604 CHKERRQ(MatGetBlockSize(local_mat,&bs)); 7605 PetscValidLogicalCollectiveInt(mat,bs,1); 7606 7607 /* prepare IS for sending if not provided */ 7608 if (!is_sends) { 7609 PetscCheck(n_subdomains,PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"You should specify either an IS or a target number of subdomains"); 7610 CHKERRQ(PCBDDCMatISGetSubassemblingPattern(mat,&n_subdomains,0,&is_sends_internal,NULL)); 7611 } else { 7612 CHKERRQ(PetscObjectReference((PetscObject)is_sends)); 7613 is_sends_internal = is_sends; 7614 } 7615 7616 /* get comm */ 7617 CHKERRQ(PetscObjectGetComm((PetscObject)mat,&comm)); 7618 7619 /* compute number of sends */ 7620 CHKERRQ(ISGetLocalSize(is_sends_internal,&i)); 7621 CHKERRQ(PetscMPIIntCast(i,&n_sends)); 7622 7623 /* compute number of receives */ 7624 CHKERRMPI(MPI_Comm_size(comm,&size)); 7625 CHKERRQ(PetscMalloc1(size,&iflags)); 7626 CHKERRQ(PetscArrayzero(iflags,size)); 7627 CHKERRQ(ISGetIndices(is_sends_internal,&is_indices)); 7628 for (i=0;i<n_sends;i++) iflags[is_indices[i]] = 1; 7629 CHKERRQ(PetscGatherNumberOfMessages(comm,iflags,NULL,&n_recvs)); 7630 CHKERRQ(PetscFree(iflags)); 7631 7632 /* restrict comm if requested */ 7633 subcomm = NULL; 7634 destroy_mat = PETSC_FALSE; 7635 if (restrict_comm) { 7636 PetscMPIInt color,subcommsize; 7637 7638 color = 0; 7639 if (restrict_full) { 7640 if (!n_recvs) color = 1; /* processes not receiving anything will not partecipate in new comm (full restriction) */ 7641 } else { 7642 if (!n_recvs && n_sends) color = 1; /* just those processes that are sending but not receiving anything will not partecipate in new comm */ 7643 } 7644 CHKERRMPI(MPIU_Allreduce(&color,&subcommsize,1,MPI_INT,MPI_SUM,comm)); 7645 subcommsize = size - subcommsize; 7646 /* check if reuse has been requested */ 7647 if (reuse) { 7648 if (*mat_n) { 7649 PetscMPIInt subcommsize2; 7650 CHKERRMPI(MPI_Comm_size(PetscObjectComm((PetscObject)*mat_n),&subcommsize2)); 7651 PetscCheck(subcommsize == subcommsize2,PetscObjectComm((PetscObject)*mat_n),PETSC_ERR_PLIB,"Cannot reuse matrix! wrong subcomm size %d != %d",subcommsize,subcommsize2); 7652 comm_n = PetscObjectComm((PetscObject)*mat_n); 7653 } else { 7654 comm_n = PETSC_COMM_SELF; 7655 } 7656 } else { /* MAT_INITIAL_MATRIX */ 7657 PetscMPIInt rank; 7658 7659 CHKERRMPI(MPI_Comm_rank(comm,&rank)); 7660 CHKERRQ(PetscSubcommCreate(comm,&subcomm)); 7661 CHKERRQ(PetscSubcommSetNumber(subcomm,2)); 7662 CHKERRQ(PetscSubcommSetTypeGeneral(subcomm,color,rank)); 7663 comm_n = PetscSubcommChild(subcomm); 7664 } 7665 /* flag to destroy *mat_n if not significative */ 7666 if (color) destroy_mat = PETSC_TRUE; 7667 } else { 7668 comm_n = comm; 7669 } 7670 7671 /* prepare send/receive buffers */ 7672 CHKERRQ(PetscMalloc1(size,&ilengths_idxs)); 7673 CHKERRQ(PetscArrayzero(ilengths_idxs,size)); 7674 CHKERRQ(PetscMalloc1(size,&ilengths_vals)); 7675 CHKERRQ(PetscArrayzero(ilengths_vals,size)); 7676 if (nis) { 7677 CHKERRQ(PetscCalloc1(size,&ilengths_idxs_is)); 7678 } 7679 7680 /* Get data from local matrices */ 7681 PetscCheck(isdense,PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Subassembling of AIJ local matrices not yet implemented"); 7682 /* TODO: See below some guidelines on how to prepare the local buffers */ 7683 /* 7684 send_buffer_vals should contain the raw values of the local matrix 7685 send_buffer_idxs should contain: 7686 - MatType_PRIVATE type 7687 - PetscInt size_of_l2gmap 7688 - PetscInt global_row_indices[size_of_l2gmap] 7689 - PetscInt all_other_info_which_is_needed_to_compute_preallocation_and_set_values 7690 */ 7691 { 7692 ISLocalToGlobalMapping mapping; 7693 7694 CHKERRQ(MatISGetLocalToGlobalMapping(mat,&mapping,NULL)); 7695 CHKERRQ(MatDenseGetArrayRead(local_mat,&send_buffer_vals)); 7696 CHKERRQ(ISLocalToGlobalMappingGetSize(mapping,&i)); 7697 CHKERRQ(PetscMalloc1(i+2,&send_buffer_idxs)); 7698 send_buffer_idxs[0] = (PetscInt)MATDENSE_PRIVATE; 7699 send_buffer_idxs[1] = i; 7700 CHKERRQ(ISLocalToGlobalMappingGetIndices(mapping,(const PetscInt**)&ptr_idxs)); 7701 CHKERRQ(PetscArraycpy(&send_buffer_idxs[2],ptr_idxs,i)); 7702 CHKERRQ(ISLocalToGlobalMappingRestoreIndices(mapping,(const PetscInt**)&ptr_idxs)); 7703 CHKERRQ(PetscMPIIntCast(i,&len)); 7704 for (i=0;i<n_sends;i++) { 7705 ilengths_vals[is_indices[i]] = len*len; 7706 ilengths_idxs[is_indices[i]] = len+2; 7707 } 7708 } 7709 CHKERRQ(PetscGatherMessageLengths2(comm,n_sends,n_recvs,ilengths_idxs,ilengths_vals,&onodes,&olengths_idxs,&olengths_vals)); 7710 /* additional is (if any) */ 7711 if (nis) { 7712 PetscMPIInt psum; 7713 PetscInt j; 7714 for (j=0,psum=0;j<nis;j++) { 7715 PetscInt plen; 7716 CHKERRQ(ISGetLocalSize(isarray[j],&plen)); 7717 CHKERRQ(PetscMPIIntCast(plen,&len)); 7718 psum += len+1; /* indices + lenght */ 7719 } 7720 CHKERRQ(PetscMalloc1(psum,&send_buffer_idxs_is)); 7721 for (j=0,psum=0;j<nis;j++) { 7722 PetscInt plen; 7723 const PetscInt *is_array_idxs; 7724 CHKERRQ(ISGetLocalSize(isarray[j],&plen)); 7725 send_buffer_idxs_is[psum] = plen; 7726 CHKERRQ(ISGetIndices(isarray[j],&is_array_idxs)); 7727 CHKERRQ(PetscArraycpy(&send_buffer_idxs_is[psum+1],is_array_idxs,plen)); 7728 CHKERRQ(ISRestoreIndices(isarray[j],&is_array_idxs)); 7729 psum += plen+1; /* indices + lenght */ 7730 } 7731 for (i=0;i<n_sends;i++) { 7732 ilengths_idxs_is[is_indices[i]] = psum; 7733 } 7734 CHKERRQ(PetscGatherMessageLengths(comm,n_sends,n_recvs,ilengths_idxs_is,&onodes_is,&olengths_idxs_is)); 7735 } 7736 CHKERRQ(MatISRestoreLocalMat(mat,&local_mat)); 7737 7738 buf_size_idxs = 0; 7739 buf_size_vals = 0; 7740 buf_size_idxs_is = 0; 7741 buf_size_vecs = 0; 7742 for (i=0;i<n_recvs;i++) { 7743 buf_size_idxs += (PetscInt)olengths_idxs[i]; 7744 buf_size_vals += (PetscInt)olengths_vals[i]; 7745 if (nis) buf_size_idxs_is += (PetscInt)olengths_idxs_is[i]; 7746 if (nvecs) buf_size_vecs += (PetscInt)olengths_idxs[i]; 7747 } 7748 CHKERRQ(PetscMalloc1(buf_size_idxs,&recv_buffer_idxs)); 7749 CHKERRQ(PetscMalloc1(buf_size_vals,&recv_buffer_vals)); 7750 CHKERRQ(PetscMalloc1(buf_size_idxs_is,&recv_buffer_idxs_is)); 7751 CHKERRQ(PetscMalloc1(buf_size_vecs,&recv_buffer_vecs)); 7752 7753 /* get new tags for clean communications */ 7754 CHKERRQ(PetscObjectGetNewTag((PetscObject)mat,&tag_idxs)); 7755 CHKERRQ(PetscObjectGetNewTag((PetscObject)mat,&tag_vals)); 7756 CHKERRQ(PetscObjectGetNewTag((PetscObject)mat,&tag_idxs_is)); 7757 CHKERRQ(PetscObjectGetNewTag((PetscObject)mat,&tag_vecs)); 7758 7759 /* allocate for requests */ 7760 CHKERRQ(PetscMalloc1(n_sends,&send_req_idxs)); 7761 CHKERRQ(PetscMalloc1(n_sends,&send_req_vals)); 7762 CHKERRQ(PetscMalloc1(n_sends,&send_req_idxs_is)); 7763 CHKERRQ(PetscMalloc1(n_sends,&send_req_vecs)); 7764 CHKERRQ(PetscMalloc1(n_recvs,&recv_req_idxs)); 7765 CHKERRQ(PetscMalloc1(n_recvs,&recv_req_vals)); 7766 CHKERRQ(PetscMalloc1(n_recvs,&recv_req_idxs_is)); 7767 CHKERRQ(PetscMalloc1(n_recvs,&recv_req_vecs)); 7768 7769 /* communications */ 7770 ptr_idxs = recv_buffer_idxs; 7771 ptr_vals = recv_buffer_vals; 7772 ptr_idxs_is = recv_buffer_idxs_is; 7773 ptr_vecs = recv_buffer_vecs; 7774 for (i=0;i<n_recvs;i++) { 7775 source_dest = onodes[i]; 7776 CHKERRMPI(MPI_Irecv(ptr_idxs,olengths_idxs[i],MPIU_INT,source_dest,tag_idxs,comm,&recv_req_idxs[i])); 7777 CHKERRMPI(MPI_Irecv(ptr_vals,olengths_vals[i],MPIU_SCALAR,source_dest,tag_vals,comm,&recv_req_vals[i])); 7778 ptr_idxs += olengths_idxs[i]; 7779 ptr_vals += olengths_vals[i]; 7780 if (nis) { 7781 source_dest = onodes_is[i]; 7782 CHKERRMPI(MPI_Irecv(ptr_idxs_is,olengths_idxs_is[i],MPIU_INT,source_dest,tag_idxs_is,comm,&recv_req_idxs_is[i])); 7783 ptr_idxs_is += olengths_idxs_is[i]; 7784 } 7785 if (nvecs) { 7786 source_dest = onodes[i]; 7787 CHKERRMPI(MPI_Irecv(ptr_vecs,olengths_idxs[i]-2,MPIU_SCALAR,source_dest,tag_vecs,comm,&recv_req_vecs[i])); 7788 ptr_vecs += olengths_idxs[i]-2; 7789 } 7790 } 7791 for (i=0;i<n_sends;i++) { 7792 CHKERRQ(PetscMPIIntCast(is_indices[i],&source_dest)); 7793 CHKERRMPI(MPI_Isend(send_buffer_idxs,ilengths_idxs[source_dest],MPIU_INT,source_dest,tag_idxs,comm,&send_req_idxs[i])); 7794 CHKERRMPI(MPI_Isend((PetscScalar*)send_buffer_vals,ilengths_vals[source_dest],MPIU_SCALAR,source_dest,tag_vals,comm,&send_req_vals[i])); 7795 if (nis) { 7796 CHKERRMPI(MPI_Isend(send_buffer_idxs_is,ilengths_idxs_is[source_dest],MPIU_INT,source_dest,tag_idxs_is,comm,&send_req_idxs_is[i])); 7797 } 7798 if (nvecs) { 7799 CHKERRQ(VecGetArray(nnsp_vec[0],&send_buffer_vecs)); 7800 CHKERRMPI(MPI_Isend(send_buffer_vecs,ilengths_idxs[source_dest]-2,MPIU_SCALAR,source_dest,tag_vecs,comm,&send_req_vecs[i])); 7801 } 7802 } 7803 CHKERRQ(ISRestoreIndices(is_sends_internal,&is_indices)); 7804 CHKERRQ(ISDestroy(&is_sends_internal)); 7805 7806 /* assemble new l2g map */ 7807 CHKERRMPI(MPI_Waitall(n_recvs,recv_req_idxs,MPI_STATUSES_IGNORE)); 7808 ptr_idxs = recv_buffer_idxs; 7809 new_local_rows = 0; 7810 for (i=0;i<n_recvs;i++) { 7811 new_local_rows += *(ptr_idxs+1); /* second element is the local size of the l2gmap */ 7812 ptr_idxs += olengths_idxs[i]; 7813 } 7814 CHKERRQ(PetscMalloc1(new_local_rows,&l2gmap_indices)); 7815 ptr_idxs = recv_buffer_idxs; 7816 new_local_rows = 0; 7817 for (i=0;i<n_recvs;i++) { 7818 CHKERRQ(PetscArraycpy(&l2gmap_indices[new_local_rows],ptr_idxs+2,*(ptr_idxs+1))); 7819 new_local_rows += *(ptr_idxs+1); /* second element is the local size of the l2gmap */ 7820 ptr_idxs += olengths_idxs[i]; 7821 } 7822 CHKERRQ(PetscSortRemoveDupsInt(&new_local_rows,l2gmap_indices)); 7823 CHKERRQ(ISLocalToGlobalMappingCreate(comm_n,1,new_local_rows,l2gmap_indices,PETSC_COPY_VALUES,&l2gmap)); 7824 CHKERRQ(PetscFree(l2gmap_indices)); 7825 7826 /* infer new local matrix type from received local matrices type */ 7827 /* currently if all local matrices are of type X, then the resulting matrix will be of type X, except for the dense case */ 7828 /* it also assumes that if the block size is set, than it is the same among all local matrices (see checks at the beginning of the function) */ 7829 if (n_recvs) { 7830 MatTypePrivate new_local_type_private = (MatTypePrivate)send_buffer_idxs[0]; 7831 ptr_idxs = recv_buffer_idxs; 7832 for (i=0;i<n_recvs;i++) { 7833 if ((PetscInt)new_local_type_private != *ptr_idxs) { 7834 new_local_type_private = MATAIJ_PRIVATE; 7835 break; 7836 } 7837 ptr_idxs += olengths_idxs[i]; 7838 } 7839 switch (new_local_type_private) { 7840 case MATDENSE_PRIVATE: 7841 new_local_type = MATSEQAIJ; 7842 bs = 1; 7843 break; 7844 case MATAIJ_PRIVATE: 7845 new_local_type = MATSEQAIJ; 7846 bs = 1; 7847 break; 7848 case MATBAIJ_PRIVATE: 7849 new_local_type = MATSEQBAIJ; 7850 break; 7851 case MATSBAIJ_PRIVATE: 7852 new_local_type = MATSEQSBAIJ; 7853 break; 7854 default: 7855 SETERRQ(comm,PETSC_ERR_SUP,"Unsupported private type %d in %s",new_local_type_private,PETSC_FUNCTION_NAME); 7856 } 7857 } else { /* by default, new_local_type is seqaij */ 7858 new_local_type = MATSEQAIJ; 7859 bs = 1; 7860 } 7861 7862 /* create MATIS object if needed */ 7863 if (!reuse) { 7864 CHKERRQ(MatGetSize(mat,&rows,&cols)); 7865 CHKERRQ(MatCreateIS(comm_n,bs,PETSC_DECIDE,PETSC_DECIDE,rows,cols,l2gmap,l2gmap,mat_n)); 7866 } else { 7867 /* it also destroys the local matrices */ 7868 if (*mat_n) { 7869 CHKERRQ(MatSetLocalToGlobalMapping(*mat_n,l2gmap,l2gmap)); 7870 } else { /* this is a fake object */ 7871 CHKERRQ(MatCreateIS(comm_n,bs,PETSC_DECIDE,PETSC_DECIDE,rows,cols,l2gmap,l2gmap,mat_n)); 7872 } 7873 } 7874 CHKERRQ(MatISGetLocalMat(*mat_n,&local_mat)); 7875 CHKERRQ(MatSetType(local_mat,new_local_type)); 7876 7877 CHKERRMPI(MPI_Waitall(n_recvs,recv_req_vals,MPI_STATUSES_IGNORE)); 7878 7879 /* Global to local map of received indices */ 7880 CHKERRQ(PetscMalloc1(buf_size_idxs,&recv_buffer_idxs_local)); /* needed for values insertion */ 7881 CHKERRQ(ISGlobalToLocalMappingApply(l2gmap,IS_GTOLM_MASK,buf_size_idxs,recv_buffer_idxs,&i,recv_buffer_idxs_local)); 7882 CHKERRQ(ISLocalToGlobalMappingDestroy(&l2gmap)); 7883 7884 /* restore attributes -> type of incoming data and its size */ 7885 buf_size_idxs = 0; 7886 for (i=0;i<n_recvs;i++) { 7887 recv_buffer_idxs_local[buf_size_idxs] = recv_buffer_idxs[buf_size_idxs]; 7888 recv_buffer_idxs_local[buf_size_idxs+1] = recv_buffer_idxs[buf_size_idxs+1]; 7889 buf_size_idxs += (PetscInt)olengths_idxs[i]; 7890 } 7891 CHKERRQ(PetscFree(recv_buffer_idxs)); 7892 7893 /* set preallocation */ 7894 CHKERRQ(PetscObjectTypeCompare((PetscObject)local_mat,MATSEQDENSE,&newisdense)); 7895 if (!newisdense) { 7896 PetscInt *new_local_nnz=NULL; 7897 7898 ptr_idxs = recv_buffer_idxs_local; 7899 if (n_recvs) { 7900 CHKERRQ(PetscCalloc1(new_local_rows,&new_local_nnz)); 7901 } 7902 for (i=0;i<n_recvs;i++) { 7903 PetscInt j; 7904 if (*ptr_idxs == (PetscInt)MATDENSE_PRIVATE) { /* preallocation provided for dense case only */ 7905 for (j=0;j<*(ptr_idxs+1);j++) { 7906 new_local_nnz[*(ptr_idxs+2+j)] += *(ptr_idxs+1); 7907 } 7908 } else { 7909 /* TODO */ 7910 } 7911 ptr_idxs += olengths_idxs[i]; 7912 } 7913 if (new_local_nnz) { 7914 for (i=0;i<new_local_rows;i++) new_local_nnz[i] = PetscMin(new_local_nnz[i],new_local_rows); 7915 CHKERRQ(MatSeqAIJSetPreallocation(local_mat,0,new_local_nnz)); 7916 for (i=0;i<new_local_rows;i++) new_local_nnz[i] /= bs; 7917 CHKERRQ(MatSeqBAIJSetPreallocation(local_mat,bs,0,new_local_nnz)); 7918 for (i=0;i<new_local_rows;i++) new_local_nnz[i] = PetscMax(new_local_nnz[i]-i,0); 7919 CHKERRQ(MatSeqSBAIJSetPreallocation(local_mat,bs,0,new_local_nnz)); 7920 } else { 7921 CHKERRQ(MatSetUp(local_mat)); 7922 } 7923 CHKERRQ(PetscFree(new_local_nnz)); 7924 } else { 7925 CHKERRQ(MatSetUp(local_mat)); 7926 } 7927 7928 /* set values */ 7929 ptr_vals = recv_buffer_vals; 7930 ptr_idxs = recv_buffer_idxs_local; 7931 for (i=0;i<n_recvs;i++) { 7932 if (*ptr_idxs == (PetscInt)MATDENSE_PRIVATE) { /* values insertion provided for dense case only */ 7933 CHKERRQ(MatSetOption(local_mat,MAT_ROW_ORIENTED,PETSC_FALSE)); 7934 CHKERRQ(MatSetValues(local_mat,*(ptr_idxs+1),ptr_idxs+2,*(ptr_idxs+1),ptr_idxs+2,ptr_vals,ADD_VALUES)); 7935 CHKERRQ(MatAssemblyBegin(local_mat,MAT_FLUSH_ASSEMBLY)); 7936 CHKERRQ(MatAssemblyEnd(local_mat,MAT_FLUSH_ASSEMBLY)); 7937 CHKERRQ(MatSetOption(local_mat,MAT_ROW_ORIENTED,PETSC_TRUE)); 7938 } else { 7939 /* TODO */ 7940 } 7941 ptr_idxs += olengths_idxs[i]; 7942 ptr_vals += olengths_vals[i]; 7943 } 7944 CHKERRQ(MatAssemblyBegin(local_mat,MAT_FINAL_ASSEMBLY)); 7945 CHKERRQ(MatAssemblyEnd(local_mat,MAT_FINAL_ASSEMBLY)); 7946 CHKERRQ(MatISRestoreLocalMat(*mat_n,&local_mat)); 7947 CHKERRQ(MatAssemblyBegin(*mat_n,MAT_FINAL_ASSEMBLY)); 7948 CHKERRQ(MatAssemblyEnd(*mat_n,MAT_FINAL_ASSEMBLY)); 7949 CHKERRQ(PetscFree(recv_buffer_vals)); 7950 7951 #if 0 7952 if (!restrict_comm) { /* check */ 7953 Vec lvec,rvec; 7954 PetscReal infty_error; 7955 7956 CHKERRQ(MatCreateVecs(mat,&rvec,&lvec)); 7957 CHKERRQ(VecSetRandom(rvec,NULL)); 7958 CHKERRQ(MatMult(mat,rvec,lvec)); 7959 CHKERRQ(VecScale(lvec,-1.0)); 7960 CHKERRQ(MatMultAdd(*mat_n,rvec,lvec,lvec)); 7961 CHKERRQ(VecNorm(lvec,NORM_INFINITY,&infty_error)); 7962 CHKERRQ(PetscPrintf(PetscObjectComm((PetscObject)mat),"Infinity error subassembling %1.6e\n",infty_error)); 7963 CHKERRQ(VecDestroy(&rvec)); 7964 CHKERRQ(VecDestroy(&lvec)); 7965 } 7966 #endif 7967 7968 /* assemble new additional is (if any) */ 7969 if (nis) { 7970 PetscInt **temp_idxs,*count_is,j,psum; 7971 7972 CHKERRMPI(MPI_Waitall(n_recvs,recv_req_idxs_is,MPI_STATUSES_IGNORE)); 7973 CHKERRQ(PetscCalloc1(nis,&count_is)); 7974 ptr_idxs = recv_buffer_idxs_is; 7975 psum = 0; 7976 for (i=0;i<n_recvs;i++) { 7977 for (j=0;j<nis;j++) { 7978 PetscInt plen = *(ptr_idxs); /* first element is the local size of IS's indices */ 7979 count_is[j] += plen; /* increment counting of buffer for j-th IS */ 7980 psum += plen; 7981 ptr_idxs += plen+1; /* shift pointer to received data */ 7982 } 7983 } 7984 CHKERRQ(PetscMalloc1(nis,&temp_idxs)); 7985 CHKERRQ(PetscMalloc1(psum,&temp_idxs[0])); 7986 for (i=1;i<nis;i++) { 7987 temp_idxs[i] = temp_idxs[i-1]+count_is[i-1]; 7988 } 7989 CHKERRQ(PetscArrayzero(count_is,nis)); 7990 ptr_idxs = recv_buffer_idxs_is; 7991 for (i=0;i<n_recvs;i++) { 7992 for (j=0;j<nis;j++) { 7993 PetscInt plen = *(ptr_idxs); /* first element is the local size of IS's indices */ 7994 CHKERRQ(PetscArraycpy(&temp_idxs[j][count_is[j]],ptr_idxs+1,plen)); 7995 count_is[j] += plen; /* increment starting point of buffer for j-th IS */ 7996 ptr_idxs += plen+1; /* shift pointer to received data */ 7997 } 7998 } 7999 for (i=0;i<nis;i++) { 8000 CHKERRQ(ISDestroy(&isarray[i])); 8001 CHKERRQ(PetscSortRemoveDupsInt(&count_is[i],temp_idxs[i])); 8002 CHKERRQ(ISCreateGeneral(comm_n,count_is[i],temp_idxs[i],PETSC_COPY_VALUES,&isarray[i])); 8003 } 8004 CHKERRQ(PetscFree(count_is)); 8005 CHKERRQ(PetscFree(temp_idxs[0])); 8006 CHKERRQ(PetscFree(temp_idxs)); 8007 } 8008 /* free workspace */ 8009 CHKERRQ(PetscFree(recv_buffer_idxs_is)); 8010 CHKERRMPI(MPI_Waitall(n_sends,send_req_idxs,MPI_STATUSES_IGNORE)); 8011 CHKERRQ(PetscFree(send_buffer_idxs)); 8012 CHKERRMPI(MPI_Waitall(n_sends,send_req_vals,MPI_STATUSES_IGNORE)); 8013 if (isdense) { 8014 CHKERRQ(MatISGetLocalMat(mat,&local_mat)); 8015 CHKERRQ(MatDenseRestoreArrayRead(local_mat,&send_buffer_vals)); 8016 CHKERRQ(MatISRestoreLocalMat(mat,&local_mat)); 8017 } else { 8018 /* CHKERRQ(PetscFree(send_buffer_vals)); */ 8019 } 8020 if (nis) { 8021 CHKERRMPI(MPI_Waitall(n_sends,send_req_idxs_is,MPI_STATUSES_IGNORE)); 8022 CHKERRQ(PetscFree(send_buffer_idxs_is)); 8023 } 8024 8025 if (nvecs) { 8026 CHKERRMPI(MPI_Waitall(n_recvs,recv_req_vecs,MPI_STATUSES_IGNORE)); 8027 CHKERRMPI(MPI_Waitall(n_sends,send_req_vecs,MPI_STATUSES_IGNORE)); 8028 CHKERRQ(VecRestoreArray(nnsp_vec[0],&send_buffer_vecs)); 8029 CHKERRQ(VecDestroy(&nnsp_vec[0])); 8030 CHKERRQ(VecCreate(comm_n,&nnsp_vec[0])); 8031 CHKERRQ(VecSetSizes(nnsp_vec[0],new_local_rows,PETSC_DECIDE)); 8032 CHKERRQ(VecSetType(nnsp_vec[0],VECSTANDARD)); 8033 /* set values */ 8034 ptr_vals = recv_buffer_vecs; 8035 ptr_idxs = recv_buffer_idxs_local; 8036 CHKERRQ(VecGetArray(nnsp_vec[0],&send_buffer_vecs)); 8037 for (i=0;i<n_recvs;i++) { 8038 PetscInt j; 8039 for (j=0;j<*(ptr_idxs+1);j++) { 8040 send_buffer_vecs[*(ptr_idxs+2+j)] += *(ptr_vals + j); 8041 } 8042 ptr_idxs += olengths_idxs[i]; 8043 ptr_vals += olengths_idxs[i]-2; 8044 } 8045 CHKERRQ(VecRestoreArray(nnsp_vec[0],&send_buffer_vecs)); 8046 CHKERRQ(VecAssemblyBegin(nnsp_vec[0])); 8047 CHKERRQ(VecAssemblyEnd(nnsp_vec[0])); 8048 } 8049 8050 CHKERRQ(PetscFree(recv_buffer_vecs)); 8051 CHKERRQ(PetscFree(recv_buffer_idxs_local)); 8052 CHKERRQ(PetscFree(recv_req_idxs)); 8053 CHKERRQ(PetscFree(recv_req_vals)); 8054 CHKERRQ(PetscFree(recv_req_vecs)); 8055 CHKERRQ(PetscFree(recv_req_idxs_is)); 8056 CHKERRQ(PetscFree(send_req_idxs)); 8057 CHKERRQ(PetscFree(send_req_vals)); 8058 CHKERRQ(PetscFree(send_req_vecs)); 8059 CHKERRQ(PetscFree(send_req_idxs_is)); 8060 CHKERRQ(PetscFree(ilengths_vals)); 8061 CHKERRQ(PetscFree(ilengths_idxs)); 8062 CHKERRQ(PetscFree(olengths_vals)); 8063 CHKERRQ(PetscFree(olengths_idxs)); 8064 CHKERRQ(PetscFree(onodes)); 8065 if (nis) { 8066 CHKERRQ(PetscFree(ilengths_idxs_is)); 8067 CHKERRQ(PetscFree(olengths_idxs_is)); 8068 CHKERRQ(PetscFree(onodes_is)); 8069 } 8070 CHKERRQ(PetscSubcommDestroy(&subcomm)); 8071 if (destroy_mat) { /* destroy mat is true only if restrict comm is true and process will not partecipate */ 8072 CHKERRQ(MatDestroy(mat_n)); 8073 for (i=0;i<nis;i++) { 8074 CHKERRQ(ISDestroy(&isarray[i])); 8075 } 8076 if (nvecs) { /* need to match VecDestroy nnsp_vec called in the other code path */ 8077 CHKERRQ(VecDestroy(&nnsp_vec[0])); 8078 } 8079 *mat_n = NULL; 8080 } 8081 PetscFunctionReturn(0); 8082 } 8083 8084 /* temporary hack into ksp private data structure */ 8085 #include <petsc/private/kspimpl.h> 8086 8087 PetscErrorCode PCBDDCSetUpCoarseSolver(PC pc,PetscScalar* coarse_submat_vals) 8088 { 8089 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 8090 PC_IS *pcis = (PC_IS*)pc->data; 8091 Mat coarse_mat,coarse_mat_is,coarse_submat_dense; 8092 Mat coarsedivudotp = NULL; 8093 Mat coarseG,t_coarse_mat_is; 8094 MatNullSpace CoarseNullSpace = NULL; 8095 ISLocalToGlobalMapping coarse_islg; 8096 IS coarse_is,*isarray,corners; 8097 PetscInt i,im_active=-1,active_procs=-1; 8098 PetscInt nis,nisdofs,nisneu,nisvert; 8099 PetscInt coarse_eqs_per_proc; 8100 PC pc_temp; 8101 PCType coarse_pc_type; 8102 KSPType coarse_ksp_type; 8103 PetscBool multilevel_requested,multilevel_allowed; 8104 PetscBool coarse_reuse; 8105 PetscInt ncoarse,nedcfield; 8106 PetscBool compute_vecs = PETSC_FALSE; 8107 PetscScalar *array; 8108 MatReuse coarse_mat_reuse; 8109 PetscBool restr, full_restr, have_void; 8110 PetscMPIInt size; 8111 PetscErrorCode ierr; 8112 8113 PetscFunctionBegin; 8114 CHKERRQ(PetscLogEventBegin(PC_BDDC_CoarseSetUp[pcbddc->current_level],pc,0,0,0)); 8115 /* Assign global numbering to coarse dofs */ 8116 if (pcbddc->new_primal_space || pcbddc->coarse_size == -1) { /* a new primal space is present or it is the first initialization, so recompute global numbering */ 8117 PetscInt ocoarse_size; 8118 compute_vecs = PETSC_TRUE; 8119 8120 pcbddc->new_primal_space = PETSC_TRUE; 8121 ocoarse_size = pcbddc->coarse_size; 8122 CHKERRQ(PetscFree(pcbddc->global_primal_indices)); 8123 CHKERRQ(PCBDDCComputePrimalNumbering(pc,&pcbddc->coarse_size,&pcbddc->global_primal_indices)); 8124 /* see if we can avoid some work */ 8125 if (pcbddc->coarse_ksp) { /* coarse ksp has already been created */ 8126 /* if the coarse size is different or we are using adaptive selection, better to not reuse the coarse matrix */ 8127 if (ocoarse_size != pcbddc->coarse_size || pcbddc->adaptive_selection) { 8128 CHKERRQ(KSPReset(pcbddc->coarse_ksp)); 8129 coarse_reuse = PETSC_FALSE; 8130 } else { /* we can safely reuse already computed coarse matrix */ 8131 coarse_reuse = PETSC_TRUE; 8132 } 8133 } else { /* there's no coarse ksp, so we need to create the coarse matrix too */ 8134 coarse_reuse = PETSC_FALSE; 8135 } 8136 /* reset any subassembling information */ 8137 if (!coarse_reuse || pcbddc->recompute_topography) { 8138 CHKERRQ(ISDestroy(&pcbddc->coarse_subassembling)); 8139 } 8140 } else { /* primal space is unchanged, so we can reuse coarse matrix */ 8141 coarse_reuse = PETSC_TRUE; 8142 } 8143 if (coarse_reuse && pcbddc->coarse_ksp) { 8144 CHKERRQ(KSPGetOperators(pcbddc->coarse_ksp,&coarse_mat,NULL)); 8145 CHKERRQ(PetscObjectReference((PetscObject)coarse_mat)); 8146 coarse_mat_reuse = MAT_REUSE_MATRIX; 8147 } else { 8148 coarse_mat = NULL; 8149 coarse_mat_reuse = MAT_INITIAL_MATRIX; 8150 } 8151 8152 /* creates temporary l2gmap and IS for coarse indexes */ 8153 CHKERRQ(ISCreateGeneral(PetscObjectComm((PetscObject)pc),pcbddc->local_primal_size,pcbddc->global_primal_indices,PETSC_COPY_VALUES,&coarse_is)); 8154 CHKERRQ(ISLocalToGlobalMappingCreateIS(coarse_is,&coarse_islg)); 8155 8156 /* creates temporary MATIS object for coarse matrix */ 8157 CHKERRQ(MatCreateSeqDense(PETSC_COMM_SELF,pcbddc->local_primal_size,pcbddc->local_primal_size,coarse_submat_vals,&coarse_submat_dense)); 8158 CHKERRQ(MatCreateIS(PetscObjectComm((PetscObject)pc),1,PETSC_DECIDE,PETSC_DECIDE,pcbddc->coarse_size,pcbddc->coarse_size,coarse_islg,coarse_islg,&t_coarse_mat_is)); 8159 CHKERRQ(MatISSetLocalMat(t_coarse_mat_is,coarse_submat_dense)); 8160 CHKERRQ(MatAssemblyBegin(t_coarse_mat_is,MAT_FINAL_ASSEMBLY)); 8161 CHKERRQ(MatAssemblyEnd(t_coarse_mat_is,MAT_FINAL_ASSEMBLY)); 8162 CHKERRQ(MatDestroy(&coarse_submat_dense)); 8163 8164 /* count "active" (i.e. with positive local size) and "void" processes */ 8165 im_active = !!(pcis->n); 8166 CHKERRMPI(MPIU_Allreduce(&im_active,&active_procs,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)pc))); 8167 8168 /* determine number of processes partecipating to coarse solver and compute subassembling pattern */ 8169 /* restr : whether we want to exclude senders (which are not receivers) from the subassembling pattern */ 8170 /* full_restr : just use the receivers from the subassembling pattern */ 8171 CHKERRMPI(MPI_Comm_size(PetscObjectComm((PetscObject)pc),&size)); 8172 coarse_mat_is = NULL; 8173 multilevel_allowed = PETSC_FALSE; 8174 multilevel_requested = PETSC_FALSE; 8175 coarse_eqs_per_proc = PetscMin(PetscMax(pcbddc->coarse_size,1),pcbddc->coarse_eqs_per_proc); 8176 if (coarse_eqs_per_proc < 0) coarse_eqs_per_proc = pcbddc->coarse_size; 8177 if (pcbddc->current_level < pcbddc->max_levels) multilevel_requested = PETSC_TRUE; 8178 if (pcbddc->coarse_size <= pcbddc->coarse_eqs_limit) multilevel_requested = PETSC_FALSE; 8179 if (multilevel_requested) { 8180 ncoarse = active_procs/pcbddc->coarsening_ratio; 8181 restr = PETSC_FALSE; 8182 full_restr = PETSC_FALSE; 8183 } else { 8184 ncoarse = pcbddc->coarse_size/coarse_eqs_per_proc + !!(pcbddc->coarse_size%coarse_eqs_per_proc); 8185 restr = PETSC_TRUE; 8186 full_restr = PETSC_TRUE; 8187 } 8188 if (!pcbddc->coarse_size || size == 1) multilevel_allowed = multilevel_requested = restr = full_restr = PETSC_FALSE; 8189 ncoarse = PetscMax(1,ncoarse); 8190 if (!pcbddc->coarse_subassembling) { 8191 if (pcbddc->coarsening_ratio > 1) { 8192 if (multilevel_requested) { 8193 CHKERRQ(PCBDDCMatISGetSubassemblingPattern(pc->pmat,&ncoarse,pcbddc->coarse_adj_red,&pcbddc->coarse_subassembling,&have_void)); 8194 } else { 8195 CHKERRQ(PCBDDCMatISGetSubassemblingPattern(t_coarse_mat_is,&ncoarse,pcbddc->coarse_adj_red,&pcbddc->coarse_subassembling,&have_void)); 8196 } 8197 } else { 8198 PetscMPIInt rank; 8199 8200 CHKERRMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)pc),&rank)); 8201 have_void = (active_procs == (PetscInt)size) ? PETSC_FALSE : PETSC_TRUE; 8202 CHKERRQ(ISCreateStride(PetscObjectComm((PetscObject)pc),1,rank,1,&pcbddc->coarse_subassembling)); 8203 } 8204 } else { /* if a subassembling pattern exists, then we can reuse the coarse ksp and compute the number of process involved */ 8205 PetscInt psum; 8206 if (pcbddc->coarse_ksp) psum = 1; 8207 else psum = 0; 8208 CHKERRMPI(MPIU_Allreduce(&psum,&ncoarse,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)pc))); 8209 have_void = ncoarse < size ? PETSC_TRUE : PETSC_FALSE; 8210 } 8211 /* determine if we can go multilevel */ 8212 if (multilevel_requested) { 8213 if (ncoarse > 1) multilevel_allowed = PETSC_TRUE; /* found enough processes */ 8214 else restr = full_restr = PETSC_TRUE; /* 1 subdomain, use a direct solver */ 8215 } 8216 if (multilevel_allowed && have_void) restr = PETSC_TRUE; 8217 8218 /* dump subassembling pattern */ 8219 if (pcbddc->dbg_flag && multilevel_allowed) { 8220 CHKERRQ(ISView(pcbddc->coarse_subassembling,pcbddc->dbg_viewer)); 8221 } 8222 /* compute dofs splitting and neumann boundaries for coarse dofs */ 8223 nedcfield = -1; 8224 corners = NULL; 8225 if (multilevel_allowed && !coarse_reuse && (pcbddc->n_ISForDofsLocal || pcbddc->NeumannBoundariesLocal || pcbddc->nedclocal || pcbddc->corner_selected)) { /* protects from unneeded computations */ 8226 PetscInt *tidxs,*tidxs2,nout,tsize,i; 8227 const PetscInt *idxs; 8228 ISLocalToGlobalMapping tmap; 8229 8230 /* create map between primal indices (in local representative ordering) and local primal numbering */ 8231 CHKERRQ(ISLocalToGlobalMappingCreate(PETSC_COMM_SELF,1,pcbddc->local_primal_size,pcbddc->primal_indices_local_idxs,PETSC_COPY_VALUES,&tmap)); 8232 /* allocate space for temporary storage */ 8233 CHKERRQ(PetscMalloc1(pcbddc->local_primal_size,&tidxs)); 8234 CHKERRQ(PetscMalloc1(pcbddc->local_primal_size,&tidxs2)); 8235 /* allocate for IS array */ 8236 nisdofs = pcbddc->n_ISForDofsLocal; 8237 if (pcbddc->nedclocal) { 8238 if (pcbddc->nedfield > -1) { 8239 nedcfield = pcbddc->nedfield; 8240 } else { 8241 nedcfield = 0; 8242 PetscCheck(!nisdofs,PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"This should not happen (%D)",nisdofs); 8243 nisdofs = 1; 8244 } 8245 } 8246 nisneu = !!pcbddc->NeumannBoundariesLocal; 8247 nisvert = 0; /* nisvert is not used */ 8248 nis = nisdofs + nisneu + nisvert; 8249 CHKERRQ(PetscMalloc1(nis,&isarray)); 8250 /* dofs splitting */ 8251 for (i=0;i<nisdofs;i++) { 8252 /* CHKERRQ(ISView(pcbddc->ISForDofsLocal[i],0)); */ 8253 if (nedcfield != i) { 8254 CHKERRQ(ISGetLocalSize(pcbddc->ISForDofsLocal[i],&tsize)); 8255 CHKERRQ(ISGetIndices(pcbddc->ISForDofsLocal[i],&idxs)); 8256 CHKERRQ(ISGlobalToLocalMappingApply(tmap,IS_GTOLM_DROP,tsize,idxs,&nout,tidxs)); 8257 CHKERRQ(ISRestoreIndices(pcbddc->ISForDofsLocal[i],&idxs)); 8258 } else { 8259 CHKERRQ(ISGetLocalSize(pcbddc->nedclocal,&tsize)); 8260 CHKERRQ(ISGetIndices(pcbddc->nedclocal,&idxs)); 8261 CHKERRQ(ISGlobalToLocalMappingApply(tmap,IS_GTOLM_DROP,tsize,idxs,&nout,tidxs)); 8262 PetscCheckFalse(tsize != nout,PETSC_COMM_SELF,PETSC_ERR_PLIB,"Failed when mapping coarse nedelec field! %D != %D",tsize,nout); 8263 CHKERRQ(ISRestoreIndices(pcbddc->nedclocal,&idxs)); 8264 } 8265 CHKERRQ(ISLocalToGlobalMappingApply(coarse_islg,nout,tidxs,tidxs2)); 8266 CHKERRQ(ISCreateGeneral(PetscObjectComm((PetscObject)pc),nout,tidxs2,PETSC_COPY_VALUES,&isarray[i])); 8267 /* CHKERRQ(ISView(isarray[i],0)); */ 8268 } 8269 /* neumann boundaries */ 8270 if (pcbddc->NeumannBoundariesLocal) { 8271 /* CHKERRQ(ISView(pcbddc->NeumannBoundariesLocal,0)); */ 8272 CHKERRQ(ISGetLocalSize(pcbddc->NeumannBoundariesLocal,&tsize)); 8273 CHKERRQ(ISGetIndices(pcbddc->NeumannBoundariesLocal,&idxs)); 8274 CHKERRQ(ISGlobalToLocalMappingApply(tmap,IS_GTOLM_DROP,tsize,idxs,&nout,tidxs)); 8275 CHKERRQ(ISRestoreIndices(pcbddc->NeumannBoundariesLocal,&idxs)); 8276 CHKERRQ(ISLocalToGlobalMappingApply(coarse_islg,nout,tidxs,tidxs2)); 8277 CHKERRQ(ISCreateGeneral(PetscObjectComm((PetscObject)pc),nout,tidxs2,PETSC_COPY_VALUES,&isarray[nisdofs])); 8278 /* CHKERRQ(ISView(isarray[nisdofs],0)); */ 8279 } 8280 /* coordinates */ 8281 if (pcbddc->corner_selected) { 8282 CHKERRQ(PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph,NULL,NULL,NULL,NULL,&corners)); 8283 CHKERRQ(ISGetLocalSize(corners,&tsize)); 8284 CHKERRQ(ISGetIndices(corners,&idxs)); 8285 CHKERRQ(ISGlobalToLocalMappingApply(tmap,IS_GTOLM_DROP,tsize,idxs,&nout,tidxs)); 8286 PetscCheckFalse(tsize != nout,PETSC_COMM_SELF,PETSC_ERR_PLIB,"Failed when mapping corners! %D != %D",tsize,nout); 8287 CHKERRQ(ISRestoreIndices(corners,&idxs)); 8288 CHKERRQ(PCBDDCGraphRestoreCandidatesIS(pcbddc->mat_graph,NULL,NULL,NULL,NULL,&corners)); 8289 CHKERRQ(ISLocalToGlobalMappingApply(coarse_islg,nout,tidxs,tidxs2)); 8290 CHKERRQ(ISCreateGeneral(PetscObjectComm((PetscObject)pc),nout,tidxs2,PETSC_COPY_VALUES,&corners)); 8291 } 8292 CHKERRQ(PetscFree(tidxs)); 8293 CHKERRQ(PetscFree(tidxs2)); 8294 CHKERRQ(ISLocalToGlobalMappingDestroy(&tmap)); 8295 } else { 8296 nis = 0; 8297 nisdofs = 0; 8298 nisneu = 0; 8299 nisvert = 0; 8300 isarray = NULL; 8301 } 8302 /* destroy no longer needed map */ 8303 CHKERRQ(ISLocalToGlobalMappingDestroy(&coarse_islg)); 8304 8305 /* subassemble */ 8306 if (multilevel_allowed) { 8307 Vec vp[1]; 8308 PetscInt nvecs = 0; 8309 PetscBool reuse,reuser; 8310 8311 if (coarse_mat) reuse = PETSC_TRUE; 8312 else reuse = PETSC_FALSE; 8313 CHKERRMPI(MPIU_Allreduce(&reuse,&reuser,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc))); 8314 vp[0] = NULL; 8315 if (pcbddc->benign_have_null) { /* propagate no-net-flux quadrature to coarser level */ 8316 CHKERRQ(VecCreate(PetscObjectComm((PetscObject)pc),&vp[0])); 8317 CHKERRQ(VecSetSizes(vp[0],pcbddc->local_primal_size,PETSC_DECIDE)); 8318 CHKERRQ(VecSetType(vp[0],VECSTANDARD)); 8319 nvecs = 1; 8320 8321 if (pcbddc->divudotp) { 8322 Mat B,loc_divudotp; 8323 Vec v,p; 8324 IS dummy; 8325 PetscInt np; 8326 8327 CHKERRQ(MatISGetLocalMat(pcbddc->divudotp,&loc_divudotp)); 8328 CHKERRQ(MatGetSize(loc_divudotp,&np,NULL)); 8329 CHKERRQ(ISCreateStride(PETSC_COMM_SELF,np,0,1,&dummy)); 8330 CHKERRQ(MatCreateSubMatrix(loc_divudotp,dummy,pcis->is_B_local,MAT_INITIAL_MATRIX,&B)); 8331 CHKERRQ(MatCreateVecs(B,&v,&p)); 8332 CHKERRQ(VecSet(p,1.)); 8333 CHKERRQ(MatMultTranspose(B,p,v)); 8334 CHKERRQ(VecDestroy(&p)); 8335 CHKERRQ(MatDestroy(&B)); 8336 CHKERRQ(VecGetArray(vp[0],&array)); 8337 CHKERRQ(VecPlaceArray(pcbddc->vec1_P,array)); 8338 CHKERRQ(MatMultTranspose(pcbddc->coarse_phi_B,v,pcbddc->vec1_P)); 8339 CHKERRQ(VecResetArray(pcbddc->vec1_P)); 8340 CHKERRQ(VecRestoreArray(vp[0],&array)); 8341 CHKERRQ(ISDestroy(&dummy)); 8342 CHKERRQ(VecDestroy(&v)); 8343 } 8344 } 8345 if (reuser) { 8346 CHKERRQ(PCBDDCMatISSubassemble(t_coarse_mat_is,pcbddc->coarse_subassembling,0,restr,full_restr,PETSC_TRUE,&coarse_mat,nis,isarray,nvecs,vp)); 8347 } else { 8348 CHKERRQ(PCBDDCMatISSubassemble(t_coarse_mat_is,pcbddc->coarse_subassembling,0,restr,full_restr,PETSC_FALSE,&coarse_mat_is,nis,isarray,nvecs,vp)); 8349 } 8350 if (vp[0]) { /* vp[0] could have been placed on a different set of processes */ 8351 PetscScalar *arraym; 8352 const PetscScalar *arrayv; 8353 PetscInt nl; 8354 CHKERRQ(VecGetLocalSize(vp[0],&nl)); 8355 CHKERRQ(MatCreateSeqDense(PETSC_COMM_SELF,1,nl,NULL,&coarsedivudotp)); 8356 CHKERRQ(MatDenseGetArray(coarsedivudotp,&arraym)); 8357 CHKERRQ(VecGetArrayRead(vp[0],&arrayv)); 8358 CHKERRQ(PetscArraycpy(arraym,arrayv,nl)); 8359 CHKERRQ(VecRestoreArrayRead(vp[0],&arrayv)); 8360 CHKERRQ(MatDenseRestoreArray(coarsedivudotp,&arraym)); 8361 CHKERRQ(VecDestroy(&vp[0])); 8362 } else { 8363 CHKERRQ(MatCreateSeqAIJ(PETSC_COMM_SELF,0,0,1,NULL,&coarsedivudotp)); 8364 } 8365 } else { 8366 CHKERRQ(PCBDDCMatISSubassemble(t_coarse_mat_is,pcbddc->coarse_subassembling,0,restr,full_restr,PETSC_FALSE,&coarse_mat_is,0,NULL,0,NULL)); 8367 } 8368 if (coarse_mat_is || coarse_mat) { 8369 if (!multilevel_allowed) { 8370 CHKERRQ(MatConvert(coarse_mat_is,MATAIJ,coarse_mat_reuse,&coarse_mat)); 8371 } else { 8372 /* if this matrix is present, it means we are not reusing the coarse matrix */ 8373 if (coarse_mat_is) { 8374 PetscCheck(!coarse_mat,PetscObjectComm((PetscObject)coarse_mat_is),PETSC_ERR_PLIB,"This should not happen"); 8375 CHKERRQ(PetscObjectReference((PetscObject)coarse_mat_is)); 8376 coarse_mat = coarse_mat_is; 8377 } 8378 } 8379 } 8380 CHKERRQ(MatDestroy(&t_coarse_mat_is)); 8381 CHKERRQ(MatDestroy(&coarse_mat_is)); 8382 8383 /* create local to global scatters for coarse problem */ 8384 if (compute_vecs) { 8385 PetscInt lrows; 8386 CHKERRQ(VecDestroy(&pcbddc->coarse_vec)); 8387 if (coarse_mat) { 8388 CHKERRQ(MatGetLocalSize(coarse_mat,&lrows,NULL)); 8389 } else { 8390 lrows = 0; 8391 } 8392 CHKERRQ(VecCreate(PetscObjectComm((PetscObject)pc),&pcbddc->coarse_vec)); 8393 CHKERRQ(VecSetSizes(pcbddc->coarse_vec,lrows,PETSC_DECIDE)); 8394 CHKERRQ(VecSetType(pcbddc->coarse_vec,coarse_mat ? coarse_mat->defaultvectype : VECSTANDARD)); 8395 CHKERRQ(VecScatterDestroy(&pcbddc->coarse_loc_to_glob)); 8396 CHKERRQ(VecScatterCreate(pcbddc->vec1_P,NULL,pcbddc->coarse_vec,coarse_is,&pcbddc->coarse_loc_to_glob)); 8397 } 8398 CHKERRQ(ISDestroy(&coarse_is)); 8399 8400 /* set defaults for coarse KSP and PC */ 8401 if (multilevel_allowed) { 8402 coarse_ksp_type = KSPRICHARDSON; 8403 coarse_pc_type = PCBDDC; 8404 } else { 8405 coarse_ksp_type = KSPPREONLY; 8406 coarse_pc_type = PCREDUNDANT; 8407 } 8408 8409 /* print some info if requested */ 8410 if (pcbddc->dbg_flag) { 8411 if (!multilevel_allowed) { 8412 CHKERRQ(PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n")); 8413 if (multilevel_requested) { 8414 CHKERRQ(PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Not enough active processes on level %D (active processes %D, coarsening ratio %D)\n",pcbddc->current_level,active_procs,pcbddc->coarsening_ratio)); 8415 } else if (pcbddc->max_levels) { 8416 CHKERRQ(PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Maximum number of requested levels reached (%D)\n",pcbddc->max_levels)); 8417 } 8418 CHKERRQ(PetscViewerFlush(pcbddc->dbg_viewer)); 8419 } 8420 } 8421 8422 /* communicate coarse discrete gradient */ 8423 coarseG = NULL; 8424 if (pcbddc->nedcG && multilevel_allowed) { 8425 MPI_Comm ccomm; 8426 if (coarse_mat) { 8427 ccomm = PetscObjectComm((PetscObject)coarse_mat); 8428 } else { 8429 ccomm = MPI_COMM_NULL; 8430 } 8431 CHKERRQ(MatMPIAIJRestrict(pcbddc->nedcG,ccomm,&coarseG)); 8432 } 8433 8434 /* create the coarse KSP object only once with defaults */ 8435 if (coarse_mat) { 8436 PetscBool isredundant,isbddc,force,valid; 8437 PetscViewer dbg_viewer = NULL; 8438 8439 if (pcbddc->dbg_flag) { 8440 dbg_viewer = PETSC_VIEWER_STDOUT_(PetscObjectComm((PetscObject)coarse_mat)); 8441 CHKERRQ(PetscViewerASCIIAddTab(dbg_viewer,2*pcbddc->current_level)); 8442 } 8443 if (!pcbddc->coarse_ksp) { 8444 char prefix[256],str_level[16]; 8445 size_t len; 8446 8447 CHKERRQ(KSPCreate(PetscObjectComm((PetscObject)coarse_mat),&pcbddc->coarse_ksp)); 8448 CHKERRQ(KSPSetErrorIfNotConverged(pcbddc->coarse_ksp,pc->erroriffailure)); 8449 CHKERRQ(PetscObjectIncrementTabLevel((PetscObject)pcbddc->coarse_ksp,(PetscObject)pc,1)); 8450 CHKERRQ(KSPSetTolerances(pcbddc->coarse_ksp,PETSC_DEFAULT,PETSC_DEFAULT,PETSC_DEFAULT,1)); 8451 CHKERRQ(KSPSetOperators(pcbddc->coarse_ksp,coarse_mat,coarse_mat)); 8452 CHKERRQ(KSPSetType(pcbddc->coarse_ksp,coarse_ksp_type)); 8453 CHKERRQ(KSPSetNormType(pcbddc->coarse_ksp,KSP_NORM_NONE)); 8454 CHKERRQ(KSPGetPC(pcbddc->coarse_ksp,&pc_temp)); 8455 /* TODO is this logic correct? should check for coarse_mat type */ 8456 CHKERRQ(PCSetType(pc_temp,coarse_pc_type)); 8457 /* prefix */ 8458 CHKERRQ(PetscStrcpy(prefix,"")); 8459 CHKERRQ(PetscStrcpy(str_level,"")); 8460 if (!pcbddc->current_level) { 8461 CHKERRQ(PetscStrncpy(prefix,((PetscObject)pc)->prefix,sizeof(prefix))); 8462 CHKERRQ(PetscStrlcat(prefix,"pc_bddc_coarse_",sizeof(prefix))); 8463 } else { 8464 CHKERRQ(PetscStrlen(((PetscObject)pc)->prefix,&len)); 8465 if (pcbddc->current_level>1) len -= 3; /* remove "lX_" with X level number */ 8466 if (pcbddc->current_level>10) len -= 1; /* remove another char from level number */ 8467 /* Nonstandard use of PetscStrncpy() to copy only a portion of the string */ 8468 CHKERRQ(PetscStrncpy(prefix,((PetscObject)pc)->prefix,len+1)); 8469 CHKERRQ(PetscSNPrintf(str_level,sizeof(str_level),"l%d_",(int)(pcbddc->current_level))); 8470 CHKERRQ(PetscStrlcat(prefix,str_level,sizeof(prefix))); 8471 } 8472 CHKERRQ(KSPSetOptionsPrefix(pcbddc->coarse_ksp,prefix)); 8473 /* propagate BDDC info to the next level (these are dummy calls if pc_temp is not of type PCBDDC) */ 8474 CHKERRQ(PCBDDCSetLevel(pc_temp,pcbddc->current_level+1)); 8475 CHKERRQ(PCBDDCSetCoarseningRatio(pc_temp,pcbddc->coarsening_ratio)); 8476 CHKERRQ(PCBDDCSetLevels(pc_temp,pcbddc->max_levels)); 8477 /* allow user customization */ 8478 CHKERRQ(KSPSetFromOptions(pcbddc->coarse_ksp)); 8479 /* get some info after set from options */ 8480 CHKERRQ(KSPGetPC(pcbddc->coarse_ksp,&pc_temp)); 8481 /* multilevel cannot be done with coarse PC different from BDDC, NN, HPDDM, unless forced to */ 8482 force = PETSC_FALSE; 8483 CHKERRQ(PetscOptionsGetBool(NULL,((PetscObject)pc_temp)->prefix,"-pc_type_forced",&force,NULL)); 8484 CHKERRQ(PetscObjectTypeCompareAny((PetscObject)pc_temp,&valid,PCBDDC,PCNN,PCHPDDM,"")); 8485 CHKERRQ(PetscObjectTypeCompare((PetscObject)pc_temp,PCBDDC,&isbddc)); 8486 if (multilevel_allowed && !force && !valid) { 8487 isbddc = PETSC_TRUE; 8488 CHKERRQ(PCSetType(pc_temp,PCBDDC)); 8489 CHKERRQ(PCBDDCSetLevel(pc_temp,pcbddc->current_level+1)); 8490 CHKERRQ(PCBDDCSetCoarseningRatio(pc_temp,pcbddc->coarsening_ratio)); 8491 CHKERRQ(PCBDDCSetLevels(pc_temp,pcbddc->max_levels)); 8492 if (pc_temp->ops->setfromoptions) { /* need to setfromoptions again, skipping the pc_type */ 8493 ierr = PetscObjectOptionsBegin((PetscObject)pc_temp);CHKERRQ(ierr); 8494 CHKERRQ((*pc_temp->ops->setfromoptions)(PetscOptionsObject,pc_temp)); 8495 CHKERRQ(PetscObjectProcessOptionsHandlers(PetscOptionsObject,(PetscObject)pc_temp)); 8496 ierr = PetscOptionsEnd();CHKERRQ(ierr); 8497 pc_temp->setfromoptionscalled++; 8498 } 8499 } 8500 } 8501 /* propagate BDDC info to the next level (these are dummy calls if pc_temp is not of type PCBDDC) */ 8502 CHKERRQ(KSPGetPC(pcbddc->coarse_ksp,&pc_temp)); 8503 if (nisdofs) { 8504 CHKERRQ(PCBDDCSetDofsSplitting(pc_temp,nisdofs,isarray)); 8505 for (i=0;i<nisdofs;i++) { 8506 CHKERRQ(ISDestroy(&isarray[i])); 8507 } 8508 } 8509 if (nisneu) { 8510 CHKERRQ(PCBDDCSetNeumannBoundaries(pc_temp,isarray[nisdofs])); 8511 CHKERRQ(ISDestroy(&isarray[nisdofs])); 8512 } 8513 if (nisvert) { 8514 CHKERRQ(PCBDDCSetPrimalVerticesIS(pc_temp,isarray[nis-1])); 8515 CHKERRQ(ISDestroy(&isarray[nis-1])); 8516 } 8517 if (coarseG) { 8518 CHKERRQ(PCBDDCSetDiscreteGradient(pc_temp,coarseG,1,nedcfield,PETSC_FALSE,PETSC_TRUE)); 8519 } 8520 8521 /* get some info after set from options */ 8522 CHKERRQ(PetscObjectTypeCompare((PetscObject)pc_temp,PCBDDC,&isbddc)); 8523 8524 /* multilevel can only be requested via -pc_bddc_levels or PCBDDCSetLevels */ 8525 if (isbddc && !multilevel_allowed) { 8526 CHKERRQ(PCSetType(pc_temp,coarse_pc_type)); 8527 } 8528 /* multilevel cannot be done with coarse PC different from BDDC, NN, HPDDM, unless forced to */ 8529 force = PETSC_FALSE; 8530 CHKERRQ(PetscOptionsGetBool(NULL,((PetscObject)pc_temp)->prefix,"-pc_type_forced",&force,NULL)); 8531 CHKERRQ(PetscObjectTypeCompareAny((PetscObject)pc_temp,&valid,PCBDDC,PCNN,PCHPDDM,"")); 8532 if (multilevel_requested && multilevel_allowed && !valid && !force) { 8533 CHKERRQ(PCSetType(pc_temp,PCBDDC)); 8534 } 8535 CHKERRQ(PetscObjectTypeCompare((PetscObject)pc_temp,PCREDUNDANT,&isredundant)); 8536 if (isredundant) { 8537 KSP inner_ksp; 8538 PC inner_pc; 8539 8540 CHKERRQ(PCRedundantGetKSP(pc_temp,&inner_ksp)); 8541 CHKERRQ(KSPGetPC(inner_ksp,&inner_pc)); 8542 } 8543 8544 /* parameters which miss an API */ 8545 CHKERRQ(PetscObjectTypeCompare((PetscObject)pc_temp,PCBDDC,&isbddc)); 8546 if (isbddc) { 8547 PC_BDDC* pcbddc_coarse = (PC_BDDC*)pc_temp->data; 8548 8549 pcbddc_coarse->detect_disconnected = PETSC_TRUE; 8550 pcbddc_coarse->coarse_eqs_per_proc = pcbddc->coarse_eqs_per_proc; 8551 pcbddc_coarse->coarse_eqs_limit = pcbddc->coarse_eqs_limit; 8552 pcbddc_coarse->benign_saddle_point = pcbddc->benign_have_null; 8553 if (pcbddc_coarse->benign_saddle_point) { 8554 Mat coarsedivudotp_is; 8555 ISLocalToGlobalMapping l2gmap,rl2g,cl2g; 8556 IS row,col; 8557 const PetscInt *gidxs; 8558 PetscInt n,st,M,N; 8559 8560 CHKERRQ(MatGetSize(coarsedivudotp,&n,NULL)); 8561 CHKERRMPI(MPI_Scan(&n,&st,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)coarse_mat))); 8562 st = st-n; 8563 CHKERRQ(ISCreateStride(PetscObjectComm((PetscObject)coarse_mat),1,st,1,&row)); 8564 CHKERRQ(MatISGetLocalToGlobalMapping(coarse_mat,&l2gmap,NULL)); 8565 CHKERRQ(ISLocalToGlobalMappingGetSize(l2gmap,&n)); 8566 CHKERRQ(ISLocalToGlobalMappingGetIndices(l2gmap,&gidxs)); 8567 CHKERRQ(ISCreateGeneral(PetscObjectComm((PetscObject)coarse_mat),n,gidxs,PETSC_COPY_VALUES,&col)); 8568 CHKERRQ(ISLocalToGlobalMappingRestoreIndices(l2gmap,&gidxs)); 8569 CHKERRQ(ISLocalToGlobalMappingCreateIS(row,&rl2g)); 8570 CHKERRQ(ISLocalToGlobalMappingCreateIS(col,&cl2g)); 8571 CHKERRQ(ISGetSize(row,&M)); 8572 CHKERRQ(MatGetSize(coarse_mat,&N,NULL)); 8573 CHKERRQ(ISDestroy(&row)); 8574 CHKERRQ(ISDestroy(&col)); 8575 CHKERRQ(MatCreate(PetscObjectComm((PetscObject)coarse_mat),&coarsedivudotp_is)); 8576 CHKERRQ(MatSetType(coarsedivudotp_is,MATIS)); 8577 CHKERRQ(MatSetSizes(coarsedivudotp_is,PETSC_DECIDE,PETSC_DECIDE,M,N)); 8578 CHKERRQ(MatSetLocalToGlobalMapping(coarsedivudotp_is,rl2g,cl2g)); 8579 CHKERRQ(ISLocalToGlobalMappingDestroy(&rl2g)); 8580 CHKERRQ(ISLocalToGlobalMappingDestroy(&cl2g)); 8581 CHKERRQ(MatISSetLocalMat(coarsedivudotp_is,coarsedivudotp)); 8582 CHKERRQ(MatDestroy(&coarsedivudotp)); 8583 CHKERRQ(PCBDDCSetDivergenceMat(pc_temp,coarsedivudotp_is,PETSC_FALSE,NULL)); 8584 CHKERRQ(MatDestroy(&coarsedivudotp_is)); 8585 pcbddc_coarse->adaptive_userdefined = PETSC_TRUE; 8586 if (pcbddc->adaptive_threshold[0] == 0.0) pcbddc_coarse->deluxe_zerorows = PETSC_TRUE; 8587 } 8588 } 8589 8590 /* propagate symmetry info of coarse matrix */ 8591 CHKERRQ(MatSetOption(coarse_mat,MAT_STRUCTURALLY_SYMMETRIC,PETSC_TRUE)); 8592 if (pc->pmat->symmetric_set) { 8593 CHKERRQ(MatSetOption(coarse_mat,MAT_SYMMETRIC,pc->pmat->symmetric)); 8594 } 8595 if (pc->pmat->hermitian_set) { 8596 CHKERRQ(MatSetOption(coarse_mat,MAT_HERMITIAN,pc->pmat->hermitian)); 8597 } 8598 if (pc->pmat->spd_set) { 8599 CHKERRQ(MatSetOption(coarse_mat,MAT_SPD,pc->pmat->spd)); 8600 } 8601 if (pcbddc->benign_saddle_point && !pcbddc->benign_have_null) { 8602 CHKERRQ(MatSetOption(coarse_mat,MAT_SPD,PETSC_TRUE)); 8603 } 8604 /* set operators */ 8605 CHKERRQ(MatViewFromOptions(coarse_mat,(PetscObject)pc,"-pc_bddc_coarse_mat_view")); 8606 CHKERRQ(MatSetOptionsPrefix(coarse_mat,((PetscObject)pcbddc->coarse_ksp)->prefix)); 8607 CHKERRQ(KSPSetOperators(pcbddc->coarse_ksp,coarse_mat,coarse_mat)); 8608 if (pcbddc->dbg_flag) { 8609 CHKERRQ(PetscViewerASCIISubtractTab(dbg_viewer,2*pcbddc->current_level)); 8610 } 8611 } 8612 CHKERRQ(MatDestroy(&coarseG)); 8613 CHKERRQ(PetscFree(isarray)); 8614 #if 0 8615 { 8616 PetscViewer viewer; 8617 char filename[256]; 8618 sprintf(filename,"coarse_mat_level%d.m",pcbddc->current_level); 8619 CHKERRQ(PetscViewerASCIIOpen(PetscObjectComm((PetscObject)coarse_mat),filename,&viewer)); 8620 CHKERRQ(PetscViewerPushFormat(viewer,PETSC_VIEWER_ASCII_MATLAB)); 8621 CHKERRQ(MatView(coarse_mat,viewer)); 8622 CHKERRQ(PetscViewerPopFormat(viewer)); 8623 CHKERRQ(PetscViewerDestroy(&viewer)); 8624 } 8625 #endif 8626 8627 if (corners) { 8628 Vec gv; 8629 IS is; 8630 const PetscInt *idxs; 8631 PetscInt i,d,N,n,cdim = pcbddc->mat_graph->cdim; 8632 PetscScalar *coords; 8633 8634 PetscCheck(pcbddc->mat_graph->cloc,PETSC_COMM_SELF,PETSC_ERR_PLIB,"Missing local coordinates"); 8635 CHKERRQ(VecGetSize(pcbddc->coarse_vec,&N)); 8636 CHKERRQ(VecGetLocalSize(pcbddc->coarse_vec,&n)); 8637 CHKERRQ(VecCreate(PetscObjectComm((PetscObject)pcbddc->coarse_vec),&gv)); 8638 CHKERRQ(VecSetBlockSize(gv,cdim)); 8639 CHKERRQ(VecSetSizes(gv,n*cdim,N*cdim)); 8640 CHKERRQ(VecSetType(gv,VECSTANDARD)); 8641 CHKERRQ(VecSetFromOptions(gv)); 8642 CHKERRQ(VecSet(gv,PETSC_MAX_REAL)); /* we only propagate coordinates from vertices constraints */ 8643 8644 CHKERRQ(PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph,NULL,NULL,NULL,NULL,&is)); 8645 CHKERRQ(ISGetLocalSize(is,&n)); 8646 CHKERRQ(ISGetIndices(is,&idxs)); 8647 CHKERRQ(PetscMalloc1(n*cdim,&coords)); 8648 for (i=0;i<n;i++) { 8649 for (d=0;d<cdim;d++) { 8650 coords[cdim*i+d] = pcbddc->mat_graph->coords[cdim*idxs[i]+d]; 8651 } 8652 } 8653 CHKERRQ(ISRestoreIndices(is,&idxs)); 8654 CHKERRQ(PCBDDCGraphRestoreCandidatesIS(pcbddc->mat_graph,NULL,NULL,NULL,NULL,&is)); 8655 8656 CHKERRQ(ISGetLocalSize(corners,&n)); 8657 CHKERRQ(ISGetIndices(corners,&idxs)); 8658 CHKERRQ(VecSetValuesBlocked(gv,n,idxs,coords,INSERT_VALUES)); 8659 CHKERRQ(ISRestoreIndices(corners,&idxs)); 8660 CHKERRQ(PetscFree(coords)); 8661 CHKERRQ(VecAssemblyBegin(gv)); 8662 CHKERRQ(VecAssemblyEnd(gv)); 8663 CHKERRQ(VecGetArray(gv,&coords)); 8664 if (pcbddc->coarse_ksp) { 8665 PC coarse_pc; 8666 PetscBool isbddc; 8667 8668 CHKERRQ(KSPGetPC(pcbddc->coarse_ksp,&coarse_pc)); 8669 CHKERRQ(PetscObjectTypeCompare((PetscObject)coarse_pc,PCBDDC,&isbddc)); 8670 if (isbddc) { /* coarse coordinates have PETSC_MAX_REAL, specific for BDDC */ 8671 PetscReal *realcoords; 8672 8673 CHKERRQ(VecGetLocalSize(gv,&n)); 8674 #if defined(PETSC_USE_COMPLEX) 8675 CHKERRQ(PetscMalloc1(n,&realcoords)); 8676 for (i=0;i<n;i++) realcoords[i] = PetscRealPart(coords[i]); 8677 #else 8678 realcoords = coords; 8679 #endif 8680 CHKERRQ(PCSetCoordinates(coarse_pc,cdim,n/cdim,realcoords)); 8681 #if defined(PETSC_USE_COMPLEX) 8682 CHKERRQ(PetscFree(realcoords)); 8683 #endif 8684 } 8685 } 8686 CHKERRQ(VecRestoreArray(gv,&coords)); 8687 CHKERRQ(VecDestroy(&gv)); 8688 } 8689 CHKERRQ(ISDestroy(&corners)); 8690 8691 if (pcbddc->coarse_ksp) { 8692 Vec crhs,csol; 8693 8694 CHKERRQ(KSPGetSolution(pcbddc->coarse_ksp,&csol)); 8695 CHKERRQ(KSPGetRhs(pcbddc->coarse_ksp,&crhs)); 8696 if (!csol) { 8697 CHKERRQ(MatCreateVecs(coarse_mat,&((pcbddc->coarse_ksp)->vec_sol),NULL)); 8698 } 8699 if (!crhs) { 8700 CHKERRQ(MatCreateVecs(coarse_mat,NULL,&((pcbddc->coarse_ksp)->vec_rhs))); 8701 } 8702 } 8703 CHKERRQ(MatDestroy(&coarsedivudotp)); 8704 8705 /* compute null space for coarse solver if the benign trick has been requested */ 8706 if (pcbddc->benign_null) { 8707 8708 CHKERRQ(VecSet(pcbddc->vec1_P,0.)); 8709 for (i=0;i<pcbddc->benign_n;i++) { 8710 CHKERRQ(VecSetValue(pcbddc->vec1_P,pcbddc->local_primal_size-pcbddc->benign_n+i,1.0,INSERT_VALUES)); 8711 } 8712 CHKERRQ(VecAssemblyBegin(pcbddc->vec1_P)); 8713 CHKERRQ(VecAssemblyEnd(pcbddc->vec1_P)); 8714 CHKERRQ(VecScatterBegin(pcbddc->coarse_loc_to_glob,pcbddc->vec1_P,pcbddc->coarse_vec,INSERT_VALUES,SCATTER_FORWARD)); 8715 CHKERRQ(VecScatterEnd(pcbddc->coarse_loc_to_glob,pcbddc->vec1_P,pcbddc->coarse_vec,INSERT_VALUES,SCATTER_FORWARD)); 8716 if (coarse_mat) { 8717 Vec nullv; 8718 PetscScalar *array,*array2; 8719 PetscInt nl; 8720 8721 CHKERRQ(MatCreateVecs(coarse_mat,&nullv,NULL)); 8722 CHKERRQ(VecGetLocalSize(nullv,&nl)); 8723 CHKERRQ(VecGetArrayRead(pcbddc->coarse_vec,(const PetscScalar**)&array)); 8724 CHKERRQ(VecGetArray(nullv,&array2)); 8725 CHKERRQ(PetscArraycpy(array2,array,nl)); 8726 CHKERRQ(VecRestoreArray(nullv,&array2)); 8727 CHKERRQ(VecRestoreArrayRead(pcbddc->coarse_vec,(const PetscScalar**)&array)); 8728 CHKERRQ(VecNormalize(nullv,NULL)); 8729 CHKERRQ(MatNullSpaceCreate(PetscObjectComm((PetscObject)coarse_mat),PETSC_FALSE,1,&nullv,&CoarseNullSpace)); 8730 CHKERRQ(VecDestroy(&nullv)); 8731 } 8732 } 8733 CHKERRQ(PetscLogEventEnd(PC_BDDC_CoarseSetUp[pcbddc->current_level],pc,0,0,0)); 8734 8735 CHKERRQ(PetscLogEventBegin(PC_BDDC_CoarseSolver[pcbddc->current_level],pc,0,0,0)); 8736 if (pcbddc->coarse_ksp) { 8737 PetscBool ispreonly; 8738 8739 if (CoarseNullSpace) { 8740 PetscBool isnull; 8741 CHKERRQ(MatNullSpaceTest(CoarseNullSpace,coarse_mat,&isnull)); 8742 if (isnull) { 8743 CHKERRQ(MatSetNullSpace(coarse_mat,CoarseNullSpace)); 8744 } 8745 /* TODO: add local nullspaces (if any) */ 8746 } 8747 /* setup coarse ksp */ 8748 CHKERRQ(KSPSetUp(pcbddc->coarse_ksp)); 8749 /* Check coarse problem if in debug mode or if solving with an iterative method */ 8750 CHKERRQ(PetscObjectTypeCompare((PetscObject)pcbddc->coarse_ksp,KSPPREONLY,&ispreonly)); 8751 if (pcbddc->dbg_flag || (!ispreonly && pcbddc->use_coarse_estimates)) { 8752 KSP check_ksp; 8753 KSPType check_ksp_type; 8754 PC check_pc; 8755 Vec check_vec,coarse_vec; 8756 PetscReal abs_infty_error,infty_error,lambda_min=1.0,lambda_max=1.0; 8757 PetscInt its; 8758 PetscBool compute_eigs; 8759 PetscReal *eigs_r,*eigs_c; 8760 PetscInt neigs; 8761 const char *prefix; 8762 8763 /* Create ksp object suitable for estimation of extreme eigenvalues */ 8764 CHKERRQ(KSPCreate(PetscObjectComm((PetscObject)pcbddc->coarse_ksp),&check_ksp)); 8765 CHKERRQ(PetscObjectIncrementTabLevel((PetscObject)check_ksp,(PetscObject)pcbddc->coarse_ksp,0)); 8766 CHKERRQ(KSPSetErrorIfNotConverged(pcbddc->coarse_ksp,PETSC_FALSE)); 8767 CHKERRQ(KSPSetOperators(check_ksp,coarse_mat,coarse_mat)); 8768 CHKERRQ(KSPSetTolerances(check_ksp,1.e-12,1.e-12,PETSC_DEFAULT,pcbddc->coarse_size)); 8769 /* prevent from setup unneeded object */ 8770 CHKERRQ(KSPGetPC(check_ksp,&check_pc)); 8771 CHKERRQ(PCSetType(check_pc,PCNONE)); 8772 if (ispreonly) { 8773 check_ksp_type = KSPPREONLY; 8774 compute_eigs = PETSC_FALSE; 8775 } else { 8776 check_ksp_type = KSPGMRES; 8777 compute_eigs = PETSC_TRUE; 8778 } 8779 CHKERRQ(KSPSetType(check_ksp,check_ksp_type)); 8780 CHKERRQ(KSPSetComputeSingularValues(check_ksp,compute_eigs)); 8781 CHKERRQ(KSPSetComputeEigenvalues(check_ksp,compute_eigs)); 8782 CHKERRQ(KSPGMRESSetRestart(check_ksp,pcbddc->coarse_size+1)); 8783 CHKERRQ(KSPGetOptionsPrefix(pcbddc->coarse_ksp,&prefix)); 8784 CHKERRQ(KSPSetOptionsPrefix(check_ksp,prefix)); 8785 CHKERRQ(KSPAppendOptionsPrefix(check_ksp,"check_")); 8786 CHKERRQ(KSPSetFromOptions(check_ksp)); 8787 CHKERRQ(KSPSetUp(check_ksp)); 8788 CHKERRQ(KSPGetPC(pcbddc->coarse_ksp,&check_pc)); 8789 CHKERRQ(KSPSetPC(check_ksp,check_pc)); 8790 /* create random vec */ 8791 CHKERRQ(MatCreateVecs(coarse_mat,&coarse_vec,&check_vec)); 8792 CHKERRQ(VecSetRandom(check_vec,NULL)); 8793 CHKERRQ(MatMult(coarse_mat,check_vec,coarse_vec)); 8794 /* solve coarse problem */ 8795 CHKERRQ(KSPSolve(check_ksp,coarse_vec,coarse_vec)); 8796 CHKERRQ(KSPCheckSolve(check_ksp,pc,coarse_vec)); 8797 /* set eigenvalue estimation if preonly has not been requested */ 8798 if (compute_eigs) { 8799 CHKERRQ(PetscMalloc1(pcbddc->coarse_size+1,&eigs_r)); 8800 CHKERRQ(PetscMalloc1(pcbddc->coarse_size+1,&eigs_c)); 8801 CHKERRQ(KSPComputeEigenvalues(check_ksp,pcbddc->coarse_size+1,eigs_r,eigs_c,&neigs)); 8802 if (neigs) { 8803 lambda_max = eigs_r[neigs-1]; 8804 lambda_min = eigs_r[0]; 8805 if (pcbddc->use_coarse_estimates) { 8806 if (lambda_max>=lambda_min) { /* using PETSC_SMALL since lambda_max == lambda_min is not allowed by KSPChebyshevSetEigenvalues */ 8807 CHKERRQ(KSPChebyshevSetEigenvalues(pcbddc->coarse_ksp,lambda_max+PETSC_SMALL,lambda_min)); 8808 CHKERRQ(KSPRichardsonSetScale(pcbddc->coarse_ksp,2.0/(lambda_max+lambda_min))); 8809 } 8810 } 8811 } 8812 } 8813 8814 /* check coarse problem residual error */ 8815 if (pcbddc->dbg_flag) { 8816 PetscViewer dbg_viewer = PETSC_VIEWER_STDOUT_(PetscObjectComm((PetscObject)pcbddc->coarse_ksp)); 8817 CHKERRQ(PetscViewerASCIIAddTab(dbg_viewer,2*(pcbddc->current_level+1))); 8818 CHKERRQ(VecAXPY(check_vec,-1.0,coarse_vec)); 8819 CHKERRQ(VecNorm(check_vec,NORM_INFINITY,&infty_error)); 8820 CHKERRQ(MatMult(coarse_mat,check_vec,coarse_vec)); 8821 CHKERRQ(VecNorm(coarse_vec,NORM_INFINITY,&abs_infty_error)); 8822 CHKERRQ(PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem details (use estimates %d)\n",pcbddc->use_coarse_estimates)); 8823 CHKERRQ(PetscObjectPrintClassNamePrefixType((PetscObject)(pcbddc->coarse_ksp),dbg_viewer)); 8824 CHKERRQ(PetscObjectPrintClassNamePrefixType((PetscObject)(check_pc),dbg_viewer)); 8825 CHKERRQ(PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem exact infty_error : %1.6e\n",infty_error)); 8826 CHKERRQ(PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem residual infty_error: %1.6e\n",abs_infty_error)); 8827 if (CoarseNullSpace) { 8828 CHKERRQ(PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem is singular\n")); 8829 } 8830 if (compute_eigs) { 8831 PetscReal lambda_max_s,lambda_min_s; 8832 KSPConvergedReason reason; 8833 CHKERRQ(KSPGetType(check_ksp,&check_ksp_type)); 8834 CHKERRQ(KSPGetIterationNumber(check_ksp,&its)); 8835 CHKERRQ(KSPGetConvergedReason(check_ksp,&reason)); 8836 CHKERRQ(KSPComputeExtremeSingularValues(check_ksp,&lambda_max_s,&lambda_min_s)); 8837 CHKERRQ(PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem eigenvalues (estimated with %d iterations of %s, conv reason %d): %1.6e %1.6e (%1.6e %1.6e)\n",its,check_ksp_type,reason,lambda_min,lambda_max,lambda_min_s,lambda_max_s)); 8838 for (i=0;i<neigs;i++) { 8839 CHKERRQ(PetscViewerASCIIPrintf(dbg_viewer,"%1.6e %1.6ei\n",eigs_r[i],eigs_c[i])); 8840 } 8841 } 8842 CHKERRQ(PetscViewerFlush(dbg_viewer)); 8843 CHKERRQ(PetscViewerASCIISubtractTab(dbg_viewer,2*(pcbddc->current_level+1))); 8844 } 8845 CHKERRQ(VecDestroy(&check_vec)); 8846 CHKERRQ(VecDestroy(&coarse_vec)); 8847 CHKERRQ(KSPDestroy(&check_ksp)); 8848 if (compute_eigs) { 8849 CHKERRQ(PetscFree(eigs_r)); 8850 CHKERRQ(PetscFree(eigs_c)); 8851 } 8852 } 8853 } 8854 CHKERRQ(MatNullSpaceDestroy(&CoarseNullSpace)); 8855 /* print additional info */ 8856 if (pcbddc->dbg_flag) { 8857 /* waits until all processes reaches this point */ 8858 CHKERRQ(PetscBarrier((PetscObject)pc)); 8859 CHKERRQ(PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Coarse solver setup completed at level %D\n",pcbddc->current_level)); 8860 CHKERRQ(PetscViewerFlush(pcbddc->dbg_viewer)); 8861 } 8862 8863 /* free memory */ 8864 CHKERRQ(MatDestroy(&coarse_mat)); 8865 CHKERRQ(PetscLogEventEnd(PC_BDDC_CoarseSolver[pcbddc->current_level],pc,0,0,0)); 8866 PetscFunctionReturn(0); 8867 } 8868 8869 PetscErrorCode PCBDDCComputePrimalNumbering(PC pc,PetscInt* coarse_size_n,PetscInt** local_primal_indices_n) 8870 { 8871 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 8872 PC_IS* pcis = (PC_IS*)pc->data; 8873 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 8874 IS subset,subset_mult,subset_n; 8875 PetscInt local_size,coarse_size=0; 8876 PetscInt *local_primal_indices=NULL; 8877 const PetscInt *t_local_primal_indices; 8878 8879 PetscFunctionBegin; 8880 /* Compute global number of coarse dofs */ 8881 PetscCheckFalse(pcbddc->local_primal_size && !pcbddc->local_primal_ref_node,PETSC_COMM_SELF,PETSC_ERR_PLIB,"BDDC ConstraintsSetUp should be called first"); 8882 CHKERRQ(ISCreateGeneral(PetscObjectComm((PetscObject)(pc->pmat)),pcbddc->local_primal_size_cc,pcbddc->local_primal_ref_node,PETSC_COPY_VALUES,&subset_n)); 8883 CHKERRQ(ISLocalToGlobalMappingApplyIS(pcis->mapping,subset_n,&subset)); 8884 CHKERRQ(ISDestroy(&subset_n)); 8885 CHKERRQ(ISCreateGeneral(PetscObjectComm((PetscObject)(pc->pmat)),pcbddc->local_primal_size_cc,pcbddc->local_primal_ref_mult,PETSC_COPY_VALUES,&subset_mult)); 8886 CHKERRQ(ISRenumber(subset,subset_mult,&coarse_size,&subset_n)); 8887 CHKERRQ(ISDestroy(&subset)); 8888 CHKERRQ(ISDestroy(&subset_mult)); 8889 CHKERRQ(ISGetLocalSize(subset_n,&local_size)); 8890 PetscCheckFalse(local_size != pcbddc->local_primal_size,PETSC_COMM_SELF,PETSC_ERR_PLIB,"Invalid number of local primal indices computed %D != %D",local_size,pcbddc->local_primal_size); 8891 CHKERRQ(PetscMalloc1(local_size,&local_primal_indices)); 8892 CHKERRQ(ISGetIndices(subset_n,&t_local_primal_indices)); 8893 CHKERRQ(PetscArraycpy(local_primal_indices,t_local_primal_indices,local_size)); 8894 CHKERRQ(ISRestoreIndices(subset_n,&t_local_primal_indices)); 8895 CHKERRQ(ISDestroy(&subset_n)); 8896 8897 /* check numbering */ 8898 if (pcbddc->dbg_flag) { 8899 PetscScalar coarsesum,*array,*array2; 8900 PetscInt i; 8901 PetscBool set_error = PETSC_FALSE,set_error_reduced = PETSC_FALSE; 8902 8903 CHKERRQ(PetscViewerFlush(pcbddc->dbg_viewer)); 8904 CHKERRQ(PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n")); 8905 CHKERRQ(PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Check coarse indices\n")); 8906 CHKERRQ(PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer)); 8907 /* counter */ 8908 CHKERRQ(VecSet(pcis->vec1_global,0.0)); 8909 CHKERRQ(VecSet(pcis->vec1_N,1.0)); 8910 CHKERRQ(VecScatterBegin(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE)); 8911 CHKERRQ(VecScatterEnd(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE)); 8912 CHKERRQ(VecScatterBegin(matis->rctx,pcis->vec1_global,pcis->vec2_N,INSERT_VALUES,SCATTER_FORWARD)); 8913 CHKERRQ(VecScatterEnd(matis->rctx,pcis->vec1_global,pcis->vec2_N,INSERT_VALUES,SCATTER_FORWARD)); 8914 CHKERRQ(VecSet(pcis->vec1_N,0.0)); 8915 for (i=0;i<pcbddc->local_primal_size;i++) { 8916 CHKERRQ(VecSetValue(pcis->vec1_N,pcbddc->primal_indices_local_idxs[i],1.0,INSERT_VALUES)); 8917 } 8918 CHKERRQ(VecAssemblyBegin(pcis->vec1_N)); 8919 CHKERRQ(VecAssemblyEnd(pcis->vec1_N)); 8920 CHKERRQ(VecSet(pcis->vec1_global,0.0)); 8921 CHKERRQ(VecScatterBegin(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE)); 8922 CHKERRQ(VecScatterEnd(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE)); 8923 CHKERRQ(VecScatterBegin(matis->rctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD)); 8924 CHKERRQ(VecScatterEnd(matis->rctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD)); 8925 CHKERRQ(VecGetArray(pcis->vec1_N,&array)); 8926 CHKERRQ(VecGetArray(pcis->vec2_N,&array2)); 8927 for (i=0;i<pcis->n;i++) { 8928 if (array[i] != 0.0 && array[i] != array2[i]) { 8929 PetscInt owned = (PetscInt)PetscRealPart(array[i]),gi; 8930 PetscInt neigh = (PetscInt)PetscRealPart(array2[i]); 8931 set_error = PETSC_TRUE; 8932 CHKERRQ(ISLocalToGlobalMappingApply(pcis->mapping,1,&i,&gi)); 8933 CHKERRQ(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d: local index %D (gid %D) owned by %D processes instead of %D!\n",PetscGlobalRank,i,gi,owned,neigh)); 8934 } 8935 } 8936 CHKERRQ(VecRestoreArray(pcis->vec2_N,&array2)); 8937 CHKERRMPI(MPIU_Allreduce(&set_error,&set_error_reduced,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc))); 8938 CHKERRQ(PetscViewerFlush(pcbddc->dbg_viewer)); 8939 for (i=0;i<pcis->n;i++) { 8940 if (PetscRealPart(array[i]) > 0.0) array[i] = 1.0/PetscRealPart(array[i]); 8941 } 8942 CHKERRQ(VecRestoreArray(pcis->vec1_N,&array)); 8943 CHKERRQ(VecSet(pcis->vec1_global,0.0)); 8944 CHKERRQ(VecScatterBegin(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE)); 8945 CHKERRQ(VecScatterEnd(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE)); 8946 CHKERRQ(VecSum(pcis->vec1_global,&coarsesum)); 8947 CHKERRQ(PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Size of coarse problem is %D (%lf)\n",coarse_size,PetscRealPart(coarsesum))); 8948 if (pcbddc->dbg_flag > 1 || set_error_reduced) { 8949 PetscInt *gidxs; 8950 8951 CHKERRQ(PetscMalloc1(pcbddc->local_primal_size,&gidxs)); 8952 CHKERRQ(ISLocalToGlobalMappingApply(pcis->mapping,pcbddc->local_primal_size,pcbddc->primal_indices_local_idxs,gidxs)); 8953 CHKERRQ(PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Distribution of local primal indices\n")); 8954 CHKERRQ(PetscViewerFlush(pcbddc->dbg_viewer)); 8955 CHKERRQ(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d\n",PetscGlobalRank)); 8956 for (i=0;i<pcbddc->local_primal_size;i++) { 8957 CHKERRQ(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local_primal_indices[%D]=%D (%D,%D)\n",i,local_primal_indices[i],pcbddc->primal_indices_local_idxs[i],gidxs[i])); 8958 } 8959 CHKERRQ(PetscViewerFlush(pcbddc->dbg_viewer)); 8960 CHKERRQ(PetscFree(gidxs)); 8961 } 8962 CHKERRQ(PetscViewerFlush(pcbddc->dbg_viewer)); 8963 CHKERRQ(PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer)); 8964 PetscCheck(!set_error_reduced,PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"BDDC Numbering of coarse dofs failed"); 8965 } 8966 8967 /* get back data */ 8968 *coarse_size_n = coarse_size; 8969 *local_primal_indices_n = local_primal_indices; 8970 PetscFunctionReturn(0); 8971 } 8972 8973 PetscErrorCode PCBDDCGlobalToLocal(VecScatter g2l_ctx,Vec gwork, Vec lwork, IS globalis, IS* localis) 8974 { 8975 IS localis_t; 8976 PetscInt i,lsize,*idxs,n; 8977 PetscScalar *vals; 8978 8979 PetscFunctionBegin; 8980 /* get indices in local ordering exploiting local to global map */ 8981 CHKERRQ(ISGetLocalSize(globalis,&lsize)); 8982 CHKERRQ(PetscMalloc1(lsize,&vals)); 8983 for (i=0;i<lsize;i++) vals[i] = 1.0; 8984 CHKERRQ(ISGetIndices(globalis,(const PetscInt**)&idxs)); 8985 CHKERRQ(VecSet(gwork,0.0)); 8986 CHKERRQ(VecSet(lwork,0.0)); 8987 if (idxs) { /* multilevel guard */ 8988 CHKERRQ(VecSetOption(gwork,VEC_IGNORE_NEGATIVE_INDICES,PETSC_TRUE)); 8989 CHKERRQ(VecSetValues(gwork,lsize,idxs,vals,INSERT_VALUES)); 8990 } 8991 CHKERRQ(VecAssemblyBegin(gwork)); 8992 CHKERRQ(ISRestoreIndices(globalis,(const PetscInt**)&idxs)); 8993 CHKERRQ(PetscFree(vals)); 8994 CHKERRQ(VecAssemblyEnd(gwork)); 8995 /* now compute set in local ordering */ 8996 CHKERRQ(VecScatterBegin(g2l_ctx,gwork,lwork,INSERT_VALUES,SCATTER_FORWARD)); 8997 CHKERRQ(VecScatterEnd(g2l_ctx,gwork,lwork,INSERT_VALUES,SCATTER_FORWARD)); 8998 CHKERRQ(VecGetArrayRead(lwork,(const PetscScalar**)&vals)); 8999 CHKERRQ(VecGetSize(lwork,&n)); 9000 for (i=0,lsize=0;i<n;i++) { 9001 if (PetscRealPart(vals[i]) > 0.5) { 9002 lsize++; 9003 } 9004 } 9005 CHKERRQ(PetscMalloc1(lsize,&idxs)); 9006 for (i=0,lsize=0;i<n;i++) { 9007 if (PetscRealPart(vals[i]) > 0.5) { 9008 idxs[lsize++] = i; 9009 } 9010 } 9011 CHKERRQ(VecRestoreArrayRead(lwork,(const PetscScalar**)&vals)); 9012 CHKERRQ(ISCreateGeneral(PetscObjectComm((PetscObject)gwork),lsize,idxs,PETSC_OWN_POINTER,&localis_t)); 9013 *localis = localis_t; 9014 PetscFunctionReturn(0); 9015 } 9016 9017 PetscErrorCode PCBDDCSetUpSubSchurs(PC pc) 9018 { 9019 PC_IS *pcis=(PC_IS*)pc->data; 9020 PC_BDDC *pcbddc=(PC_BDDC*)pc->data; 9021 PCBDDCSubSchurs sub_schurs=pcbddc->sub_schurs; 9022 Mat S_j; 9023 PetscInt *used_xadj,*used_adjncy; 9024 PetscBool free_used_adj; 9025 PetscErrorCode ierr; 9026 9027 PetscFunctionBegin; 9028 CHKERRQ(PetscLogEventBegin(PC_BDDC_Schurs[pcbddc->current_level],pc,0,0,0)); 9029 /* decide the adjacency to be used for determining internal problems for local schur on subsets */ 9030 free_used_adj = PETSC_FALSE; 9031 if (pcbddc->sub_schurs_layers == -1) { 9032 used_xadj = NULL; 9033 used_adjncy = NULL; 9034 } else { 9035 if (pcbddc->sub_schurs_use_useradj && pcbddc->mat_graph->xadj) { 9036 used_xadj = pcbddc->mat_graph->xadj; 9037 used_adjncy = pcbddc->mat_graph->adjncy; 9038 } else if (pcbddc->computed_rowadj) { 9039 used_xadj = pcbddc->mat_graph->xadj; 9040 used_adjncy = pcbddc->mat_graph->adjncy; 9041 } else { 9042 PetscBool flg_row=PETSC_FALSE; 9043 const PetscInt *xadj,*adjncy; 9044 PetscInt nvtxs; 9045 9046 CHKERRQ(MatGetRowIJ(pcbddc->local_mat,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,&xadj,&adjncy,&flg_row)); 9047 if (flg_row) { 9048 CHKERRQ(PetscMalloc2(nvtxs+1,&used_xadj,xadj[nvtxs],&used_adjncy)); 9049 CHKERRQ(PetscArraycpy(used_xadj,xadj,nvtxs+1)); 9050 CHKERRQ(PetscArraycpy(used_adjncy,adjncy,xadj[nvtxs])); 9051 free_used_adj = PETSC_TRUE; 9052 } else { 9053 pcbddc->sub_schurs_layers = -1; 9054 used_xadj = NULL; 9055 used_adjncy = NULL; 9056 } 9057 CHKERRQ(MatRestoreRowIJ(pcbddc->local_mat,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,&xadj,&adjncy,&flg_row)); 9058 } 9059 } 9060 9061 /* setup sub_schurs data */ 9062 CHKERRQ(MatCreateSchurComplement(pcis->A_II,pcis->pA_II,pcis->A_IB,pcis->A_BI,pcis->A_BB,&S_j)); 9063 if (!sub_schurs->schur_explicit) { 9064 /* pcbddc->ksp_D up to date only if not using MatFactor with Schur complement support */ 9065 CHKERRQ(MatSchurComplementSetKSP(S_j,pcbddc->ksp_D)); 9066 CHKERRQ(PCBDDCSubSchursSetUp(sub_schurs,NULL,S_j,PETSC_FALSE,used_xadj,used_adjncy,pcbddc->sub_schurs_layers,NULL,pcbddc->adaptive_selection,PETSC_FALSE,PETSC_FALSE,0,NULL,NULL,NULL,NULL)); 9067 } else { 9068 Mat change = NULL; 9069 Vec scaling = NULL; 9070 IS change_primal = NULL, iP; 9071 PetscInt benign_n; 9072 PetscBool reuse_solvers = (PetscBool)!pcbddc->use_change_of_basis; 9073 PetscBool need_change = PETSC_FALSE; 9074 PetscBool discrete_harmonic = PETSC_FALSE; 9075 9076 if (!pcbddc->use_vertices && reuse_solvers) { 9077 PetscInt n_vertices; 9078 9079 CHKERRQ(ISGetLocalSize(sub_schurs->is_vertices,&n_vertices)); 9080 reuse_solvers = (PetscBool)!n_vertices; 9081 } 9082 if (!pcbddc->benign_change_explicit) { 9083 benign_n = pcbddc->benign_n; 9084 } else { 9085 benign_n = 0; 9086 } 9087 /* sub_schurs->change is a local object; instead, PCBDDCConstraintsSetUp and the quantities used in the test below are logically collective on pc. 9088 We need a global reduction to avoid possible deadlocks. 9089 We assume that sub_schurs->change is created once, and then reused for different solves, unless the topography has been recomputed */ 9090 if (pcbddc->adaptive_userdefined || (pcbddc->deluxe_zerorows && !pcbddc->use_change_of_basis)) { 9091 PetscBool have_loc_change = (PetscBool)(!!sub_schurs->change); 9092 CHKERRMPI(MPIU_Allreduce(&have_loc_change,&need_change,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc))); 9093 need_change = (PetscBool)(!need_change); 9094 } 9095 /* If the user defines additional constraints, we import them here. 9096 We need to compute the change of basis according to the quadrature weights attached to pmat via MatSetNearNullSpace, and this could not be done (at the moment) without some hacking */ 9097 if (need_change) { 9098 PC_IS *pcisf; 9099 PC_BDDC *pcbddcf; 9100 PC pcf; 9101 9102 PetscCheck(!pcbddc->sub_schurs_rebuild,PETSC_COMM_SELF,PETSC_ERR_SUP,"Cannot compute change of basis with a different graph"); 9103 CHKERRQ(PCCreate(PetscObjectComm((PetscObject)pc),&pcf)); 9104 CHKERRQ(PCSetOperators(pcf,pc->mat,pc->pmat)); 9105 CHKERRQ(PCSetType(pcf,PCBDDC)); 9106 9107 /* hacks */ 9108 pcisf = (PC_IS*)pcf->data; 9109 pcisf->is_B_local = pcis->is_B_local; 9110 pcisf->vec1_N = pcis->vec1_N; 9111 pcisf->BtoNmap = pcis->BtoNmap; 9112 pcisf->n = pcis->n; 9113 pcisf->n_B = pcis->n_B; 9114 pcbddcf = (PC_BDDC*)pcf->data; 9115 CHKERRQ(PetscFree(pcbddcf->mat_graph)); 9116 pcbddcf->mat_graph = pcbddc->mat_graph; 9117 pcbddcf->use_faces = PETSC_TRUE; 9118 pcbddcf->use_change_of_basis = PETSC_TRUE; 9119 pcbddcf->use_change_on_faces = PETSC_TRUE; 9120 pcbddcf->use_qr_single = PETSC_TRUE; 9121 pcbddcf->fake_change = PETSC_TRUE; 9122 9123 /* setup constraints so that we can get information on primal vertices and change of basis (in local numbering) */ 9124 CHKERRQ(PCBDDCConstraintsSetUp(pcf)); 9125 sub_schurs->change_with_qr = pcbddcf->use_qr_single; 9126 CHKERRQ(ISCreateGeneral(PETSC_COMM_SELF,pcbddcf->n_vertices,pcbddcf->local_primal_ref_node,PETSC_COPY_VALUES,&change_primal)); 9127 change = pcbddcf->ConstraintMatrix; 9128 pcbddcf->ConstraintMatrix = NULL; 9129 9130 /* free unneeded memory allocated in PCBDDCConstraintsSetUp */ 9131 CHKERRQ(PetscFree(pcbddcf->sub_schurs)); 9132 CHKERRQ(MatNullSpaceDestroy(&pcbddcf->onearnullspace)); 9133 CHKERRQ(PetscFree2(pcbddcf->local_primal_ref_node,pcbddcf->local_primal_ref_mult)); 9134 CHKERRQ(PetscFree(pcbddcf->primal_indices_local_idxs)); 9135 CHKERRQ(PetscFree(pcbddcf->onearnullvecs_state)); 9136 CHKERRQ(PetscFree(pcf->data)); 9137 pcf->ops->destroy = NULL; 9138 pcf->ops->reset = NULL; 9139 CHKERRQ(PCDestroy(&pcf)); 9140 } 9141 if (!pcbddc->use_deluxe_scaling) scaling = pcis->D; 9142 9143 CHKERRQ(PetscObjectQuery((PetscObject)pc,"__KSPFETIDP_iP",(PetscObject*)&iP)); 9144 if (iP) { 9145 ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)iP),sub_schurs->prefix,"BDDC sub_schurs options","PC");CHKERRQ(ierr); 9146 CHKERRQ(PetscOptionsBool("-sub_schurs_discrete_harmonic",NULL,NULL,discrete_harmonic,&discrete_harmonic,NULL)); 9147 ierr = PetscOptionsEnd();CHKERRQ(ierr); 9148 } 9149 if (discrete_harmonic) { 9150 Mat A; 9151 CHKERRQ(MatDuplicate(pcbddc->local_mat,MAT_COPY_VALUES,&A)); 9152 CHKERRQ(MatZeroRowsColumnsIS(A,iP,1.0,NULL,NULL)); 9153 CHKERRQ(PetscObjectCompose((PetscObject)A,"__KSPFETIDP_iP",(PetscObject)iP)); 9154 CHKERRQ(PCBDDCSubSchursSetUp(sub_schurs,A,S_j,pcbddc->sub_schurs_exact_schur,used_xadj,used_adjncy,pcbddc->sub_schurs_layers,scaling,pcbddc->adaptive_selection,reuse_solvers,pcbddc->benign_saddle_point,benign_n,pcbddc->benign_p0_lidx,pcbddc->benign_zerodiag_subs,change,change_primal)); 9155 CHKERRQ(MatDestroy(&A)); 9156 } else { 9157 CHKERRQ(PCBDDCSubSchursSetUp(sub_schurs,pcbddc->local_mat,S_j,pcbddc->sub_schurs_exact_schur,used_xadj,used_adjncy,pcbddc->sub_schurs_layers,scaling,pcbddc->adaptive_selection,reuse_solvers,pcbddc->benign_saddle_point,benign_n,pcbddc->benign_p0_lidx,pcbddc->benign_zerodiag_subs,change,change_primal)); 9158 } 9159 CHKERRQ(MatDestroy(&change)); 9160 CHKERRQ(ISDestroy(&change_primal)); 9161 } 9162 CHKERRQ(MatDestroy(&S_j)); 9163 9164 /* free adjacency */ 9165 if (free_used_adj) { 9166 CHKERRQ(PetscFree2(used_xadj,used_adjncy)); 9167 } 9168 CHKERRQ(PetscLogEventEnd(PC_BDDC_Schurs[pcbddc->current_level],pc,0,0,0)); 9169 PetscFunctionReturn(0); 9170 } 9171 9172 PetscErrorCode PCBDDCInitSubSchurs(PC pc) 9173 { 9174 PC_IS *pcis=(PC_IS*)pc->data; 9175 PC_BDDC *pcbddc=(PC_BDDC*)pc->data; 9176 PCBDDCGraph graph; 9177 9178 PetscFunctionBegin; 9179 /* attach interface graph for determining subsets */ 9180 if (pcbddc->sub_schurs_rebuild) { /* in case rebuild has been requested, it uses a graph generated only by the neighbouring information */ 9181 IS verticesIS,verticescomm; 9182 PetscInt vsize,*idxs; 9183 9184 CHKERRQ(PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph,NULL,NULL,NULL,NULL,&verticesIS)); 9185 CHKERRQ(ISGetSize(verticesIS,&vsize)); 9186 CHKERRQ(ISGetIndices(verticesIS,(const PetscInt**)&idxs)); 9187 CHKERRQ(ISCreateGeneral(PetscObjectComm((PetscObject)pc),vsize,idxs,PETSC_COPY_VALUES,&verticescomm)); 9188 CHKERRQ(ISRestoreIndices(verticesIS,(const PetscInt**)&idxs)); 9189 CHKERRQ(PCBDDCGraphRestoreCandidatesIS(pcbddc->mat_graph,NULL,NULL,NULL,NULL,&verticesIS)); 9190 CHKERRQ(PCBDDCGraphCreate(&graph)); 9191 CHKERRQ(PCBDDCGraphInit(graph,pcbddc->mat_graph->l2gmap,pcbddc->mat_graph->nvtxs_global,pcbddc->graphmaxcount)); 9192 CHKERRQ(PCBDDCGraphSetUp(graph,pcbddc->mat_graph->custom_minimal_size,NULL,pcbddc->DirichletBoundariesLocal,0,NULL,verticescomm)); 9193 CHKERRQ(ISDestroy(&verticescomm)); 9194 CHKERRQ(PCBDDCGraphComputeConnectedComponents(graph)); 9195 } else { 9196 graph = pcbddc->mat_graph; 9197 } 9198 /* print some info */ 9199 if (pcbddc->dbg_flag && !pcbddc->sub_schurs_rebuild) { 9200 IS vertices; 9201 PetscInt nv,nedges,nfaces; 9202 CHKERRQ(PCBDDCGraphASCIIView(graph,pcbddc->dbg_flag,pcbddc->dbg_viewer)); 9203 CHKERRQ(PCBDDCGraphGetCandidatesIS(graph,&nfaces,NULL,&nedges,NULL,&vertices)); 9204 CHKERRQ(ISGetSize(vertices,&nv)); 9205 CHKERRQ(PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer)); 9206 CHKERRQ(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"--------------------------------------------------------------\n")); 9207 CHKERRQ(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate vertices (%D)\n",PetscGlobalRank,(int)nv,pcbddc->use_vertices)); 9208 CHKERRQ(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate edges (%D)\n",PetscGlobalRank,(int)nedges,pcbddc->use_edges)); 9209 CHKERRQ(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate faces (%D)\n",PetscGlobalRank,(int)nfaces,pcbddc->use_faces)); 9210 CHKERRQ(PetscViewerFlush(pcbddc->dbg_viewer)); 9211 CHKERRQ(PetscViewerASCIIPopSynchronized(pcbddc->dbg_viewer)); 9212 CHKERRQ(PCBDDCGraphRestoreCandidatesIS(graph,&nfaces,NULL,&nedges,NULL,&vertices)); 9213 } 9214 9215 /* sub_schurs init */ 9216 if (!pcbddc->sub_schurs) { 9217 CHKERRQ(PCBDDCSubSchursCreate(&pcbddc->sub_schurs)); 9218 } 9219 CHKERRQ(PCBDDCSubSchursInit(pcbddc->sub_schurs,((PetscObject)pc)->prefix,pcis->is_I_local,pcis->is_B_local,graph,pcis->BtoNmap,pcbddc->sub_schurs_rebuild)); 9220 9221 /* free graph struct */ 9222 if (pcbddc->sub_schurs_rebuild) { 9223 CHKERRQ(PCBDDCGraphDestroy(&graph)); 9224 } 9225 PetscFunctionReturn(0); 9226 } 9227 9228 PetscErrorCode PCBDDCCheckOperator(PC pc) 9229 { 9230 PC_IS *pcis=(PC_IS*)pc->data; 9231 PC_BDDC *pcbddc=(PC_BDDC*)pc->data; 9232 9233 PetscFunctionBegin; 9234 if (pcbddc->n_vertices == pcbddc->local_primal_size) { 9235 IS zerodiag = NULL; 9236 Mat S_j,B0_B=NULL; 9237 Vec dummy_vec=NULL,vec_check_B,vec_scale_P; 9238 PetscScalar *p0_check,*array,*array2; 9239 PetscReal norm; 9240 PetscInt i; 9241 9242 /* B0 and B0_B */ 9243 if (zerodiag) { 9244 IS dummy; 9245 9246 CHKERRQ(ISCreateStride(PETSC_COMM_SELF,pcbddc->benign_n,0,1,&dummy)); 9247 CHKERRQ(MatCreateSubMatrix(pcbddc->benign_B0,dummy,pcis->is_B_local,MAT_INITIAL_MATRIX,&B0_B)); 9248 CHKERRQ(MatCreateVecs(B0_B,NULL,&dummy_vec)); 9249 CHKERRQ(ISDestroy(&dummy)); 9250 } 9251 /* I need a primal vector to scale primal nodes since BDDC sums contibutions */ 9252 CHKERRQ(VecDuplicate(pcbddc->vec1_P,&vec_scale_P)); 9253 CHKERRQ(VecSet(pcbddc->vec1_P,1.0)); 9254 CHKERRQ(VecScatterBegin(pcbddc->coarse_loc_to_glob,pcbddc->vec1_P,pcbddc->coarse_vec,ADD_VALUES,SCATTER_FORWARD)); 9255 CHKERRQ(VecScatterEnd(pcbddc->coarse_loc_to_glob,pcbddc->vec1_P,pcbddc->coarse_vec,ADD_VALUES,SCATTER_FORWARD)); 9256 CHKERRQ(VecScatterBegin(pcbddc->coarse_loc_to_glob,pcbddc->coarse_vec,vec_scale_P,INSERT_VALUES,SCATTER_REVERSE)); 9257 CHKERRQ(VecScatterEnd(pcbddc->coarse_loc_to_glob,pcbddc->coarse_vec,vec_scale_P,INSERT_VALUES,SCATTER_REVERSE)); 9258 CHKERRQ(VecReciprocal(vec_scale_P)); 9259 /* S_j */ 9260 CHKERRQ(MatCreateSchurComplement(pcis->A_II,pcis->pA_II,pcis->A_IB,pcis->A_BI,pcis->A_BB,&S_j)); 9261 CHKERRQ(MatSchurComplementSetKSP(S_j,pcbddc->ksp_D)); 9262 9263 /* mimic vector in \widetilde{W}_\Gamma */ 9264 CHKERRQ(VecSetRandom(pcis->vec1_N,NULL)); 9265 /* continuous in primal space */ 9266 CHKERRQ(VecSetRandom(pcbddc->coarse_vec,NULL)); 9267 CHKERRQ(VecScatterBegin(pcbddc->coarse_loc_to_glob,pcbddc->coarse_vec,pcbddc->vec1_P,INSERT_VALUES,SCATTER_REVERSE)); 9268 CHKERRQ(VecScatterEnd(pcbddc->coarse_loc_to_glob,pcbddc->coarse_vec,pcbddc->vec1_P,INSERT_VALUES,SCATTER_REVERSE)); 9269 CHKERRQ(VecGetArray(pcbddc->vec1_P,&array)); 9270 CHKERRQ(PetscCalloc1(pcbddc->benign_n,&p0_check)); 9271 for (i=0;i<pcbddc->benign_n;i++) p0_check[i] = array[pcbddc->local_primal_size-pcbddc->benign_n+i]; 9272 CHKERRQ(VecSetValues(pcis->vec1_N,pcbddc->local_primal_size,pcbddc->local_primal_ref_node,array,INSERT_VALUES)); 9273 CHKERRQ(VecRestoreArray(pcbddc->vec1_P,&array)); 9274 CHKERRQ(VecAssemblyBegin(pcis->vec1_N)); 9275 CHKERRQ(VecAssemblyEnd(pcis->vec1_N)); 9276 CHKERRQ(VecScatterBegin(pcis->N_to_B,pcis->vec1_N,pcis->vec2_B,INSERT_VALUES,SCATTER_FORWARD)); 9277 CHKERRQ(VecScatterEnd(pcis->N_to_B,pcis->vec1_N,pcis->vec2_B,INSERT_VALUES,SCATTER_FORWARD)); 9278 CHKERRQ(VecDuplicate(pcis->vec2_B,&vec_check_B)); 9279 CHKERRQ(VecCopy(pcis->vec2_B,vec_check_B)); 9280 9281 /* assemble rhs for coarse problem */ 9282 /* widetilde{S}_\Gamma w_\Gamma + \widetilde{B0}^T_B p0 */ 9283 /* local with Schur */ 9284 CHKERRQ(MatMult(S_j,pcis->vec2_B,pcis->vec1_B)); 9285 if (zerodiag) { 9286 CHKERRQ(VecGetArray(dummy_vec,&array)); 9287 for (i=0;i<pcbddc->benign_n;i++) array[i] = p0_check[i]; 9288 CHKERRQ(VecRestoreArray(dummy_vec,&array)); 9289 CHKERRQ(MatMultTransposeAdd(B0_B,dummy_vec,pcis->vec1_B,pcis->vec1_B)); 9290 } 9291 /* sum on primal nodes the local contributions */ 9292 CHKERRQ(VecScatterBegin(pcis->N_to_B,pcis->vec1_B,pcis->vec1_N,INSERT_VALUES,SCATTER_REVERSE)); 9293 CHKERRQ(VecScatterEnd(pcis->N_to_B,pcis->vec1_B,pcis->vec1_N,INSERT_VALUES,SCATTER_REVERSE)); 9294 CHKERRQ(VecGetArray(pcis->vec1_N,&array)); 9295 CHKERRQ(VecGetArray(pcbddc->vec1_P,&array2)); 9296 for (i=0;i<pcbddc->local_primal_size;i++) array2[i] = array[pcbddc->local_primal_ref_node[i]]; 9297 CHKERRQ(VecRestoreArray(pcbddc->vec1_P,&array2)); 9298 CHKERRQ(VecRestoreArray(pcis->vec1_N,&array)); 9299 CHKERRQ(VecSet(pcbddc->coarse_vec,0.)); 9300 CHKERRQ(VecScatterBegin(pcbddc->coarse_loc_to_glob,pcbddc->vec1_P,pcbddc->coarse_vec,ADD_VALUES,SCATTER_FORWARD)); 9301 CHKERRQ(VecScatterEnd(pcbddc->coarse_loc_to_glob,pcbddc->vec1_P,pcbddc->coarse_vec,ADD_VALUES,SCATTER_FORWARD)); 9302 CHKERRQ(VecScatterBegin(pcbddc->coarse_loc_to_glob,pcbddc->coarse_vec,pcbddc->vec1_P,INSERT_VALUES,SCATTER_REVERSE)); 9303 CHKERRQ(VecScatterEnd(pcbddc->coarse_loc_to_glob,pcbddc->coarse_vec,pcbddc->vec1_P,INSERT_VALUES,SCATTER_REVERSE)); 9304 CHKERRQ(VecGetArray(pcbddc->vec1_P,&array)); 9305 /* scale primal nodes (BDDC sums contibutions) */ 9306 CHKERRQ(VecPointwiseMult(pcbddc->vec1_P,vec_scale_P,pcbddc->vec1_P)); 9307 CHKERRQ(VecSetValues(pcis->vec1_N,pcbddc->local_primal_size,pcbddc->local_primal_ref_node,array,INSERT_VALUES)); 9308 CHKERRQ(VecRestoreArray(pcbddc->vec1_P,&array)); 9309 CHKERRQ(VecAssemblyBegin(pcis->vec1_N)); 9310 CHKERRQ(VecAssemblyEnd(pcis->vec1_N)); 9311 CHKERRQ(VecScatterBegin(pcis->N_to_B,pcis->vec1_N,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD)); 9312 CHKERRQ(VecScatterEnd(pcis->N_to_B,pcis->vec1_N,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD)); 9313 /* global: \widetilde{B0}_B w_\Gamma */ 9314 if (zerodiag) { 9315 CHKERRQ(MatMult(B0_B,pcis->vec2_B,dummy_vec)); 9316 CHKERRQ(VecGetArray(dummy_vec,&array)); 9317 for (i=0;i<pcbddc->benign_n;i++) pcbddc->benign_p0[i] = array[i]; 9318 CHKERRQ(VecRestoreArray(dummy_vec,&array)); 9319 } 9320 /* BDDC */ 9321 CHKERRQ(VecSet(pcis->vec1_D,0.)); 9322 CHKERRQ(PCBDDCApplyInterfacePreconditioner(pc,PETSC_FALSE)); 9323 9324 CHKERRQ(VecCopy(pcis->vec1_B,pcis->vec2_B)); 9325 CHKERRQ(VecAXPY(pcis->vec1_B,-1.0,vec_check_B)); 9326 CHKERRQ(VecNorm(pcis->vec1_B,NORM_INFINITY,&norm)); 9327 CHKERRQ(PetscPrintf(PETSC_COMM_SELF,"[%d] BDDC local error is %1.4e\n",PetscGlobalRank,norm)); 9328 for (i=0;i<pcbddc->benign_n;i++) { 9329 CHKERRQ(PetscPrintf(PETSC_COMM_SELF,"[%d] BDDC p0[%D] error is %1.4e\n",PetscGlobalRank,i,PetscAbsScalar(pcbddc->benign_p0[i]-p0_check[i]))); 9330 } 9331 CHKERRQ(PetscFree(p0_check)); 9332 CHKERRQ(VecDestroy(&vec_scale_P)); 9333 CHKERRQ(VecDestroy(&vec_check_B)); 9334 CHKERRQ(VecDestroy(&dummy_vec)); 9335 CHKERRQ(MatDestroy(&S_j)); 9336 CHKERRQ(MatDestroy(&B0_B)); 9337 } 9338 PetscFunctionReturn(0); 9339 } 9340 9341 #include <../src/mat/impls/aij/mpi/mpiaij.h> 9342 PetscErrorCode MatMPIAIJRestrict(Mat A, MPI_Comm ccomm, Mat *B) 9343 { 9344 Mat At; 9345 IS rows; 9346 PetscInt rst,ren; 9347 PetscLayout rmap; 9348 9349 PetscFunctionBegin; 9350 rst = ren = 0; 9351 if (ccomm != MPI_COMM_NULL) { 9352 CHKERRQ(PetscLayoutCreate(ccomm,&rmap)); 9353 CHKERRQ(PetscLayoutSetSize(rmap,A->rmap->N)); 9354 CHKERRQ(PetscLayoutSetBlockSize(rmap,1)); 9355 CHKERRQ(PetscLayoutSetUp(rmap)); 9356 CHKERRQ(PetscLayoutGetRange(rmap,&rst,&ren)); 9357 } 9358 CHKERRQ(ISCreateStride(PetscObjectComm((PetscObject)A),ren-rst,rst,1,&rows)); 9359 CHKERRQ(MatCreateSubMatrix(A,rows,NULL,MAT_INITIAL_MATRIX,&At)); 9360 CHKERRQ(ISDestroy(&rows)); 9361 9362 if (ccomm != MPI_COMM_NULL) { 9363 Mat_MPIAIJ *a,*b; 9364 IS from,to; 9365 Vec gvec; 9366 PetscInt lsize; 9367 9368 CHKERRQ(MatCreate(ccomm,B)); 9369 CHKERRQ(MatSetSizes(*B,ren-rst,PETSC_DECIDE,PETSC_DECIDE,At->cmap->N)); 9370 CHKERRQ(MatSetType(*B,MATAIJ)); 9371 CHKERRQ(PetscLayoutDestroy(&((*B)->rmap))); 9372 CHKERRQ(PetscLayoutSetUp((*B)->cmap)); 9373 a = (Mat_MPIAIJ*)At->data; 9374 b = (Mat_MPIAIJ*)(*B)->data; 9375 CHKERRMPI(MPI_Comm_size(ccomm,&b->size)); 9376 CHKERRMPI(MPI_Comm_rank(ccomm,&b->rank)); 9377 CHKERRQ(PetscObjectReference((PetscObject)a->A)); 9378 CHKERRQ(PetscObjectReference((PetscObject)a->B)); 9379 b->A = a->A; 9380 b->B = a->B; 9381 9382 b->donotstash = a->donotstash; 9383 b->roworiented = a->roworiented; 9384 b->rowindices = NULL; 9385 b->rowvalues = NULL; 9386 b->getrowactive = PETSC_FALSE; 9387 9388 (*B)->rmap = rmap; 9389 (*B)->factortype = A->factortype; 9390 (*B)->assembled = PETSC_TRUE; 9391 (*B)->insertmode = NOT_SET_VALUES; 9392 (*B)->preallocated = PETSC_TRUE; 9393 9394 if (a->colmap) { 9395 #if defined(PETSC_USE_CTABLE) 9396 CHKERRQ(PetscTableCreateCopy(a->colmap,&b->colmap)); 9397 #else 9398 CHKERRQ(PetscMalloc1(At->cmap->N,&b->colmap)); 9399 CHKERRQ(PetscLogObjectMemory((PetscObject)*B,At->cmap->N*sizeof(PetscInt))); 9400 CHKERRQ(PetscArraycpy(b->colmap,a->colmap,At->cmap->N)); 9401 #endif 9402 } else b->colmap = NULL; 9403 if (a->garray) { 9404 PetscInt len; 9405 len = a->B->cmap->n; 9406 CHKERRQ(PetscMalloc1(len+1,&b->garray)); 9407 CHKERRQ(PetscLogObjectMemory((PetscObject)(*B),len*sizeof(PetscInt))); 9408 if (len) CHKERRQ(PetscArraycpy(b->garray,a->garray,len)); 9409 } else b->garray = NULL; 9410 9411 CHKERRQ(PetscObjectReference((PetscObject)a->lvec)); 9412 b->lvec = a->lvec; 9413 CHKERRQ(PetscLogObjectParent((PetscObject)*B,(PetscObject)b->lvec)); 9414 9415 /* cannot use VecScatterCopy */ 9416 CHKERRQ(VecGetLocalSize(b->lvec,&lsize)); 9417 CHKERRQ(ISCreateGeneral(ccomm,lsize,b->garray,PETSC_USE_POINTER,&from)); 9418 CHKERRQ(ISCreateStride(PETSC_COMM_SELF,lsize,0,1,&to)); 9419 CHKERRQ(MatCreateVecs(*B,&gvec,NULL)); 9420 CHKERRQ(VecScatterCreate(gvec,from,b->lvec,to,&b->Mvctx)); 9421 CHKERRQ(PetscLogObjectParent((PetscObject)*B,(PetscObject)b->Mvctx)); 9422 CHKERRQ(ISDestroy(&from)); 9423 CHKERRQ(ISDestroy(&to)); 9424 CHKERRQ(VecDestroy(&gvec)); 9425 } 9426 CHKERRQ(MatDestroy(&At)); 9427 PetscFunctionReturn(0); 9428 } 9429