1 #include <../src/mat/impls/aij/seq/aij.h> 2 #include <../src/ksp/pc/impls/bddc/bddc.h> 3 #include <../src/ksp/pc/impls/bddc/bddcprivate.h> 4 #include <../src/mat/impls/dense/seq/dense.h> 5 #include <petscdmplex.h> 6 #include <petscblaslapack.h> 7 #include <petsc/private/sfimpl.h> 8 #include <petsc/private/dmpleximpl.h> 9 #include <petscdmda.h> 10 11 static PetscErrorCode MatMPIAIJRestrict(Mat,MPI_Comm,Mat*); 12 13 /* if range is true, it returns B s.t. span{B} = range(A) 14 if range is false, it returns B s.t. range(B) _|_ range(A) */ 15 PetscErrorCode MatDenseOrthogonalRangeOrComplement(Mat A, PetscBool range, PetscInt lw, PetscScalar *work, PetscReal *rwork, Mat *B) 16 { 17 #if !defined(PETSC_USE_COMPLEX) 18 PetscScalar *uwork,*data,*U, ds = 0.; 19 PetscReal *sing; 20 PetscBLASInt bM,bN,lwork,lierr,di = 1; 21 PetscInt ulw,i,nr,nc,n; 22 PetscErrorCode ierr; 23 24 PetscFunctionBegin; 25 #if defined(PETSC_MISSING_LAPACK_GESVD) 26 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"LAPACK _GESVD not available"); 27 #else 28 ierr = MatGetSize(A,&nr,&nc);CHKERRQ(ierr); 29 if (!nr || !nc) PetscFunctionReturn(0); 30 31 /* workspace */ 32 if (!work) { 33 ulw = PetscMax(PetscMax(1,5*PetscMin(nr,nc)),3*PetscMin(nr,nc)+PetscMax(nr,nc)); 34 ierr = PetscMalloc1(ulw,&uwork);CHKERRQ(ierr); 35 } else { 36 ulw = lw; 37 uwork = work; 38 } 39 n = PetscMin(nr,nc); 40 if (!rwork) { 41 ierr = PetscMalloc1(n,&sing);CHKERRQ(ierr); 42 } else { 43 sing = rwork; 44 } 45 46 /* SVD */ 47 ierr = PetscMalloc1(nr*nr,&U);CHKERRQ(ierr); 48 ierr = PetscBLASIntCast(nr,&bM);CHKERRQ(ierr); 49 ierr = PetscBLASIntCast(nc,&bN);CHKERRQ(ierr); 50 ierr = PetscBLASIntCast(ulw,&lwork);CHKERRQ(ierr); 51 ierr = MatDenseGetArray(A,&data);CHKERRQ(ierr); 52 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 53 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("A","N",&bM,&bN,data,&bM,sing,U,&bM,&ds,&di,uwork,&lwork,&lierr)); 54 ierr = PetscFPTrapPop();CHKERRQ(ierr); 55 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in GESVD Lapack routine %d",(int)lierr); 56 ierr = MatDenseRestoreArray(A,&data);CHKERRQ(ierr); 57 for (i=0;i<n;i++) if (sing[i] < PETSC_SMALL) break; 58 if (!rwork) { 59 ierr = PetscFree(sing);CHKERRQ(ierr); 60 } 61 if (!work) { 62 ierr = PetscFree(uwork);CHKERRQ(ierr); 63 } 64 /* create B */ 65 if (!range) { 66 ierr = MatCreateSeqDense(PETSC_COMM_SELF,nr,nr-i,NULL,B);CHKERRQ(ierr); 67 ierr = MatDenseGetArray(*B,&data);CHKERRQ(ierr); 68 ierr = PetscMemcpy(data,U+nr*i,(nr-i)*nr*sizeof(PetscScalar));CHKERRQ(ierr); 69 } else { 70 ierr = MatCreateSeqDense(PETSC_COMM_SELF,nr,i,NULL,B);CHKERRQ(ierr); 71 ierr = MatDenseGetArray(*B,&data);CHKERRQ(ierr); 72 ierr = PetscMemcpy(data,U,i*nr*sizeof(PetscScalar));CHKERRQ(ierr); 73 } 74 ierr = MatDenseRestoreArray(*B,&data);CHKERRQ(ierr); 75 ierr = PetscFree(U);CHKERRQ(ierr); 76 #endif 77 #else /* PETSC_USE_COMPLEX */ 78 PetscFunctionBegin; 79 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Not implemented for complexes"); 80 #endif 81 PetscFunctionReturn(0); 82 } 83 84 /* TODO REMOVE */ 85 #if defined(PRINT_GDET) 86 static int inc = 0; 87 static int lev = 0; 88 #endif 89 90 PetscErrorCode PCBDDCComputeNedelecChangeEdge(Mat lG, IS edge, IS extrow, IS extcol, IS corners, Mat* Gins, Mat* GKins, PetscScalar cvals[2], PetscScalar *work, PetscReal *rwork) 91 { 92 PetscErrorCode ierr; 93 Mat GE,GEd; 94 PetscInt rsize,csize,esize; 95 PetscScalar *ptr; 96 97 PetscFunctionBegin; 98 ierr = ISGetSize(edge,&esize);CHKERRQ(ierr); 99 if (!esize) PetscFunctionReturn(0); 100 ierr = ISGetSize(extrow,&rsize);CHKERRQ(ierr); 101 ierr = ISGetSize(extcol,&csize);CHKERRQ(ierr); 102 103 /* gradients */ 104 ptr = work + 5*esize; 105 ierr = MatCreateSubMatrix(lG,extrow,extcol,MAT_INITIAL_MATRIX,&GE);CHKERRQ(ierr); 106 ierr = MatCreateSeqDense(PETSC_COMM_SELF,rsize,csize,ptr,Gins);CHKERRQ(ierr); 107 ierr = MatConvert(GE,MATSEQDENSE,MAT_REUSE_MATRIX,Gins);CHKERRQ(ierr); 108 ierr = MatDestroy(&GE);CHKERRQ(ierr); 109 110 /* constants */ 111 ptr += rsize*csize; 112 ierr = MatCreateSeqDense(PETSC_COMM_SELF,esize,csize,ptr,&GEd);CHKERRQ(ierr); 113 ierr = MatCreateSubMatrix(lG,edge,extcol,MAT_INITIAL_MATRIX,&GE);CHKERRQ(ierr); 114 ierr = MatConvert(GE,MATSEQDENSE,MAT_REUSE_MATRIX,&GEd);CHKERRQ(ierr); 115 ierr = MatDestroy(&GE);CHKERRQ(ierr); 116 ierr = MatDenseOrthogonalRangeOrComplement(GEd,PETSC_FALSE,5*esize,work,rwork,GKins);CHKERRQ(ierr); 117 ierr = MatDestroy(&GEd);CHKERRQ(ierr); 118 119 if (corners) { 120 Mat GEc; 121 PetscScalar *vals,v; 122 123 ierr = MatCreateSubMatrix(lG,edge,corners,MAT_INITIAL_MATRIX,&GEc);CHKERRQ(ierr); 124 ierr = MatTransposeMatMult(GEc,*GKins,MAT_INITIAL_MATRIX,1.0,&GEd);CHKERRQ(ierr); 125 ierr = MatDenseGetArray(GEd,&vals);CHKERRQ(ierr); 126 /* v = PetscAbsScalar(vals[0]) */; 127 v = 1.; 128 cvals[0] = vals[0]/v; 129 cvals[1] = vals[1]/v; 130 ierr = MatDenseRestoreArray(GEd,&vals);CHKERRQ(ierr); 131 ierr = MatScale(*GKins,1./v);CHKERRQ(ierr); 132 #if defined(PRINT_GDET) 133 { 134 PetscViewer viewer; 135 char filename[256]; 136 sprintf(filename,"Gdet_l%d_r%d_cc%d.m",lev,PetscGlobalRank,inc++); 137 ierr = PetscViewerASCIIOpen(PETSC_COMM_SELF,filename,&viewer);CHKERRQ(ierr); 138 ierr = PetscViewerPushFormat(viewer,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr); 139 ierr = PetscObjectSetName((PetscObject)GEc,"GEc");CHKERRQ(ierr); 140 ierr = MatView(GEc,viewer);CHKERRQ(ierr); 141 ierr = PetscObjectSetName((PetscObject)(*GKins),"GK");CHKERRQ(ierr); 142 ierr = MatView(*GKins,viewer);CHKERRQ(ierr); 143 ierr = PetscObjectSetName((PetscObject)GEd,"Gproj");CHKERRQ(ierr); 144 ierr = MatView(GEd,viewer);CHKERRQ(ierr); 145 ierr = PetscViewerDestroy(&viewer);CHKERRQ(ierr); 146 } 147 #endif 148 ierr = MatDestroy(&GEd);CHKERRQ(ierr); 149 ierr = MatDestroy(&GEc);CHKERRQ(ierr); 150 } 151 152 PetscFunctionReturn(0); 153 } 154 155 PetscErrorCode PCBDDCNedelecSupport(PC pc) 156 { 157 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 158 Mat_IS *matis = (Mat_IS*)pc->pmat->data; 159 Mat G,T,conn,lG,lGt,lGis,lGall,lGe,lGinit; 160 Vec tvec; 161 PetscSF sfv; 162 ISLocalToGlobalMapping el2g,vl2g,fl2g,al2g; 163 MPI_Comm comm; 164 IS lned,primals,allprimals,nedfieldlocal; 165 IS *eedges,*extrows,*extcols,*alleedges; 166 PetscBT btv,bte,btvc,btb,btbd,btvcand,btvi,btee,bter; 167 PetscScalar *vals,*work; 168 PetscReal *rwork; 169 const PetscInt *idxs,*ii,*jj,*iit,*jjt; 170 PetscInt ne,nv,Lv,order,n,field; 171 PetscInt n_neigh,*neigh,*n_shared,**shared; 172 PetscInt i,j,extmem,cum,maxsize,nee; 173 PetscInt *extrow,*extrowcum,*marks,*vmarks,*gidxs; 174 PetscInt *sfvleaves,*sfvroots; 175 PetscInt *corners,*cedges; 176 PetscInt *ecount,**eneighs,*vcount,**vneighs; 177 #if defined(PETSC_USE_DEBUG) 178 PetscInt *emarks; 179 #endif 180 PetscBool print,eerr,done,lrc[2],conforming,global,singular,setprimal; 181 PetscErrorCode ierr; 182 183 PetscFunctionBegin; 184 /* If the discrete gradient is defined for a subset of dofs and global is true, 185 it assumes G is given in global ordering for all the dofs. 186 Otherwise, the ordering is global for the Nedelec field */ 187 order = pcbddc->nedorder; 188 conforming = pcbddc->conforming; 189 field = pcbddc->nedfield; 190 global = pcbddc->nedglobal; 191 setprimal = PETSC_FALSE; 192 print = PETSC_FALSE; 193 singular = PETSC_FALSE; 194 195 /* Command line customization */ 196 ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)pc),((PetscObject)pc)->prefix,"BDDC Nedelec options","PC");CHKERRQ(ierr); 197 ierr = PetscOptionsBool("-pc_bddc_nedelec_field_primal","All edge dofs set as primals: Toselli's algorithm C",NULL,setprimal,&setprimal,NULL);CHKERRQ(ierr); 198 ierr = PetscOptionsBool("-pc_bddc_nedelec_singular","Infer nullspace from discrete gradient",NULL,singular,&singular,NULL);CHKERRQ(ierr); 199 ierr = PetscOptionsInt("-pc_bddc_nedelec_order","Test variable order code (to be removed)",NULL,order,&order,NULL);CHKERRQ(ierr); 200 /* print debug info TODO: to be removed */ 201 ierr = PetscOptionsBool("-pc_bddc_nedelec_print","Print debug info",NULL,print,&print,NULL);CHKERRQ(ierr); 202 ierr = PetscOptionsEnd();CHKERRQ(ierr); 203 204 /* Return if there are no edges in the decomposition and the problem is not singular */ 205 ierr = MatGetLocalToGlobalMapping(pc->pmat,&al2g,NULL);CHKERRQ(ierr); 206 ierr = ISLocalToGlobalMappingGetSize(al2g,&n);CHKERRQ(ierr); 207 ierr = PetscObjectGetComm((PetscObject)pc,&comm);CHKERRQ(ierr); 208 if (!singular) { 209 ierr = VecGetArrayRead(matis->counter,(const PetscScalar**)&vals);CHKERRQ(ierr); 210 lrc[0] = PETSC_FALSE; 211 for (i=0;i<n;i++) { 212 if (PetscRealPart(vals[i]) > 2.) { 213 lrc[0] = PETSC_TRUE; 214 break; 215 } 216 } 217 ierr = VecRestoreArrayRead(matis->counter,(const PetscScalar**)&vals);CHKERRQ(ierr); 218 ierr = MPIU_Allreduce(&lrc[0],&lrc[1],1,MPIU_BOOL,MPI_LOR,comm);CHKERRQ(ierr); 219 if (!lrc[1]) PetscFunctionReturn(0); 220 } 221 222 /* Get Nedelec field */ 223 ierr = MatISSetUpSF(pc->pmat);CHKERRQ(ierr); 224 if (pcbddc->n_ISForDofsLocal && field >= pcbddc->n_ISForDofsLocal) SETERRQ2(comm,PETSC_ERR_USER,"Invalid field for Nedelec %d: number of fields is %d",field,pcbddc->n_ISForDofsLocal); 225 if (pcbddc->n_ISForDofsLocal && field >= 0) { 226 ierr = PetscObjectReference((PetscObject)pcbddc->ISForDofsLocal[field]);CHKERRQ(ierr); 227 nedfieldlocal = pcbddc->ISForDofsLocal[field]; 228 ierr = ISGetLocalSize(nedfieldlocal,&ne);CHKERRQ(ierr); 229 } else if (!pcbddc->n_ISForDofsLocal && field != PETSC_DECIDE) { 230 ne = n; 231 nedfieldlocal = NULL; 232 global = PETSC_TRUE; 233 } else if (field == PETSC_DECIDE) { 234 PetscInt rst,ren,*idx; 235 236 ierr = PetscMemzero(matis->sf_leafdata,n*sizeof(PetscInt));CHKERRQ(ierr); 237 ierr = PetscMemzero(matis->sf_rootdata,pc->pmat->rmap->n*sizeof(PetscInt));CHKERRQ(ierr); 238 ierr = MatGetOwnershipRange(pcbddc->discretegradient,&rst,&ren);CHKERRQ(ierr); 239 for (i=rst;i<ren;i++) { 240 PetscInt nc; 241 242 ierr = MatGetRow(pcbddc->discretegradient,i,&nc,NULL,NULL);CHKERRQ(ierr); 243 if (nc > 1) matis->sf_rootdata[i-rst] = 1; 244 ierr = MatRestoreRow(pcbddc->discretegradient,i,&nc,NULL,NULL);CHKERRQ(ierr); 245 } 246 ierr = PetscSFBcastBegin(matis->sf,MPIU_INT,matis->sf_rootdata,matis->sf_leafdata);CHKERRQ(ierr); 247 ierr = PetscSFBcastEnd(matis->sf,MPIU_INT,matis->sf_rootdata,matis->sf_leafdata);CHKERRQ(ierr); 248 ierr = PetscMalloc1(n,&idx);CHKERRQ(ierr); 249 for (i=0,ne=0;i<n;i++) if (matis->sf_leafdata[i]) idx[ne++] = i; 250 ierr = ISCreateGeneral(comm,ne,idx,PETSC_OWN_POINTER,&nedfieldlocal);CHKERRQ(ierr); 251 } else { 252 SETERRQ(comm,PETSC_ERR_USER,"When multiple fields are present, the Nedelec field has to be specified"); 253 } 254 255 /* Sanity checks */ 256 if (!order && !conforming) SETERRQ(comm,PETSC_ERR_SUP,"Variable order and non-conforming spaces are not supported at the same time"); 257 if (pcbddc->user_ChangeOfBasisMatrix) SETERRQ(comm,PETSC_ERR_SUP,"Cannot generate Nedelec support with user defined change of basis"); 258 if (order && ne%order) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_USER,"The number of local edge dofs %d it's not a multiple of the order %d",ne,order); 259 260 /* Just set primal dofs and return */ 261 if (setprimal) { 262 IS enedfieldlocal; 263 PetscInt *eidxs; 264 265 ierr = PetscMalloc1(ne,&eidxs);CHKERRQ(ierr); 266 ierr = VecGetArrayRead(matis->counter,(const PetscScalar**)&vals);CHKERRQ(ierr); 267 if (nedfieldlocal) { 268 ierr = ISGetIndices(nedfieldlocal,&idxs);CHKERRQ(ierr); 269 for (i=0,cum=0;i<ne;i++) { 270 if (PetscRealPart(vals[idxs[i]]) > 2.) { 271 eidxs[cum++] = idxs[i]; 272 } 273 } 274 ierr = ISRestoreIndices(nedfieldlocal,&idxs);CHKERRQ(ierr); 275 } else { 276 for (i=0,cum=0;i<ne;i++) { 277 if (PetscRealPart(vals[i]) > 2.) { 278 eidxs[cum++] = i; 279 } 280 } 281 } 282 ierr = VecRestoreArrayRead(matis->counter,(const PetscScalar**)&vals);CHKERRQ(ierr); 283 ierr = ISCreateGeneral(comm,cum,eidxs,PETSC_COPY_VALUES,&enedfieldlocal);CHKERRQ(ierr); 284 ierr = PCBDDCSetPrimalVerticesLocalIS(pc,enedfieldlocal);CHKERRQ(ierr); 285 ierr = PetscFree(eidxs);CHKERRQ(ierr); 286 ierr = ISDestroy(&nedfieldlocal);CHKERRQ(ierr); 287 ierr = ISDestroy(&enedfieldlocal);CHKERRQ(ierr); 288 PetscFunctionReturn(0); 289 } 290 291 /* Compute some l2g maps */ 292 if (nedfieldlocal) { 293 IS is; 294 295 /* need to map from the local Nedelec field to local numbering */ 296 ierr = ISLocalToGlobalMappingCreateIS(nedfieldlocal,&fl2g);CHKERRQ(ierr); 297 /* need to map from the local Nedelec field to global numbering for the whole dofs*/ 298 ierr = ISLocalToGlobalMappingApplyIS(al2g,nedfieldlocal,&is);CHKERRQ(ierr); 299 ierr = ISLocalToGlobalMappingCreateIS(is,&al2g);CHKERRQ(ierr); 300 /* need to map from the local Nedelec field to global numbering (for Nedelec only) */ 301 if (global) { 302 ierr = PetscObjectReference((PetscObject)al2g);CHKERRQ(ierr); 303 el2g = al2g; 304 } else { 305 IS gis; 306 307 ierr = ISRenumber(is,NULL,NULL,&gis);CHKERRQ(ierr); 308 ierr = ISLocalToGlobalMappingCreateIS(gis,&el2g);CHKERRQ(ierr); 309 ierr = ISDestroy(&gis);CHKERRQ(ierr); 310 } 311 ierr = ISDestroy(&is);CHKERRQ(ierr); 312 } else { 313 /* restore default */ 314 pcbddc->nedfield = -1; 315 /* one ref for the destruction of al2g, one for el2g */ 316 ierr = PetscObjectReference((PetscObject)al2g);CHKERRQ(ierr); 317 ierr = PetscObjectReference((PetscObject)al2g);CHKERRQ(ierr); 318 el2g = al2g; 319 fl2g = NULL; 320 } 321 322 /* Start communication to drop connections for interior edges (for cc analysis only) */ 323 ierr = PetscMemzero(matis->sf_leafdata,n*sizeof(PetscInt));CHKERRQ(ierr); 324 ierr = PetscMemzero(matis->sf_rootdata,pc->pmat->rmap->n*sizeof(PetscInt));CHKERRQ(ierr); 325 if (nedfieldlocal) { 326 ierr = ISGetIndices(nedfieldlocal,&idxs);CHKERRQ(ierr); 327 for (i=0;i<ne;i++) matis->sf_leafdata[idxs[i]] = 1; 328 ierr = ISRestoreIndices(nedfieldlocal,&idxs);CHKERRQ(ierr); 329 } else { 330 for (i=0;i<ne;i++) matis->sf_leafdata[i] = 1; 331 } 332 ierr = PetscSFReduceBegin(matis->sf,MPIU_INT,matis->sf_leafdata,matis->sf_rootdata,MPI_SUM);CHKERRQ(ierr); 333 ierr = PetscSFReduceEnd(matis->sf,MPIU_INT,matis->sf_leafdata,matis->sf_rootdata,MPI_SUM);CHKERRQ(ierr); 334 335 if (!singular) { /* drop connections with interior edges to avoid unneeded communications and memory movements */ 336 ierr = MatDuplicate(pcbddc->discretegradient,MAT_COPY_VALUES,&G);CHKERRQ(ierr); 337 ierr = MatSetOption(G,MAT_KEEP_NONZERO_PATTERN,PETSC_FALSE);CHKERRQ(ierr); 338 if (global) { 339 PetscInt rst; 340 341 ierr = MatGetOwnershipRange(G,&rst,NULL);CHKERRQ(ierr); 342 for (i=0,cum=0;i<pc->pmat->rmap->n;i++) { 343 if (matis->sf_rootdata[i] < 2) { 344 matis->sf_rootdata[cum++] = i + rst; 345 } 346 } 347 ierr = MatSetOption(G,MAT_NO_OFF_PROC_ZERO_ROWS,PETSC_TRUE);CHKERRQ(ierr); 348 ierr = MatZeroRows(G,cum,matis->sf_rootdata,0.,NULL,NULL);CHKERRQ(ierr); 349 } else { 350 PetscInt *tbz; 351 352 ierr = PetscMalloc1(ne,&tbz);CHKERRQ(ierr); 353 ierr = PetscSFBcastBegin(matis->sf,MPIU_INT,matis->sf_rootdata,matis->sf_leafdata);CHKERRQ(ierr); 354 ierr = PetscSFBcastEnd(matis->sf,MPIU_INT,matis->sf_rootdata,matis->sf_leafdata);CHKERRQ(ierr); 355 ierr = ISGetIndices(nedfieldlocal,&idxs);CHKERRQ(ierr); 356 for (i=0,cum=0;i<ne;i++) 357 if (matis->sf_leafdata[idxs[i]] == 1) 358 tbz[cum++] = i; 359 ierr = ISRestoreIndices(nedfieldlocal,&idxs);CHKERRQ(ierr); 360 ierr = ISLocalToGlobalMappingApply(el2g,cum,tbz,tbz);CHKERRQ(ierr); 361 ierr = MatZeroRows(G,cum,tbz,0.,NULL,NULL);CHKERRQ(ierr); 362 ierr = PetscFree(tbz);CHKERRQ(ierr); 363 } 364 } else { /* we need the entire G to infer the nullspace */ 365 ierr = PetscObjectReference((PetscObject)pcbddc->discretegradient);CHKERRQ(ierr); 366 G = pcbddc->discretegradient; 367 } 368 369 /* Extract subdomain relevant rows of G */ 370 ierr = ISLocalToGlobalMappingGetIndices(el2g,&idxs);CHKERRQ(ierr); 371 ierr = ISCreateGeneral(comm,ne,idxs,PETSC_USE_POINTER,&lned);CHKERRQ(ierr); 372 ierr = MatCreateSubMatrix(G,lned,NULL,MAT_INITIAL_MATRIX,&lGall);CHKERRQ(ierr); 373 ierr = ISLocalToGlobalMappingRestoreIndices(el2g,&idxs);CHKERRQ(ierr); 374 ierr = ISDestroy(&lned);CHKERRQ(ierr); 375 ierr = MatConvert(lGall,MATIS,MAT_INITIAL_MATRIX,&lGis);CHKERRQ(ierr); 376 ierr = MatDestroy(&lGall);CHKERRQ(ierr); 377 ierr = MatISGetLocalMat(lGis,&lG);CHKERRQ(ierr); 378 379 /* SF for nodal dofs communications */ 380 ierr = MatGetLocalSize(G,NULL,&Lv);CHKERRQ(ierr); 381 ierr = MatGetLocalToGlobalMapping(lGis,NULL,&vl2g);CHKERRQ(ierr); 382 ierr = PetscObjectReference((PetscObject)vl2g);CHKERRQ(ierr); 383 ierr = ISLocalToGlobalMappingGetSize(vl2g,&nv);CHKERRQ(ierr); 384 ierr = PetscSFCreate(comm,&sfv);CHKERRQ(ierr); 385 ierr = ISLocalToGlobalMappingGetIndices(vl2g,&idxs);CHKERRQ(ierr); 386 ierr = PetscSFSetGraphLayout(sfv,lGis->cmap,nv,NULL,PETSC_OWN_POINTER,idxs);CHKERRQ(ierr); 387 ierr = ISLocalToGlobalMappingRestoreIndices(vl2g,&idxs);CHKERRQ(ierr); 388 i = singular ? 2 : 1; 389 ierr = PetscMalloc2(i*nv,&sfvleaves,i*Lv,&sfvroots);CHKERRQ(ierr); 390 391 /* Destroy temporary G created in MATIS format and modified G */ 392 ierr = PetscObjectReference((PetscObject)lG);CHKERRQ(ierr); 393 ierr = MatDestroy(&lGis);CHKERRQ(ierr); 394 ierr = MatDestroy(&G);CHKERRQ(ierr); 395 396 if (print) { 397 ierr = PetscObjectSetName((PetscObject)lG,"initial_lG");CHKERRQ(ierr); 398 ierr = MatView(lG,NULL);CHKERRQ(ierr); 399 } 400 401 /* Save lG for values insertion in change of basis */ 402 ierr = MatDuplicate(lG,MAT_COPY_VALUES,&lGinit);CHKERRQ(ierr); 403 404 /* Analyze the edge-nodes connections (duplicate lG) */ 405 ierr = MatDuplicate(lG,MAT_COPY_VALUES,&lGe);CHKERRQ(ierr); 406 ierr = MatSetOption(lGe,MAT_KEEP_NONZERO_PATTERN,PETSC_FALSE);CHKERRQ(ierr); 407 ierr = PetscBTCreate(nv,&btv);CHKERRQ(ierr); 408 ierr = PetscBTCreate(ne,&bte);CHKERRQ(ierr); 409 ierr = PetscBTCreate(ne,&btb);CHKERRQ(ierr); 410 ierr = PetscBTCreate(ne,&btbd);CHKERRQ(ierr); 411 ierr = PetscBTCreate(nv,&btvcand);CHKERRQ(ierr); 412 /* need to import the boundary specification to ensure the 413 proper detection of coarse edges' endpoints */ 414 if (pcbddc->DirichletBoundariesLocal) { 415 IS is; 416 417 if (fl2g) { 418 ierr = ISGlobalToLocalMappingApplyIS(fl2g,IS_GTOLM_MASK,pcbddc->DirichletBoundariesLocal,&is);CHKERRQ(ierr); 419 } else { 420 is = pcbddc->DirichletBoundariesLocal; 421 } 422 ierr = ISGetLocalSize(is,&cum);CHKERRQ(ierr); 423 ierr = ISGetIndices(is,&idxs);CHKERRQ(ierr); 424 for (i=0;i<cum;i++) { 425 if (idxs[i] >= 0) { 426 ierr = PetscBTSet(btb,idxs[i]);CHKERRQ(ierr); 427 ierr = PetscBTSet(btbd,idxs[i]);CHKERRQ(ierr); 428 } 429 } 430 ierr = ISRestoreIndices(is,&idxs);CHKERRQ(ierr); 431 if (fl2g) { 432 ierr = ISDestroy(&is);CHKERRQ(ierr); 433 } 434 } 435 if (pcbddc->NeumannBoundariesLocal) { 436 IS is; 437 438 if (fl2g) { 439 ierr = ISGlobalToLocalMappingApplyIS(fl2g,IS_GTOLM_MASK,pcbddc->NeumannBoundariesLocal,&is);CHKERRQ(ierr); 440 } else { 441 is = pcbddc->NeumannBoundariesLocal; 442 } 443 ierr = ISGetLocalSize(is,&cum);CHKERRQ(ierr); 444 ierr = ISGetIndices(is,&idxs);CHKERRQ(ierr); 445 for (i=0;i<cum;i++) { 446 if (idxs[i] >= 0) { 447 ierr = PetscBTSet(btb,idxs[i]);CHKERRQ(ierr); 448 } 449 } 450 ierr = ISRestoreIndices(is,&idxs);CHKERRQ(ierr); 451 if (fl2g) { 452 ierr = ISDestroy(&is);CHKERRQ(ierr); 453 } 454 } 455 456 /* Count neighs per dof */ 457 ierr = PetscCalloc1(ne,&ecount);CHKERRQ(ierr); 458 ierr = PetscMalloc1(ne,&eneighs);CHKERRQ(ierr); 459 ierr = ISLocalToGlobalMappingGetInfo(el2g,&n_neigh,&neigh,&n_shared,&shared);CHKERRQ(ierr); 460 for (i=1,cum=0;i<n_neigh;i++) { 461 cum += n_shared[i]; 462 for (j=0;j<n_shared[i];j++) { 463 ecount[shared[i][j]]++; 464 } 465 } 466 if (ne) { 467 ierr = PetscMalloc1(cum,&eneighs[0]);CHKERRQ(ierr); 468 } 469 for (i=1;i<ne;i++) eneighs[i] = eneighs[i-1] + ecount[i-1]; 470 ierr = PetscMemzero(ecount,ne*sizeof(PetscInt));CHKERRQ(ierr); 471 for (i=1;i<n_neigh;i++) { 472 for (j=0;j<n_shared[i];j++) { 473 PetscInt k = shared[i][j]; 474 eneighs[k][ecount[k]] = neigh[i]; 475 ecount[k]++; 476 } 477 } 478 for (i=0;i<ne;i++) { 479 ierr = PetscSortRemoveDupsInt(&ecount[i],eneighs[i]);CHKERRQ(ierr); 480 } 481 ierr = ISLocalToGlobalMappingRestoreInfo(el2g,&n_neigh,&neigh,&n_shared,&shared);CHKERRQ(ierr); 482 ierr = PetscCalloc1(nv,&vcount);CHKERRQ(ierr); 483 ierr = PetscMalloc1(nv,&vneighs);CHKERRQ(ierr); 484 ierr = ISLocalToGlobalMappingGetInfo(vl2g,&n_neigh,&neigh,&n_shared,&shared);CHKERRQ(ierr); 485 for (i=1,cum=0;i<n_neigh;i++) { 486 cum += n_shared[i]; 487 for (j=0;j<n_shared[i];j++) { 488 vcount[shared[i][j]]++; 489 } 490 } 491 if (nv) { 492 ierr = PetscMalloc1(cum,&vneighs[0]);CHKERRQ(ierr); 493 } 494 for (i=1;i<nv;i++) vneighs[i] = vneighs[i-1] + vcount[i-1]; 495 ierr = PetscMemzero(vcount,nv*sizeof(PetscInt));CHKERRQ(ierr); 496 for (i=1;i<n_neigh;i++) { 497 for (j=0;j<n_shared[i];j++) { 498 PetscInt k = shared[i][j]; 499 vneighs[k][vcount[k]] = neigh[i]; 500 vcount[k]++; 501 } 502 } 503 for (i=0;i<nv;i++) { 504 ierr = PetscSortRemoveDupsInt(&vcount[i],vneighs[i]);CHKERRQ(ierr); 505 } 506 ierr = ISLocalToGlobalMappingRestoreInfo(vl2g,&n_neigh,&neigh,&n_shared,&shared);CHKERRQ(ierr); 507 508 /* need to remove coarse faces' dofs and coarse edges' dirichlet dofs 509 for proper detection of coarse edges' endpoints */ 510 ierr = PetscBTCreate(ne,&btee);CHKERRQ(ierr); 511 for (i=0;i<ne;i++) { 512 if ((ecount[i] > 1 && !PetscBTLookup(btbd,i)) || (ecount[i] == 1 && PetscBTLookup(btb,i))) { 513 ierr = PetscBTSet(btee,i);CHKERRQ(ierr); 514 } 515 } 516 ierr = PetscMalloc1(ne,&marks);CHKERRQ(ierr); 517 if (!conforming) { 518 ierr = MatTranspose(lGe,MAT_INITIAL_MATRIX,&lGt);CHKERRQ(ierr); 519 ierr = MatGetRowIJ(lGt,0,PETSC_FALSE,PETSC_FALSE,&i,&iit,&jjt,&done);CHKERRQ(ierr); 520 } 521 ierr = MatGetRowIJ(lGe,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);CHKERRQ(ierr); 522 ierr = MatSeqAIJGetArray(lGe,&vals);CHKERRQ(ierr); 523 cum = 0; 524 for (i=0;i<ne;i++) { 525 /* eliminate rows corresponding to edge dofs belonging to coarse faces */ 526 if (!PetscBTLookup(btee,i)) { 527 marks[cum++] = i; 528 continue; 529 } 530 /* set badly connected edge dofs as primal */ 531 if (!conforming) { 532 if (ii[i+1]-ii[i] != order + 1) { /* every row of G on the coarse edge should list order+1 nodal dofs */ 533 marks[cum++] = i; 534 ierr = PetscBTSet(bte,i);CHKERRQ(ierr); 535 for (j=ii[i];j<ii[i+1];j++) { 536 ierr = PetscBTSet(btv,jj[j]);CHKERRQ(ierr); 537 } 538 } else { 539 /* every edge dofs should be connected trough a certain number of nodal dofs 540 to other edge dofs belonging to coarse edges 541 - at most 2 endpoints 542 - order-1 interior nodal dofs 543 - no undefined nodal dofs (nconn < order) 544 */ 545 PetscInt ends = 0,ints = 0, undef = 0; 546 for (j=ii[i];j<ii[i+1];j++) { 547 PetscInt v = jj[j],k; 548 PetscInt nconn = iit[v+1]-iit[v]; 549 for (k=iit[v];k<iit[v+1];k++) if (!PetscBTLookup(btee,jjt[k])) nconn--; 550 if (nconn > order) ends++; 551 else if (nconn == order) ints++; 552 else undef++; 553 } 554 if (undef || ends > 2 || ints != order -1) { 555 marks[cum++] = i; 556 ierr = PetscBTSet(bte,i);CHKERRQ(ierr); 557 for (j=ii[i];j<ii[i+1];j++) { 558 ierr = PetscBTSet(btv,jj[j]);CHKERRQ(ierr); 559 } 560 } 561 } 562 } 563 /* We assume the order on the element edge is ii[i+1]-ii[i]-1 */ 564 if (!order && ii[i+1] != ii[i]) { 565 PetscScalar val = 1./(ii[i+1]-ii[i]-1); 566 for (j=ii[i];j<ii[i+1];j++) vals[j] = val; 567 } 568 } 569 ierr = PetscBTDestroy(&btee);CHKERRQ(ierr); 570 ierr = MatSeqAIJRestoreArray(lGe,&vals);CHKERRQ(ierr); 571 ierr = MatRestoreRowIJ(lGe,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);CHKERRQ(ierr); 572 if (!conforming) { 573 ierr = MatRestoreRowIJ(lGt,0,PETSC_FALSE,PETSC_FALSE,&i,&iit,&jjt,&done);CHKERRQ(ierr); 574 ierr = MatDestroy(&lGt);CHKERRQ(ierr); 575 } 576 ierr = MatZeroRows(lGe,cum,marks,0.,NULL,NULL);CHKERRQ(ierr); 577 578 /* identify splitpoints and corner candidates */ 579 ierr = MatTranspose(lGe,MAT_INITIAL_MATRIX,&lGt);CHKERRQ(ierr); 580 if (print) { 581 ierr = PetscObjectSetName((PetscObject)lGe,"edgerestr_lG");CHKERRQ(ierr); 582 ierr = MatView(lGe,NULL);CHKERRQ(ierr); 583 ierr = PetscObjectSetName((PetscObject)lGt,"edgerestr_lGt");CHKERRQ(ierr); 584 ierr = MatView(lGt,NULL);CHKERRQ(ierr); 585 } 586 ierr = MatGetRowIJ(lGt,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);CHKERRQ(ierr); 587 ierr = MatSeqAIJGetArray(lGt,&vals);CHKERRQ(ierr); 588 for (i=0;i<nv;i++) { 589 PetscInt ord = order, test = ii[i+1]-ii[i], vc = vcount[i]; 590 PetscBool sneighs = PETSC_TRUE, bdir = PETSC_FALSE; 591 if (!order) { /* variable order */ 592 PetscReal vorder = 0.; 593 594 for (j=ii[i];j<ii[i+1];j++) vorder += PetscRealPart(vals[j]); 595 test = PetscFloorReal(vorder+10.*PETSC_SQRT_MACHINE_EPSILON); 596 if (vorder-test > PETSC_SQRT_MACHINE_EPSILON) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Unexpected value for vorder: %g (%d)",vorder,test); 597 ord = 1; 598 } 599 #if defined(PETSC_USE_DEBUG) 600 if (test%ord) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Unexpected number of edge dofs %d connected with nodal dof %d with order %d",test,i,ord); 601 #endif 602 for (j=ii[i];j<ii[i+1] && sneighs;j++) { 603 if (PetscBTLookup(btbd,jj[j])) { 604 bdir = PETSC_TRUE; 605 break; 606 } 607 if (vc != ecount[jj[j]]) { 608 sneighs = PETSC_FALSE; 609 } else { 610 PetscInt k,*vn = vneighs[i], *en = eneighs[jj[j]]; 611 for (k=0;k<vc;k++) { 612 if (vn[k] != en[k]) { 613 sneighs = PETSC_FALSE; 614 break; 615 } 616 } 617 } 618 } 619 if (!sneighs || test >= 3*ord || bdir) { /* splitpoints */ 620 if (print) PetscPrintf(PETSC_COMM_SELF,"SPLITPOINT %d (%d %d %d)\n",i,!sneighs,test >= 3*ord,bdir); 621 ierr = PetscBTSet(btv,i);CHKERRQ(ierr); 622 } else if (test == ord) { 623 if (order == 1 || (!order && ii[i+1]-ii[i] == 1)) { 624 if (print) PetscPrintf(PETSC_COMM_SELF,"ENDPOINT %d\n",i); 625 ierr = PetscBTSet(btv,i);CHKERRQ(ierr); 626 } else { 627 if (print) PetscPrintf(PETSC_COMM_SELF,"CORNER CANDIDATE %d\n",i); 628 ierr = PetscBTSet(btvcand,i);CHKERRQ(ierr); 629 } 630 } 631 } 632 ierr = PetscFree(ecount);CHKERRQ(ierr); 633 ierr = PetscFree(vcount);CHKERRQ(ierr); 634 if (ne) { 635 ierr = PetscFree(eneighs[0]);CHKERRQ(ierr); 636 } 637 if (nv) { 638 ierr = PetscFree(vneighs[0]);CHKERRQ(ierr); 639 } 640 ierr = PetscFree(eneighs);CHKERRQ(ierr); 641 ierr = PetscFree(vneighs);CHKERRQ(ierr); 642 ierr = PetscBTDestroy(&btbd);CHKERRQ(ierr); 643 644 /* a candidate is valid if it is connected to another candidate via a non-primal edge dof */ 645 if (order != 1) { 646 if (print) PetscPrintf(PETSC_COMM_SELF,"INSPECTING CANDIDATES\n"); 647 ierr = MatGetRowIJ(lGe,0,PETSC_FALSE,PETSC_FALSE,&i,&iit,&jjt,&done);CHKERRQ(ierr); 648 for (i=0;i<nv;i++) { 649 if (PetscBTLookup(btvcand,i)) { 650 PetscBool found = PETSC_FALSE; 651 for (j=ii[i];j<ii[i+1] && !found;j++) { 652 PetscInt k,e = jj[j]; 653 if (PetscBTLookup(bte,e)) continue; 654 for (k=iit[e];k<iit[e+1];k++) { 655 PetscInt v = jjt[k]; 656 if (v != i && PetscBTLookup(btvcand,v)) { 657 found = PETSC_TRUE; 658 break; 659 } 660 } 661 } 662 if (!found) { 663 if (print) PetscPrintf(PETSC_COMM_SELF," CANDIDATE %d CLEARED\n",i); 664 ierr = PetscBTClear(btvcand,i);CHKERRQ(ierr); 665 } else { 666 if (print) PetscPrintf(PETSC_COMM_SELF," CANDIDATE %d ACCEPTED\n",i); 667 } 668 } 669 } 670 ierr = MatRestoreRowIJ(lGe,0,PETSC_FALSE,PETSC_FALSE,&i,&iit,&jjt,&done);CHKERRQ(ierr); 671 } 672 ierr = MatSeqAIJRestoreArray(lGt,&vals);CHKERRQ(ierr); 673 ierr = MatRestoreRowIJ(lGt,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);CHKERRQ(ierr); 674 ierr = MatDestroy(&lGe);CHKERRQ(ierr); 675 676 /* Get the local G^T explicitly */ 677 ierr = MatDestroy(&lGt);CHKERRQ(ierr); 678 ierr = MatTranspose(lG,MAT_INITIAL_MATRIX,&lGt);CHKERRQ(ierr); 679 ierr = MatSetOption(lGt,MAT_KEEP_NONZERO_PATTERN,PETSC_FALSE);CHKERRQ(ierr); 680 681 /* Mark interior nodal dofs */ 682 ierr = ISLocalToGlobalMappingGetInfo(vl2g,&n_neigh,&neigh,&n_shared,&shared);CHKERRQ(ierr); 683 ierr = PetscBTCreate(nv,&btvi);CHKERRQ(ierr); 684 for (i=1;i<n_neigh;i++) { 685 for (j=0;j<n_shared[i];j++) { 686 ierr = PetscBTSet(btvi,shared[i][j]);CHKERRQ(ierr); 687 } 688 } 689 ierr = ISLocalToGlobalMappingRestoreInfo(vl2g,&n_neigh,&neigh,&n_shared,&shared);CHKERRQ(ierr); 690 691 /* communicate corners and splitpoints */ 692 ierr = PetscMalloc1(nv,&vmarks);CHKERRQ(ierr); 693 ierr = PetscMemzero(sfvleaves,nv*sizeof(PetscInt));CHKERRQ(ierr); 694 ierr = PetscMemzero(sfvroots,Lv*sizeof(PetscInt));CHKERRQ(ierr); 695 for (i=0;i<nv;i++) if (PetscUnlikely(PetscBTLookup(btv,i))) sfvleaves[i] = 1; 696 697 if (print) { 698 IS tbz; 699 700 cum = 0; 701 for (i=0;i<nv;i++) 702 if (sfvleaves[i]) 703 vmarks[cum++] = i; 704 705 ierr = ISCreateGeneral(PETSC_COMM_SELF,cum,vmarks,PETSC_COPY_VALUES,&tbz);CHKERRQ(ierr); 706 ierr = PetscObjectSetName((PetscObject)tbz,"corners_to_be_zeroed_local");CHKERRQ(ierr); 707 ierr = ISView(tbz,NULL);CHKERRQ(ierr); 708 ierr = ISDestroy(&tbz);CHKERRQ(ierr); 709 } 710 711 ierr = PetscSFReduceBegin(sfv,MPIU_INT,sfvleaves,sfvroots,MPI_SUM);CHKERRQ(ierr); 712 ierr = PetscSFReduceEnd(sfv,MPIU_INT,sfvleaves,sfvroots,MPI_SUM);CHKERRQ(ierr); 713 ierr = PetscSFBcastBegin(sfv,MPIU_INT,sfvroots,sfvleaves);CHKERRQ(ierr); 714 ierr = PetscSFBcastEnd(sfv,MPIU_INT,sfvroots,sfvleaves);CHKERRQ(ierr); 715 716 /* Zero rows of lGt corresponding to identified corners 717 and interior nodal dofs */ 718 cum = 0; 719 for (i=0;i<nv;i++) { 720 if (sfvleaves[i]) { 721 vmarks[cum++] = i; 722 ierr = PetscBTSet(btv,i);CHKERRQ(ierr); 723 } 724 if (!PetscBTLookup(btvi,i)) vmarks[cum++] = i; 725 } 726 ierr = PetscBTDestroy(&btvi);CHKERRQ(ierr); 727 if (print) { 728 IS tbz; 729 730 ierr = ISCreateGeneral(PETSC_COMM_SELF,cum,vmarks,PETSC_COPY_VALUES,&tbz);CHKERRQ(ierr); 731 ierr = PetscObjectSetName((PetscObject)tbz,"corners_to_be_zeroed_with_interior");CHKERRQ(ierr); 732 ierr = ISView(tbz,NULL);CHKERRQ(ierr); 733 ierr = ISDestroy(&tbz);CHKERRQ(ierr); 734 } 735 ierr = MatZeroRows(lGt,cum,vmarks,0.,NULL,NULL);CHKERRQ(ierr); 736 ierr = PetscFree(vmarks);CHKERRQ(ierr); 737 ierr = PetscSFDestroy(&sfv);CHKERRQ(ierr); 738 ierr = PetscFree2(sfvleaves,sfvroots);CHKERRQ(ierr); 739 740 /* Recompute G */ 741 ierr = MatDestroy(&lG);CHKERRQ(ierr); 742 ierr = MatTranspose(lGt,MAT_INITIAL_MATRIX,&lG);CHKERRQ(ierr); 743 if (print) { 744 ierr = PetscObjectSetName((PetscObject)lG,"used_lG");CHKERRQ(ierr); 745 ierr = MatView(lG,NULL);CHKERRQ(ierr); 746 ierr = PetscObjectSetName((PetscObject)lGt,"used_lGt");CHKERRQ(ierr); 747 ierr = MatView(lGt,NULL);CHKERRQ(ierr); 748 } 749 750 /* Get primal dofs (if any) */ 751 cum = 0; 752 for (i=0;i<ne;i++) { 753 if (PetscUnlikely(PetscBTLookup(bte,i))) marks[cum++] = i; 754 } 755 if (fl2g) { 756 ierr = ISLocalToGlobalMappingApply(fl2g,cum,marks,marks);CHKERRQ(ierr); 757 } 758 ierr = ISCreateGeneral(comm,cum,marks,PETSC_COPY_VALUES,&primals);CHKERRQ(ierr); 759 if (print) { 760 ierr = PetscObjectSetName((PetscObject)primals,"prescribed_primal_dofs");CHKERRQ(ierr); 761 ierr = ISView(primals,NULL);CHKERRQ(ierr); 762 } 763 ierr = PetscBTDestroy(&bte);CHKERRQ(ierr); 764 /* TODO: what if the user passed in some of them ? */ 765 ierr = PCBDDCSetPrimalVerticesLocalIS(pc,primals);CHKERRQ(ierr); 766 ierr = ISDestroy(&primals);CHKERRQ(ierr); 767 768 /* Compute edge connectivity */ 769 ierr = PetscObjectSetOptionsPrefix((PetscObject)lG,"econn_");CHKERRQ(ierr); 770 ierr = MatMatMultSymbolic(lG,lGt,PETSC_DEFAULT,&conn);CHKERRQ(ierr); 771 ierr = MatGetRowIJ(conn,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);CHKERRQ(ierr); 772 if (fl2g) { 773 PetscBT btf; 774 PetscInt *iia,*jja,*iiu,*jju; 775 PetscBool rest = PETSC_FALSE,free = PETSC_FALSE; 776 777 /* create CSR for all local dofs */ 778 ierr = PetscMalloc1(n+1,&iia);CHKERRQ(ierr); 779 if (pcbddc->mat_graph->nvtxs_csr) { /* the user has passed in a CSR graph */ 780 if (pcbddc->mat_graph->nvtxs_csr != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_USER,"Invalid size of CSR graph %d. Should be %d\n",pcbddc->mat_graph->nvtxs_csr,n); 781 iiu = pcbddc->mat_graph->xadj; 782 jju = pcbddc->mat_graph->adjncy; 783 } else if (pcbddc->use_local_adj) { 784 rest = PETSC_TRUE; 785 ierr = MatGetRowIJ(matis->A,0,PETSC_TRUE,PETSC_FALSE,&i,(const PetscInt**)&iiu,(const PetscInt**)&jju,&done);CHKERRQ(ierr); 786 } else { 787 free = PETSC_TRUE; 788 ierr = PetscMalloc2(n+1,&iiu,n,&jju);CHKERRQ(ierr); 789 iiu[0] = 0; 790 for (i=0;i<n;i++) { 791 iiu[i+1] = i+1; 792 jju[i] = -1; 793 } 794 } 795 796 /* import sizes of CSR */ 797 iia[0] = 0; 798 for (i=0;i<n;i++) iia[i+1] = iiu[i+1]-iiu[i]; 799 800 /* overwrite entries corresponding to the Nedelec field */ 801 ierr = PetscBTCreate(n,&btf);CHKERRQ(ierr); 802 ierr = ISGetIndices(nedfieldlocal,&idxs);CHKERRQ(ierr); 803 for (i=0;i<ne;i++) { 804 ierr = PetscBTSet(btf,idxs[i]);CHKERRQ(ierr); 805 iia[idxs[i]+1] = ii[i+1]-ii[i]; 806 } 807 808 /* iia in CSR */ 809 for (i=0;i<n;i++) iia[i+1] += iia[i]; 810 811 /* jja in CSR */ 812 ierr = PetscMalloc1(iia[n],&jja);CHKERRQ(ierr); 813 for (i=0;i<n;i++) 814 if (!PetscBTLookup(btf,i)) 815 for (j=0;j<iiu[i+1]-iiu[i];j++) 816 jja[iia[i]+j] = jju[iiu[i]+j]; 817 818 /* map edge dofs connectivity */ 819 if (jj) { 820 ierr = ISLocalToGlobalMappingApply(fl2g,ii[ne],jj,(PetscInt *)jj);CHKERRQ(ierr); 821 for (i=0;i<ne;i++) { 822 PetscInt e = idxs[i]; 823 for (j=0;j<ii[i+1]-ii[i];j++) jja[iia[e]+j] = jj[ii[i]+j]; 824 } 825 } 826 ierr = ISRestoreIndices(nedfieldlocal,&idxs);CHKERRQ(ierr); 827 ierr = PCBDDCSetLocalAdjacencyGraph(pc,n,iia,jja,PETSC_OWN_POINTER);CHKERRQ(ierr); 828 if (rest) { 829 ierr = MatRestoreRowIJ(matis->A,0,PETSC_TRUE,PETSC_FALSE,&i,(const PetscInt**)&iiu,(const PetscInt**)&jju,&done);CHKERRQ(ierr); 830 } 831 if (free) { 832 ierr = PetscFree2(iiu,jju);CHKERRQ(ierr); 833 } 834 ierr = PetscBTDestroy(&btf);CHKERRQ(ierr); 835 } else { 836 ierr = PCBDDCSetLocalAdjacencyGraph(pc,n,ii,jj,PETSC_USE_POINTER);CHKERRQ(ierr); 837 } 838 839 /* Analyze interface for edge dofs */ 840 ierr = PCBDDCAnalyzeInterface(pc);CHKERRQ(ierr); 841 pcbddc->mat_graph->twodim = PETSC_FALSE; 842 843 /* Get coarse edges in the edge space */ 844 ierr = PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph,NULL,NULL,&nee,&alleedges,&allprimals);CHKERRQ(ierr); 845 ierr = MatRestoreRowIJ(conn,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);CHKERRQ(ierr); 846 847 if (fl2g) { 848 ierr = ISGlobalToLocalMappingApplyIS(fl2g,IS_GTOLM_DROP,allprimals,&primals);CHKERRQ(ierr); 849 ierr = PetscMalloc1(nee,&eedges);CHKERRQ(ierr); 850 for (i=0;i<nee;i++) { 851 ierr = ISGlobalToLocalMappingApplyIS(fl2g,IS_GTOLM_DROP,alleedges[i],&eedges[i]);CHKERRQ(ierr); 852 } 853 } else { 854 eedges = alleedges; 855 primals = allprimals; 856 } 857 858 /* Mark fine edge dofs with their coarse edge id */ 859 ierr = PetscMemzero(marks,ne*sizeof(PetscInt));CHKERRQ(ierr); 860 ierr = ISGetLocalSize(primals,&cum);CHKERRQ(ierr); 861 ierr = ISGetIndices(primals,&idxs);CHKERRQ(ierr); 862 for (i=0;i<cum;i++) marks[idxs[i]] = nee+1; 863 ierr = ISRestoreIndices(primals,&idxs);CHKERRQ(ierr); 864 if (print) { 865 ierr = PetscObjectSetName((PetscObject)primals,"obtained_primal_dofs");CHKERRQ(ierr); 866 ierr = ISView(primals,NULL);CHKERRQ(ierr); 867 } 868 869 maxsize = 0; 870 for (i=0;i<nee;i++) { 871 PetscInt size,mark = i+1; 872 873 ierr = ISGetLocalSize(eedges[i],&size);CHKERRQ(ierr); 874 ierr = ISGetIndices(eedges[i],&idxs);CHKERRQ(ierr); 875 for (j=0;j<size;j++) marks[idxs[j]] = mark; 876 ierr = ISRestoreIndices(eedges[i],&idxs);CHKERRQ(ierr); 877 maxsize = PetscMax(maxsize,size); 878 } 879 880 /* Find coarse edge endpoints */ 881 ierr = MatGetRowIJ(lG,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);CHKERRQ(ierr); 882 ierr = MatGetRowIJ(lGt,0,PETSC_FALSE,PETSC_FALSE,&i,&iit,&jjt,&done);CHKERRQ(ierr); 883 for (i=0;i<nee;i++) { 884 PetscInt mark = i+1,size; 885 886 ierr = ISGetLocalSize(eedges[i],&size);CHKERRQ(ierr); 887 if (!size && nedfieldlocal) continue; 888 if (!size) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Unexpected zero sized edge %d",i); 889 ierr = ISGetIndices(eedges[i],&idxs);CHKERRQ(ierr); 890 if (print) { 891 PetscPrintf(PETSC_COMM_SELF,"ENDPOINTS ANALYSIS EDGE %d\n",i); 892 ISView(eedges[i],NULL); 893 } 894 for (j=0;j<size;j++) { 895 PetscInt k, ee = idxs[j]; 896 if (print) PetscPrintf(PETSC_COMM_SELF," idx %d\n",ee); 897 for (k=ii[ee];k<ii[ee+1];k++) { 898 if (print) PetscPrintf(PETSC_COMM_SELF," inspect %d\n",jj[k]); 899 if (PetscBTLookup(btv,jj[k])) { 900 if (print) PetscPrintf(PETSC_COMM_SELF," corner found (already set) %d\n",jj[k]); 901 } else if (PetscBTLookup(btvcand,jj[k])) { /* is it ok? */ 902 PetscInt k2; 903 PetscBool corner = PETSC_FALSE; 904 for (k2 = iit[jj[k]];k2 < iit[jj[k]+1];k2++) { 905 if (print) PetscPrintf(PETSC_COMM_SELF," INSPECTING %d: mark %d (ref mark %d), boundary %d\n",jjt[k2],marks[jjt[k2]],mark,!!PetscBTLookup(btb,jjt[k2])); 906 /* it's a corner if either is connected with an edge dof belonging to a different cc or 907 if the edge dof lie on the natural part of the boundary */ 908 if ((marks[jjt[k2]] && marks[jjt[k2]] != mark) || (!marks[jjt[k2]] && PetscBTLookup(btb,jjt[k2]))) { 909 corner = PETSC_TRUE; 910 break; 911 } 912 } 913 if (corner) { /* found the nodal dof corresponding to the endpoint of the edge */ 914 if (print) PetscPrintf(PETSC_COMM_SELF," corner found %d\n",jj[k]); 915 ierr = PetscBTSet(btv,jj[k]);CHKERRQ(ierr); 916 } else { 917 if (print) PetscPrintf(PETSC_COMM_SELF," no corners found\n"); 918 } 919 } 920 } 921 } 922 ierr = ISRestoreIndices(eedges[i],&idxs);CHKERRQ(ierr); 923 } 924 ierr = MatRestoreRowIJ(lGt,0,PETSC_FALSE,PETSC_FALSE,&i,&iit,&jjt,&done);CHKERRQ(ierr); 925 ierr = MatRestoreRowIJ(lG,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);CHKERRQ(ierr); 926 ierr = PetscBTDestroy(&btb);CHKERRQ(ierr); 927 928 /* Reset marked primal dofs */ 929 ierr = ISGetLocalSize(primals,&cum);CHKERRQ(ierr); 930 ierr = ISGetIndices(primals,&idxs);CHKERRQ(ierr); 931 for (i=0;i<cum;i++) marks[idxs[i]] = 0; 932 ierr = ISRestoreIndices(primals,&idxs);CHKERRQ(ierr); 933 934 /* Now use the initial lG */ 935 ierr = MatDestroy(&lG);CHKERRQ(ierr); 936 ierr = MatDestroy(&lGt);CHKERRQ(ierr); 937 lG = lGinit; 938 ierr = MatTranspose(lG,MAT_INITIAL_MATRIX,&lGt);CHKERRQ(ierr); 939 940 /* Compute extended cols indices */ 941 ierr = PetscBTCreate(nv,&btvc);CHKERRQ(ierr); 942 ierr = PetscBTCreate(nee,&bter);CHKERRQ(ierr); 943 ierr = MatGetRowIJ(lG,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);CHKERRQ(ierr); 944 ierr = MatSeqAIJGetMaxRowNonzeros(lG,&i);CHKERRQ(ierr); 945 i *= maxsize; 946 ierr = PetscCalloc1(nee,&extcols);CHKERRQ(ierr); 947 ierr = PetscMalloc2(i,&extrow,i,&gidxs);CHKERRQ(ierr); 948 eerr = PETSC_FALSE; 949 for (i=0;i<nee;i++) { 950 PetscInt size,found = 0; 951 952 cum = 0; 953 ierr = ISGetLocalSize(eedges[i],&size);CHKERRQ(ierr); 954 if (!size && nedfieldlocal) continue; 955 if (!size) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Unexpected zero sized edge %d",i); 956 ierr = ISGetIndices(eedges[i],&idxs);CHKERRQ(ierr); 957 ierr = PetscBTMemzero(nv,btvc);CHKERRQ(ierr); 958 for (j=0;j<size;j++) { 959 PetscInt k,ee = idxs[j]; 960 for (k=ii[ee];k<ii[ee+1];k++) { 961 PetscInt vv = jj[k]; 962 if (!PetscBTLookup(btv,vv)) extrow[cum++] = vv; 963 else if (!PetscBTLookupSet(btvc,vv)) found++; 964 } 965 } 966 ierr = ISRestoreIndices(eedges[i],&idxs);CHKERRQ(ierr); 967 ierr = PetscSortRemoveDupsInt(&cum,extrow);CHKERRQ(ierr); 968 ierr = ISLocalToGlobalMappingApply(vl2g,cum,extrow,gidxs);CHKERRQ(ierr); 969 ierr = PetscSortIntWithArray(cum,gidxs,extrow);CHKERRQ(ierr); 970 ierr = ISCreateGeneral(PETSC_COMM_SELF,cum,extrow,PETSC_COPY_VALUES,&extcols[i]);CHKERRQ(ierr); 971 /* it may happen that endpoints are not defined at this point 972 if it is the case, mark this edge for a second pass */ 973 if (cum != size -1 || found != 2) { 974 ierr = PetscBTSet(bter,i);CHKERRQ(ierr); 975 if (print) { 976 ierr = PetscObjectSetName((PetscObject)eedges[i],"error_edge");CHKERRQ(ierr); 977 ierr = ISView(eedges[i],NULL);CHKERRQ(ierr); 978 ierr = PetscObjectSetName((PetscObject)extcols[i],"error_extcol");CHKERRQ(ierr); 979 ierr = ISView(extcols[i],NULL);CHKERRQ(ierr); 980 } 981 eerr = PETSC_TRUE; 982 } 983 } 984 /* if (eerr) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Unexpected SIZE OF EDGE > EXTCOL FIRST PASS"); */ 985 ierr = MPIU_Allreduce(&eerr,&done,1,MPIU_BOOL,MPI_LOR,comm);CHKERRQ(ierr); 986 if (done) { 987 PetscInt *newprimals; 988 989 ierr = PetscMalloc1(ne,&newprimals);CHKERRQ(ierr); 990 ierr = ISGetLocalSize(primals,&cum);CHKERRQ(ierr); 991 ierr = ISGetIndices(primals,&idxs);CHKERRQ(ierr); 992 ierr = PetscMemcpy(newprimals,idxs,cum*sizeof(PetscInt));CHKERRQ(ierr); 993 ierr = ISRestoreIndices(primals,&idxs);CHKERRQ(ierr); 994 ierr = MatGetRowIJ(lGt,0,PETSC_FALSE,PETSC_FALSE,&i,&iit,&jjt,&done);CHKERRQ(ierr); 995 if (print) PetscPrintf(PETSC_COMM_SELF,"DOING SECOND PASS (eerr %d)\n",eerr); 996 for (i=0;i<nee;i++) { 997 PetscBool has_candidates = PETSC_FALSE; 998 if (PetscBTLookup(bter,i)) { 999 PetscInt size,mark = i+1; 1000 1001 ierr = ISGetLocalSize(eedges[i],&size);CHKERRQ(ierr); 1002 ierr = ISGetIndices(eedges[i],&idxs);CHKERRQ(ierr); 1003 /* for (j=0;j<size;j++) newprimals[cum++] = idxs[j]; */ 1004 for (j=0;j<size;j++) { 1005 PetscInt k,ee = idxs[j]; 1006 if (print) PetscPrintf(PETSC_COMM_SELF,"Inspecting edge dof %d [%d %d)\n",ee,ii[ee],ii[ee+1]); 1007 for (k=ii[ee];k<ii[ee+1];k++) { 1008 /* set all candidates located on the edge as corners */ 1009 if (PetscBTLookup(btvcand,jj[k])) { 1010 PetscInt k2,vv = jj[k]; 1011 has_candidates = PETSC_TRUE; 1012 if (print) PetscPrintf(PETSC_COMM_SELF," Candidate set to vertex %d\n",vv); 1013 ierr = PetscBTSet(btv,vv);CHKERRQ(ierr); 1014 /* set all edge dofs connected to candidate as primals */ 1015 for (k2=iit[vv];k2<iit[vv+1];k2++) { 1016 if (marks[jjt[k2]] == mark) { 1017 PetscInt k3,ee2 = jjt[k2]; 1018 if (print) PetscPrintf(PETSC_COMM_SELF," Connected edge dof set to primal %d\n",ee2); 1019 newprimals[cum++] = ee2; 1020 /* finally set the new corners */ 1021 for (k3=ii[ee2];k3<ii[ee2+1];k3++) { 1022 if (print) PetscPrintf(PETSC_COMM_SELF," Connected nodal dof set to vertex %d\n",jj[k3]); 1023 ierr = PetscBTSet(btv,jj[k3]);CHKERRQ(ierr); 1024 } 1025 } 1026 } 1027 } else { 1028 if (print) PetscPrintf(PETSC_COMM_SELF," Not a candidate vertex %d\n",jj[k]); 1029 } 1030 } 1031 } 1032 if (!has_candidates) { /* circular edge */ 1033 PetscInt k, ee = idxs[0],*tmarks; 1034 1035 ierr = PetscCalloc1(ne,&tmarks);CHKERRQ(ierr); 1036 if (print) PetscPrintf(PETSC_COMM_SELF," Circular edge %d\n",i); 1037 for (k=ii[ee];k<ii[ee+1];k++) { 1038 PetscInt k2; 1039 if (print) PetscPrintf(PETSC_COMM_SELF," Set to corner %d\n",jj[k]); 1040 ierr = PetscBTSet(btv,jj[k]);CHKERRQ(ierr); 1041 for (k2=iit[jj[k]];k2<iit[jj[k]+1];k2++) tmarks[jjt[k2]]++; 1042 } 1043 for (j=0;j<size;j++) { 1044 if (tmarks[idxs[j]] > 1) { 1045 if (print) PetscPrintf(PETSC_COMM_SELF," Edge dof set to primal %d\n",idxs[j]); 1046 newprimals[cum++] = idxs[j]; 1047 } 1048 } 1049 ierr = PetscFree(tmarks);CHKERRQ(ierr); 1050 } 1051 ierr = ISRestoreIndices(eedges[i],&idxs);CHKERRQ(ierr); 1052 } 1053 ierr = ISDestroy(&extcols[i]);CHKERRQ(ierr); 1054 } 1055 ierr = PetscFree(extcols);CHKERRQ(ierr); 1056 ierr = MatRestoreRowIJ(lGt,0,PETSC_FALSE,PETSC_FALSE,&i,&iit,&jjt,&done);CHKERRQ(ierr); 1057 ierr = PetscSortRemoveDupsInt(&cum,newprimals);CHKERRQ(ierr); 1058 if (fl2g) { 1059 ierr = ISLocalToGlobalMappingApply(fl2g,cum,newprimals,newprimals);CHKERRQ(ierr); 1060 ierr = ISDestroy(&primals);CHKERRQ(ierr); 1061 for (i=0;i<nee;i++) { 1062 ierr = ISDestroy(&eedges[i]);CHKERRQ(ierr); 1063 } 1064 ierr = PetscFree(eedges);CHKERRQ(ierr); 1065 } 1066 ierr = PCBDDCGraphRestoreCandidatesIS(pcbddc->mat_graph,NULL,NULL,&nee,&alleedges,&allprimals);CHKERRQ(ierr); 1067 ierr = ISCreateGeneral(comm,cum,newprimals,PETSC_COPY_VALUES,&primals);CHKERRQ(ierr); 1068 ierr = PetscFree(newprimals);CHKERRQ(ierr); 1069 ierr = PCBDDCSetPrimalVerticesLocalIS(pc,primals);CHKERRQ(ierr); 1070 ierr = ISDestroy(&primals);CHKERRQ(ierr); 1071 ierr = PCBDDCAnalyzeInterface(pc);CHKERRQ(ierr); 1072 pcbddc->mat_graph->twodim = PETSC_FALSE; 1073 ierr = PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph,NULL,NULL,&nee,&alleedges,&allprimals);CHKERRQ(ierr); 1074 if (fl2g) { 1075 ierr = ISGlobalToLocalMappingApplyIS(fl2g,IS_GTOLM_DROP,allprimals,&primals);CHKERRQ(ierr); 1076 ierr = PetscMalloc1(nee,&eedges);CHKERRQ(ierr); 1077 for (i=0;i<nee;i++) { 1078 ierr = ISGlobalToLocalMappingApplyIS(fl2g,IS_GTOLM_DROP,alleedges[i],&eedges[i]);CHKERRQ(ierr); 1079 } 1080 } else { 1081 eedges = alleedges; 1082 primals = allprimals; 1083 } 1084 ierr = PetscCalloc1(nee,&extcols);CHKERRQ(ierr); 1085 1086 /* Mark again */ 1087 ierr = PetscMemzero(marks,ne*sizeof(PetscInt));CHKERRQ(ierr); 1088 for (i=0;i<nee;i++) { 1089 PetscInt size,mark = i+1; 1090 1091 ierr = ISGetLocalSize(eedges[i],&size);CHKERRQ(ierr); 1092 ierr = ISGetIndices(eedges[i],&idxs);CHKERRQ(ierr); 1093 for (j=0;j<size;j++) marks[idxs[j]] = mark; 1094 ierr = ISRestoreIndices(eedges[i],&idxs);CHKERRQ(ierr); 1095 } 1096 if (print) { 1097 ierr = PetscObjectSetName((PetscObject)primals,"obtained_primal_dofs_secondpass");CHKERRQ(ierr); 1098 ierr = ISView(primals,NULL);CHKERRQ(ierr); 1099 } 1100 1101 /* Recompute extended cols */ 1102 eerr = PETSC_FALSE; 1103 for (i=0;i<nee;i++) { 1104 PetscInt size; 1105 1106 cum = 0; 1107 ierr = ISGetLocalSize(eedges[i],&size);CHKERRQ(ierr); 1108 if (!size && nedfieldlocal) continue; 1109 if (!size) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Unexpected zero sized edge %d",i); 1110 ierr = ISGetIndices(eedges[i],&idxs);CHKERRQ(ierr); 1111 for (j=0;j<size;j++) { 1112 PetscInt k,ee = idxs[j]; 1113 for (k=ii[ee];k<ii[ee+1];k++) if (!PetscBTLookup(btv,jj[k])) extrow[cum++] = jj[k]; 1114 } 1115 ierr = ISRestoreIndices(eedges[i],&idxs);CHKERRQ(ierr); 1116 ierr = PetscSortRemoveDupsInt(&cum,extrow);CHKERRQ(ierr); 1117 ierr = ISLocalToGlobalMappingApply(vl2g,cum,extrow,gidxs);CHKERRQ(ierr); 1118 ierr = PetscSortIntWithArray(cum,gidxs,extrow);CHKERRQ(ierr); 1119 ierr = ISCreateGeneral(PETSC_COMM_SELF,cum,extrow,PETSC_COPY_VALUES,&extcols[i]);CHKERRQ(ierr); 1120 if (cum != size -1) { 1121 if (print) { 1122 ierr = PetscObjectSetName((PetscObject)eedges[i],"error_edge_secondpass");CHKERRQ(ierr); 1123 ierr = ISView(eedges[i],NULL);CHKERRQ(ierr); 1124 ierr = PetscObjectSetName((PetscObject)extcols[i],"error_extcol_secondpass");CHKERRQ(ierr); 1125 ierr = ISView(extcols[i],NULL);CHKERRQ(ierr); 1126 } 1127 eerr = PETSC_TRUE; 1128 } 1129 } 1130 } 1131 ierr = MatRestoreRowIJ(lG,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);CHKERRQ(ierr); 1132 ierr = PetscFree2(extrow,gidxs);CHKERRQ(ierr); 1133 ierr = PetscBTDestroy(&bter);CHKERRQ(ierr); 1134 if (print) { ierr = PCBDDCGraphASCIIView(pcbddc->mat_graph,5,PETSC_VIEWER_STDOUT_SELF);CHKERRQ(ierr); } 1135 /* an error should not occur at this point */ 1136 if (eerr) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Unexpected SIZE OF EDGE > EXTCOL SECOND PASS"); 1137 1138 /* Check the number of endpoints */ 1139 ierr = MatGetRowIJ(lG,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);CHKERRQ(ierr); 1140 ierr = PetscMalloc1(2*nee,&corners);CHKERRQ(ierr); 1141 ierr = PetscMalloc1(nee,&cedges);CHKERRQ(ierr); 1142 for (i=0;i<nee;i++) { 1143 PetscInt size, found = 0, gc[2]; 1144 1145 /* init with defaults */ 1146 cedges[i] = corners[i*2] = corners[i*2+1] = -1; 1147 ierr = ISGetLocalSize(eedges[i],&size);CHKERRQ(ierr); 1148 if (!size && nedfieldlocal) continue; 1149 if (!size) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Unexpected zero sized edge %d",i); 1150 ierr = ISGetIndices(eedges[i],&idxs);CHKERRQ(ierr); 1151 ierr = PetscBTMemzero(nv,btvc);CHKERRQ(ierr); 1152 for (j=0;j<size;j++) { 1153 PetscInt k,ee = idxs[j]; 1154 for (k=ii[ee];k<ii[ee+1];k++) { 1155 PetscInt vv = jj[k]; 1156 if (PetscBTLookup(btv,vv) && !PetscBTLookupSet(btvc,vv)) { 1157 if (found == 2) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Found more then two corners for edge %d\n",i); 1158 corners[i*2+found++] = vv; 1159 } 1160 } 1161 } 1162 if (found != 2) { 1163 PetscInt e; 1164 if (fl2g) { 1165 ierr = ISLocalToGlobalMappingApply(fl2g,1,idxs,&e);CHKERRQ(ierr); 1166 } else { 1167 e = idxs[0]; 1168 } 1169 SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Found %d corners for edge %d (astart %d, estart %d)\n",found,i,e,idxs[0]); 1170 } 1171 1172 /* get primal dof index on this coarse edge */ 1173 ierr = ISLocalToGlobalMappingApply(vl2g,2,corners+2*i,gc);CHKERRQ(ierr); 1174 if (gc[0] > gc[1]) { 1175 PetscInt swap = corners[2*i]; 1176 corners[2*i] = corners[2*i+1]; 1177 corners[2*i+1] = swap; 1178 } 1179 cedges[i] = idxs[size-1]; 1180 ierr = ISRestoreIndices(eedges[i],&idxs);CHKERRQ(ierr); 1181 if (print) PetscPrintf(PETSC_COMM_SELF,"EDGE %d: ce %d, corners (%d,%d)\n",i,cedges[i],corners[2*i],corners[2*i+1]); 1182 } 1183 ierr = MatRestoreRowIJ(lG,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);CHKERRQ(ierr); 1184 ierr = PetscBTDestroy(&btvc);CHKERRQ(ierr); 1185 1186 #if defined(PETSC_USE_DEBUG) 1187 /* Inspects columns of lG (rows of lGt) and make sure the change of basis will 1188 not interfere with neighbouring coarse edges */ 1189 ierr = PetscMalloc1(nee+1,&emarks);CHKERRQ(ierr); 1190 ierr = MatGetRowIJ(lGt,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);CHKERRQ(ierr); 1191 for (i=0;i<nv;i++) { 1192 PetscInt emax = 0,eemax = 0; 1193 1194 if (ii[i+1]==ii[i] || PetscBTLookup(btv,i)) continue; 1195 ierr = PetscMemzero(emarks,(nee+1)*sizeof(PetscInt));CHKERRQ(ierr); 1196 for (j=ii[i];j<ii[i+1];j++) emarks[marks[jj[j]]]++; 1197 for (j=1;j<nee+1;j++) { 1198 if (emax < emarks[j]) { 1199 emax = emarks[j]; 1200 eemax = j; 1201 } 1202 } 1203 /* not relevant for edges */ 1204 if (!eemax) continue; 1205 1206 for (j=ii[i];j<ii[i+1];j++) { 1207 if (marks[jj[j]] && marks[jj[j]] != eemax) { 1208 SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_SUP,"Found 2 coarse edges (id %d and %d) connected through the %d nodal dof at edge dof %d\n",marks[jj[j]]-1,eemax,i,jj[j]); 1209 } 1210 } 1211 } 1212 ierr = PetscFree(emarks);CHKERRQ(ierr); 1213 ierr = MatRestoreRowIJ(lGt,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);CHKERRQ(ierr); 1214 #endif 1215 1216 /* Compute extended rows indices for edge blocks of the change of basis */ 1217 ierr = MatGetRowIJ(lGt,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);CHKERRQ(ierr); 1218 ierr = MatSeqAIJGetMaxRowNonzeros(lGt,&extmem);CHKERRQ(ierr); 1219 extmem *= maxsize; 1220 ierr = PetscMalloc1(extmem*nee,&extrow);CHKERRQ(ierr); 1221 ierr = PetscMalloc1(nee,&extrows);CHKERRQ(ierr); 1222 ierr = PetscCalloc1(nee,&extrowcum);CHKERRQ(ierr); 1223 for (i=0;i<nv;i++) { 1224 PetscInt mark = 0,size,start; 1225 1226 if (ii[i+1]==ii[i] || PetscBTLookup(btv,i)) continue; 1227 for (j=ii[i];j<ii[i+1];j++) 1228 if (marks[jj[j]] && !mark) 1229 mark = marks[jj[j]]; 1230 1231 /* not relevant */ 1232 if (!mark) continue; 1233 1234 /* import extended row */ 1235 mark--; 1236 start = mark*extmem+extrowcum[mark]; 1237 size = ii[i+1]-ii[i]; 1238 if (extrowcum[mark] + size > extmem) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Not enough memory allocated %d > %d",extrowcum[mark] + size,extmem); 1239 ierr = PetscMemcpy(extrow+start,jj+ii[i],size*sizeof(PetscInt));CHKERRQ(ierr); 1240 extrowcum[mark] += size; 1241 } 1242 ierr = MatRestoreRowIJ(lGt,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);CHKERRQ(ierr); 1243 ierr = MatDestroy(&lGt);CHKERRQ(ierr); 1244 ierr = PetscFree(marks);CHKERRQ(ierr); 1245 1246 /* Compress extrows */ 1247 cum = 0; 1248 for (i=0;i<nee;i++) { 1249 PetscInt size = extrowcum[i],*start = extrow + i*extmem; 1250 ierr = PetscSortRemoveDupsInt(&size,start);CHKERRQ(ierr); 1251 ierr = ISCreateGeneral(PETSC_COMM_SELF,size,start,PETSC_USE_POINTER,&extrows[i]);CHKERRQ(ierr); 1252 cum = PetscMax(cum,size); 1253 } 1254 ierr = PetscFree(extrowcum);CHKERRQ(ierr); 1255 ierr = PetscBTDestroy(&btv);CHKERRQ(ierr); 1256 ierr = PetscBTDestroy(&btvcand);CHKERRQ(ierr); 1257 1258 /* Workspace for lapack inner calls and VecSetValues */ 1259 ierr = PetscMalloc2((5+cum+maxsize)*maxsize,&work,maxsize,&rwork);CHKERRQ(ierr); 1260 1261 /* Create change of basis matrix (preallocation can be improved) */ 1262 ierr = MatCreate(comm,&T);CHKERRQ(ierr); 1263 ierr = MatSetSizes(T,pc->pmat->rmap->n,pc->pmat->rmap->n, 1264 pc->pmat->rmap->N,pc->pmat->rmap->N);CHKERRQ(ierr); 1265 ierr = MatSetType(T,MATAIJ);CHKERRQ(ierr); 1266 ierr = MatSeqAIJSetPreallocation(T,10,NULL);CHKERRQ(ierr); 1267 ierr = MatMPIAIJSetPreallocation(T,10,NULL,10,NULL);CHKERRQ(ierr); 1268 ierr = MatSetLocalToGlobalMapping(T,al2g,al2g);CHKERRQ(ierr); 1269 ierr = MatSetOption(T,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr); 1270 ierr = MatSetOption(T,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr); 1271 ierr = ISLocalToGlobalMappingDestroy(&al2g);CHKERRQ(ierr); 1272 1273 /* Defaults to identity */ 1274 ierr = MatCreateVecs(pc->pmat,&tvec,NULL);CHKERRQ(ierr); 1275 ierr = VecSet(tvec,1.0);CHKERRQ(ierr); 1276 ierr = MatDiagonalSet(T,tvec,INSERT_VALUES);CHKERRQ(ierr); 1277 ierr = VecDestroy(&tvec);CHKERRQ(ierr); 1278 1279 /* Create discrete gradient for the coarser level if needed */ 1280 ierr = MatDestroy(&pcbddc->nedcG);CHKERRQ(ierr); 1281 ierr = ISDestroy(&pcbddc->nedclocal);CHKERRQ(ierr); 1282 if (pcbddc->current_level < pcbddc->max_levels) { 1283 ISLocalToGlobalMapping cel2g,cvl2g; 1284 IS wis,gwis; 1285 PetscInt cnv,cne; 1286 1287 ierr = ISCreateGeneral(comm,nee,cedges,PETSC_COPY_VALUES,&wis);CHKERRQ(ierr); 1288 if (fl2g) { 1289 ierr = ISLocalToGlobalMappingApplyIS(fl2g,wis,&pcbddc->nedclocal);CHKERRQ(ierr); 1290 } else { 1291 ierr = PetscObjectReference((PetscObject)wis);CHKERRQ(ierr); 1292 pcbddc->nedclocal = wis; 1293 } 1294 ierr = ISLocalToGlobalMappingApplyIS(el2g,wis,&gwis);CHKERRQ(ierr); 1295 ierr = ISDestroy(&wis);CHKERRQ(ierr); 1296 ierr = ISRenumber(gwis,NULL,&cne,&wis);CHKERRQ(ierr); 1297 ierr = ISLocalToGlobalMappingCreateIS(wis,&cel2g);CHKERRQ(ierr); 1298 ierr = ISDestroy(&wis);CHKERRQ(ierr); 1299 ierr = ISDestroy(&gwis);CHKERRQ(ierr); 1300 1301 ierr = ISCreateGeneral(comm,2*nee,corners,PETSC_USE_POINTER,&wis);CHKERRQ(ierr); 1302 ierr = ISLocalToGlobalMappingApplyIS(vl2g,wis,&gwis);CHKERRQ(ierr); 1303 ierr = ISDestroy(&wis);CHKERRQ(ierr); 1304 ierr = ISRenumber(gwis,NULL,&cnv,&wis);CHKERRQ(ierr); 1305 ierr = ISLocalToGlobalMappingCreateIS(wis,&cvl2g);CHKERRQ(ierr); 1306 ierr = ISDestroy(&wis);CHKERRQ(ierr); 1307 ierr = ISDestroy(&gwis);CHKERRQ(ierr); 1308 1309 ierr = MatCreate(comm,&pcbddc->nedcG);CHKERRQ(ierr); 1310 ierr = MatSetSizes(pcbddc->nedcG,PETSC_DECIDE,PETSC_DECIDE,cne,cnv);CHKERRQ(ierr); 1311 ierr = MatSetType(pcbddc->nedcG,MATAIJ);CHKERRQ(ierr); 1312 ierr = MatSeqAIJSetPreallocation(pcbddc->nedcG,2,NULL);CHKERRQ(ierr); 1313 ierr = MatMPIAIJSetPreallocation(pcbddc->nedcG,2,NULL,2,NULL);CHKERRQ(ierr); 1314 ierr = MatSetLocalToGlobalMapping(pcbddc->nedcG,cel2g,cvl2g);CHKERRQ(ierr); 1315 ierr = ISLocalToGlobalMappingDestroy(&cel2g);CHKERRQ(ierr); 1316 ierr = ISLocalToGlobalMappingDestroy(&cvl2g);CHKERRQ(ierr); 1317 } 1318 ierr = ISLocalToGlobalMappingDestroy(&vl2g);CHKERRQ(ierr); 1319 1320 #if defined(PRINT_GDET) 1321 inc = 0; 1322 lev = pcbddc->current_level; 1323 #endif 1324 1325 /* Insert values in the change of basis matrix */ 1326 for (i=0;i<nee;i++) { 1327 Mat Gins = NULL, GKins = NULL; 1328 IS cornersis = NULL; 1329 PetscScalar cvals[2]; 1330 1331 if (pcbddc->nedcG) { 1332 ierr = ISCreateGeneral(PETSC_COMM_SELF,2,corners+2*i,PETSC_USE_POINTER,&cornersis);CHKERRQ(ierr); 1333 } 1334 ierr = PCBDDCComputeNedelecChangeEdge(lG,eedges[i],extrows[i],extcols[i],cornersis,&Gins,&GKins,cvals,work,rwork);CHKERRQ(ierr); 1335 if (Gins && GKins) { 1336 PetscScalar *data; 1337 const PetscInt *rows,*cols; 1338 PetscInt nrh,nch,nrc,ncc; 1339 1340 ierr = ISGetIndices(eedges[i],&cols);CHKERRQ(ierr); 1341 /* H1 */ 1342 ierr = ISGetIndices(extrows[i],&rows);CHKERRQ(ierr); 1343 ierr = MatGetSize(Gins,&nrh,&nch);CHKERRQ(ierr); 1344 ierr = MatDenseGetArray(Gins,&data);CHKERRQ(ierr); 1345 ierr = MatSetValuesLocal(T,nrh,rows,nch,cols,data,INSERT_VALUES);CHKERRQ(ierr); 1346 ierr = MatDenseRestoreArray(Gins,&data);CHKERRQ(ierr); 1347 ierr = ISRestoreIndices(extrows[i],&rows);CHKERRQ(ierr); 1348 /* complement */ 1349 ierr = MatGetSize(GKins,&nrc,&ncc);CHKERRQ(ierr); 1350 if (!ncc) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Constant function has not been generated for coarse edge %d",i); 1351 if (ncc + nch != nrc) SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_PLIB,"The sum of the number of columns of GKins %d and Gins %d does not match %d for coarse edge %d",ncc,nch,nrc,i); 1352 if (ncc != 1 && pcbddc->nedcG) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_SUP,"Cannot generate the coarse discrete gradient for coarse edge %d with ncc %d",i,ncc); 1353 ierr = MatDenseGetArray(GKins,&data);CHKERRQ(ierr); 1354 ierr = MatSetValuesLocal(T,nrc,cols,ncc,cols+nch,data,INSERT_VALUES);CHKERRQ(ierr); 1355 ierr = MatDenseRestoreArray(GKins,&data);CHKERRQ(ierr); 1356 1357 /* coarse discrete gradient */ 1358 if (pcbddc->nedcG) { 1359 PetscInt cols[2]; 1360 1361 cols[0] = 2*i; 1362 cols[1] = 2*i+1; 1363 ierr = MatSetValuesLocal(pcbddc->nedcG,1,&i,2,cols,cvals,INSERT_VALUES);CHKERRQ(ierr); 1364 } 1365 ierr = ISRestoreIndices(eedges[i],&cols);CHKERRQ(ierr); 1366 } 1367 ierr = ISDestroy(&extrows[i]);CHKERRQ(ierr); 1368 ierr = ISDestroy(&extcols[i]);CHKERRQ(ierr); 1369 ierr = ISDestroy(&cornersis);CHKERRQ(ierr); 1370 ierr = MatDestroy(&Gins);CHKERRQ(ierr); 1371 ierr = MatDestroy(&GKins);CHKERRQ(ierr); 1372 } 1373 ierr = ISLocalToGlobalMappingDestroy(&el2g);CHKERRQ(ierr); 1374 1375 /* Start assembling */ 1376 ierr = MatAssemblyBegin(T,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1377 if (pcbddc->nedcG) { 1378 ierr = MatAssemblyBegin(pcbddc->nedcG,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1379 } 1380 1381 /* Free */ 1382 if (fl2g) { 1383 ierr = ISDestroy(&primals);CHKERRQ(ierr); 1384 for (i=0;i<nee;i++) { 1385 ierr = ISDestroy(&eedges[i]);CHKERRQ(ierr); 1386 } 1387 ierr = PetscFree(eedges);CHKERRQ(ierr); 1388 } 1389 1390 /* hack mat_graph with primal dofs on the coarse edges */ 1391 { 1392 PCBDDCGraph graph = pcbddc->mat_graph; 1393 PetscInt *oqueue = graph->queue; 1394 PetscInt *ocptr = graph->cptr; 1395 PetscInt ncc,*idxs; 1396 1397 /* find first primal edge */ 1398 if (pcbddc->nedclocal) { 1399 ierr = ISGetIndices(pcbddc->nedclocal,(const PetscInt**)&idxs);CHKERRQ(ierr); 1400 } else { 1401 if (fl2g) { 1402 ierr = ISLocalToGlobalMappingApply(fl2g,nee,cedges,cedges);CHKERRQ(ierr); 1403 } 1404 idxs = cedges; 1405 } 1406 cum = 0; 1407 while (cum < nee && cedges[cum] < 0) cum++; 1408 1409 /* adapt connected components */ 1410 ierr = PetscMalloc2(graph->nvtxs+1,&graph->cptr,ocptr[graph->ncc],&graph->queue);CHKERRQ(ierr); 1411 graph->cptr[0] = 0; 1412 for (i=0,ncc=0;i<graph->ncc;i++) { 1413 PetscInt lc = ocptr[i+1]-ocptr[i]; 1414 if (cum != nee && oqueue[ocptr[i+1]-1] == cedges[cum]) { /* this cc has a primal dof */ 1415 graph->cptr[ncc+1] = graph->cptr[ncc]+1; 1416 graph->queue[graph->cptr[ncc]] = cedges[cum]; 1417 ncc++; 1418 lc--; 1419 cum++; 1420 while (cum < nee && cedges[cum] < 0) cum++; 1421 } 1422 graph->cptr[ncc+1] = graph->cptr[ncc] + lc; 1423 for (j=0;j<lc;j++) graph->queue[graph->cptr[ncc]+j] = oqueue[ocptr[i]+j]; 1424 ncc++; 1425 } 1426 graph->ncc = ncc; 1427 if (pcbddc->nedclocal) { 1428 ierr = ISRestoreIndices(pcbddc->nedclocal,(const PetscInt**)&idxs);CHKERRQ(ierr); 1429 } 1430 ierr = PetscFree2(ocptr,oqueue);CHKERRQ(ierr); 1431 } 1432 ierr = ISLocalToGlobalMappingDestroy(&fl2g);CHKERRQ(ierr); 1433 ierr = PCBDDCGraphRestoreCandidatesIS(pcbddc->mat_graph,NULL,NULL,&nee,&alleedges,&allprimals);CHKERRQ(ierr); 1434 ierr = PCBDDCGraphResetCSR(pcbddc->mat_graph);CHKERRQ(ierr); 1435 ierr = MatDestroy(&conn);CHKERRQ(ierr); 1436 1437 ierr = ISDestroy(&nedfieldlocal);CHKERRQ(ierr); 1438 ierr = PetscFree(extrow);CHKERRQ(ierr); 1439 ierr = PetscFree2(work,rwork);CHKERRQ(ierr); 1440 ierr = PetscFree(corners);CHKERRQ(ierr); 1441 ierr = PetscFree(cedges);CHKERRQ(ierr); 1442 ierr = PetscFree(extrows);CHKERRQ(ierr); 1443 ierr = PetscFree(extcols);CHKERRQ(ierr); 1444 ierr = MatDestroy(&lG);CHKERRQ(ierr); 1445 1446 /* Complete assembling */ 1447 ierr = MatAssemblyEnd(T,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1448 if (pcbddc->nedcG) { 1449 ierr = MatAssemblyEnd(pcbddc->nedcG,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1450 #if 0 1451 ierr = PetscObjectSetName((PetscObject)pcbddc->nedcG,"coarse_G");CHKERRQ(ierr); 1452 ierr = MatView(pcbddc->nedcG,NULL);CHKERRQ(ierr); 1453 #endif 1454 } 1455 1456 /* set change of basis */ 1457 ierr = PCBDDCSetChangeOfBasisMat(pc,T,singular);CHKERRQ(ierr); 1458 ierr = MatDestroy(&T);CHKERRQ(ierr); 1459 1460 PetscFunctionReturn(0); 1461 } 1462 1463 /* the near-null space of BDDC carries information on quadrature weights, 1464 and these can be collinear -> so cheat with MatNullSpaceCreate 1465 and create a suitable set of basis vectors first */ 1466 PetscErrorCode PCBDDCNullSpaceCreate(MPI_Comm comm, PetscBool has_const, PetscInt nvecs, Vec quad_vecs[], MatNullSpace *nnsp) 1467 { 1468 PetscErrorCode ierr; 1469 PetscInt i; 1470 1471 PetscFunctionBegin; 1472 for (i=0;i<nvecs;i++) { 1473 PetscInt first,last; 1474 1475 ierr = VecGetOwnershipRange(quad_vecs[i],&first,&last);CHKERRQ(ierr); 1476 if (last-first < 2*nvecs && has_const) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Not implemented"); 1477 if (i>=first && i < last) { 1478 PetscScalar *data; 1479 ierr = VecGetArray(quad_vecs[i],&data);CHKERRQ(ierr); 1480 if (!has_const) { 1481 data[i-first] = 1.; 1482 } else { 1483 data[2*i-first] = 1./PetscSqrtReal(2.); 1484 data[2*i-first+1] = -1./PetscSqrtReal(2.); 1485 } 1486 ierr = VecRestoreArray(quad_vecs[i],&data);CHKERRQ(ierr); 1487 } 1488 ierr = PetscObjectStateIncrease((PetscObject)quad_vecs[i]);CHKERRQ(ierr); 1489 } 1490 ierr = MatNullSpaceCreate(comm,has_const,nvecs,quad_vecs,nnsp);CHKERRQ(ierr); 1491 for (i=0;i<nvecs;i++) { /* reset vectors */ 1492 PetscInt first,last; 1493 ierr = VecLockPop(quad_vecs[i]);CHKERRQ(ierr); 1494 ierr = VecGetOwnershipRange(quad_vecs[i],&first,&last);CHKERRQ(ierr); 1495 if (i>=first && i < last) { 1496 PetscScalar *data; 1497 ierr = VecGetArray(quad_vecs[i],&data);CHKERRQ(ierr); 1498 if (!has_const) { 1499 data[i-first] = 0.; 1500 } else { 1501 data[2*i-first] = 0.; 1502 data[2*i-first+1] = 0.; 1503 } 1504 ierr = VecRestoreArray(quad_vecs[i],&data);CHKERRQ(ierr); 1505 } 1506 ierr = PetscObjectStateIncrease((PetscObject)quad_vecs[i]);CHKERRQ(ierr); 1507 ierr = VecLockPush(quad_vecs[i]);CHKERRQ(ierr); 1508 } 1509 PetscFunctionReturn(0); 1510 } 1511 1512 PetscErrorCode PCBDDCComputeNoNetFlux(Mat A, Mat divudotp, PetscBool transpose, IS vl2l, PCBDDCGraph graph, MatNullSpace *nnsp) 1513 { 1514 Mat loc_divudotp; 1515 Vec p,v,vins,quad_vec,*quad_vecs; 1516 ISLocalToGlobalMapping map; 1517 PetscScalar *vals; 1518 const PetscScalar *array; 1519 PetscInt i,maxneighs,maxsize; 1520 PetscInt n_neigh,*neigh,*n_shared,**shared; 1521 PetscMPIInt rank; 1522 PetscErrorCode ierr; 1523 1524 PetscFunctionBegin; 1525 ierr = ISLocalToGlobalMappingGetInfo(graph->l2gmap,&n_neigh,&neigh,&n_shared,&shared);CHKERRQ(ierr); 1526 ierr = MPIU_Allreduce(&n_neigh,&maxneighs,1,MPIU_INT,MPI_MAX,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 1527 if (!maxneighs) { 1528 ierr = ISLocalToGlobalMappingRestoreInfo(graph->l2gmap,&n_neigh,&neigh,&n_shared,&shared);CHKERRQ(ierr); 1529 *nnsp = NULL; 1530 PetscFunctionReturn(0); 1531 } 1532 maxsize = 0; 1533 for (i=0;i<n_neigh;i++) maxsize = PetscMax(n_shared[i],maxsize); 1534 ierr = PetscMalloc1(maxsize,&vals);CHKERRQ(ierr); 1535 /* create vectors to hold quadrature weights */ 1536 ierr = MatCreateVecs(A,&quad_vec,NULL);CHKERRQ(ierr); 1537 if (!transpose) { 1538 ierr = MatGetLocalToGlobalMapping(A,&map,NULL);CHKERRQ(ierr); 1539 } else { 1540 ierr = MatGetLocalToGlobalMapping(A,NULL,&map);CHKERRQ(ierr); 1541 } 1542 ierr = VecDuplicateVecs(quad_vec,maxneighs,&quad_vecs);CHKERRQ(ierr); 1543 ierr = VecDestroy(&quad_vec);CHKERRQ(ierr); 1544 ierr = PCBDDCNullSpaceCreate(PetscObjectComm((PetscObject)A),PETSC_FALSE,maxneighs,quad_vecs,nnsp);CHKERRQ(ierr); 1545 for (i=0;i<maxneighs;i++) { 1546 ierr = VecLockPop(quad_vecs[i]);CHKERRQ(ierr); 1547 ierr = VecSetLocalToGlobalMapping(quad_vecs[i],map);CHKERRQ(ierr); 1548 } 1549 1550 /* compute local quad vec */ 1551 ierr = MatISGetLocalMat(divudotp,&loc_divudotp);CHKERRQ(ierr); 1552 if (!transpose) { 1553 ierr = MatCreateVecs(loc_divudotp,&v,&p);CHKERRQ(ierr); 1554 } else { 1555 ierr = MatCreateVecs(loc_divudotp,&p,&v);CHKERRQ(ierr); 1556 } 1557 ierr = VecSet(p,1.);CHKERRQ(ierr); 1558 if (!transpose) { 1559 ierr = MatMultTranspose(loc_divudotp,p,v);CHKERRQ(ierr); 1560 } else { 1561 ierr = MatMult(loc_divudotp,p,v);CHKERRQ(ierr); 1562 } 1563 if (vl2l) { 1564 Mat lA; 1565 VecScatter sc; 1566 1567 ierr = MatISGetLocalMat(A,&lA);CHKERRQ(ierr); 1568 ierr = MatCreateVecs(lA,&vins,NULL);CHKERRQ(ierr); 1569 ierr = VecScatterCreate(v,vl2l,vins,NULL,&sc);CHKERRQ(ierr); 1570 ierr = VecScatterBegin(sc,v,vins,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1571 ierr = VecScatterEnd(sc,v,vins,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1572 ierr = VecScatterDestroy(&sc);CHKERRQ(ierr); 1573 } else { 1574 vins = v; 1575 } 1576 ierr = VecGetArrayRead(vins,&array);CHKERRQ(ierr); 1577 ierr = VecDestroy(&p);CHKERRQ(ierr); 1578 1579 /* insert in global quadrature vecs */ 1580 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)A),&rank);CHKERRQ(ierr); 1581 for (i=0;i<n_neigh;i++) { 1582 const PetscInt *idxs; 1583 PetscInt idx,nn,j; 1584 1585 idxs = shared[i]; 1586 nn = n_shared[i]; 1587 for (j=0;j<nn;j++) vals[j] = array[idxs[j]]; 1588 ierr = PetscFindInt(rank,graph->count[idxs[0]],graph->neighbours_set[idxs[0]],&idx);CHKERRQ(ierr); 1589 idx = -(idx+1); 1590 ierr = VecSetValuesLocal(quad_vecs[idx],nn,idxs,vals,INSERT_VALUES);CHKERRQ(ierr); 1591 } 1592 ierr = ISLocalToGlobalMappingRestoreInfo(graph->l2gmap,&n_neigh,&neigh,&n_shared,&shared);CHKERRQ(ierr); 1593 ierr = VecRestoreArrayRead(vins,&array);CHKERRQ(ierr); 1594 if (vl2l) { 1595 ierr = VecDestroy(&vins);CHKERRQ(ierr); 1596 } 1597 ierr = VecDestroy(&v);CHKERRQ(ierr); 1598 ierr = PetscFree(vals);CHKERRQ(ierr); 1599 1600 /* assemble near null space */ 1601 for (i=0;i<maxneighs;i++) { 1602 ierr = VecAssemblyBegin(quad_vecs[i]);CHKERRQ(ierr); 1603 } 1604 for (i=0;i<maxneighs;i++) { 1605 ierr = VecAssemblyEnd(quad_vecs[i]);CHKERRQ(ierr); 1606 ierr = VecViewFromOptions(quad_vecs[i],NULL,"-pc_bddc_quad_vecs_view");CHKERRQ(ierr); 1607 ierr = VecLockPush(quad_vecs[i]);CHKERRQ(ierr); 1608 } 1609 ierr = VecDestroyVecs(maxneighs,&quad_vecs);CHKERRQ(ierr); 1610 PetscFunctionReturn(0); 1611 } 1612 1613 PetscErrorCode PCBDDCAddPrimalVerticesLocalIS(PC pc, IS primalv) 1614 { 1615 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 1616 PetscErrorCode ierr; 1617 1618 PetscFunctionBegin; 1619 if (primalv) { 1620 if (pcbddc->user_primal_vertices_local) { 1621 IS list[2], newp; 1622 1623 list[0] = primalv; 1624 list[1] = pcbddc->user_primal_vertices_local; 1625 ierr = ISConcatenate(PetscObjectComm((PetscObject)pc),2,list,&newp);CHKERRQ(ierr); 1626 ierr = ISSortRemoveDups(newp);CHKERRQ(ierr); 1627 ierr = ISDestroy(&list[1]);CHKERRQ(ierr); 1628 pcbddc->user_primal_vertices_local = newp; 1629 } else { 1630 ierr = PCBDDCSetPrimalVerticesLocalIS(pc,primalv);CHKERRQ(ierr); 1631 } 1632 } 1633 PetscFunctionReturn(0); 1634 } 1635 1636 static PetscErrorCode func_coords_private(PetscInt dim, PetscReal t, const PetscReal X[], PetscInt Nf, PetscScalar *out, void *ctx) 1637 { 1638 PetscInt f, *comp = (PetscInt *)ctx; 1639 1640 PetscFunctionBegin; 1641 for (f=0;f<Nf;f++) out[f] = X[*comp]; 1642 PetscFunctionReturn(0); 1643 } 1644 1645 PetscErrorCode PCBDDCComputeLocalTopologyInfo(PC pc) 1646 { 1647 PetscErrorCode ierr; 1648 Vec local,global; 1649 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 1650 Mat_IS *matis = (Mat_IS*)pc->pmat->data; 1651 PetscBool monolithic = PETSC_FALSE; 1652 1653 PetscFunctionBegin; 1654 ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)pc),((PetscObject)pc)->prefix,"BDDC topology options","PC");CHKERRQ(ierr); 1655 ierr = PetscOptionsBool("-pc_bddc_monolithic","Discard any information on dofs splitting",NULL,monolithic,&monolithic,NULL);CHKERRQ(ierr); 1656 ierr = PetscOptionsEnd();CHKERRQ(ierr); 1657 /* need to convert from global to local topology information and remove references to information in global ordering */ 1658 ierr = MatCreateVecs(pc->pmat,&global,NULL);CHKERRQ(ierr); 1659 ierr = MatCreateVecs(matis->A,&local,NULL);CHKERRQ(ierr); 1660 if (monolithic) { /* just get block size to properly compute vertices */ 1661 if (pcbddc->vertex_size == 1) { 1662 ierr = MatGetBlockSize(pc->pmat,&pcbddc->vertex_size);CHKERRQ(ierr); 1663 } 1664 goto boundary; 1665 } 1666 1667 if (pcbddc->user_provided_isfordofs) { 1668 if (pcbddc->n_ISForDofs) { 1669 PetscInt i; 1670 ierr = PetscMalloc1(pcbddc->n_ISForDofs,&pcbddc->ISForDofsLocal);CHKERRQ(ierr); 1671 for (i=0;i<pcbddc->n_ISForDofs;i++) { 1672 ierr = PCBDDCGlobalToLocal(matis->rctx,global,local,pcbddc->ISForDofs[i],&pcbddc->ISForDofsLocal[i]);CHKERRQ(ierr); 1673 ierr = ISDestroy(&pcbddc->ISForDofs[i]);CHKERRQ(ierr); 1674 } 1675 pcbddc->n_ISForDofsLocal = pcbddc->n_ISForDofs; 1676 pcbddc->n_ISForDofs = 0; 1677 ierr = PetscFree(pcbddc->ISForDofs);CHKERRQ(ierr); 1678 } 1679 } else { 1680 if (!pcbddc->n_ISForDofsLocal) { /* field split not present */ 1681 DM dm; 1682 1683 ierr = PCGetDM(pc, &dm);CHKERRQ(ierr); 1684 if (!dm) { 1685 ierr = MatGetDM(pc->pmat, &dm);CHKERRQ(ierr); 1686 } 1687 if (dm) { 1688 IS *fields; 1689 PetscInt nf,i; 1690 ierr = DMCreateFieldDecomposition(dm,&nf,NULL,&fields,NULL);CHKERRQ(ierr); 1691 ierr = PetscMalloc1(nf,&pcbddc->ISForDofsLocal);CHKERRQ(ierr); 1692 for (i=0;i<nf;i++) { 1693 ierr = PCBDDCGlobalToLocal(matis->rctx,global,local,fields[i],&pcbddc->ISForDofsLocal[i]);CHKERRQ(ierr); 1694 ierr = ISDestroy(&fields[i]);CHKERRQ(ierr); 1695 } 1696 ierr = PetscFree(fields);CHKERRQ(ierr); 1697 pcbddc->n_ISForDofsLocal = nf; 1698 } else { /* See if MATIS has fields attached by the conversion from MatNest */ 1699 PetscContainer c; 1700 1701 ierr = PetscObjectQuery((PetscObject)pc->pmat,"_convert_nest_lfields",(PetscObject*)&c);CHKERRQ(ierr); 1702 if (c) { 1703 MatISLocalFields lf; 1704 ierr = PetscContainerGetPointer(c,(void**)&lf);CHKERRQ(ierr); 1705 ierr = PCBDDCSetDofsSplittingLocal(pc,lf->nr,lf->rf);CHKERRQ(ierr); 1706 } else { /* fallback, create the default fields if bs > 1 */ 1707 PetscInt i, n = matis->A->rmap->n; 1708 ierr = MatGetBlockSize(pc->pmat,&i);CHKERRQ(ierr); 1709 if (i > 1) { 1710 pcbddc->n_ISForDofsLocal = i; 1711 ierr = PetscMalloc1(pcbddc->n_ISForDofsLocal,&pcbddc->ISForDofsLocal);CHKERRQ(ierr); 1712 for (i=0;i<pcbddc->n_ISForDofsLocal;i++) { 1713 ierr = ISCreateStride(PetscObjectComm((PetscObject)pc),n/pcbddc->n_ISForDofsLocal,i,pcbddc->n_ISForDofsLocal,&pcbddc->ISForDofsLocal[i]);CHKERRQ(ierr); 1714 } 1715 } 1716 } 1717 } 1718 } else { 1719 PetscInt i; 1720 for (i=0;i<pcbddc->n_ISForDofsLocal;i++) { 1721 ierr = PCBDDCConsistencyCheckIS(pc,MPI_LAND,&pcbddc->ISForDofsLocal[i]);CHKERRQ(ierr); 1722 } 1723 } 1724 } 1725 1726 boundary: 1727 if (!pcbddc->DirichletBoundariesLocal && pcbddc->DirichletBoundaries) { 1728 ierr = PCBDDCGlobalToLocal(matis->rctx,global,local,pcbddc->DirichletBoundaries,&pcbddc->DirichletBoundariesLocal);CHKERRQ(ierr); 1729 } else if (pcbddc->DirichletBoundariesLocal) { 1730 ierr = PCBDDCConsistencyCheckIS(pc,MPI_LAND,&pcbddc->DirichletBoundariesLocal);CHKERRQ(ierr); 1731 } 1732 if (!pcbddc->NeumannBoundariesLocal && pcbddc->NeumannBoundaries) { 1733 ierr = PCBDDCGlobalToLocal(matis->rctx,global,local,pcbddc->NeumannBoundaries,&pcbddc->NeumannBoundariesLocal);CHKERRQ(ierr); 1734 } else if (pcbddc->NeumannBoundariesLocal) { 1735 ierr = PCBDDCConsistencyCheckIS(pc,MPI_LOR,&pcbddc->NeumannBoundariesLocal);CHKERRQ(ierr); 1736 } 1737 if (!pcbddc->user_primal_vertices_local && pcbddc->user_primal_vertices) { 1738 ierr = PCBDDCGlobalToLocal(matis->rctx,global,local,pcbddc->user_primal_vertices,&pcbddc->user_primal_vertices_local);CHKERRQ(ierr); 1739 } 1740 ierr = VecDestroy(&global);CHKERRQ(ierr); 1741 ierr = VecDestroy(&local);CHKERRQ(ierr); 1742 /* detect local disconnected subdomains if requested (use matis->A) */ 1743 if (pcbddc->detect_disconnected) { 1744 IS primalv = NULL; 1745 PetscInt i; 1746 1747 for (i=0;i<pcbddc->n_local_subs;i++) { 1748 ierr = ISDestroy(&pcbddc->local_subs[i]);CHKERRQ(ierr); 1749 } 1750 ierr = PetscFree(pcbddc->local_subs);CHKERRQ(ierr); 1751 ierr = PCBDDCDetectDisconnectedComponents(pc,&pcbddc->n_local_subs,&pcbddc->local_subs,&primalv);CHKERRQ(ierr); 1752 ierr = PCBDDCAddPrimalVerticesLocalIS(pc,primalv);CHKERRQ(ierr); 1753 ierr = ISDestroy(&primalv);CHKERRQ(ierr); 1754 } 1755 /* early stage corner detection */ 1756 { 1757 DM dm; 1758 1759 ierr = MatGetDM(pc->pmat,&dm);CHKERRQ(ierr); 1760 if (dm) { 1761 PetscBool isda; 1762 1763 ierr = PetscObjectTypeCompare((PetscObject)dm,DMDA,&isda);CHKERRQ(ierr); 1764 if (isda) { 1765 ISLocalToGlobalMapping l2l; 1766 IS corners; 1767 Mat lA; 1768 1769 ierr = DMDAGetSubdomainCornersIS(dm,&corners);CHKERRQ(ierr); 1770 ierr = MatISGetLocalMat(pc->pmat,&lA);CHKERRQ(ierr); 1771 ierr = MatGetLocalToGlobalMapping(lA,&l2l,NULL);CHKERRQ(ierr); 1772 ierr = MatISRestoreLocalMat(pc->pmat,&lA);CHKERRQ(ierr); 1773 if (l2l) { 1774 const PetscInt *idx; 1775 PetscInt bs,*idxout,n; 1776 1777 ierr = ISLocalToGlobalMappingGetBlockSize(l2l,&bs);CHKERRQ(ierr); 1778 ierr = ISGetLocalSize(corners,&n);CHKERRQ(ierr); 1779 ierr = ISGetIndices(corners,&idx);CHKERRQ(ierr); 1780 ierr = PetscMalloc1(n,&idxout);CHKERRQ(ierr); 1781 ierr = ISLocalToGlobalMappingApplyBlock(l2l,n,idx,idxout);CHKERRQ(ierr); 1782 ierr = ISRestoreIndices(corners,&idx);CHKERRQ(ierr); 1783 ierr = DMDARestoreSubdomainCornersIS(dm,&corners);CHKERRQ(ierr); 1784 ierr = ISCreateBlock(PetscObjectComm((PetscObject)pc),bs,n,idxout,PETSC_OWN_POINTER,&corners);CHKERRQ(ierr); 1785 ierr = PCBDDCAddPrimalVerticesLocalIS(pc,corners);CHKERRQ(ierr); 1786 ierr = ISDestroy(&corners);CHKERRQ(ierr); 1787 pcbddc->corner_selected = PETSC_TRUE; 1788 } else { /* not from DMDA */ 1789 ierr = DMDARestoreSubdomainCornersIS(dm,&corners);CHKERRQ(ierr); 1790 } 1791 } 1792 } 1793 } 1794 if (pcbddc->corner_selection && !pcbddc->mat_graph->cdim) { 1795 DM dm; 1796 1797 ierr = PCGetDM(pc,&dm);CHKERRQ(ierr); 1798 if (!dm) { 1799 ierr = MatGetDM(pc->pmat,&dm);CHKERRQ(ierr); 1800 } 1801 if (dm) { 1802 Vec vcoords; 1803 PetscSection section; 1804 PetscReal *coords; 1805 PetscInt d,cdim,nl,nf,**ctxs; 1806 PetscErrorCode (**funcs)(PetscInt, PetscReal, const PetscReal *, PetscInt, PetscScalar *, void *); 1807 1808 ierr = DMGetCoordinateDim(dm,&cdim);CHKERRQ(ierr); 1809 ierr = DMGetDefaultSection(dm,§ion);CHKERRQ(ierr); 1810 ierr = PetscSectionGetNumFields(section,&nf);CHKERRQ(ierr); 1811 ierr = DMCreateGlobalVector(dm,&vcoords);CHKERRQ(ierr); 1812 ierr = VecGetLocalSize(vcoords,&nl);CHKERRQ(ierr); 1813 ierr = PetscMalloc1(nl*cdim,&coords);CHKERRQ(ierr); 1814 ierr = PetscMalloc2(nf,&funcs,nf,&ctxs);CHKERRQ(ierr); 1815 ierr = PetscMalloc1(nf,&ctxs[0]);CHKERRQ(ierr); 1816 for (d=0;d<nf;d++) funcs[d] = func_coords_private; 1817 for (d=1;d<nf;d++) ctxs[d] = ctxs[d-1] + 1; 1818 for (d=0;d<cdim;d++) { 1819 PetscInt i; 1820 const PetscScalar *v; 1821 1822 for (i=0;i<nf;i++) ctxs[i][0] = d; 1823 ierr = DMProjectFunction(dm,0.0,funcs,(void**)ctxs,INSERT_VALUES,vcoords);CHKERRQ(ierr); 1824 ierr = VecGetArrayRead(vcoords,&v);CHKERRQ(ierr); 1825 for (i=0;i<nl;i++) coords[i*cdim+d] = PetscRealPart(v[i]); 1826 ierr = VecRestoreArrayRead(vcoords,&v);CHKERRQ(ierr); 1827 } 1828 ierr = VecDestroy(&vcoords);CHKERRQ(ierr); 1829 ierr = PCSetCoordinates(pc,cdim,nl,coords);CHKERRQ(ierr); 1830 ierr = PetscFree(coords);CHKERRQ(ierr); 1831 ierr = PetscFree(ctxs[0]);CHKERRQ(ierr); 1832 ierr = PetscFree2(funcs,ctxs);CHKERRQ(ierr); 1833 } 1834 } 1835 PetscFunctionReturn(0); 1836 } 1837 1838 PetscErrorCode PCBDDCConsistencyCheckIS(PC pc, MPI_Op mop, IS *is) 1839 { 1840 Mat_IS *matis = (Mat_IS*)(pc->pmat->data); 1841 PetscErrorCode ierr; 1842 IS nis; 1843 const PetscInt *idxs; 1844 PetscInt i,nd,n = matis->A->rmap->n,*nidxs,nnd; 1845 PetscBool *ld; 1846 1847 PetscFunctionBegin; 1848 if (mop != MPI_LAND && mop != MPI_LOR) SETERRQ(PetscObjectComm((PetscObject)(pc)),PETSC_ERR_SUP,"Supported are MPI_LAND and MPI_LOR"); 1849 ierr = MatISSetUpSF(pc->pmat);CHKERRQ(ierr); 1850 if (mop == MPI_LAND) { 1851 /* init rootdata with true */ 1852 ld = (PetscBool*) matis->sf_rootdata; 1853 for (i=0;i<pc->pmat->rmap->n;i++) ld[i] = PETSC_TRUE; 1854 } else { 1855 ierr = PetscMemzero(matis->sf_rootdata,pc->pmat->rmap->n*sizeof(PetscBool));CHKERRQ(ierr); 1856 } 1857 ierr = PetscMemzero(matis->sf_leafdata,n*sizeof(PetscBool));CHKERRQ(ierr); 1858 ierr = ISGetLocalSize(*is,&nd);CHKERRQ(ierr); 1859 ierr = ISGetIndices(*is,&idxs);CHKERRQ(ierr); 1860 ld = (PetscBool*) matis->sf_leafdata; 1861 for (i=0;i<nd;i++) 1862 if (-1 < idxs[i] && idxs[i] < n) 1863 ld[idxs[i]] = PETSC_TRUE; 1864 ierr = ISRestoreIndices(*is,&idxs);CHKERRQ(ierr); 1865 ierr = PetscSFReduceBegin(matis->sf,MPIU_BOOL,matis->sf_leafdata,matis->sf_rootdata,mop);CHKERRQ(ierr); 1866 ierr = PetscSFReduceEnd(matis->sf,MPIU_BOOL,matis->sf_leafdata,matis->sf_rootdata,mop);CHKERRQ(ierr); 1867 ierr = PetscSFBcastBegin(matis->sf,MPIU_BOOL,matis->sf_rootdata,matis->sf_leafdata);CHKERRQ(ierr); 1868 ierr = PetscSFBcastEnd(matis->sf,MPIU_BOOL,matis->sf_rootdata,matis->sf_leafdata);CHKERRQ(ierr); 1869 if (mop == MPI_LAND) { 1870 ierr = PetscMalloc1(nd,&nidxs);CHKERRQ(ierr); 1871 } else { 1872 ierr = PetscMalloc1(n,&nidxs);CHKERRQ(ierr); 1873 } 1874 for (i=0,nnd=0;i<n;i++) 1875 if (ld[i]) 1876 nidxs[nnd++] = i; 1877 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)(*is)),nnd,nidxs,PETSC_OWN_POINTER,&nis);CHKERRQ(ierr); 1878 ierr = ISDestroy(is);CHKERRQ(ierr); 1879 *is = nis; 1880 PetscFunctionReturn(0); 1881 } 1882 1883 PetscErrorCode PCBDDCBenignRemoveInterior(PC pc,Vec r,Vec z) 1884 { 1885 PC_IS *pcis = (PC_IS*)(pc->data); 1886 PC_BDDC *pcbddc = (PC_BDDC*)(pc->data); 1887 PetscErrorCode ierr; 1888 1889 PetscFunctionBegin; 1890 if (!pcbddc->benign_have_null) { 1891 PetscFunctionReturn(0); 1892 } 1893 if (pcbddc->ChangeOfBasisMatrix) { 1894 Vec swap; 1895 1896 ierr = MatMultTranspose(pcbddc->ChangeOfBasisMatrix,r,pcbddc->work_change);CHKERRQ(ierr); 1897 swap = pcbddc->work_change; 1898 pcbddc->work_change = r; 1899 r = swap; 1900 } 1901 ierr = VecScatterBegin(pcis->global_to_D,r,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1902 ierr = VecScatterEnd(pcis->global_to_D,r,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1903 ierr = KSPSolve(pcbddc->ksp_D,pcis->vec1_D,pcis->vec2_D);CHKERRQ(ierr); 1904 ierr = VecSet(z,0.);CHKERRQ(ierr); 1905 ierr = VecScatterBegin(pcis->global_to_D,pcis->vec2_D,z,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1906 ierr = VecScatterEnd(pcis->global_to_D,pcis->vec2_D,z,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1907 if (pcbddc->ChangeOfBasisMatrix) { 1908 pcbddc->work_change = r; 1909 ierr = VecCopy(z,pcbddc->work_change);CHKERRQ(ierr); 1910 ierr = MatMult(pcbddc->ChangeOfBasisMatrix,pcbddc->work_change,z);CHKERRQ(ierr); 1911 } 1912 PetscFunctionReturn(0); 1913 } 1914 1915 PetscErrorCode PCBDDCBenignMatMult_Private_Private(Mat A, Vec x, Vec y, PetscBool transpose) 1916 { 1917 PCBDDCBenignMatMult_ctx ctx; 1918 PetscErrorCode ierr; 1919 PetscBool apply_right,apply_left,reset_x; 1920 1921 PetscFunctionBegin; 1922 ierr = MatShellGetContext(A,&ctx);CHKERRQ(ierr); 1923 if (transpose) { 1924 apply_right = ctx->apply_left; 1925 apply_left = ctx->apply_right; 1926 } else { 1927 apply_right = ctx->apply_right; 1928 apply_left = ctx->apply_left; 1929 } 1930 reset_x = PETSC_FALSE; 1931 if (apply_right) { 1932 const PetscScalar *ax; 1933 PetscInt nl,i; 1934 1935 ierr = VecGetLocalSize(x,&nl);CHKERRQ(ierr); 1936 ierr = VecGetArrayRead(x,&ax);CHKERRQ(ierr); 1937 ierr = PetscMemcpy(ctx->work,ax,nl*sizeof(PetscScalar));CHKERRQ(ierr); 1938 ierr = VecRestoreArrayRead(x,&ax);CHKERRQ(ierr); 1939 for (i=0;i<ctx->benign_n;i++) { 1940 PetscScalar sum,val; 1941 const PetscInt *idxs; 1942 PetscInt nz,j; 1943 ierr = ISGetLocalSize(ctx->benign_zerodiag_subs[i],&nz);CHKERRQ(ierr); 1944 ierr = ISGetIndices(ctx->benign_zerodiag_subs[i],&idxs);CHKERRQ(ierr); 1945 sum = 0.; 1946 if (ctx->apply_p0) { 1947 val = ctx->work[idxs[nz-1]]; 1948 for (j=0;j<nz-1;j++) { 1949 sum += ctx->work[idxs[j]]; 1950 ctx->work[idxs[j]] += val; 1951 } 1952 } else { 1953 for (j=0;j<nz-1;j++) { 1954 sum += ctx->work[idxs[j]]; 1955 } 1956 } 1957 ctx->work[idxs[nz-1]] -= sum; 1958 ierr = ISRestoreIndices(ctx->benign_zerodiag_subs[i],&idxs);CHKERRQ(ierr); 1959 } 1960 ierr = VecPlaceArray(x,ctx->work);CHKERRQ(ierr); 1961 reset_x = PETSC_TRUE; 1962 } 1963 if (transpose) { 1964 ierr = MatMultTranspose(ctx->A,x,y);CHKERRQ(ierr); 1965 } else { 1966 ierr = MatMult(ctx->A,x,y);CHKERRQ(ierr); 1967 } 1968 if (reset_x) { 1969 ierr = VecResetArray(x);CHKERRQ(ierr); 1970 } 1971 if (apply_left) { 1972 PetscScalar *ay; 1973 PetscInt i; 1974 1975 ierr = VecGetArray(y,&ay);CHKERRQ(ierr); 1976 for (i=0;i<ctx->benign_n;i++) { 1977 PetscScalar sum,val; 1978 const PetscInt *idxs; 1979 PetscInt nz,j; 1980 ierr = ISGetLocalSize(ctx->benign_zerodiag_subs[i],&nz);CHKERRQ(ierr); 1981 ierr = ISGetIndices(ctx->benign_zerodiag_subs[i],&idxs);CHKERRQ(ierr); 1982 val = -ay[idxs[nz-1]]; 1983 if (ctx->apply_p0) { 1984 sum = 0.; 1985 for (j=0;j<nz-1;j++) { 1986 sum += ay[idxs[j]]; 1987 ay[idxs[j]] += val; 1988 } 1989 ay[idxs[nz-1]] += sum; 1990 } else { 1991 for (j=0;j<nz-1;j++) { 1992 ay[idxs[j]] += val; 1993 } 1994 ay[idxs[nz-1]] = 0.; 1995 } 1996 ierr = ISRestoreIndices(ctx->benign_zerodiag_subs[i],&idxs);CHKERRQ(ierr); 1997 } 1998 ierr = VecRestoreArray(y,&ay);CHKERRQ(ierr); 1999 } 2000 PetscFunctionReturn(0); 2001 } 2002 2003 PetscErrorCode PCBDDCBenignMatMultTranspose_Private(Mat A, Vec x, Vec y) 2004 { 2005 PetscErrorCode ierr; 2006 2007 PetscFunctionBegin; 2008 ierr = PCBDDCBenignMatMult_Private_Private(A,x,y,PETSC_TRUE);CHKERRQ(ierr); 2009 PetscFunctionReturn(0); 2010 } 2011 2012 PetscErrorCode PCBDDCBenignMatMult_Private(Mat A, Vec x, Vec y) 2013 { 2014 PetscErrorCode ierr; 2015 2016 PetscFunctionBegin; 2017 ierr = PCBDDCBenignMatMult_Private_Private(A,x,y,PETSC_FALSE);CHKERRQ(ierr); 2018 PetscFunctionReturn(0); 2019 } 2020 2021 PetscErrorCode PCBDDCBenignShellMat(PC pc, PetscBool restore) 2022 { 2023 PC_IS *pcis = (PC_IS*)pc->data; 2024 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 2025 PCBDDCBenignMatMult_ctx ctx; 2026 PetscErrorCode ierr; 2027 2028 PetscFunctionBegin; 2029 if (!restore) { 2030 Mat A_IB,A_BI; 2031 PetscScalar *work; 2032 PCBDDCReuseSolvers reuse = pcbddc->sub_schurs ? pcbddc->sub_schurs->reuse_solver : NULL; 2033 2034 if (pcbddc->benign_original_mat) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Benign original mat has not been restored"); 2035 if (!pcbddc->benign_change || !pcbddc->benign_n || pcbddc->benign_change_explicit) PetscFunctionReturn(0); 2036 ierr = PetscMalloc1(pcis->n,&work);CHKERRQ(ierr); 2037 ierr = MatCreate(PETSC_COMM_SELF,&A_IB);CHKERRQ(ierr); 2038 ierr = MatSetSizes(A_IB,pcis->n-pcis->n_B,pcis->n_B,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr); 2039 ierr = MatSetType(A_IB,MATSHELL);CHKERRQ(ierr); 2040 ierr = MatShellSetOperation(A_IB,MATOP_MULT,(void (*)(void))PCBDDCBenignMatMult_Private);CHKERRQ(ierr); 2041 ierr = MatShellSetOperation(A_IB,MATOP_MULT_TRANSPOSE,(void (*)(void))PCBDDCBenignMatMultTranspose_Private);CHKERRQ(ierr); 2042 ierr = PetscNew(&ctx);CHKERRQ(ierr); 2043 ierr = MatShellSetContext(A_IB,ctx);CHKERRQ(ierr); 2044 ctx->apply_left = PETSC_TRUE; 2045 ctx->apply_right = PETSC_FALSE; 2046 ctx->apply_p0 = PETSC_FALSE; 2047 ctx->benign_n = pcbddc->benign_n; 2048 if (reuse) { 2049 ctx->benign_zerodiag_subs = reuse->benign_zerodiag_subs; 2050 ctx->free = PETSC_FALSE; 2051 } else { /* TODO: could be optimized for successive solves */ 2052 ISLocalToGlobalMapping N_to_D; 2053 PetscInt i; 2054 2055 ierr = ISLocalToGlobalMappingCreateIS(pcis->is_I_local,&N_to_D);CHKERRQ(ierr); 2056 ierr = PetscMalloc1(pcbddc->benign_n,&ctx->benign_zerodiag_subs);CHKERRQ(ierr); 2057 for (i=0;i<pcbddc->benign_n;i++) { 2058 ierr = ISGlobalToLocalMappingApplyIS(N_to_D,IS_GTOLM_DROP,pcbddc->benign_zerodiag_subs[i],&ctx->benign_zerodiag_subs[i]);CHKERRQ(ierr); 2059 } 2060 ierr = ISLocalToGlobalMappingDestroy(&N_to_D);CHKERRQ(ierr); 2061 ctx->free = PETSC_TRUE; 2062 } 2063 ctx->A = pcis->A_IB; 2064 ctx->work = work; 2065 ierr = MatSetUp(A_IB);CHKERRQ(ierr); 2066 ierr = MatAssemblyBegin(A_IB,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2067 ierr = MatAssemblyEnd(A_IB,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2068 pcis->A_IB = A_IB; 2069 2070 /* A_BI as A_IB^T */ 2071 ierr = MatCreateTranspose(A_IB,&A_BI);CHKERRQ(ierr); 2072 pcbddc->benign_original_mat = pcis->A_BI; 2073 pcis->A_BI = A_BI; 2074 } else { 2075 if (!pcbddc->benign_original_mat) { 2076 PetscFunctionReturn(0); 2077 } 2078 ierr = MatShellGetContext(pcis->A_IB,&ctx);CHKERRQ(ierr); 2079 ierr = MatDestroy(&pcis->A_IB);CHKERRQ(ierr); 2080 pcis->A_IB = ctx->A; 2081 ctx->A = NULL; 2082 ierr = MatDestroy(&pcis->A_BI);CHKERRQ(ierr); 2083 pcis->A_BI = pcbddc->benign_original_mat; 2084 pcbddc->benign_original_mat = NULL; 2085 if (ctx->free) { 2086 PetscInt i; 2087 for (i=0;i<ctx->benign_n;i++) { 2088 ierr = ISDestroy(&ctx->benign_zerodiag_subs[i]);CHKERRQ(ierr); 2089 } 2090 ierr = PetscFree(ctx->benign_zerodiag_subs);CHKERRQ(ierr); 2091 } 2092 ierr = PetscFree(ctx->work);CHKERRQ(ierr); 2093 ierr = PetscFree(ctx);CHKERRQ(ierr); 2094 } 2095 PetscFunctionReturn(0); 2096 } 2097 2098 /* used just in bddc debug mode */ 2099 PetscErrorCode PCBDDCBenignProject(PC pc, IS is1, IS is2, Mat *B) 2100 { 2101 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 2102 Mat_IS *matis = (Mat_IS*)pc->pmat->data; 2103 Mat An; 2104 PetscErrorCode ierr; 2105 2106 PetscFunctionBegin; 2107 ierr = MatPtAP(matis->A,pcbddc->benign_change,MAT_INITIAL_MATRIX,2.0,&An);CHKERRQ(ierr); 2108 ierr = MatZeroRowsColumns(An,pcbddc->benign_n,pcbddc->benign_p0_lidx,1.0,NULL,NULL);CHKERRQ(ierr); 2109 if (is1) { 2110 ierr = MatCreateSubMatrix(An,is1,is2,MAT_INITIAL_MATRIX,B);CHKERRQ(ierr); 2111 ierr = MatDestroy(&An);CHKERRQ(ierr); 2112 } else { 2113 *B = An; 2114 } 2115 PetscFunctionReturn(0); 2116 } 2117 2118 /* TODO: add reuse flag */ 2119 PetscErrorCode MatSeqAIJCompress(Mat A, Mat *B) 2120 { 2121 Mat Bt; 2122 PetscScalar *a,*bdata; 2123 const PetscInt *ii,*ij; 2124 PetscInt m,n,i,nnz,*bii,*bij; 2125 PetscBool flg_row; 2126 PetscErrorCode ierr; 2127 2128 PetscFunctionBegin; 2129 ierr = MatGetSize(A,&n,&m);CHKERRQ(ierr); 2130 ierr = MatGetRowIJ(A,0,PETSC_FALSE,PETSC_FALSE,&n,&ii,&ij,&flg_row);CHKERRQ(ierr); 2131 ierr = MatSeqAIJGetArray(A,&a);CHKERRQ(ierr); 2132 nnz = n; 2133 for (i=0;i<ii[n];i++) { 2134 if (PetscLikely(PetscAbsScalar(a[i]) > PETSC_SMALL)) nnz++; 2135 } 2136 ierr = PetscMalloc1(n+1,&bii);CHKERRQ(ierr); 2137 ierr = PetscMalloc1(nnz,&bij);CHKERRQ(ierr); 2138 ierr = PetscMalloc1(nnz,&bdata);CHKERRQ(ierr); 2139 nnz = 0; 2140 bii[0] = 0; 2141 for (i=0;i<n;i++) { 2142 PetscInt j; 2143 for (j=ii[i];j<ii[i+1];j++) { 2144 PetscScalar entry = a[j]; 2145 if (PetscLikely(PetscAbsScalar(entry) > PETSC_SMALL) || (n == m && ij[j] == i)) { 2146 bij[nnz] = ij[j]; 2147 bdata[nnz] = entry; 2148 nnz++; 2149 } 2150 } 2151 bii[i+1] = nnz; 2152 } 2153 ierr = MatSeqAIJRestoreArray(A,&a);CHKERRQ(ierr); 2154 ierr = MatCreateSeqAIJWithArrays(PetscObjectComm((PetscObject)A),n,m,bii,bij,bdata,&Bt);CHKERRQ(ierr); 2155 ierr = MatRestoreRowIJ(A,0,PETSC_FALSE,PETSC_FALSE,&n,&ii,&ij,&flg_row);CHKERRQ(ierr); 2156 { 2157 Mat_SeqAIJ *b = (Mat_SeqAIJ*)(Bt->data); 2158 b->free_a = PETSC_TRUE; 2159 b->free_ij = PETSC_TRUE; 2160 } 2161 if (*B == A) { 2162 ierr = MatDestroy(&A);CHKERRQ(ierr); 2163 } 2164 *B = Bt; 2165 PetscFunctionReturn(0); 2166 } 2167 2168 PetscErrorCode PCBDDCDetectDisconnectedComponents(PC pc, PetscInt *ncc, IS* cc[], IS* primalv) 2169 { 2170 Mat B = NULL; 2171 DM dm; 2172 IS is_dummy,*cc_n; 2173 ISLocalToGlobalMapping l2gmap_dummy; 2174 PCBDDCGraph graph; 2175 PetscInt *xadj_filtered = NULL,*adjncy_filtered = NULL; 2176 PetscInt i,n; 2177 PetscInt *xadj,*adjncy; 2178 PetscBool isplex = PETSC_FALSE; 2179 PetscErrorCode ierr; 2180 2181 PetscFunctionBegin; 2182 if (ncc) *ncc = 0; 2183 if (cc) *cc = NULL; 2184 if (primalv) *primalv = NULL; 2185 ierr = PCBDDCGraphCreate(&graph);CHKERRQ(ierr); 2186 ierr = PCGetDM(pc,&dm);CHKERRQ(ierr); 2187 if (!dm) { 2188 ierr = MatGetDM(pc->pmat,&dm);CHKERRQ(ierr); 2189 } 2190 if (dm) { 2191 ierr = PetscObjectTypeCompare((PetscObject)dm,DMPLEX,&isplex);CHKERRQ(ierr); 2192 } 2193 if (isplex) { /* this code has been modified from plexpartition.c */ 2194 PetscInt p, pStart, pEnd, a, adjSize, idx, size, nroots; 2195 PetscInt *adj = NULL; 2196 IS cellNumbering; 2197 const PetscInt *cellNum; 2198 PetscBool useCone, useClosure; 2199 PetscSection section; 2200 PetscSegBuffer adjBuffer; 2201 PetscSF sfPoint; 2202 PetscErrorCode ierr; 2203 2204 PetscFunctionBegin; 2205 ierr = DMPlexGetHeightStratum(dm, 0, &pStart, &pEnd);CHKERRQ(ierr); 2206 ierr = DMGetPointSF(dm, &sfPoint);CHKERRQ(ierr); 2207 ierr = PetscSFGetGraph(sfPoint, &nroots, NULL, NULL, NULL);CHKERRQ(ierr); 2208 /* Build adjacency graph via a section/segbuffer */ 2209 ierr = PetscSectionCreate(PetscObjectComm((PetscObject) dm), §ion);CHKERRQ(ierr); 2210 ierr = PetscSectionSetChart(section, pStart, pEnd);CHKERRQ(ierr); 2211 ierr = PetscSegBufferCreate(sizeof(PetscInt),1000,&adjBuffer);CHKERRQ(ierr); 2212 /* Always use FVM adjacency to create partitioner graph */ 2213 ierr = DMPlexGetAdjacencyUseCone(dm, &useCone);CHKERRQ(ierr); 2214 ierr = DMPlexGetAdjacencyUseClosure(dm, &useClosure);CHKERRQ(ierr); 2215 ierr = DMPlexSetAdjacencyUseCone(dm, PETSC_TRUE);CHKERRQ(ierr); 2216 ierr = DMPlexSetAdjacencyUseClosure(dm, PETSC_FALSE);CHKERRQ(ierr); 2217 ierr = DMPlexGetCellNumbering(dm, &cellNumbering);CHKERRQ(ierr); 2218 ierr = ISGetIndices(cellNumbering, &cellNum);CHKERRQ(ierr); 2219 for (n = 0, p = pStart; p < pEnd; p++) { 2220 /* Skip non-owned cells in parallel (ParMetis expects no overlap) */ 2221 if (nroots > 0) {if (cellNum[p] < 0) continue;} 2222 adjSize = PETSC_DETERMINE; 2223 ierr = DMPlexGetAdjacency(dm, p, &adjSize, &adj);CHKERRQ(ierr); 2224 for (a = 0; a < adjSize; ++a) { 2225 const PetscInt point = adj[a]; 2226 if (pStart <= point && point < pEnd) { 2227 PetscInt *PETSC_RESTRICT pBuf; 2228 ierr = PetscSectionAddDof(section, p, 1);CHKERRQ(ierr); 2229 ierr = PetscSegBufferGetInts(adjBuffer, 1, &pBuf);CHKERRQ(ierr); 2230 *pBuf = point; 2231 } 2232 } 2233 n++; 2234 } 2235 ierr = DMPlexSetAdjacencyUseCone(dm, useCone);CHKERRQ(ierr); 2236 ierr = DMPlexSetAdjacencyUseClosure(dm, useClosure);CHKERRQ(ierr); 2237 /* Derive CSR graph from section/segbuffer */ 2238 ierr = PetscSectionSetUp(section);CHKERRQ(ierr); 2239 ierr = PetscSectionGetStorageSize(section, &size);CHKERRQ(ierr); 2240 ierr = PetscMalloc1(n+1, &xadj);CHKERRQ(ierr); 2241 for (idx = 0, p = pStart; p < pEnd; p++) { 2242 if (nroots > 0) {if (cellNum[p] < 0) continue;} 2243 ierr = PetscSectionGetOffset(section, p, &(xadj[idx++]));CHKERRQ(ierr); 2244 } 2245 xadj[n] = size; 2246 ierr = PetscSegBufferExtractAlloc(adjBuffer, &adjncy);CHKERRQ(ierr); 2247 /* Clean up */ 2248 ierr = PetscSegBufferDestroy(&adjBuffer);CHKERRQ(ierr); 2249 ierr = PetscSectionDestroy(§ion);CHKERRQ(ierr); 2250 ierr = PetscFree(adj);CHKERRQ(ierr); 2251 graph->xadj = xadj; 2252 graph->adjncy = adjncy; 2253 } else { 2254 Mat A; 2255 PetscBool filter = PETSC_FALSE, isseqaij, flg_row; 2256 2257 ierr = MatISGetLocalMat(pc->pmat,&A);CHKERRQ(ierr); 2258 if (!A->rmap->N || !A->cmap->N) { 2259 ierr = PCBDDCGraphDestroy(&graph);CHKERRQ(ierr); 2260 PetscFunctionReturn(0); 2261 } 2262 ierr = PetscObjectTypeCompare((PetscObject)A,MATSEQAIJ,&isseqaij);CHKERRQ(ierr); 2263 if (!isseqaij && filter) { 2264 PetscBool isseqdense; 2265 2266 ierr = PetscObjectTypeCompare((PetscObject)A,MATSEQDENSE,&isseqdense);CHKERRQ(ierr); 2267 if (!isseqdense) { 2268 ierr = MatConvert(A,MATSEQAIJ,MAT_INITIAL_MATRIX,&B);CHKERRQ(ierr); 2269 } else { /* TODO: rectangular case and LDA */ 2270 PetscScalar *array; 2271 PetscReal chop=1.e-6; 2272 2273 ierr = MatDuplicate(A,MAT_COPY_VALUES,&B);CHKERRQ(ierr); 2274 ierr = MatDenseGetArray(B,&array);CHKERRQ(ierr); 2275 ierr = MatGetSize(B,&n,NULL);CHKERRQ(ierr); 2276 for (i=0;i<n;i++) { 2277 PetscInt j; 2278 for (j=i+1;j<n;j++) { 2279 PetscReal thresh = chop*(PetscAbsScalar(array[i*(n+1)])+PetscAbsScalar(array[j*(n+1)])); 2280 if (PetscAbsScalar(array[i*n+j]) < thresh) array[i*n+j] = 0.; 2281 if (PetscAbsScalar(array[j*n+i]) < thresh) array[j*n+i] = 0.; 2282 } 2283 } 2284 ierr = MatDenseRestoreArray(B,&array);CHKERRQ(ierr); 2285 ierr = MatConvert(B,MATSEQAIJ,MAT_INPLACE_MATRIX,&B);CHKERRQ(ierr); 2286 } 2287 } else { 2288 ierr = PetscObjectReference((PetscObject)A);CHKERRQ(ierr); 2289 B = A; 2290 } 2291 ierr = MatGetRowIJ(B,0,PETSC_TRUE,PETSC_FALSE,&n,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);CHKERRQ(ierr); 2292 2293 /* if filter is true, then removes entries lower than PETSC_SMALL in magnitude */ 2294 if (filter) { 2295 PetscScalar *data; 2296 PetscInt j,cum; 2297 2298 ierr = PetscCalloc2(n+1,&xadj_filtered,xadj[n],&adjncy_filtered);CHKERRQ(ierr); 2299 ierr = MatSeqAIJGetArray(B,&data);CHKERRQ(ierr); 2300 cum = 0; 2301 for (i=0;i<n;i++) { 2302 PetscInt t; 2303 2304 for (j=xadj[i];j<xadj[i+1];j++) { 2305 if (PetscUnlikely(PetscAbsScalar(data[j]) < PETSC_SMALL)) { 2306 continue; 2307 } 2308 adjncy_filtered[cum+xadj_filtered[i]++] = adjncy[j]; 2309 } 2310 t = xadj_filtered[i]; 2311 xadj_filtered[i] = cum; 2312 cum += t; 2313 } 2314 ierr = MatSeqAIJRestoreArray(B,&data);CHKERRQ(ierr); 2315 graph->xadj = xadj_filtered; 2316 graph->adjncy = adjncy_filtered; 2317 } else { 2318 graph->xadj = xadj; 2319 graph->adjncy = adjncy; 2320 } 2321 } 2322 /* compute local connected components using PCBDDCGraph */ 2323 ierr = ISCreateStride(PETSC_COMM_SELF,n,0,1,&is_dummy);CHKERRQ(ierr); 2324 ierr = ISLocalToGlobalMappingCreateIS(is_dummy,&l2gmap_dummy);CHKERRQ(ierr); 2325 ierr = ISDestroy(&is_dummy);CHKERRQ(ierr); 2326 ierr = PCBDDCGraphInit(graph,l2gmap_dummy,n,PETSC_MAX_INT);CHKERRQ(ierr); 2327 ierr = ISLocalToGlobalMappingDestroy(&l2gmap_dummy);CHKERRQ(ierr); 2328 ierr = PCBDDCGraphSetUp(graph,1,NULL,NULL,0,NULL,NULL);CHKERRQ(ierr); 2329 ierr = PCBDDCGraphComputeConnectedComponents(graph);CHKERRQ(ierr); 2330 2331 /* partial clean up */ 2332 ierr = PetscFree2(xadj_filtered,adjncy_filtered);CHKERRQ(ierr); 2333 if (B) { 2334 PetscBool flg_row; 2335 ierr = MatRestoreRowIJ(B,0,PETSC_TRUE,PETSC_FALSE,&n,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);CHKERRQ(ierr); 2336 ierr = MatDestroy(&B);CHKERRQ(ierr); 2337 } 2338 if (isplex) { 2339 ierr = PetscFree(xadj);CHKERRQ(ierr); 2340 ierr = PetscFree(adjncy);CHKERRQ(ierr); 2341 } 2342 2343 /* get back data */ 2344 if (isplex) { 2345 if (ncc) *ncc = graph->ncc; 2346 if (cc || primalv) { 2347 Mat A; 2348 PetscBT btv,btvt; 2349 PetscSection subSection; 2350 PetscInt *ids,cum,cump,*cids,*pids; 2351 2352 ierr = DMPlexGetSubdomainSection(dm,&subSection);CHKERRQ(ierr); 2353 ierr = MatISGetLocalMat(pc->pmat,&A);CHKERRQ(ierr); 2354 ierr = PetscMalloc3(A->rmap->n,&ids,graph->ncc+1,&cids,A->rmap->n,&pids);CHKERRQ(ierr); 2355 ierr = PetscBTCreate(A->rmap->n,&btv);CHKERRQ(ierr); 2356 ierr = PetscBTCreate(A->rmap->n,&btvt);CHKERRQ(ierr); 2357 2358 cids[0] = 0; 2359 for (i = 0, cump = 0, cum = 0; i < graph->ncc; i++) { 2360 PetscInt j; 2361 2362 ierr = PetscBTMemzero(A->rmap->n,btvt);CHKERRQ(ierr); 2363 for (j = graph->cptr[i]; j < graph->cptr[i+1]; j++) { 2364 PetscInt k, size, *closure = NULL, cell = graph->queue[j]; 2365 2366 ierr = DMPlexGetTransitiveClosure(dm,cell,PETSC_TRUE,&size,&closure);CHKERRQ(ierr); 2367 for (k = 0; k < 2*size; k += 2) { 2368 PetscInt s, p = closure[k], off, dof, cdof; 2369 2370 ierr = PetscSectionGetConstraintDof(subSection, p, &cdof);CHKERRQ(ierr); 2371 ierr = PetscSectionGetOffset(subSection,p,&off);CHKERRQ(ierr); 2372 ierr = PetscSectionGetDof(subSection,p,&dof);CHKERRQ(ierr); 2373 for (s = 0; s < dof-cdof; s++) { 2374 if (PetscBTLookupSet(btvt,off+s)) continue; 2375 if (!PetscBTLookup(btv,off+s)) { 2376 ids[cum++] = off+s; 2377 } else { /* cross-vertex */ 2378 pids[cump++] = off+s; 2379 } 2380 } 2381 } 2382 ierr = DMPlexRestoreTransitiveClosure(dm,cell,PETSC_TRUE,&size,&closure);CHKERRQ(ierr); 2383 } 2384 cids[i+1] = cum; 2385 /* mark dofs as already assigned */ 2386 for (j = cids[i]; j < cids[i+1]; j++) { 2387 ierr = PetscBTSet(btv,ids[j]);CHKERRQ(ierr); 2388 } 2389 } 2390 if (cc) { 2391 ierr = PetscMalloc1(graph->ncc,&cc_n);CHKERRQ(ierr); 2392 for (i = 0; i < graph->ncc; i++) { 2393 ierr = ISCreateGeneral(PETSC_COMM_SELF,cids[i+1]-cids[i],ids+cids[i],PETSC_COPY_VALUES,&cc_n[i]);CHKERRQ(ierr); 2394 } 2395 *cc = cc_n; 2396 } 2397 if (primalv) { 2398 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),cump,pids,PETSC_COPY_VALUES,primalv);CHKERRQ(ierr); 2399 } 2400 ierr = PetscFree3(ids,cids,pids);CHKERRQ(ierr); 2401 ierr = PetscBTDestroy(&btv);CHKERRQ(ierr); 2402 ierr = PetscBTDestroy(&btvt);CHKERRQ(ierr); 2403 } 2404 } else { 2405 if (ncc) *ncc = graph->ncc; 2406 if (cc) { 2407 ierr = PetscMalloc1(graph->ncc,&cc_n);CHKERRQ(ierr); 2408 for (i=0;i<graph->ncc;i++) { 2409 ierr = ISCreateGeneral(PETSC_COMM_SELF,graph->cptr[i+1]-graph->cptr[i],graph->queue+graph->cptr[i],PETSC_COPY_VALUES,&cc_n[i]);CHKERRQ(ierr); 2410 } 2411 *cc = cc_n; 2412 } 2413 } 2414 /* clean up graph */ 2415 graph->xadj = 0; 2416 graph->adjncy = 0; 2417 ierr = PCBDDCGraphDestroy(&graph);CHKERRQ(ierr); 2418 PetscFunctionReturn(0); 2419 } 2420 2421 PetscErrorCode PCBDDCBenignCheck(PC pc, IS zerodiag) 2422 { 2423 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 2424 PC_IS* pcis = (PC_IS*)(pc->data); 2425 IS dirIS = NULL; 2426 PetscInt i; 2427 PetscErrorCode ierr; 2428 2429 PetscFunctionBegin; 2430 ierr = PCBDDCGraphGetDirichletDofs(pcbddc->mat_graph,&dirIS);CHKERRQ(ierr); 2431 if (zerodiag) { 2432 Mat A; 2433 Vec vec3_N; 2434 PetscScalar *vals; 2435 const PetscInt *idxs; 2436 PetscInt nz,*count; 2437 2438 /* p0 */ 2439 ierr = VecSet(pcis->vec1_N,0.);CHKERRQ(ierr); 2440 ierr = PetscMalloc1(pcis->n,&vals);CHKERRQ(ierr); 2441 ierr = ISGetLocalSize(zerodiag,&nz);CHKERRQ(ierr); 2442 ierr = ISGetIndices(zerodiag,&idxs);CHKERRQ(ierr); 2443 for (i=0;i<nz;i++) vals[i] = 1.; 2444 ierr = VecSetValues(pcis->vec1_N,nz,idxs,vals,INSERT_VALUES);CHKERRQ(ierr); 2445 ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr); 2446 ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr); 2447 /* v_I */ 2448 ierr = VecSetRandom(pcis->vec2_N,NULL);CHKERRQ(ierr); 2449 for (i=0;i<nz;i++) vals[i] = 0.; 2450 ierr = VecSetValues(pcis->vec2_N,nz,idxs,vals,INSERT_VALUES);CHKERRQ(ierr); 2451 ierr = ISRestoreIndices(zerodiag,&idxs);CHKERRQ(ierr); 2452 ierr = ISGetIndices(pcis->is_B_local,&idxs);CHKERRQ(ierr); 2453 for (i=0;i<pcis->n_B;i++) vals[i] = 0.; 2454 ierr = VecSetValues(pcis->vec2_N,pcis->n_B,idxs,vals,INSERT_VALUES);CHKERRQ(ierr); 2455 ierr = ISRestoreIndices(pcis->is_B_local,&idxs);CHKERRQ(ierr); 2456 if (dirIS) { 2457 PetscInt n; 2458 2459 ierr = ISGetLocalSize(dirIS,&n);CHKERRQ(ierr); 2460 ierr = ISGetIndices(dirIS,&idxs);CHKERRQ(ierr); 2461 for (i=0;i<n;i++) vals[i] = 0.; 2462 ierr = VecSetValues(pcis->vec2_N,n,idxs,vals,INSERT_VALUES);CHKERRQ(ierr); 2463 ierr = ISRestoreIndices(dirIS,&idxs);CHKERRQ(ierr); 2464 } 2465 ierr = VecAssemblyBegin(pcis->vec2_N);CHKERRQ(ierr); 2466 ierr = VecAssemblyEnd(pcis->vec2_N);CHKERRQ(ierr); 2467 ierr = VecDuplicate(pcis->vec1_N,&vec3_N);CHKERRQ(ierr); 2468 ierr = VecSet(vec3_N,0.);CHKERRQ(ierr); 2469 ierr = MatISGetLocalMat(pc->pmat,&A);CHKERRQ(ierr); 2470 ierr = MatMult(A,pcis->vec1_N,vec3_N);CHKERRQ(ierr); 2471 ierr = VecDot(vec3_N,pcis->vec2_N,&vals[0]);CHKERRQ(ierr); 2472 if (PetscAbsScalar(vals[0]) > 1.e-1) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"Benign trick can not be applied! b(v_I,p_0) = %1.6e (should be numerically 0.)",PetscAbsScalar(vals[0])); 2473 ierr = PetscFree(vals);CHKERRQ(ierr); 2474 ierr = VecDestroy(&vec3_N);CHKERRQ(ierr); 2475 2476 /* there should not be any pressure dofs lying on the interface */ 2477 ierr = PetscCalloc1(pcis->n,&count);CHKERRQ(ierr); 2478 ierr = ISGetIndices(pcis->is_B_local,&idxs);CHKERRQ(ierr); 2479 for (i=0;i<pcis->n_B;i++) count[idxs[i]]++; 2480 ierr = ISRestoreIndices(pcis->is_B_local,&idxs);CHKERRQ(ierr); 2481 ierr = ISGetIndices(zerodiag,&idxs);CHKERRQ(ierr); 2482 for (i=0;i<nz;i++) if (count[idxs[i]]) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"Benign trick can not be applied! pressure dof %d is an interface dof",idxs[i]); 2483 ierr = ISRestoreIndices(zerodiag,&idxs);CHKERRQ(ierr); 2484 ierr = PetscFree(count);CHKERRQ(ierr); 2485 } 2486 ierr = ISDestroy(&dirIS);CHKERRQ(ierr); 2487 2488 /* check PCBDDCBenignGetOrSetP0 */ 2489 ierr = VecSetRandom(pcis->vec1_global,NULL);CHKERRQ(ierr); 2490 for (i=0;i<pcbddc->benign_n;i++) pcbddc->benign_p0[i] = -PetscGlobalRank-i; 2491 ierr = PCBDDCBenignGetOrSetP0(pc,pcis->vec1_global,PETSC_FALSE);CHKERRQ(ierr); 2492 for (i=0;i<pcbddc->benign_n;i++) pcbddc->benign_p0[i] = 1; 2493 ierr = PCBDDCBenignGetOrSetP0(pc,pcis->vec1_global,PETSC_TRUE);CHKERRQ(ierr); 2494 for (i=0;i<pcbddc->benign_n;i++) { 2495 PetscInt val = PetscRealPart(pcbddc->benign_p0[i]); 2496 if (val != -PetscGlobalRank-i) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error testing PCBDDCBenignGetOrSetP0! Found %g at %d instead of %g\n",PetscRealPart(pcbddc->benign_p0[i]),i,-PetscGlobalRank-i); 2497 } 2498 PetscFunctionReturn(0); 2499 } 2500 2501 PetscErrorCode PCBDDCBenignDetectSaddlePoint(PC pc, IS *zerodiaglocal) 2502 { 2503 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 2504 IS pressures,zerodiag,zerodiag_save,*zerodiag_subs; 2505 PetscInt nz,n; 2506 PetscInt *interior_dofs,n_interior_dofs,nneu; 2507 PetscBool sorted,have_null,has_null_pressures,recompute_zerodiag,checkb; 2508 PetscErrorCode ierr; 2509 2510 PetscFunctionBegin; 2511 ierr = PetscSFDestroy(&pcbddc->benign_sf);CHKERRQ(ierr); 2512 ierr = MatDestroy(&pcbddc->benign_B0);CHKERRQ(ierr); 2513 for (n=0;n<pcbddc->benign_n;n++) { 2514 ierr = ISDestroy(&pcbddc->benign_zerodiag_subs[n]);CHKERRQ(ierr); 2515 } 2516 ierr = PetscFree(pcbddc->benign_zerodiag_subs);CHKERRQ(ierr); 2517 pcbddc->benign_n = 0; 2518 2519 /* if a local info on dofs is present, uses the last field for "pressures" (or fid by command line) 2520 otherwise, it uses only zerodiagonal dofs (ok if the pressure block is all zero; it could fail if it is not) 2521 Checks if all the pressure dofs in each subdomain have a zero diagonal 2522 If not, a change of basis on pressures is not needed 2523 since the local Schur complements are already SPD 2524 */ 2525 has_null_pressures = PETSC_TRUE; 2526 have_null = PETSC_TRUE; 2527 if (pcbddc->n_ISForDofsLocal) { 2528 IS iP = NULL; 2529 PetscInt npl,*idxs,p = pcbddc->n_ISForDofsLocal-1; 2530 2531 ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)pc),((PetscObject)pc)->prefix,"BDDC benign options","PC");CHKERRQ(ierr); 2532 ierr = PetscOptionsInt("-pc_bddc_pressure_field","Field id for pressures",NULL,p,&p,NULL);CHKERRQ(ierr); 2533 ierr = PetscOptionsEnd();CHKERRQ(ierr); 2534 if (p < 0 || p > pcbddc->n_ISForDofsLocal-1) SETERRQ1(PetscObjectComm((PetscObject)pc),PETSC_ERR_USER,"Invalid field id for pressures %D",p); 2535 /* Dofs splitting for BDDC cannot have PETSC_COMM_SELF, so create a sequential IS */ 2536 ierr = ISGetLocalSize(pcbddc->ISForDofsLocal[p],&npl);CHKERRQ(ierr); 2537 ierr = ISGetIndices(pcbddc->ISForDofsLocal[p],(const PetscInt**)&idxs);CHKERRQ(ierr); 2538 ierr = ISCreateGeneral(PETSC_COMM_SELF,npl,idxs,PETSC_COPY_VALUES,&pressures);CHKERRQ(ierr); 2539 ierr = ISRestoreIndices(pcbddc->ISForDofsLocal[p],(const PetscInt**)&idxs);CHKERRQ(ierr); 2540 /* remove zeroed out pressures if we are setting up a BDDC solver for a saddle-point FETI-DP */ 2541 ierr = PetscObjectQuery((PetscObject)pc,"__KSPFETIDP_lP",(PetscObject*)&iP);CHKERRQ(ierr); 2542 if (iP) { 2543 IS newpressures; 2544 2545 ierr = ISDifference(pressures,iP,&newpressures);CHKERRQ(ierr); 2546 ierr = ISDestroy(&pressures);CHKERRQ(ierr); 2547 pressures = newpressures; 2548 } 2549 ierr = ISSorted(pressures,&sorted);CHKERRQ(ierr); 2550 if (!sorted) { 2551 ierr = ISSort(pressures);CHKERRQ(ierr); 2552 } 2553 } else { 2554 pressures = NULL; 2555 } 2556 /* pcis has not been setup yet, so get the local size from the subdomain matrix */ 2557 ierr = MatGetLocalSize(pcbddc->local_mat,&n,NULL);CHKERRQ(ierr); 2558 if (!n) pcbddc->benign_change_explicit = PETSC_TRUE; 2559 ierr = MatFindZeroDiagonals(pcbddc->local_mat,&zerodiag);CHKERRQ(ierr); 2560 ierr = ISSorted(zerodiag,&sorted);CHKERRQ(ierr); 2561 if (!sorted) { 2562 ierr = ISSort(zerodiag);CHKERRQ(ierr); 2563 } 2564 ierr = PetscObjectReference((PetscObject)zerodiag);CHKERRQ(ierr); 2565 zerodiag_save = zerodiag; 2566 ierr = ISGetLocalSize(zerodiag,&nz);CHKERRQ(ierr); 2567 if (!nz) { 2568 if (n) have_null = PETSC_FALSE; 2569 has_null_pressures = PETSC_FALSE; 2570 ierr = ISDestroy(&zerodiag);CHKERRQ(ierr); 2571 } 2572 recompute_zerodiag = PETSC_FALSE; 2573 /* in case disconnected subdomains info is present, split the pressures accordingly (otherwise the benign trick could fail) */ 2574 zerodiag_subs = NULL; 2575 pcbddc->benign_n = 0; 2576 n_interior_dofs = 0; 2577 interior_dofs = NULL; 2578 nneu = 0; 2579 if (pcbddc->NeumannBoundariesLocal) { 2580 ierr = ISGetLocalSize(pcbddc->NeumannBoundariesLocal,&nneu);CHKERRQ(ierr); 2581 } 2582 checkb = (PetscBool)(!pcbddc->NeumannBoundariesLocal || pcbddc->current_level); 2583 if (checkb) { /* need to compute interior nodes */ 2584 PetscInt n,i,j; 2585 PetscInt n_neigh,*neigh,*n_shared,**shared; 2586 PetscInt *iwork; 2587 2588 ierr = ISLocalToGlobalMappingGetSize(pc->pmat->rmap->mapping,&n);CHKERRQ(ierr); 2589 ierr = ISLocalToGlobalMappingGetInfo(pc->pmat->rmap->mapping,&n_neigh,&neigh,&n_shared,&shared);CHKERRQ(ierr); 2590 ierr = PetscCalloc1(n,&iwork);CHKERRQ(ierr); 2591 ierr = PetscMalloc1(n,&interior_dofs);CHKERRQ(ierr); 2592 for (i=1;i<n_neigh;i++) 2593 for (j=0;j<n_shared[i];j++) 2594 iwork[shared[i][j]] += 1; 2595 for (i=0;i<n;i++) 2596 if (!iwork[i]) 2597 interior_dofs[n_interior_dofs++] = i; 2598 ierr = PetscFree(iwork);CHKERRQ(ierr); 2599 ierr = ISLocalToGlobalMappingRestoreInfo(pc->pmat->rmap->mapping,&n_neigh,&neigh,&n_shared,&shared);CHKERRQ(ierr); 2600 } 2601 if (has_null_pressures) { 2602 IS *subs; 2603 PetscInt nsubs,i,j,nl; 2604 const PetscInt *idxs; 2605 PetscScalar *array; 2606 Vec *work; 2607 Mat_IS* matis = (Mat_IS*)(pc->pmat->data); 2608 2609 subs = pcbddc->local_subs; 2610 nsubs = pcbddc->n_local_subs; 2611 /* these vectors are needed to check if the constant on pressures is in the kernel of the local operator B (i.e. B(v_I,p0) should be zero) */ 2612 if (checkb) { 2613 ierr = VecDuplicateVecs(matis->y,2,&work);CHKERRQ(ierr); 2614 ierr = ISGetLocalSize(zerodiag,&nl);CHKERRQ(ierr); 2615 ierr = ISGetIndices(zerodiag,&idxs);CHKERRQ(ierr); 2616 /* work[0] = 1_p */ 2617 ierr = VecSet(work[0],0.);CHKERRQ(ierr); 2618 ierr = VecGetArray(work[0],&array);CHKERRQ(ierr); 2619 for (j=0;j<nl;j++) array[idxs[j]] = 1.; 2620 ierr = VecRestoreArray(work[0],&array);CHKERRQ(ierr); 2621 /* work[0] = 1_v */ 2622 ierr = VecSet(work[1],1.);CHKERRQ(ierr); 2623 ierr = VecGetArray(work[1],&array);CHKERRQ(ierr); 2624 for (j=0;j<nl;j++) array[idxs[j]] = 0.; 2625 ierr = VecRestoreArray(work[1],&array);CHKERRQ(ierr); 2626 ierr = ISRestoreIndices(zerodiag,&idxs);CHKERRQ(ierr); 2627 } 2628 if (nsubs > 1) { 2629 ierr = PetscCalloc1(nsubs,&zerodiag_subs);CHKERRQ(ierr); 2630 for (i=0;i<nsubs;i++) { 2631 ISLocalToGlobalMapping l2g; 2632 IS t_zerodiag_subs; 2633 PetscInt nl; 2634 2635 ierr = ISLocalToGlobalMappingCreateIS(subs[i],&l2g);CHKERRQ(ierr); 2636 ierr = ISGlobalToLocalMappingApplyIS(l2g,IS_GTOLM_DROP,zerodiag,&t_zerodiag_subs);CHKERRQ(ierr); 2637 ierr = ISGetLocalSize(t_zerodiag_subs,&nl);CHKERRQ(ierr); 2638 if (nl) { 2639 PetscBool valid = PETSC_TRUE; 2640 2641 if (checkb) { 2642 ierr = VecSet(matis->x,0);CHKERRQ(ierr); 2643 ierr = ISGetLocalSize(subs[i],&nl);CHKERRQ(ierr); 2644 ierr = ISGetIndices(subs[i],&idxs);CHKERRQ(ierr); 2645 ierr = VecGetArray(matis->x,&array);CHKERRQ(ierr); 2646 for (j=0;j<nl;j++) array[idxs[j]] = 1.; 2647 ierr = VecRestoreArray(matis->x,&array);CHKERRQ(ierr); 2648 ierr = ISRestoreIndices(subs[i],&idxs);CHKERRQ(ierr); 2649 ierr = VecPointwiseMult(matis->x,work[0],matis->x);CHKERRQ(ierr); 2650 ierr = MatMult(matis->A,matis->x,matis->y);CHKERRQ(ierr); 2651 ierr = VecPointwiseMult(matis->y,work[1],matis->y);CHKERRQ(ierr); 2652 ierr = VecGetArray(matis->y,&array);CHKERRQ(ierr); 2653 for (j=0;j<n_interior_dofs;j++) { 2654 if (PetscAbsScalar(array[interior_dofs[j]]) > PETSC_SMALL) { 2655 valid = PETSC_FALSE; 2656 break; 2657 } 2658 } 2659 ierr = VecRestoreArray(matis->y,&array);CHKERRQ(ierr); 2660 } 2661 if (valid && nneu) { 2662 const PetscInt *idxs; 2663 PetscInt nzb; 2664 2665 ierr = ISGetIndices(pcbddc->NeumannBoundariesLocal,&idxs);CHKERRQ(ierr); 2666 ierr = ISGlobalToLocalMappingApply(l2g,IS_GTOLM_DROP,nneu,idxs,&nzb,NULL);CHKERRQ(ierr); 2667 ierr = ISRestoreIndices(pcbddc->NeumannBoundariesLocal,&idxs);CHKERRQ(ierr); 2668 if (nzb) valid = PETSC_FALSE; 2669 } 2670 if (valid && pressures) { 2671 IS t_pressure_subs; 2672 ierr = ISGlobalToLocalMappingApplyIS(l2g,IS_GTOLM_DROP,pressures,&t_pressure_subs);CHKERRQ(ierr); 2673 ierr = ISEqual(t_pressure_subs,t_zerodiag_subs,&valid);CHKERRQ(ierr); 2674 ierr = ISDestroy(&t_pressure_subs);CHKERRQ(ierr); 2675 } 2676 if (valid) { 2677 ierr = ISLocalToGlobalMappingApplyIS(l2g,t_zerodiag_subs,&zerodiag_subs[pcbddc->benign_n]);CHKERRQ(ierr); 2678 pcbddc->benign_n++; 2679 } else { 2680 recompute_zerodiag = PETSC_TRUE; 2681 } 2682 } 2683 ierr = ISDestroy(&t_zerodiag_subs);CHKERRQ(ierr); 2684 ierr = ISLocalToGlobalMappingDestroy(&l2g);CHKERRQ(ierr); 2685 } 2686 } else { /* there's just one subdomain (or zero if they have not been detected */ 2687 PetscBool valid = PETSC_TRUE; 2688 2689 if (nneu) valid = PETSC_FALSE; 2690 if (valid && pressures) { 2691 ierr = ISEqual(pressures,zerodiag,&valid);CHKERRQ(ierr); 2692 } 2693 if (valid && checkb) { 2694 ierr = MatMult(matis->A,work[0],matis->x);CHKERRQ(ierr); 2695 ierr = VecPointwiseMult(matis->x,work[1],matis->x);CHKERRQ(ierr); 2696 ierr = VecGetArray(matis->x,&array);CHKERRQ(ierr); 2697 for (j=0;j<n_interior_dofs;j++) { 2698 if (PetscAbsScalar(array[interior_dofs[j]]) > PETSC_SMALL) { 2699 valid = PETSC_FALSE; 2700 break; 2701 } 2702 } 2703 ierr = VecRestoreArray(matis->x,&array);CHKERRQ(ierr); 2704 } 2705 if (valid) { 2706 pcbddc->benign_n = 1; 2707 ierr = PetscMalloc1(pcbddc->benign_n,&zerodiag_subs);CHKERRQ(ierr); 2708 ierr = PetscObjectReference((PetscObject)zerodiag);CHKERRQ(ierr); 2709 zerodiag_subs[0] = zerodiag; 2710 } 2711 } 2712 if (checkb) { 2713 ierr = VecDestroyVecs(2,&work);CHKERRQ(ierr); 2714 } 2715 } 2716 ierr = PetscFree(interior_dofs);CHKERRQ(ierr); 2717 2718 if (!pcbddc->benign_n) { 2719 PetscInt n; 2720 2721 ierr = ISDestroy(&zerodiag);CHKERRQ(ierr); 2722 recompute_zerodiag = PETSC_FALSE; 2723 ierr = MatGetLocalSize(pcbddc->local_mat,&n,NULL);CHKERRQ(ierr); 2724 if (n) { 2725 has_null_pressures = PETSC_FALSE; 2726 have_null = PETSC_FALSE; 2727 } 2728 } 2729 2730 /* final check for null pressures */ 2731 if (zerodiag && pressures) { 2732 PetscInt nz,np; 2733 ierr = ISGetLocalSize(zerodiag,&nz);CHKERRQ(ierr); 2734 ierr = ISGetLocalSize(pressures,&np);CHKERRQ(ierr); 2735 if (nz != np) have_null = PETSC_FALSE; 2736 } 2737 2738 if (recompute_zerodiag) { 2739 ierr = ISDestroy(&zerodiag);CHKERRQ(ierr); 2740 if (pcbddc->benign_n == 1) { 2741 ierr = PetscObjectReference((PetscObject)zerodiag_subs[0]);CHKERRQ(ierr); 2742 zerodiag = zerodiag_subs[0]; 2743 } else { 2744 PetscInt i,nzn,*new_idxs; 2745 2746 nzn = 0; 2747 for (i=0;i<pcbddc->benign_n;i++) { 2748 PetscInt ns; 2749 ierr = ISGetLocalSize(zerodiag_subs[i],&ns);CHKERRQ(ierr); 2750 nzn += ns; 2751 } 2752 ierr = PetscMalloc1(nzn,&new_idxs);CHKERRQ(ierr); 2753 nzn = 0; 2754 for (i=0;i<pcbddc->benign_n;i++) { 2755 PetscInt ns,*idxs; 2756 ierr = ISGetLocalSize(zerodiag_subs[i],&ns);CHKERRQ(ierr); 2757 ierr = ISGetIndices(zerodiag_subs[i],(const PetscInt**)&idxs);CHKERRQ(ierr); 2758 ierr = PetscMemcpy(new_idxs+nzn,idxs,ns*sizeof(PetscInt));CHKERRQ(ierr); 2759 ierr = ISRestoreIndices(zerodiag_subs[i],(const PetscInt**)&idxs);CHKERRQ(ierr); 2760 nzn += ns; 2761 } 2762 ierr = PetscSortInt(nzn,new_idxs);CHKERRQ(ierr); 2763 ierr = ISCreateGeneral(PETSC_COMM_SELF,nzn,new_idxs,PETSC_OWN_POINTER,&zerodiag);CHKERRQ(ierr); 2764 } 2765 have_null = PETSC_FALSE; 2766 } 2767 2768 /* Prepare matrix to compute no-net-flux */ 2769 if (pcbddc->compute_nonetflux && !pcbddc->divudotp) { 2770 Mat A,loc_divudotp; 2771 ISLocalToGlobalMapping rl2g,cl2g,l2gmap; 2772 IS row,col,isused = NULL; 2773 PetscInt M,N,n,st,n_isused; 2774 2775 if (pressures) { 2776 isused = pressures; 2777 } else { 2778 isused = zerodiag_save; 2779 } 2780 ierr = MatGetLocalToGlobalMapping(pc->pmat,&l2gmap,NULL);CHKERRQ(ierr); 2781 ierr = MatISGetLocalMat(pc->pmat,&A);CHKERRQ(ierr); 2782 ierr = MatGetLocalSize(A,&n,NULL);CHKERRQ(ierr); 2783 if (!isused && n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_USER,"Don't know how to extract div u dot p! Please provide the pressure field"); 2784 n_isused = 0; 2785 if (isused) { 2786 ierr = ISGetLocalSize(isused,&n_isused);CHKERRQ(ierr); 2787 } 2788 ierr = MPI_Scan(&n_isused,&st,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 2789 st = st-n_isused; 2790 if (n) { 2791 const PetscInt *gidxs; 2792 2793 ierr = MatCreateSubMatrix(A,isused,NULL,MAT_INITIAL_MATRIX,&loc_divudotp);CHKERRQ(ierr); 2794 ierr = ISLocalToGlobalMappingGetIndices(l2gmap,&gidxs);CHKERRQ(ierr); 2795 /* TODO: extend ISCreateStride with st = PETSC_DECIDE */ 2796 ierr = ISCreateStride(PetscObjectComm((PetscObject)pc),n_isused,st,1,&row);CHKERRQ(ierr); 2797 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),n,gidxs,PETSC_COPY_VALUES,&col);CHKERRQ(ierr); 2798 ierr = ISLocalToGlobalMappingRestoreIndices(l2gmap,&gidxs);CHKERRQ(ierr); 2799 } else { 2800 ierr = MatCreateSeqAIJ(PETSC_COMM_SELF,0,0,1,NULL,&loc_divudotp);CHKERRQ(ierr); 2801 ierr = ISCreateStride(PetscObjectComm((PetscObject)pc),n_isused,st,1,&row);CHKERRQ(ierr); 2802 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),0,NULL,PETSC_COPY_VALUES,&col);CHKERRQ(ierr); 2803 } 2804 ierr = MatGetSize(pc->pmat,NULL,&N);CHKERRQ(ierr); 2805 ierr = ISGetSize(row,&M);CHKERRQ(ierr); 2806 ierr = ISLocalToGlobalMappingCreateIS(row,&rl2g);CHKERRQ(ierr); 2807 ierr = ISLocalToGlobalMappingCreateIS(col,&cl2g);CHKERRQ(ierr); 2808 ierr = ISDestroy(&row);CHKERRQ(ierr); 2809 ierr = ISDestroy(&col);CHKERRQ(ierr); 2810 ierr = MatCreate(PetscObjectComm((PetscObject)pc),&pcbddc->divudotp);CHKERRQ(ierr); 2811 ierr = MatSetType(pcbddc->divudotp,MATIS);CHKERRQ(ierr); 2812 ierr = MatSetSizes(pcbddc->divudotp,PETSC_DECIDE,PETSC_DECIDE,M,N);CHKERRQ(ierr); 2813 ierr = MatSetLocalToGlobalMapping(pcbddc->divudotp,rl2g,cl2g);CHKERRQ(ierr); 2814 ierr = ISLocalToGlobalMappingDestroy(&rl2g);CHKERRQ(ierr); 2815 ierr = ISLocalToGlobalMappingDestroy(&cl2g);CHKERRQ(ierr); 2816 ierr = MatISSetLocalMat(pcbddc->divudotp,loc_divudotp);CHKERRQ(ierr); 2817 ierr = MatDestroy(&loc_divudotp);CHKERRQ(ierr); 2818 ierr = MatAssemblyBegin(pcbddc->divudotp,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2819 ierr = MatAssemblyEnd(pcbddc->divudotp,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2820 } 2821 ierr = ISDestroy(&zerodiag_save);CHKERRQ(ierr); 2822 2823 /* change of basis and p0 dofs */ 2824 if (has_null_pressures) { 2825 IS zerodiagc; 2826 const PetscInt *idxs,*idxsc; 2827 PetscInt i,s,*nnz; 2828 2829 ierr = ISGetLocalSize(zerodiag,&nz);CHKERRQ(ierr); 2830 ierr = ISComplement(zerodiag,0,n,&zerodiagc);CHKERRQ(ierr); 2831 ierr = ISGetIndices(zerodiagc,&idxsc);CHKERRQ(ierr); 2832 /* local change of basis for pressures */ 2833 ierr = MatDestroy(&pcbddc->benign_change);CHKERRQ(ierr); 2834 ierr = MatCreate(PetscObjectComm((PetscObject)pcbddc->local_mat),&pcbddc->benign_change);CHKERRQ(ierr); 2835 ierr = MatSetType(pcbddc->benign_change,MATAIJ);CHKERRQ(ierr); 2836 ierr = MatSetSizes(pcbddc->benign_change,n,n,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr); 2837 ierr = PetscMalloc1(n,&nnz);CHKERRQ(ierr); 2838 for (i=0;i<n-nz;i++) nnz[idxsc[i]] = 1; /* identity on velocities plus pressure dofs for non-singular subdomains */ 2839 for (i=0;i<pcbddc->benign_n;i++) { 2840 PetscInt nzs,j; 2841 2842 ierr = ISGetLocalSize(zerodiag_subs[i],&nzs);CHKERRQ(ierr); 2843 ierr = ISGetIndices(zerodiag_subs[i],&idxs);CHKERRQ(ierr); 2844 for (j=0;j<nzs-1;j++) nnz[idxs[j]] = 2; /* change on pressures */ 2845 nnz[idxs[nzs-1]] = nzs; /* last local pressure dof in subdomain */ 2846 ierr = ISRestoreIndices(zerodiag_subs[i],&idxs);CHKERRQ(ierr); 2847 } 2848 ierr = MatSeqAIJSetPreallocation(pcbddc->benign_change,0,nnz);CHKERRQ(ierr); 2849 ierr = PetscFree(nnz);CHKERRQ(ierr); 2850 /* set identity on velocities */ 2851 for (i=0;i<n-nz;i++) { 2852 ierr = MatSetValue(pcbddc->benign_change,idxsc[i],idxsc[i],1.,INSERT_VALUES);CHKERRQ(ierr); 2853 } 2854 ierr = ISRestoreIndices(zerodiagc,&idxsc);CHKERRQ(ierr); 2855 ierr = ISDestroy(&zerodiagc);CHKERRQ(ierr); 2856 ierr = PetscFree3(pcbddc->benign_p0_lidx,pcbddc->benign_p0_gidx,pcbddc->benign_p0);CHKERRQ(ierr); 2857 ierr = PetscMalloc3(pcbddc->benign_n,&pcbddc->benign_p0_lidx,pcbddc->benign_n,&pcbddc->benign_p0_gidx,pcbddc->benign_n,&pcbddc->benign_p0);CHKERRQ(ierr); 2858 /* set change on pressures */ 2859 for (s=0;s<pcbddc->benign_n;s++) { 2860 PetscScalar *array; 2861 PetscInt nzs; 2862 2863 ierr = ISGetLocalSize(zerodiag_subs[s],&nzs);CHKERRQ(ierr); 2864 ierr = ISGetIndices(zerodiag_subs[s],&idxs);CHKERRQ(ierr); 2865 for (i=0;i<nzs-1;i++) { 2866 PetscScalar vals[2]; 2867 PetscInt cols[2]; 2868 2869 cols[0] = idxs[i]; 2870 cols[1] = idxs[nzs-1]; 2871 vals[0] = 1.; 2872 vals[1] = 1.; 2873 ierr = MatSetValues(pcbddc->benign_change,1,cols,2,cols,vals,INSERT_VALUES);CHKERRQ(ierr); 2874 } 2875 ierr = PetscMalloc1(nzs,&array);CHKERRQ(ierr); 2876 for (i=0;i<nzs-1;i++) array[i] = -1.; 2877 array[nzs-1] = 1.; 2878 ierr = MatSetValues(pcbddc->benign_change,1,idxs+nzs-1,nzs,idxs,array,INSERT_VALUES);CHKERRQ(ierr); 2879 /* store local idxs for p0 */ 2880 pcbddc->benign_p0_lidx[s] = idxs[nzs-1]; 2881 ierr = ISRestoreIndices(zerodiag_subs[s],&idxs);CHKERRQ(ierr); 2882 ierr = PetscFree(array);CHKERRQ(ierr); 2883 } 2884 ierr = MatAssemblyBegin(pcbddc->benign_change,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2885 ierr = MatAssemblyEnd(pcbddc->benign_change,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2886 /* project if needed */ 2887 if (pcbddc->benign_change_explicit) { 2888 Mat M; 2889 2890 ierr = MatPtAP(pcbddc->local_mat,pcbddc->benign_change,MAT_INITIAL_MATRIX,2.0,&M);CHKERRQ(ierr); 2891 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 2892 ierr = MatSeqAIJCompress(M,&pcbddc->local_mat);CHKERRQ(ierr); 2893 ierr = MatDestroy(&M);CHKERRQ(ierr); 2894 } 2895 /* store global idxs for p0 */ 2896 ierr = ISLocalToGlobalMappingApply(pc->pmat->rmap->mapping,pcbddc->benign_n,pcbddc->benign_p0_lidx,pcbddc->benign_p0_gidx);CHKERRQ(ierr); 2897 } 2898 pcbddc->benign_zerodiag_subs = zerodiag_subs; 2899 ierr = ISDestroy(&pressures);CHKERRQ(ierr); 2900 2901 /* determines if the coarse solver will be singular or not */ 2902 ierr = MPI_Allreduce(&have_null,&pcbddc->benign_null,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 2903 /* determines if the problem has subdomains with 0 pressure block */ 2904 ierr = MPI_Allreduce(&have_null,&pcbddc->benign_have_null,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 2905 *zerodiaglocal = zerodiag; 2906 PetscFunctionReturn(0); 2907 } 2908 2909 PetscErrorCode PCBDDCBenignGetOrSetP0(PC pc, Vec v, PetscBool get) 2910 { 2911 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 2912 PetscScalar *array; 2913 PetscErrorCode ierr; 2914 2915 PetscFunctionBegin; 2916 if (!pcbddc->benign_sf) { 2917 ierr = PetscSFCreate(PetscObjectComm((PetscObject)pc),&pcbddc->benign_sf);CHKERRQ(ierr); 2918 ierr = PetscSFSetGraphLayout(pcbddc->benign_sf,pc->pmat->rmap,pcbddc->benign_n,NULL,PETSC_OWN_POINTER,pcbddc->benign_p0_gidx);CHKERRQ(ierr); 2919 } 2920 if (get) { 2921 ierr = VecGetArrayRead(v,(const PetscScalar**)&array);CHKERRQ(ierr); 2922 ierr = PetscSFBcastBegin(pcbddc->benign_sf,MPIU_SCALAR,array,pcbddc->benign_p0);CHKERRQ(ierr); 2923 ierr = PetscSFBcastEnd(pcbddc->benign_sf,MPIU_SCALAR,array,pcbddc->benign_p0);CHKERRQ(ierr); 2924 ierr = VecRestoreArrayRead(v,(const PetscScalar**)&array);CHKERRQ(ierr); 2925 } else { 2926 ierr = VecGetArray(v,&array);CHKERRQ(ierr); 2927 ierr = PetscSFReduceBegin(pcbddc->benign_sf,MPIU_SCALAR,pcbddc->benign_p0,array,MPIU_REPLACE);CHKERRQ(ierr); 2928 ierr = PetscSFReduceEnd(pcbddc->benign_sf,MPIU_SCALAR,pcbddc->benign_p0,array,MPIU_REPLACE);CHKERRQ(ierr); 2929 ierr = VecRestoreArray(v,&array);CHKERRQ(ierr); 2930 } 2931 PetscFunctionReturn(0); 2932 } 2933 2934 PetscErrorCode PCBDDCBenignPopOrPushB0(PC pc, PetscBool pop) 2935 { 2936 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 2937 PetscErrorCode ierr; 2938 2939 PetscFunctionBegin; 2940 /* TODO: add error checking 2941 - avoid nested pop (or push) calls. 2942 - cannot push before pop. 2943 - cannot call this if pcbddc->local_mat is NULL 2944 */ 2945 if (!pcbddc->benign_n) { 2946 PetscFunctionReturn(0); 2947 } 2948 if (pop) { 2949 if (pcbddc->benign_change_explicit) { 2950 IS is_p0; 2951 MatReuse reuse; 2952 2953 /* extract B_0 */ 2954 reuse = MAT_INITIAL_MATRIX; 2955 if (pcbddc->benign_B0) { 2956 reuse = MAT_REUSE_MATRIX; 2957 } 2958 ierr = ISCreateGeneral(PETSC_COMM_SELF,pcbddc->benign_n,pcbddc->benign_p0_lidx,PETSC_COPY_VALUES,&is_p0);CHKERRQ(ierr); 2959 ierr = MatCreateSubMatrix(pcbddc->local_mat,is_p0,NULL,reuse,&pcbddc->benign_B0);CHKERRQ(ierr); 2960 /* remove rows and cols from local problem */ 2961 ierr = MatSetOption(pcbddc->local_mat,MAT_KEEP_NONZERO_PATTERN,PETSC_TRUE);CHKERRQ(ierr); 2962 ierr = MatSetOption(pcbddc->local_mat,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr); 2963 ierr = MatZeroRowsColumnsIS(pcbddc->local_mat,is_p0,1.0,NULL,NULL);CHKERRQ(ierr); 2964 ierr = ISDestroy(&is_p0);CHKERRQ(ierr); 2965 } else { 2966 Mat_IS *matis = (Mat_IS*)pc->pmat->data; 2967 PetscScalar *vals; 2968 PetscInt i,n,*idxs_ins; 2969 2970 ierr = VecGetLocalSize(matis->y,&n);CHKERRQ(ierr); 2971 ierr = PetscMalloc2(n,&idxs_ins,n,&vals);CHKERRQ(ierr); 2972 if (!pcbddc->benign_B0) { 2973 PetscInt *nnz; 2974 ierr = MatCreate(PetscObjectComm((PetscObject)pcbddc->local_mat),&pcbddc->benign_B0);CHKERRQ(ierr); 2975 ierr = MatSetType(pcbddc->benign_B0,MATAIJ);CHKERRQ(ierr); 2976 ierr = MatSetSizes(pcbddc->benign_B0,pcbddc->benign_n,n,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr); 2977 ierr = PetscMalloc1(pcbddc->benign_n,&nnz);CHKERRQ(ierr); 2978 for (i=0;i<pcbddc->benign_n;i++) { 2979 ierr = ISGetLocalSize(pcbddc->benign_zerodiag_subs[i],&nnz[i]);CHKERRQ(ierr); 2980 nnz[i] = n - nnz[i]; 2981 } 2982 ierr = MatSeqAIJSetPreallocation(pcbddc->benign_B0,0,nnz);CHKERRQ(ierr); 2983 ierr = PetscFree(nnz);CHKERRQ(ierr); 2984 } 2985 2986 for (i=0;i<pcbddc->benign_n;i++) { 2987 PetscScalar *array; 2988 PetscInt *idxs,j,nz,cum; 2989 2990 ierr = VecSet(matis->x,0.);CHKERRQ(ierr); 2991 ierr = ISGetLocalSize(pcbddc->benign_zerodiag_subs[i],&nz);CHKERRQ(ierr); 2992 ierr = ISGetIndices(pcbddc->benign_zerodiag_subs[i],(const PetscInt**)&idxs);CHKERRQ(ierr); 2993 for (j=0;j<nz;j++) vals[j] = 1.; 2994 ierr = VecSetValues(matis->x,nz,idxs,vals,INSERT_VALUES);CHKERRQ(ierr); 2995 ierr = VecAssemblyBegin(matis->x);CHKERRQ(ierr); 2996 ierr = VecAssemblyEnd(matis->x);CHKERRQ(ierr); 2997 ierr = VecSet(matis->y,0.);CHKERRQ(ierr); 2998 ierr = MatMult(matis->A,matis->x,matis->y);CHKERRQ(ierr); 2999 ierr = VecGetArray(matis->y,&array);CHKERRQ(ierr); 3000 cum = 0; 3001 for (j=0;j<n;j++) { 3002 if (PetscUnlikely(PetscAbsScalar(array[j]) > PETSC_SMALL)) { 3003 vals[cum] = array[j]; 3004 idxs_ins[cum] = j; 3005 cum++; 3006 } 3007 } 3008 ierr = MatSetValues(pcbddc->benign_B0,1,&i,cum,idxs_ins,vals,INSERT_VALUES);CHKERRQ(ierr); 3009 ierr = VecRestoreArray(matis->y,&array);CHKERRQ(ierr); 3010 ierr = ISRestoreIndices(pcbddc->benign_zerodiag_subs[i],(const PetscInt**)&idxs);CHKERRQ(ierr); 3011 } 3012 ierr = MatAssemblyBegin(pcbddc->benign_B0,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3013 ierr = MatAssemblyEnd(pcbddc->benign_B0,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3014 ierr = PetscFree2(idxs_ins,vals);CHKERRQ(ierr); 3015 } 3016 } else { /* push */ 3017 if (pcbddc->benign_change_explicit) { 3018 PetscInt i; 3019 3020 for (i=0;i<pcbddc->benign_n;i++) { 3021 PetscScalar *B0_vals; 3022 PetscInt *B0_cols,B0_ncol; 3023 3024 ierr = MatGetRow(pcbddc->benign_B0,i,&B0_ncol,(const PetscInt**)&B0_cols,(const PetscScalar**)&B0_vals);CHKERRQ(ierr); 3025 ierr = MatSetValues(pcbddc->local_mat,1,pcbddc->benign_p0_lidx+i,B0_ncol,B0_cols,B0_vals,INSERT_VALUES);CHKERRQ(ierr); 3026 ierr = MatSetValues(pcbddc->local_mat,B0_ncol,B0_cols,1,pcbddc->benign_p0_lidx+i,B0_vals,INSERT_VALUES);CHKERRQ(ierr); 3027 ierr = MatSetValue(pcbddc->local_mat,pcbddc->benign_p0_lidx[i],pcbddc->benign_p0_lidx[i],0.0,INSERT_VALUES);CHKERRQ(ierr); 3028 ierr = MatRestoreRow(pcbddc->benign_B0,i,&B0_ncol,(const PetscInt**)&B0_cols,(const PetscScalar**)&B0_vals);CHKERRQ(ierr); 3029 } 3030 ierr = MatAssemblyBegin(pcbddc->local_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3031 ierr = MatAssemblyEnd(pcbddc->local_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3032 } else { 3033 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Cannot push B0!\n"); 3034 } 3035 } 3036 PetscFunctionReturn(0); 3037 } 3038 3039 PetscErrorCode PCBDDCAdaptiveSelection(PC pc) 3040 { 3041 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 3042 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 3043 PetscBLASInt B_dummyint,B_neigs,B_ierr,B_lwork; 3044 PetscBLASInt *B_iwork,*B_ifail; 3045 PetscScalar *work,lwork; 3046 PetscScalar *St,*S,*eigv; 3047 PetscScalar *Sarray,*Starray; 3048 PetscReal *eigs,thresh,lthresh,uthresh; 3049 PetscInt i,nmax,nmin,nv,cum,mss,cum2,cumarray,maxneigs; 3050 PetscBool allocated_S_St; 3051 #if defined(PETSC_USE_COMPLEX) 3052 PetscReal *rwork; 3053 #endif 3054 PetscErrorCode ierr; 3055 3056 PetscFunctionBegin; 3057 if (!sub_schurs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Adaptive selection of constraints requires SubSchurs data"); 3058 if (!sub_schurs->schur_explicit) SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_SUP,"Adaptive selection of constraints requires MUMPS and/or MKL_CPARDISO"); 3059 if (sub_schurs->n_subs && (!sub_schurs->is_symmetric)) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_SUP,"Adaptive selection not yet implemented for this matrix pencil (herm %d, symm %d, posdef %d)",sub_schurs->is_hermitian,sub_schurs->is_symmetric,sub_schurs->is_posdef); 3060 3061 if (pcbddc->dbg_flag) { 3062 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 3063 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 3064 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Check adaptive selection of constraints\n");CHKERRQ(ierr); 3065 ierr = PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 3066 } 3067 3068 if (pcbddc->dbg_flag) { 3069 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d cc %d (%d,%d).\n",PetscGlobalRank,sub_schurs->n_subs,sub_schurs->is_hermitian,sub_schurs->is_posdef); 3070 } 3071 3072 /* max size of subsets */ 3073 mss = 0; 3074 for (i=0;i<sub_schurs->n_subs;i++) { 3075 PetscInt subset_size; 3076 3077 ierr = ISGetLocalSize(sub_schurs->is_subs[i],&subset_size);CHKERRQ(ierr); 3078 mss = PetscMax(mss,subset_size); 3079 } 3080 3081 /* min/max and threshold */ 3082 nmax = pcbddc->adaptive_nmax > 0 ? pcbddc->adaptive_nmax : mss; 3083 nmin = pcbddc->adaptive_nmin > 0 ? pcbddc->adaptive_nmin : 0; 3084 nmax = PetscMax(nmin,nmax); 3085 allocated_S_St = PETSC_FALSE; 3086 if (nmin || !sub_schurs->is_posdef) { /* XXX */ 3087 allocated_S_St = PETSC_TRUE; 3088 } 3089 3090 /* allocate lapack workspace */ 3091 cum = cum2 = 0; 3092 maxneigs = 0; 3093 for (i=0;i<sub_schurs->n_subs;i++) { 3094 PetscInt n,subset_size; 3095 3096 ierr = ISGetLocalSize(sub_schurs->is_subs[i],&subset_size);CHKERRQ(ierr); 3097 n = PetscMin(subset_size,nmax); 3098 cum += subset_size; 3099 cum2 += subset_size*n; 3100 maxneigs = PetscMax(maxneigs,n); 3101 } 3102 if (mss) { 3103 if (sub_schurs->is_symmetric) { 3104 PetscBLASInt B_itype = 1; 3105 PetscBLASInt B_N = mss; 3106 PetscReal zero = 0.0; 3107 PetscReal eps = 0.0; /* dlamch? */ 3108 3109 B_lwork = -1; 3110 S = NULL; 3111 St = NULL; 3112 eigs = NULL; 3113 eigv = NULL; 3114 B_iwork = NULL; 3115 B_ifail = NULL; 3116 #if defined(PETSC_USE_COMPLEX) 3117 rwork = NULL; 3118 #endif 3119 thresh = 1.0; 3120 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 3121 #if defined(PETSC_USE_COMPLEX) 3122 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&zero,&thresh,&B_dummyint,&B_dummyint,&eps,&B_neigs,eigs,eigv,&B_N,&lwork,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr)); 3123 #else 3124 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&zero,&thresh,&B_dummyint,&B_dummyint,&eps,&B_neigs,eigs,eigv,&B_N,&lwork,&B_lwork,B_iwork,B_ifail,&B_ierr)); 3125 #endif 3126 if (B_ierr != 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to SYGVX Lapack routine %d",(int)B_ierr); 3127 ierr = PetscFPTrapPop();CHKERRQ(ierr); 3128 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Not yet implemented"); 3129 } else { 3130 lwork = 0; 3131 } 3132 3133 nv = 0; 3134 if (sub_schurs->is_vertices && pcbddc->use_vertices) { /* complement set of active subsets, each entry is a vertex (boundary made by active subsets, vertices and dirichlet dofs) */ 3135 ierr = ISGetLocalSize(sub_schurs->is_vertices,&nv);CHKERRQ(ierr); 3136 } 3137 ierr = PetscBLASIntCast((PetscInt)PetscRealPart(lwork),&B_lwork);CHKERRQ(ierr); 3138 if (allocated_S_St) { 3139 ierr = PetscMalloc2(mss*mss,&S,mss*mss,&St);CHKERRQ(ierr); 3140 } 3141 ierr = PetscMalloc5(mss*mss,&eigv,mss,&eigs,B_lwork,&work,5*mss,&B_iwork,mss,&B_ifail);CHKERRQ(ierr); 3142 #if defined(PETSC_USE_COMPLEX) 3143 ierr = PetscMalloc1(7*mss,&rwork);CHKERRQ(ierr); 3144 #endif 3145 ierr = PetscMalloc5(nv+sub_schurs->n_subs,&pcbddc->adaptive_constraints_n, 3146 nv+sub_schurs->n_subs+1,&pcbddc->adaptive_constraints_idxs_ptr, 3147 nv+sub_schurs->n_subs+1,&pcbddc->adaptive_constraints_data_ptr, 3148 nv+cum,&pcbddc->adaptive_constraints_idxs, 3149 nv+cum2,&pcbddc->adaptive_constraints_data);CHKERRQ(ierr); 3150 ierr = PetscMemzero(pcbddc->adaptive_constraints_n,(nv+sub_schurs->n_subs)*sizeof(PetscInt));CHKERRQ(ierr); 3151 3152 maxneigs = 0; 3153 cum = cumarray = 0; 3154 pcbddc->adaptive_constraints_idxs_ptr[0] = 0; 3155 pcbddc->adaptive_constraints_data_ptr[0] = 0; 3156 if (sub_schurs->is_vertices && pcbddc->use_vertices) { 3157 const PetscInt *idxs; 3158 3159 ierr = ISGetIndices(sub_schurs->is_vertices,&idxs);CHKERRQ(ierr); 3160 for (cum=0;cum<nv;cum++) { 3161 pcbddc->adaptive_constraints_n[cum] = 1; 3162 pcbddc->adaptive_constraints_idxs[cum] = idxs[cum]; 3163 pcbddc->adaptive_constraints_data[cum] = 1.0; 3164 pcbddc->adaptive_constraints_idxs_ptr[cum+1] = pcbddc->adaptive_constraints_idxs_ptr[cum]+1; 3165 pcbddc->adaptive_constraints_data_ptr[cum+1] = pcbddc->adaptive_constraints_data_ptr[cum]+1; 3166 } 3167 ierr = ISRestoreIndices(sub_schurs->is_vertices,&idxs);CHKERRQ(ierr); 3168 } 3169 3170 if (mss) { /* multilevel */ 3171 ierr = MatSeqAIJGetArray(sub_schurs->sum_S_Ej_inv_all,&Sarray);CHKERRQ(ierr); 3172 ierr = MatSeqAIJGetArray(sub_schurs->sum_S_Ej_tilda_all,&Starray);CHKERRQ(ierr); 3173 } 3174 3175 lthresh = pcbddc->adaptive_threshold[0]; 3176 uthresh = pcbddc->adaptive_threshold[1]; 3177 for (i=0;i<sub_schurs->n_subs;i++) { 3178 const PetscInt *idxs; 3179 PetscReal upper,lower; 3180 PetscInt j,subset_size,eigs_start = 0; 3181 PetscBLASInt B_N; 3182 PetscBool same_data = PETSC_FALSE; 3183 PetscBool scal = PETSC_FALSE; 3184 3185 if (pcbddc->use_deluxe_scaling) { 3186 upper = PETSC_MAX_REAL; 3187 lower = uthresh; 3188 } else { 3189 if (!sub_schurs->is_posdef) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Not yet implemented without deluxe scaling"); 3190 upper = 1./uthresh; 3191 lower = 0.; 3192 } 3193 ierr = ISGetLocalSize(sub_schurs->is_subs[i],&subset_size);CHKERRQ(ierr); 3194 ierr = ISGetIndices(sub_schurs->is_subs[i],&idxs);CHKERRQ(ierr); 3195 ierr = PetscBLASIntCast(subset_size,&B_N);CHKERRQ(ierr); 3196 /* this is experimental: we assume the dofs have been properly grouped to have 3197 the diagonal blocks Schur complements either positive or negative definite (true for Stokes) */ 3198 if (!sub_schurs->is_posdef) { 3199 Mat T; 3200 3201 for (j=0;j<subset_size;j++) { 3202 if (PetscRealPart(*(Sarray+cumarray+j*(subset_size+1))) < 0.0) { 3203 ierr = MatCreateSeqDense(PETSC_COMM_SELF,subset_size,subset_size,Sarray+cumarray,&T);CHKERRQ(ierr); 3204 ierr = MatScale(T,-1.0);CHKERRQ(ierr); 3205 ierr = MatDestroy(&T);CHKERRQ(ierr); 3206 ierr = MatCreateSeqDense(PETSC_COMM_SELF,subset_size,subset_size,Starray+cumarray,&T);CHKERRQ(ierr); 3207 ierr = MatScale(T,-1.0);CHKERRQ(ierr); 3208 ierr = MatDestroy(&T);CHKERRQ(ierr); 3209 if (sub_schurs->change_primal_sub) { 3210 PetscInt nz,k; 3211 const PetscInt *idxs; 3212 3213 ierr = ISGetLocalSize(sub_schurs->change_primal_sub[i],&nz);CHKERRQ(ierr); 3214 ierr = ISGetIndices(sub_schurs->change_primal_sub[i],&idxs);CHKERRQ(ierr); 3215 for (k=0;k<nz;k++) { 3216 *( Sarray + cumarray + idxs[k]*(subset_size+1)) *= -1.0; 3217 *(Starray + cumarray + idxs[k]*(subset_size+1)) = 0.0; 3218 } 3219 ierr = ISRestoreIndices(sub_schurs->change_primal_sub[i],&idxs);CHKERRQ(ierr); 3220 } 3221 scal = PETSC_TRUE; 3222 break; 3223 } 3224 } 3225 } 3226 3227 if (allocated_S_St) { /* S and S_t should be copied since we could need them later */ 3228 if (sub_schurs->is_symmetric) { 3229 PetscInt j,k; 3230 if (sub_schurs->n_subs == 1) { /* zeroing memory to use PetscMemcmp later */ 3231 ierr = PetscMemzero(S,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 3232 ierr = PetscMemzero(St,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 3233 } 3234 for (j=0;j<subset_size;j++) { 3235 for (k=j;k<subset_size;k++) { 3236 S [j*subset_size+k] = Sarray [cumarray+j*subset_size+k]; 3237 St[j*subset_size+k] = Starray[cumarray+j*subset_size+k]; 3238 } 3239 } 3240 } else { 3241 ierr = PetscMemcpy(S,Sarray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 3242 ierr = PetscMemcpy(St,Starray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 3243 } 3244 } else { 3245 S = Sarray + cumarray; 3246 St = Starray + cumarray; 3247 } 3248 /* see if we can save some work */ 3249 if (sub_schurs->n_subs == 1 && pcbddc->use_deluxe_scaling) { 3250 ierr = PetscMemcmp(S,St,subset_size*subset_size*sizeof(PetscScalar),&same_data);CHKERRQ(ierr); 3251 } 3252 3253 if (same_data && !sub_schurs->change) { /* there's no need of constraints here */ 3254 B_neigs = 0; 3255 } else { 3256 if (sub_schurs->is_symmetric) { 3257 PetscBLASInt B_itype = 1; 3258 PetscBLASInt B_IL, B_IU; 3259 PetscReal eps = -1.0; /* dlamch? */ 3260 PetscInt nmin_s; 3261 PetscBool compute_range; 3262 3263 B_neigs = 0; 3264 compute_range = (PetscBool)!same_data; 3265 if (nmin >= subset_size) compute_range = PETSC_FALSE; 3266 3267 if (pcbddc->dbg_flag) { 3268 PetscInt nc = 0; 3269 3270 if (sub_schurs->change_primal_sub) { 3271 ierr = ISGetLocalSize(sub_schurs->change_primal_sub[i],&nc);CHKERRQ(ierr); 3272 } 3273 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Computing for sub %d/%d size %d count %d fid %d (range %d) (change %d).\n",i,sub_schurs->n_subs,subset_size,pcbddc->mat_graph->count[idxs[0]]+1,pcbddc->mat_graph->which_dof[idxs[0]],compute_range,nc);CHKERRQ(ierr); 3274 } 3275 3276 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 3277 if (compute_range) { 3278 3279 /* ask for eigenvalues larger than thresh */ 3280 if (sub_schurs->is_posdef) { 3281 #if defined(PETSC_USE_COMPLEX) 3282 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&lower,&upper,&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr)); 3283 #else 3284 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&lower,&upper,&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr)); 3285 #endif 3286 } else { /* no theory so far, but it works nicely */ 3287 PetscInt recipe = 0,recipe_m = 1; 3288 PetscReal bb[2]; 3289 3290 ierr = PetscOptionsGetInt(NULL,((PetscObject)pc)->prefix,"-pc_bddc_adaptive_recipe",&recipe,NULL);CHKERRQ(ierr); 3291 switch (recipe) { 3292 case 0: 3293 if (scal) { bb[0] = PETSC_MIN_REAL; bb[1] = lthresh; } 3294 else { bb[0] = uthresh; bb[1] = PETSC_MAX_REAL; } 3295 #if defined(PETSC_USE_COMPLEX) 3296 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&bb[0],&bb[1],&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr)); 3297 #else 3298 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&bb[0],&bb[1],&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr)); 3299 #endif 3300 break; 3301 case 1: 3302 bb[0] = PETSC_MIN_REAL; bb[1] = lthresh*lthresh; 3303 #if defined(PETSC_USE_COMPLEX) 3304 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&bb[0],&bb[1],&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr)); 3305 #else 3306 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&bb[0],&bb[1],&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr)); 3307 #endif 3308 if (!scal) { 3309 PetscBLASInt B_neigs2 = 0; 3310 3311 bb[0] = PetscMax(lthresh*lthresh,uthresh); bb[1] = PETSC_MAX_REAL; 3312 ierr = PetscMemcpy(S,Sarray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 3313 ierr = PetscMemcpy(St,Starray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 3314 #if defined(PETSC_USE_COMPLEX) 3315 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&bb[0],&bb[1],&B_IL,&B_IU,&eps,&B_neigs2,eigs+B_neigs,eigv+B_neigs*B_N,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr)); 3316 #else 3317 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&bb[0],&bb[1],&B_IL,&B_IU,&eps,&B_neigs2,eigs+B_neigs,eigv+B_neigs*B_N,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr)); 3318 #endif 3319 B_neigs += B_neigs2; 3320 } 3321 break; 3322 case 2: 3323 if (scal) { 3324 bb[0] = PETSC_MIN_REAL; 3325 bb[1] = 0; 3326 #if defined(PETSC_USE_COMPLEX) 3327 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&bb[0],&bb[1],&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr)); 3328 #else 3329 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&bb[0],&bb[1],&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr)); 3330 #endif 3331 } else { 3332 PetscBLASInt B_neigs2 = 0; 3333 PetscBool import = PETSC_FALSE; 3334 3335 lthresh = PetscMax(lthresh,0.0); 3336 if (lthresh > 0.0) { 3337 bb[0] = PETSC_MIN_REAL; 3338 bb[1] = lthresh*lthresh; 3339 3340 import = PETSC_TRUE; 3341 #if defined(PETSC_USE_COMPLEX) 3342 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&bb[0],&bb[1],&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr)); 3343 #else 3344 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&bb[0],&bb[1],&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr)); 3345 #endif 3346 } 3347 bb[0] = PetscMax(lthresh*lthresh,uthresh); 3348 bb[1] = PETSC_MAX_REAL; 3349 if (import) { 3350 ierr = PetscMemcpy(S,Sarray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 3351 ierr = PetscMemcpy(St,Starray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 3352 } 3353 #if defined(PETSC_USE_COMPLEX) 3354 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&bb[0],&bb[1],&B_IL,&B_IU,&eps,&B_neigs2,eigs+B_neigs,eigv+B_neigs*B_N,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr)); 3355 #else 3356 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&bb[0],&bb[1],&B_IL,&B_IU,&eps,&B_neigs2,eigs+B_neigs,eigv+B_neigs*B_N,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr)); 3357 #endif 3358 B_neigs += B_neigs2; 3359 } 3360 break; 3361 case 3: 3362 if (scal) { 3363 ierr = PetscOptionsGetInt(NULL,((PetscObject)pc)->prefix,"-pc_bddc_adaptive_recipe3_min_scal",&recipe_m,NULL);CHKERRQ(ierr); 3364 } else { 3365 ierr = PetscOptionsGetInt(NULL,((PetscObject)pc)->prefix,"-pc_bddc_adaptive_recipe3_min",&recipe_m,NULL);CHKERRQ(ierr); 3366 } 3367 if (!scal) { 3368 bb[0] = uthresh; 3369 bb[1] = PETSC_MAX_REAL; 3370 #if defined(PETSC_USE_COMPLEX) 3371 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&bb[0],&bb[1],&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr)); 3372 #else 3373 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&bb[0],&bb[1],&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr)); 3374 #endif 3375 } 3376 if (recipe_m > 0 && B_N - B_neigs > 0) { 3377 PetscBLASInt B_neigs2 = 0; 3378 3379 B_IL = 1; 3380 ierr = PetscBLASIntCast(PetscMin(recipe_m,B_N - B_neigs),&B_IU);CHKERRQ(ierr); 3381 ierr = PetscMemcpy(S,Sarray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 3382 ierr = PetscMemcpy(St,Starray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 3383 #if defined(PETSC_USE_COMPLEX) 3384 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","I","L",&B_N,St,&B_N,S,&B_N,&lower,&upper,&B_IL,&B_IU,&eps,&B_neigs2,eigs+B_neigs,eigv+B_neigs*B_N,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr)); 3385 #else 3386 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","I","L",&B_N,St,&B_N,S,&B_N,&lower,&upper,&B_IL,&B_IU,&eps,&B_neigs2,eigs+B_neigs,eigv+B_neigs*B_N,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr)); 3387 #endif 3388 B_neigs += B_neigs2; 3389 } 3390 break; 3391 default: 3392 SETERRQ1(PetscObjectComm((PetscObject)pc),PETSC_ERR_SUP,"Unknown recipe %D",recipe); 3393 break; 3394 } 3395 } 3396 } else if (!same_data) { /* this is just to see all the eigenvalues */ 3397 B_IU = PetscMax(1,PetscMin(B_N,nmax)); 3398 B_IL = 1; 3399 #if defined(PETSC_USE_COMPLEX) 3400 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","I","L",&B_N,St,&B_N,S,&B_N,&lower,&upper,&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr)); 3401 #else 3402 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","I","L",&B_N,St,&B_N,S,&B_N,&lower,&upper,&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr)); 3403 #endif 3404 } else { /* same_data is true, so just get the adaptive functional requested by the user */ 3405 PetscInt k; 3406 if (!sub_schurs->change_primal_sub) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"This should not happen"); 3407 ierr = ISGetLocalSize(sub_schurs->change_primal_sub[i],&nmax);CHKERRQ(ierr); 3408 ierr = PetscBLASIntCast(nmax,&B_neigs);CHKERRQ(ierr); 3409 nmin = nmax; 3410 ierr = PetscMemzero(eigv,subset_size*nmax*sizeof(PetscScalar));CHKERRQ(ierr); 3411 for (k=0;k<nmax;k++) { 3412 eigs[k] = 1./PETSC_SMALL; 3413 eigv[k*(subset_size+1)] = 1.0; 3414 } 3415 } 3416 ierr = PetscFPTrapPop();CHKERRQ(ierr); 3417 if (B_ierr) { 3418 if (B_ierr < 0 ) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: illegal value for argument %d",-(int)B_ierr); 3419 else if (B_ierr <= B_N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: %d eigenvalues failed to converge",(int)B_ierr); 3420 else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: leading minor of order %d is not positive definite",(int)B_ierr-B_N-1); 3421 } 3422 3423 if (B_neigs > nmax) { 3424 if (pcbddc->dbg_flag) { 3425 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," found %d eigs, more than maximum required %d.\n",B_neigs,nmax);CHKERRQ(ierr); 3426 } 3427 if (pcbddc->use_deluxe_scaling) eigs_start = scal ? 0 : B_neigs-nmax; 3428 B_neigs = nmax; 3429 } 3430 3431 nmin_s = PetscMin(nmin,B_N); 3432 if (B_neigs < nmin_s) { 3433 PetscBLASInt B_neigs2 = 0; 3434 3435 if (pcbddc->use_deluxe_scaling) { 3436 if (scal) { 3437 B_IU = nmin_s; 3438 B_IL = B_neigs + 1; 3439 } else { 3440 B_IL = B_N - nmin_s + 1; 3441 B_IU = B_N - B_neigs; 3442 } 3443 } else { 3444 B_IL = B_neigs + 1; 3445 B_IU = nmin_s; 3446 } 3447 if (pcbddc->dbg_flag) { 3448 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," found %d eigs, less than minimum required %d. Asking for %d to %d incl (fortran like)\n",B_neigs,nmin,B_IL,B_IU); 3449 } 3450 if (sub_schurs->is_symmetric) { 3451 PetscInt j,k; 3452 for (j=0;j<subset_size;j++) { 3453 for (k=j;k<subset_size;k++) { 3454 S [j*subset_size+k] = Sarray [cumarray+j*subset_size+k]; 3455 St[j*subset_size+k] = Starray[cumarray+j*subset_size+k]; 3456 } 3457 } 3458 } else { 3459 ierr = PetscMemcpy(S,Sarray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 3460 ierr = PetscMemcpy(St,Starray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 3461 } 3462 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 3463 #if defined(PETSC_USE_COMPLEX) 3464 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","I","L",&B_N,St,&B_N,S,&B_N,&lower,&upper,&B_IL,&B_IU,&eps,&B_neigs2,eigs+B_neigs,eigv+B_neigs*subset_size,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr)); 3465 #else 3466 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","I","L",&B_N,St,&B_N,S,&B_N,&lower,&upper,&B_IL,&B_IU,&eps,&B_neigs2,eigs+B_neigs,eigv+B_neigs*subset_size,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr)); 3467 #endif 3468 ierr = PetscFPTrapPop();CHKERRQ(ierr); 3469 B_neigs += B_neigs2; 3470 } 3471 if (B_ierr) { 3472 if (B_ierr < 0 ) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: illegal value for argument %d",-(int)B_ierr); 3473 else if (B_ierr <= B_N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: %d eigenvalues failed to converge",(int)B_ierr); 3474 else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: leading minor of order %d is not positive definite",(int)B_ierr-B_N-1); 3475 } 3476 if (pcbddc->dbg_flag) { 3477 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," -> Got %d eigs\n",B_neigs);CHKERRQ(ierr); 3478 for (j=0;j<B_neigs;j++) { 3479 if (eigs[j] == 0.0) { 3480 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," Inf\n");CHKERRQ(ierr); 3481 } else { 3482 if (pcbddc->use_deluxe_scaling) { 3483 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," %1.6e\n",eigs[j+eigs_start]);CHKERRQ(ierr); 3484 } else { 3485 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," %1.6e\n",1./eigs[j+eigs_start]);CHKERRQ(ierr); 3486 } 3487 } 3488 } 3489 } 3490 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Not yet implemented"); 3491 } 3492 /* change the basis back to the original one */ 3493 if (sub_schurs->change) { 3494 Mat change,phi,phit; 3495 3496 if (pcbddc->dbg_flag > 2) { 3497 PetscInt ii; 3498 for (ii=0;ii<B_neigs;ii++) { 3499 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," -> Eigenvector (old basis) %d/%d (%d)\n",ii,B_neigs,B_N);CHKERRQ(ierr); 3500 for (j=0;j<B_N;j++) { 3501 #if defined(PETSC_USE_COMPLEX) 3502 PetscReal r = PetscRealPart(eigv[(ii+eigs_start)*subset_size+j]); 3503 PetscReal c = PetscImaginaryPart(eigv[(ii+eigs_start)*subset_size+j]); 3504 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," %1.4e + %1.4e i\n",r,c);CHKERRQ(ierr); 3505 #else 3506 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," %1.4e\n",eigv[(ii+eigs_start)*subset_size+j]);CHKERRQ(ierr); 3507 #endif 3508 } 3509 } 3510 } 3511 ierr = KSPGetOperators(sub_schurs->change[i],&change,NULL);CHKERRQ(ierr); 3512 ierr = MatCreateSeqDense(PETSC_COMM_SELF,subset_size,B_neigs,eigv+eigs_start*subset_size,&phit);CHKERRQ(ierr); 3513 ierr = MatMatMult(change,phit,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&phi);CHKERRQ(ierr); 3514 ierr = MatCopy(phi,phit,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 3515 ierr = MatDestroy(&phit);CHKERRQ(ierr); 3516 ierr = MatDestroy(&phi);CHKERRQ(ierr); 3517 } 3518 maxneigs = PetscMax(B_neigs,maxneigs); 3519 pcbddc->adaptive_constraints_n[i+nv] = B_neigs; 3520 if (B_neigs) { 3521 ierr = PetscMemcpy(pcbddc->adaptive_constraints_data+pcbddc->adaptive_constraints_data_ptr[cum],eigv+eigs_start*subset_size,B_neigs*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 3522 3523 if (pcbddc->dbg_flag > 1) { 3524 PetscInt ii; 3525 for (ii=0;ii<B_neigs;ii++) { 3526 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," -> Eigenvector %d/%d (%d)\n",ii,B_neigs,B_N);CHKERRQ(ierr); 3527 for (j=0;j<B_N;j++) { 3528 #if defined(PETSC_USE_COMPLEX) 3529 PetscReal r = PetscRealPart(pcbddc->adaptive_constraints_data[ii*subset_size+j+pcbddc->adaptive_constraints_data_ptr[cum]]); 3530 PetscReal c = PetscImaginaryPart(pcbddc->adaptive_constraints_data[ii*subset_size+j+pcbddc->adaptive_constraints_data_ptr[cum]]); 3531 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," %1.4e + %1.4e i\n",r,c);CHKERRQ(ierr); 3532 #else 3533 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," %1.4e\n",pcbddc->adaptive_constraints_data[ii*subset_size+j+pcbddc->adaptive_constraints_data_ptr[cum]]);CHKERRQ(ierr); 3534 #endif 3535 } 3536 } 3537 } 3538 ierr = PetscMemcpy(pcbddc->adaptive_constraints_idxs+pcbddc->adaptive_constraints_idxs_ptr[cum],idxs,subset_size*sizeof(PetscInt));CHKERRQ(ierr); 3539 pcbddc->adaptive_constraints_idxs_ptr[cum+1] = pcbddc->adaptive_constraints_idxs_ptr[cum] + subset_size; 3540 pcbddc->adaptive_constraints_data_ptr[cum+1] = pcbddc->adaptive_constraints_data_ptr[cum] + subset_size*B_neigs; 3541 cum++; 3542 } 3543 ierr = ISRestoreIndices(sub_schurs->is_subs[i],&idxs);CHKERRQ(ierr); 3544 /* shift for next computation */ 3545 cumarray += subset_size*subset_size; 3546 } 3547 if (pcbddc->dbg_flag) { 3548 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 3549 } 3550 3551 if (mss) { 3552 ierr = MatSeqAIJRestoreArray(sub_schurs->sum_S_Ej_inv_all,&Sarray);CHKERRQ(ierr); 3553 ierr = MatSeqAIJRestoreArray(sub_schurs->sum_S_Ej_tilda_all,&Starray);CHKERRQ(ierr); 3554 /* destroy matrices (junk) */ 3555 ierr = MatDestroy(&sub_schurs->sum_S_Ej_inv_all);CHKERRQ(ierr); 3556 ierr = MatDestroy(&sub_schurs->sum_S_Ej_tilda_all);CHKERRQ(ierr); 3557 } 3558 if (allocated_S_St) { 3559 ierr = PetscFree2(S,St);CHKERRQ(ierr); 3560 } 3561 ierr = PetscFree5(eigv,eigs,work,B_iwork,B_ifail);CHKERRQ(ierr); 3562 #if defined(PETSC_USE_COMPLEX) 3563 ierr = PetscFree(rwork);CHKERRQ(ierr); 3564 #endif 3565 if (pcbddc->dbg_flag) { 3566 PetscInt maxneigs_r; 3567 ierr = MPIU_Allreduce(&maxneigs,&maxneigs_r,1,MPIU_INT,MPI_MAX,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 3568 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Maximum number of constraints per cc %d\n",maxneigs_r);CHKERRQ(ierr); 3569 } 3570 PetscFunctionReturn(0); 3571 } 3572 3573 PetscErrorCode PCBDDCSetUpSolvers(PC pc) 3574 { 3575 PetscScalar *coarse_submat_vals; 3576 PetscErrorCode ierr; 3577 3578 PetscFunctionBegin; 3579 /* Setup local scatters R_to_B and (optionally) R_to_D */ 3580 /* PCBDDCSetUpLocalWorkVectors should be called first! */ 3581 ierr = PCBDDCSetUpLocalScatters(pc);CHKERRQ(ierr); 3582 3583 /* Setup local neumann solver ksp_R */ 3584 /* PCBDDCSetUpLocalScatters should be called first! */ 3585 ierr = PCBDDCSetUpLocalSolvers(pc,PETSC_FALSE,PETSC_TRUE);CHKERRQ(ierr); 3586 3587 /* 3588 Setup local correction and local part of coarse basis. 3589 Gives back the dense local part of the coarse matrix in column major ordering 3590 */ 3591 ierr = PCBDDCSetUpCorrection(pc,&coarse_submat_vals);CHKERRQ(ierr); 3592 3593 /* Compute total number of coarse nodes and setup coarse solver */ 3594 ierr = PCBDDCSetUpCoarseSolver(pc,coarse_submat_vals);CHKERRQ(ierr); 3595 3596 /* free */ 3597 ierr = PetscFree(coarse_submat_vals);CHKERRQ(ierr); 3598 PetscFunctionReturn(0); 3599 } 3600 3601 PetscErrorCode PCBDDCResetCustomization(PC pc) 3602 { 3603 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 3604 PetscErrorCode ierr; 3605 3606 PetscFunctionBegin; 3607 ierr = ISDestroy(&pcbddc->user_primal_vertices);CHKERRQ(ierr); 3608 ierr = ISDestroy(&pcbddc->user_primal_vertices_local);CHKERRQ(ierr); 3609 ierr = ISDestroy(&pcbddc->NeumannBoundaries);CHKERRQ(ierr); 3610 ierr = ISDestroy(&pcbddc->NeumannBoundariesLocal);CHKERRQ(ierr); 3611 ierr = ISDestroy(&pcbddc->DirichletBoundaries);CHKERRQ(ierr); 3612 ierr = MatNullSpaceDestroy(&pcbddc->onearnullspace);CHKERRQ(ierr); 3613 ierr = PetscFree(pcbddc->onearnullvecs_state);CHKERRQ(ierr); 3614 ierr = ISDestroy(&pcbddc->DirichletBoundariesLocal);CHKERRQ(ierr); 3615 ierr = PCBDDCSetDofsSplitting(pc,0,NULL);CHKERRQ(ierr); 3616 ierr = PCBDDCSetDofsSplittingLocal(pc,0,NULL);CHKERRQ(ierr); 3617 PetscFunctionReturn(0); 3618 } 3619 3620 PetscErrorCode PCBDDCResetTopography(PC pc) 3621 { 3622 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 3623 PetscInt i; 3624 PetscErrorCode ierr; 3625 3626 PetscFunctionBegin; 3627 ierr = MatDestroy(&pcbddc->nedcG);CHKERRQ(ierr); 3628 ierr = ISDestroy(&pcbddc->nedclocal);CHKERRQ(ierr); 3629 ierr = MatDestroy(&pcbddc->discretegradient);CHKERRQ(ierr); 3630 ierr = MatDestroy(&pcbddc->user_ChangeOfBasisMatrix);CHKERRQ(ierr); 3631 ierr = MatDestroy(&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 3632 ierr = MatDestroy(&pcbddc->switch_static_change);CHKERRQ(ierr); 3633 ierr = VecDestroy(&pcbddc->work_change);CHKERRQ(ierr); 3634 ierr = MatDestroy(&pcbddc->ConstraintMatrix);CHKERRQ(ierr); 3635 ierr = MatDestroy(&pcbddc->divudotp);CHKERRQ(ierr); 3636 ierr = ISDestroy(&pcbddc->divudotp_vl2l);CHKERRQ(ierr); 3637 ierr = PCBDDCGraphDestroy(&pcbddc->mat_graph);CHKERRQ(ierr); 3638 for (i=0;i<pcbddc->n_local_subs;i++) { 3639 ierr = ISDestroy(&pcbddc->local_subs[i]);CHKERRQ(ierr); 3640 } 3641 pcbddc->n_local_subs = 0; 3642 ierr = PetscFree(pcbddc->local_subs);CHKERRQ(ierr); 3643 ierr = PCBDDCSubSchursDestroy(&pcbddc->sub_schurs);CHKERRQ(ierr); 3644 pcbddc->graphanalyzed = PETSC_FALSE; 3645 pcbddc->recompute_topography = PETSC_TRUE; 3646 pcbddc->corner_selected = PETSC_FALSE; 3647 PetscFunctionReturn(0); 3648 } 3649 3650 PetscErrorCode PCBDDCResetSolvers(PC pc) 3651 { 3652 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 3653 PetscErrorCode ierr; 3654 3655 PetscFunctionBegin; 3656 ierr = VecDestroy(&pcbddc->coarse_vec);CHKERRQ(ierr); 3657 if (pcbddc->coarse_phi_B) { 3658 PetscScalar *array; 3659 ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&array);CHKERRQ(ierr); 3660 ierr = PetscFree(array);CHKERRQ(ierr); 3661 } 3662 ierr = MatDestroy(&pcbddc->coarse_phi_B);CHKERRQ(ierr); 3663 ierr = MatDestroy(&pcbddc->coarse_phi_D);CHKERRQ(ierr); 3664 ierr = MatDestroy(&pcbddc->coarse_psi_B);CHKERRQ(ierr); 3665 ierr = MatDestroy(&pcbddc->coarse_psi_D);CHKERRQ(ierr); 3666 ierr = VecDestroy(&pcbddc->vec1_P);CHKERRQ(ierr); 3667 ierr = VecDestroy(&pcbddc->vec1_C);CHKERRQ(ierr); 3668 ierr = MatDestroy(&pcbddc->local_auxmat2);CHKERRQ(ierr); 3669 ierr = MatDestroy(&pcbddc->local_auxmat1);CHKERRQ(ierr); 3670 ierr = VecDestroy(&pcbddc->vec1_R);CHKERRQ(ierr); 3671 ierr = VecDestroy(&pcbddc->vec2_R);CHKERRQ(ierr); 3672 ierr = ISDestroy(&pcbddc->is_R_local);CHKERRQ(ierr); 3673 ierr = VecScatterDestroy(&pcbddc->R_to_B);CHKERRQ(ierr); 3674 ierr = VecScatterDestroy(&pcbddc->R_to_D);CHKERRQ(ierr); 3675 ierr = VecScatterDestroy(&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr); 3676 ierr = KSPReset(pcbddc->ksp_D);CHKERRQ(ierr); 3677 ierr = KSPReset(pcbddc->ksp_R);CHKERRQ(ierr); 3678 ierr = KSPReset(pcbddc->coarse_ksp);CHKERRQ(ierr); 3679 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 3680 ierr = PetscFree(pcbddc->primal_indices_local_idxs);CHKERRQ(ierr); 3681 ierr = PetscFree2(pcbddc->local_primal_ref_node,pcbddc->local_primal_ref_mult);CHKERRQ(ierr); 3682 ierr = PetscFree(pcbddc->global_primal_indices);CHKERRQ(ierr); 3683 ierr = ISDestroy(&pcbddc->coarse_subassembling);CHKERRQ(ierr); 3684 ierr = MatDestroy(&pcbddc->benign_change);CHKERRQ(ierr); 3685 ierr = VecDestroy(&pcbddc->benign_vec);CHKERRQ(ierr); 3686 ierr = PCBDDCBenignShellMat(pc,PETSC_TRUE);CHKERRQ(ierr); 3687 ierr = MatDestroy(&pcbddc->benign_B0);CHKERRQ(ierr); 3688 ierr = PetscSFDestroy(&pcbddc->benign_sf);CHKERRQ(ierr); 3689 if (pcbddc->benign_zerodiag_subs) { 3690 PetscInt i; 3691 for (i=0;i<pcbddc->benign_n;i++) { 3692 ierr = ISDestroy(&pcbddc->benign_zerodiag_subs[i]);CHKERRQ(ierr); 3693 } 3694 ierr = PetscFree(pcbddc->benign_zerodiag_subs);CHKERRQ(ierr); 3695 } 3696 ierr = PetscFree3(pcbddc->benign_p0_lidx,pcbddc->benign_p0_gidx,pcbddc->benign_p0);CHKERRQ(ierr); 3697 PetscFunctionReturn(0); 3698 } 3699 3700 PetscErrorCode PCBDDCSetUpLocalWorkVectors(PC pc) 3701 { 3702 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 3703 PC_IS *pcis = (PC_IS*)pc->data; 3704 VecType impVecType; 3705 PetscInt n_constraints,n_R,old_size; 3706 PetscErrorCode ierr; 3707 3708 PetscFunctionBegin; 3709 n_constraints = pcbddc->local_primal_size - pcbddc->benign_n - pcbddc->n_vertices; 3710 n_R = pcis->n - pcbddc->n_vertices; 3711 ierr = VecGetType(pcis->vec1_N,&impVecType);CHKERRQ(ierr); 3712 /* local work vectors (try to avoid unneeded work)*/ 3713 /* R nodes */ 3714 old_size = -1; 3715 if (pcbddc->vec1_R) { 3716 ierr = VecGetSize(pcbddc->vec1_R,&old_size);CHKERRQ(ierr); 3717 } 3718 if (n_R != old_size) { 3719 ierr = VecDestroy(&pcbddc->vec1_R);CHKERRQ(ierr); 3720 ierr = VecDestroy(&pcbddc->vec2_R);CHKERRQ(ierr); 3721 ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_R);CHKERRQ(ierr); 3722 ierr = VecSetSizes(pcbddc->vec1_R,PETSC_DECIDE,n_R);CHKERRQ(ierr); 3723 ierr = VecSetType(pcbddc->vec1_R,impVecType);CHKERRQ(ierr); 3724 ierr = VecDuplicate(pcbddc->vec1_R,&pcbddc->vec2_R);CHKERRQ(ierr); 3725 } 3726 /* local primal dofs */ 3727 old_size = -1; 3728 if (pcbddc->vec1_P) { 3729 ierr = VecGetSize(pcbddc->vec1_P,&old_size);CHKERRQ(ierr); 3730 } 3731 if (pcbddc->local_primal_size != old_size) { 3732 ierr = VecDestroy(&pcbddc->vec1_P);CHKERRQ(ierr); 3733 ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_P);CHKERRQ(ierr); 3734 ierr = VecSetSizes(pcbddc->vec1_P,PETSC_DECIDE,pcbddc->local_primal_size);CHKERRQ(ierr); 3735 ierr = VecSetType(pcbddc->vec1_P,impVecType);CHKERRQ(ierr); 3736 } 3737 /* local explicit constraints */ 3738 old_size = -1; 3739 if (pcbddc->vec1_C) { 3740 ierr = VecGetSize(pcbddc->vec1_C,&old_size);CHKERRQ(ierr); 3741 } 3742 if (n_constraints && n_constraints != old_size) { 3743 ierr = VecDestroy(&pcbddc->vec1_C);CHKERRQ(ierr); 3744 ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_C);CHKERRQ(ierr); 3745 ierr = VecSetSizes(pcbddc->vec1_C,PETSC_DECIDE,n_constraints);CHKERRQ(ierr); 3746 ierr = VecSetType(pcbddc->vec1_C,impVecType);CHKERRQ(ierr); 3747 } 3748 PetscFunctionReturn(0); 3749 } 3750 3751 PetscErrorCode PCBDDCSetUpCorrection(PC pc, PetscScalar **coarse_submat_vals_n) 3752 { 3753 PetscErrorCode ierr; 3754 /* pointers to pcis and pcbddc */ 3755 PC_IS* pcis = (PC_IS*)pc->data; 3756 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 3757 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 3758 /* submatrices of local problem */ 3759 Mat A_RV,A_VR,A_VV,local_auxmat2_R; 3760 /* submatrices of local coarse problem */ 3761 Mat S_VV,S_CV,S_VC,S_CC; 3762 /* working matrices */ 3763 Mat C_CR; 3764 /* additional working stuff */ 3765 PC pc_R; 3766 Mat F,Brhs = NULL; 3767 Vec dummy_vec; 3768 PetscBool isLU,isCHOL,isILU,need_benign_correction,sparserhs; 3769 PetscScalar *coarse_submat_vals; /* TODO: use a PETSc matrix */ 3770 PetscScalar *work; 3771 PetscInt *idx_V_B; 3772 PetscInt lda_rhs,n,n_vertices,n_constraints,*p0_lidx_I; 3773 PetscInt i,n_R,n_D,n_B; 3774 3775 /* some shortcuts to scalars */ 3776 PetscScalar one=1.0,m_one=-1.0; 3777 3778 PetscFunctionBegin; 3779 if (!pcbddc->symmetric_primal && pcbddc->benign_n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Non-symmetric primal basis computation with benign trick not yet implemented"); 3780 3781 /* Set Non-overlapping dimensions */ 3782 n_vertices = pcbddc->n_vertices; 3783 n_constraints = pcbddc->local_primal_size - pcbddc->benign_n - n_vertices; 3784 n_B = pcis->n_B; 3785 n_D = pcis->n - n_B; 3786 n_R = pcis->n - n_vertices; 3787 3788 /* vertices in boundary numbering */ 3789 ierr = PetscMalloc1(n_vertices,&idx_V_B);CHKERRQ(ierr); 3790 ierr = ISGlobalToLocalMappingApply(pcis->BtoNmap,IS_GTOLM_DROP,n_vertices,pcbddc->local_primal_ref_node,&i,idx_V_B);CHKERRQ(ierr); 3791 if (i != n_vertices) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error in boundary numbering for BDDC vertices! %D != %D\n",n_vertices,i); 3792 3793 /* Subdomain contribution (Non-overlapping) to coarse matrix */ 3794 ierr = PetscCalloc1(pcbddc->local_primal_size*pcbddc->local_primal_size,&coarse_submat_vals);CHKERRQ(ierr); 3795 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_vertices,n_vertices,coarse_submat_vals,&S_VV);CHKERRQ(ierr); 3796 ierr = MatSeqDenseSetLDA(S_VV,pcbddc->local_primal_size);CHKERRQ(ierr); 3797 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_constraints,n_vertices,coarse_submat_vals+n_vertices,&S_CV);CHKERRQ(ierr); 3798 ierr = MatSeqDenseSetLDA(S_CV,pcbddc->local_primal_size);CHKERRQ(ierr); 3799 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_vertices,n_constraints,coarse_submat_vals+pcbddc->local_primal_size*n_vertices,&S_VC);CHKERRQ(ierr); 3800 ierr = MatSeqDenseSetLDA(S_VC,pcbddc->local_primal_size);CHKERRQ(ierr); 3801 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_constraints,n_constraints,coarse_submat_vals+(pcbddc->local_primal_size+1)*n_vertices,&S_CC);CHKERRQ(ierr); 3802 ierr = MatSeqDenseSetLDA(S_CC,pcbddc->local_primal_size);CHKERRQ(ierr); 3803 3804 /* determine if can use MatSolve routines instead of calling KSPSolve on ksp_R */ 3805 ierr = KSPGetPC(pcbddc->ksp_R,&pc_R);CHKERRQ(ierr); 3806 ierr = PCSetUp(pc_R);CHKERRQ(ierr); 3807 ierr = PetscObjectTypeCompare((PetscObject)pc_R,PCLU,&isLU);CHKERRQ(ierr); 3808 ierr = PetscObjectTypeCompare((PetscObject)pc_R,PCILU,&isILU);CHKERRQ(ierr); 3809 ierr = PetscObjectTypeCompare((PetscObject)pc_R,PCCHOLESKY,&isCHOL);CHKERRQ(ierr); 3810 lda_rhs = n_R; 3811 need_benign_correction = PETSC_FALSE; 3812 if (isLU || isILU || isCHOL) { 3813 ierr = PCFactorGetMatrix(pc_R,&F);CHKERRQ(ierr); 3814 } else if (sub_schurs && sub_schurs->reuse_solver) { 3815 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 3816 MatFactorType type; 3817 3818 F = reuse_solver->F; 3819 ierr = MatGetFactorType(F,&type);CHKERRQ(ierr); 3820 if (type == MAT_FACTOR_CHOLESKY) isCHOL = PETSC_TRUE; 3821 ierr = MatGetSize(F,&lda_rhs,NULL);CHKERRQ(ierr); 3822 need_benign_correction = (PetscBool)(!!reuse_solver->benign_n); 3823 } else { 3824 F = NULL; 3825 } 3826 3827 /* determine if we can use a sparse right-hand side */ 3828 sparserhs = PETSC_FALSE; 3829 if (F) { 3830 MatSolverType solver; 3831 3832 ierr = MatFactorGetSolverType(F,&solver);CHKERRQ(ierr); 3833 ierr = PetscStrcmp(solver,MATSOLVERMUMPS,&sparserhs);CHKERRQ(ierr); 3834 } 3835 3836 /* allocate workspace */ 3837 n = 0; 3838 if (n_constraints) { 3839 n += lda_rhs*n_constraints; 3840 } 3841 if (n_vertices) { 3842 n = PetscMax(2*lda_rhs*n_vertices,n); 3843 n = PetscMax((lda_rhs+n_B)*n_vertices,n); 3844 } 3845 if (!pcbddc->symmetric_primal) { 3846 n = PetscMax(2*lda_rhs*pcbddc->local_primal_size,n); 3847 } 3848 ierr = PetscMalloc1(n,&work);CHKERRQ(ierr); 3849 3850 /* create dummy vector to modify rhs and sol of MatMatSolve (work array will never be used) */ 3851 dummy_vec = NULL; 3852 if (need_benign_correction && lda_rhs != n_R && F) { 3853 ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,1,lda_rhs,work,&dummy_vec);CHKERRQ(ierr); 3854 } 3855 3856 /* Precompute stuffs needed for preprocessing and application of BDDC*/ 3857 if (n_constraints) { 3858 Mat M3,C_B; 3859 IS is_aux; 3860 PetscScalar *array,*array2; 3861 3862 ierr = MatDestroy(&pcbddc->local_auxmat1);CHKERRQ(ierr); 3863 ierr = MatDestroy(&pcbddc->local_auxmat2);CHKERRQ(ierr); 3864 3865 /* Extract constraints on R nodes: C_{CR} */ 3866 ierr = ISCreateStride(PETSC_COMM_SELF,n_constraints,n_vertices,1,&is_aux);CHKERRQ(ierr); 3867 ierr = MatCreateSubMatrix(pcbddc->ConstraintMatrix,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&C_CR);CHKERRQ(ierr); 3868 ierr = MatCreateSubMatrix(pcbddc->ConstraintMatrix,is_aux,pcis->is_B_local,MAT_INITIAL_MATRIX,&C_B);CHKERRQ(ierr); 3869 3870 /* Assemble local_auxmat2_R = (- A_{RR}^{-1} C^T_{CR}) needed by BDDC setup */ 3871 /* Assemble pcbddc->local_auxmat2 = R_to_B (- A_{RR}^{-1} C^T_{CR}) needed by BDDC application */ 3872 if (!sparserhs) { 3873 ierr = PetscMemzero(work,lda_rhs*n_constraints*sizeof(PetscScalar));CHKERRQ(ierr); 3874 for (i=0;i<n_constraints;i++) { 3875 const PetscScalar *row_cmat_values; 3876 const PetscInt *row_cmat_indices; 3877 PetscInt size_of_constraint,j; 3878 3879 ierr = MatGetRow(C_CR,i,&size_of_constraint,&row_cmat_indices,&row_cmat_values);CHKERRQ(ierr); 3880 for (j=0;j<size_of_constraint;j++) { 3881 work[row_cmat_indices[j]+i*lda_rhs] = -row_cmat_values[j]; 3882 } 3883 ierr = MatRestoreRow(C_CR,i,&size_of_constraint,&row_cmat_indices,&row_cmat_values);CHKERRQ(ierr); 3884 } 3885 ierr = MatCreateSeqDense(PETSC_COMM_SELF,lda_rhs,n_constraints,work,&Brhs);CHKERRQ(ierr); 3886 } else { 3887 Mat tC_CR; 3888 3889 ierr = MatScale(C_CR,-1.0);CHKERRQ(ierr); 3890 if (lda_rhs != n_R) { 3891 PetscScalar *aa; 3892 PetscInt r,*ii,*jj; 3893 PetscBool done; 3894 3895 ierr = MatGetRowIJ(C_CR,0,PETSC_FALSE,PETSC_FALSE,&r,(const PetscInt**)&ii,(const PetscInt**)&jj,&done);CHKERRQ(ierr); 3896 if (!done) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"GetRowIJ failed"); 3897 ierr = MatSeqAIJGetArray(C_CR,&aa);CHKERRQ(ierr); 3898 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,n_constraints,lda_rhs,ii,jj,aa,&tC_CR);CHKERRQ(ierr); 3899 ierr = MatRestoreRowIJ(C_CR,0,PETSC_FALSE,PETSC_FALSE,&r,(const PetscInt**)&ii,(const PetscInt**)&jj,&done);CHKERRQ(ierr); 3900 if (!done) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"RestoreRowIJ failed"); 3901 } else { 3902 ierr = PetscObjectReference((PetscObject)C_CR);CHKERRQ(ierr); 3903 tC_CR = C_CR; 3904 } 3905 ierr = MatCreateTranspose(tC_CR,&Brhs);CHKERRQ(ierr); 3906 ierr = MatDestroy(&tC_CR);CHKERRQ(ierr); 3907 } 3908 ierr = MatCreateSeqDense(PETSC_COMM_SELF,lda_rhs,n_constraints,NULL,&local_auxmat2_R);CHKERRQ(ierr); 3909 if (F) { 3910 if (need_benign_correction) { 3911 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 3912 3913 /* rhs is already zero on interior dofs, no need to change the rhs */ 3914 ierr = PetscMemzero(reuse_solver->benign_save_vals,pcbddc->benign_n*sizeof(PetscScalar));CHKERRQ(ierr); 3915 } 3916 ierr = MatMatSolve(F,Brhs,local_auxmat2_R);CHKERRQ(ierr); 3917 if (need_benign_correction) { 3918 PetscScalar *marr; 3919 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 3920 3921 ierr = MatDenseGetArray(local_auxmat2_R,&marr);CHKERRQ(ierr); 3922 if (lda_rhs != n_R) { 3923 for (i=0;i<n_constraints;i++) { 3924 ierr = VecPlaceArray(dummy_vec,marr+i*lda_rhs);CHKERRQ(ierr); 3925 ierr = PCBDDCReuseSolversBenignAdapt(reuse_solver,dummy_vec,NULL,PETSC_TRUE,PETSC_TRUE);CHKERRQ(ierr); 3926 ierr = VecResetArray(dummy_vec);CHKERRQ(ierr); 3927 } 3928 } else { 3929 for (i=0;i<n_constraints;i++) { 3930 ierr = VecPlaceArray(pcbddc->vec1_R,marr+i*lda_rhs);CHKERRQ(ierr); 3931 ierr = PCBDDCReuseSolversBenignAdapt(reuse_solver,pcbddc->vec1_R,NULL,PETSC_TRUE,PETSC_TRUE);CHKERRQ(ierr); 3932 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 3933 } 3934 } 3935 ierr = MatDenseRestoreArray(local_auxmat2_R,&marr);CHKERRQ(ierr); 3936 } 3937 } else { 3938 PetscScalar *marr; 3939 3940 ierr = MatDenseGetArray(local_auxmat2_R,&marr);CHKERRQ(ierr); 3941 for (i=0;i<n_constraints;i++) { 3942 ierr = VecPlaceArray(pcbddc->vec1_R,work+i*lda_rhs);CHKERRQ(ierr); 3943 ierr = VecPlaceArray(pcbddc->vec2_R,marr+i*lda_rhs);CHKERRQ(ierr); 3944 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 3945 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 3946 ierr = VecResetArray(pcbddc->vec2_R);CHKERRQ(ierr); 3947 } 3948 ierr = MatDenseRestoreArray(local_auxmat2_R,&marr);CHKERRQ(ierr); 3949 } 3950 if (sparserhs) { 3951 ierr = MatScale(C_CR,-1.0);CHKERRQ(ierr); 3952 } 3953 ierr = MatDestroy(&Brhs);CHKERRQ(ierr); 3954 if (!pcbddc->switch_static) { 3955 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_B,n_constraints,NULL,&pcbddc->local_auxmat2);CHKERRQ(ierr); 3956 ierr = MatDenseGetArray(pcbddc->local_auxmat2,&array);CHKERRQ(ierr); 3957 ierr = MatDenseGetArray(local_auxmat2_R,&array2);CHKERRQ(ierr); 3958 for (i=0;i<n_constraints;i++) { 3959 ierr = VecPlaceArray(pcbddc->vec1_R,array2+i*lda_rhs);CHKERRQ(ierr); 3960 ierr = VecPlaceArray(pcis->vec1_B,array+i*n_B);CHKERRQ(ierr); 3961 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3962 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3963 ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr); 3964 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 3965 } 3966 ierr = MatDenseRestoreArray(local_auxmat2_R,&array2);CHKERRQ(ierr); 3967 ierr = MatDenseRestoreArray(pcbddc->local_auxmat2,&array);CHKERRQ(ierr); 3968 ierr = MatMatMult(C_B,pcbddc->local_auxmat2,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&M3);CHKERRQ(ierr); 3969 } else { 3970 if (lda_rhs != n_R) { 3971 IS dummy; 3972 3973 ierr = ISCreateStride(PETSC_COMM_SELF,n_R,0,1,&dummy);CHKERRQ(ierr); 3974 ierr = MatCreateSubMatrix(local_auxmat2_R,dummy,NULL,MAT_INITIAL_MATRIX,&pcbddc->local_auxmat2);CHKERRQ(ierr); 3975 ierr = ISDestroy(&dummy);CHKERRQ(ierr); 3976 } else { 3977 ierr = PetscObjectReference((PetscObject)local_auxmat2_R);CHKERRQ(ierr); 3978 pcbddc->local_auxmat2 = local_auxmat2_R; 3979 } 3980 ierr = MatMatMult(C_CR,pcbddc->local_auxmat2,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&M3);CHKERRQ(ierr); 3981 } 3982 ierr = ISDestroy(&is_aux);CHKERRQ(ierr); 3983 /* Assemble explicitly S_CC = ( C_{CR} A_{RR}^{-1} C^T_{CR} )^{-1} */ 3984 ierr = MatScale(M3,m_one);CHKERRQ(ierr); 3985 if (isCHOL) { 3986 ierr = MatCholeskyFactor(M3,NULL,NULL);CHKERRQ(ierr); 3987 } else { 3988 ierr = MatLUFactor(M3,NULL,NULL,NULL);CHKERRQ(ierr); 3989 } 3990 ierr = MatSeqDenseInvertFactors_Private(M3);CHKERRQ(ierr); 3991 /* Assemble local_auxmat1 = S_CC*C_{CB} needed by BDDC application in KSP and in preproc */ 3992 ierr = MatMatMult(M3,C_B,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&pcbddc->local_auxmat1);CHKERRQ(ierr); 3993 ierr = MatDestroy(&C_B);CHKERRQ(ierr); 3994 ierr = MatCopy(M3,S_CC,SAME_NONZERO_PATTERN);CHKERRQ(ierr); /* S_CC can have a different LDA, MatMatSolve doesn't support it */ 3995 ierr = MatDestroy(&M3);CHKERRQ(ierr); 3996 } 3997 3998 /* Get submatrices from subdomain matrix */ 3999 if (n_vertices) { 4000 IS is_aux; 4001 PetscBool isseqaij; 4002 4003 if (sub_schurs && sub_schurs->reuse_solver) { /* is_R_local is not sorted, ISComplement doesn't like it */ 4004 IS tis; 4005 4006 ierr = ISDuplicate(pcbddc->is_R_local,&tis);CHKERRQ(ierr); 4007 ierr = ISSort(tis);CHKERRQ(ierr); 4008 ierr = ISComplement(tis,0,pcis->n,&is_aux);CHKERRQ(ierr); 4009 ierr = ISDestroy(&tis);CHKERRQ(ierr); 4010 } else { 4011 ierr = ISComplement(pcbddc->is_R_local,0,pcis->n,&is_aux);CHKERRQ(ierr); 4012 } 4013 ierr = MatCreateSubMatrix(pcbddc->local_mat,pcbddc->is_R_local,is_aux,MAT_INITIAL_MATRIX,&A_RV);CHKERRQ(ierr); 4014 ierr = MatCreateSubMatrix(pcbddc->local_mat,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&A_VR);CHKERRQ(ierr); 4015 ierr = PetscObjectTypeCompare((PetscObject)A_VR,MATSEQAIJ,&isseqaij);CHKERRQ(ierr); 4016 if (!isseqaij) { /* MatMatMult(A_VR,A_RRmA_RV) below will raise an error */ 4017 ierr = MatConvert(A_VR,MATSEQAIJ,MAT_INPLACE_MATRIX,&A_VR);CHKERRQ(ierr); 4018 } 4019 ierr = MatCreateSubMatrix(pcbddc->local_mat,is_aux,is_aux,MAT_INITIAL_MATRIX,&A_VV);CHKERRQ(ierr); 4020 ierr = ISDestroy(&is_aux);CHKERRQ(ierr); 4021 } 4022 4023 /* Matrix of coarse basis functions (local) */ 4024 if (pcbddc->coarse_phi_B) { 4025 PetscInt on_B,on_primal,on_D=n_D; 4026 if (pcbddc->coarse_phi_D) { 4027 ierr = MatGetSize(pcbddc->coarse_phi_D,&on_D,NULL);CHKERRQ(ierr); 4028 } 4029 ierr = MatGetSize(pcbddc->coarse_phi_B,&on_B,&on_primal);CHKERRQ(ierr); 4030 if (on_B != n_B || on_primal != pcbddc->local_primal_size || on_D != n_D) { 4031 PetscScalar *marray; 4032 4033 ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&marray);CHKERRQ(ierr); 4034 ierr = PetscFree(marray);CHKERRQ(ierr); 4035 ierr = MatDestroy(&pcbddc->coarse_phi_B);CHKERRQ(ierr); 4036 ierr = MatDestroy(&pcbddc->coarse_psi_B);CHKERRQ(ierr); 4037 ierr = MatDestroy(&pcbddc->coarse_phi_D);CHKERRQ(ierr); 4038 ierr = MatDestroy(&pcbddc->coarse_psi_D);CHKERRQ(ierr); 4039 } 4040 } 4041 4042 if (!pcbddc->coarse_phi_B) { 4043 PetscScalar *marr; 4044 4045 /* memory size */ 4046 n = n_B*pcbddc->local_primal_size; 4047 if (pcbddc->switch_static || pcbddc->dbg_flag) n += n_D*pcbddc->local_primal_size; 4048 if (!pcbddc->symmetric_primal) n *= 2; 4049 ierr = PetscCalloc1(n,&marr);CHKERRQ(ierr); 4050 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_B,pcbddc->local_primal_size,marr,&pcbddc->coarse_phi_B);CHKERRQ(ierr); 4051 marr += n_B*pcbddc->local_primal_size; 4052 if (pcbddc->switch_static || pcbddc->dbg_flag) { 4053 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_D,pcbddc->local_primal_size,marr,&pcbddc->coarse_phi_D);CHKERRQ(ierr); 4054 marr += n_D*pcbddc->local_primal_size; 4055 } 4056 if (!pcbddc->symmetric_primal) { 4057 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_B,pcbddc->local_primal_size,marr,&pcbddc->coarse_psi_B);CHKERRQ(ierr); 4058 marr += n_B*pcbddc->local_primal_size; 4059 if (pcbddc->switch_static || pcbddc->dbg_flag) { 4060 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_D,pcbddc->local_primal_size,marr,&pcbddc->coarse_psi_D);CHKERRQ(ierr); 4061 } 4062 } else { 4063 ierr = PetscObjectReference((PetscObject)pcbddc->coarse_phi_B);CHKERRQ(ierr); 4064 pcbddc->coarse_psi_B = pcbddc->coarse_phi_B; 4065 if (pcbddc->switch_static || pcbddc->dbg_flag) { 4066 ierr = PetscObjectReference((PetscObject)pcbddc->coarse_phi_D);CHKERRQ(ierr); 4067 pcbddc->coarse_psi_D = pcbddc->coarse_phi_D; 4068 } 4069 } 4070 } 4071 4072 /* We are now ready to evaluate coarse basis functions and subdomain contribution to coarse problem */ 4073 p0_lidx_I = NULL; 4074 if (pcbddc->benign_n && (pcbddc->switch_static || pcbddc->dbg_flag)) { 4075 const PetscInt *idxs; 4076 4077 ierr = ISGetIndices(pcis->is_I_local,&idxs);CHKERRQ(ierr); 4078 ierr = PetscMalloc1(pcbddc->benign_n,&p0_lidx_I);CHKERRQ(ierr); 4079 for (i=0;i<pcbddc->benign_n;i++) { 4080 ierr = PetscFindInt(pcbddc->benign_p0_lidx[i],pcis->n-pcis->n_B,idxs,&p0_lidx_I[i]);CHKERRQ(ierr); 4081 } 4082 ierr = ISRestoreIndices(pcis->is_I_local,&idxs);CHKERRQ(ierr); 4083 } 4084 4085 /* vertices */ 4086 if (n_vertices) { 4087 PetscBool restoreavr = PETSC_FALSE; 4088 4089 ierr = MatConvert(A_VV,MATDENSE,MAT_INPLACE_MATRIX,&A_VV);CHKERRQ(ierr); 4090 4091 if (n_R) { 4092 Mat A_RRmA_RV,A_RV_bcorr=NULL,S_VVt; /* S_VVt with LDA=N */ 4093 PetscBLASInt B_N,B_one = 1; 4094 PetscScalar *x,*y; 4095 4096 ierr = MatScale(A_RV,m_one);CHKERRQ(ierr); 4097 if (need_benign_correction) { 4098 ISLocalToGlobalMapping RtoN; 4099 IS is_p0; 4100 PetscInt *idxs_p0,n; 4101 4102 ierr = PetscMalloc1(pcbddc->benign_n,&idxs_p0);CHKERRQ(ierr); 4103 ierr = ISLocalToGlobalMappingCreateIS(pcbddc->is_R_local,&RtoN);CHKERRQ(ierr); 4104 ierr = ISGlobalToLocalMappingApply(RtoN,IS_GTOLM_DROP,pcbddc->benign_n,pcbddc->benign_p0_lidx,&n,idxs_p0);CHKERRQ(ierr); 4105 if (n != pcbddc->benign_n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error in R numbering for benign p0! %d != %d\n",n,pcbddc->benign_n); 4106 ierr = ISLocalToGlobalMappingDestroy(&RtoN);CHKERRQ(ierr); 4107 ierr = ISCreateGeneral(PETSC_COMM_SELF,n,idxs_p0,PETSC_OWN_POINTER,&is_p0);CHKERRQ(ierr); 4108 ierr = MatCreateSubMatrix(A_RV,is_p0,NULL,MAT_INITIAL_MATRIX,&A_RV_bcorr);CHKERRQ(ierr); 4109 ierr = ISDestroy(&is_p0);CHKERRQ(ierr); 4110 } 4111 4112 ierr = MatCreateSeqDense(PETSC_COMM_SELF,lda_rhs,n_vertices,work,&A_RRmA_RV);CHKERRQ(ierr); 4113 if (!sparserhs || need_benign_correction) { 4114 if (lda_rhs == n_R) { 4115 ierr = MatConvert(A_RV,MATDENSE,MAT_INPLACE_MATRIX,&A_RV);CHKERRQ(ierr); 4116 } else { 4117 PetscScalar *av,*array; 4118 const PetscInt *xadj,*adjncy; 4119 PetscInt n; 4120 PetscBool flg_row; 4121 4122 array = work+lda_rhs*n_vertices; 4123 ierr = PetscMemzero(array,lda_rhs*n_vertices*sizeof(PetscScalar));CHKERRQ(ierr); 4124 ierr = MatConvert(A_RV,MATSEQAIJ,MAT_INPLACE_MATRIX,&A_RV);CHKERRQ(ierr); 4125 ierr = MatGetRowIJ(A_RV,0,PETSC_FALSE,PETSC_FALSE,&n,&xadj,&adjncy,&flg_row);CHKERRQ(ierr); 4126 ierr = MatSeqAIJGetArray(A_RV,&av);CHKERRQ(ierr); 4127 for (i=0;i<n;i++) { 4128 PetscInt j; 4129 for (j=xadj[i];j<xadj[i+1];j++) array[lda_rhs*adjncy[j]+i] = av[j]; 4130 } 4131 ierr = MatRestoreRowIJ(A_RV,0,PETSC_FALSE,PETSC_FALSE,&n,&xadj,&adjncy,&flg_row);CHKERRQ(ierr); 4132 ierr = MatDestroy(&A_RV);CHKERRQ(ierr); 4133 ierr = MatCreateSeqDense(PETSC_COMM_SELF,lda_rhs,n_vertices,array,&A_RV);CHKERRQ(ierr); 4134 } 4135 if (need_benign_correction) { 4136 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 4137 PetscScalar *marr; 4138 4139 ierr = MatDenseGetArray(A_RV,&marr);CHKERRQ(ierr); 4140 /* need \Phi^T A_RV = (I+L)A_RV, L given by 4141 4142 | 0 0 0 | (V) 4143 L = | 0 0 -1 | (P-p0) 4144 | 0 0 -1 | (p0) 4145 4146 */ 4147 for (i=0;i<reuse_solver->benign_n;i++) { 4148 const PetscScalar *vals; 4149 const PetscInt *idxs,*idxs_zero; 4150 PetscInt n,j,nz; 4151 4152 ierr = ISGetLocalSize(reuse_solver->benign_zerodiag_subs[i],&nz);CHKERRQ(ierr); 4153 ierr = ISGetIndices(reuse_solver->benign_zerodiag_subs[i],&idxs_zero);CHKERRQ(ierr); 4154 ierr = MatGetRow(A_RV_bcorr,i,&n,&idxs,&vals);CHKERRQ(ierr); 4155 for (j=0;j<n;j++) { 4156 PetscScalar val = vals[j]; 4157 PetscInt k,col = idxs[j]; 4158 for (k=0;k<nz;k++) marr[idxs_zero[k]+lda_rhs*col] -= val; 4159 } 4160 ierr = MatRestoreRow(A_RV_bcorr,i,&n,&idxs,&vals);CHKERRQ(ierr); 4161 ierr = ISRestoreIndices(reuse_solver->benign_zerodiag_subs[i],&idxs_zero);CHKERRQ(ierr); 4162 } 4163 ierr = MatDenseRestoreArray(A_RV,&marr);CHKERRQ(ierr); 4164 } 4165 ierr = PetscObjectReference((PetscObject)A_RV);CHKERRQ(ierr); 4166 Brhs = A_RV; 4167 } else { 4168 Mat tA_RVT,A_RVT; 4169 4170 if (!pcbddc->symmetric_primal) { 4171 /* A_RV already scaled by -1 */ 4172 ierr = MatTranspose(A_RV,MAT_INITIAL_MATRIX,&A_RVT);CHKERRQ(ierr); 4173 } else { 4174 restoreavr = PETSC_TRUE; 4175 ierr = MatScale(A_VR,-1.0);CHKERRQ(ierr); 4176 ierr = PetscObjectReference((PetscObject)A_VR);CHKERRQ(ierr); 4177 A_RVT = A_VR; 4178 } 4179 if (lda_rhs != n_R) { 4180 PetscScalar *aa; 4181 PetscInt r,*ii,*jj; 4182 PetscBool done; 4183 4184 ierr = MatGetRowIJ(A_RVT,0,PETSC_FALSE,PETSC_FALSE,&r,(const PetscInt**)&ii,(const PetscInt**)&jj,&done);CHKERRQ(ierr); 4185 if (!done) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"GetRowIJ failed"); 4186 ierr = MatSeqAIJGetArray(A_RVT,&aa);CHKERRQ(ierr); 4187 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,n_vertices,lda_rhs,ii,jj,aa,&tA_RVT);CHKERRQ(ierr); 4188 ierr = MatRestoreRowIJ(A_RVT,0,PETSC_FALSE,PETSC_FALSE,&r,(const PetscInt**)&ii,(const PetscInt**)&jj,&done);CHKERRQ(ierr); 4189 if (!done) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"RestoreRowIJ failed"); 4190 } else { 4191 ierr = PetscObjectReference((PetscObject)A_RVT);CHKERRQ(ierr); 4192 tA_RVT = A_RVT; 4193 } 4194 ierr = MatCreateTranspose(tA_RVT,&Brhs);CHKERRQ(ierr); 4195 ierr = MatDestroy(&tA_RVT);CHKERRQ(ierr); 4196 ierr = MatDestroy(&A_RVT);CHKERRQ(ierr); 4197 } 4198 if (F) { 4199 /* need to correct the rhs */ 4200 if (need_benign_correction) { 4201 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 4202 PetscScalar *marr; 4203 4204 ierr = MatDenseGetArray(Brhs,&marr);CHKERRQ(ierr); 4205 if (lda_rhs != n_R) { 4206 for (i=0;i<n_vertices;i++) { 4207 ierr = VecPlaceArray(dummy_vec,marr+i*lda_rhs);CHKERRQ(ierr); 4208 ierr = PCBDDCReuseSolversBenignAdapt(reuse_solver,dummy_vec,NULL,PETSC_FALSE,PETSC_TRUE);CHKERRQ(ierr); 4209 ierr = VecResetArray(dummy_vec);CHKERRQ(ierr); 4210 } 4211 } else { 4212 for (i=0;i<n_vertices;i++) { 4213 ierr = VecPlaceArray(pcbddc->vec1_R,marr+i*lda_rhs);CHKERRQ(ierr); 4214 ierr = PCBDDCReuseSolversBenignAdapt(reuse_solver,pcbddc->vec1_R,NULL,PETSC_FALSE,PETSC_TRUE);CHKERRQ(ierr); 4215 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 4216 } 4217 } 4218 ierr = MatDenseRestoreArray(Brhs,&marr);CHKERRQ(ierr); 4219 } 4220 ierr = MatMatSolve(F,Brhs,A_RRmA_RV);CHKERRQ(ierr); 4221 if (restoreavr) { 4222 ierr = MatScale(A_VR,-1.0);CHKERRQ(ierr); 4223 } 4224 /* need to correct the solution */ 4225 if (need_benign_correction) { 4226 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 4227 PetscScalar *marr; 4228 4229 ierr = MatDenseGetArray(A_RRmA_RV,&marr);CHKERRQ(ierr); 4230 if (lda_rhs != n_R) { 4231 for (i=0;i<n_vertices;i++) { 4232 ierr = VecPlaceArray(dummy_vec,marr+i*lda_rhs);CHKERRQ(ierr); 4233 ierr = PCBDDCReuseSolversBenignAdapt(reuse_solver,dummy_vec,NULL,PETSC_TRUE,PETSC_TRUE);CHKERRQ(ierr); 4234 ierr = VecResetArray(dummy_vec);CHKERRQ(ierr); 4235 } 4236 } else { 4237 for (i=0;i<n_vertices;i++) { 4238 ierr = VecPlaceArray(pcbddc->vec1_R,marr+i*lda_rhs);CHKERRQ(ierr); 4239 ierr = PCBDDCReuseSolversBenignAdapt(reuse_solver,pcbddc->vec1_R,NULL,PETSC_TRUE,PETSC_TRUE);CHKERRQ(ierr); 4240 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 4241 } 4242 } 4243 ierr = MatDenseRestoreArray(A_RRmA_RV,&marr);CHKERRQ(ierr); 4244 } 4245 } else { 4246 ierr = MatDenseGetArray(Brhs,&y);CHKERRQ(ierr); 4247 for (i=0;i<n_vertices;i++) { 4248 ierr = VecPlaceArray(pcbddc->vec1_R,y+i*lda_rhs);CHKERRQ(ierr); 4249 ierr = VecPlaceArray(pcbddc->vec2_R,work+i*lda_rhs);CHKERRQ(ierr); 4250 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 4251 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 4252 ierr = VecResetArray(pcbddc->vec2_R);CHKERRQ(ierr); 4253 } 4254 ierr = MatDenseRestoreArray(Brhs,&y);CHKERRQ(ierr); 4255 } 4256 ierr = MatDestroy(&A_RV);CHKERRQ(ierr); 4257 ierr = MatDestroy(&Brhs);CHKERRQ(ierr); 4258 /* S_VV and S_CV */ 4259 if (n_constraints) { 4260 Mat B; 4261 4262 ierr = PetscMemzero(work+lda_rhs*n_vertices,n_B*n_vertices*sizeof(PetscScalar));CHKERRQ(ierr); 4263 for (i=0;i<n_vertices;i++) { 4264 ierr = VecPlaceArray(pcbddc->vec1_R,work+i*lda_rhs);CHKERRQ(ierr); 4265 ierr = VecPlaceArray(pcis->vec1_B,work+lda_rhs*n_vertices+i*n_B);CHKERRQ(ierr); 4266 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4267 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4268 ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr); 4269 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 4270 } 4271 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_B,n_vertices,work+lda_rhs*n_vertices,&B);CHKERRQ(ierr); 4272 ierr = MatMatMult(pcbddc->local_auxmat1,B,MAT_REUSE_MATRIX,PETSC_DEFAULT,&S_CV);CHKERRQ(ierr); 4273 ierr = MatDestroy(&B);CHKERRQ(ierr); 4274 ierr = MatCreateSeqDense(PETSC_COMM_SELF,lda_rhs,n_vertices,work+lda_rhs*n_vertices,&B);CHKERRQ(ierr); 4275 ierr = MatMatMult(local_auxmat2_R,S_CV,MAT_REUSE_MATRIX,PETSC_DEFAULT,&B);CHKERRQ(ierr); 4276 ierr = MatScale(S_CV,m_one);CHKERRQ(ierr); 4277 ierr = PetscBLASIntCast(lda_rhs*n_vertices,&B_N);CHKERRQ(ierr); 4278 PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&B_N,&one,work+lda_rhs*n_vertices,&B_one,work,&B_one)); 4279 ierr = MatDestroy(&B);CHKERRQ(ierr); 4280 } 4281 if (lda_rhs != n_R) { 4282 ierr = MatDestroy(&A_RRmA_RV);CHKERRQ(ierr); 4283 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_vertices,work,&A_RRmA_RV);CHKERRQ(ierr); 4284 ierr = MatSeqDenseSetLDA(A_RRmA_RV,lda_rhs);CHKERRQ(ierr); 4285 } 4286 ierr = MatMatMult(A_VR,A_RRmA_RV,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&S_VVt);CHKERRQ(ierr); 4287 /* need A_VR * \Phi * A_RRmA_RV = A_VR * (I+L)^T * A_RRmA_RV, L given as before */ 4288 if (need_benign_correction) { 4289 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 4290 PetscScalar *marr,*sums; 4291 4292 ierr = PetscMalloc1(n_vertices,&sums);CHKERRQ(ierr); 4293 ierr = MatDenseGetArray(S_VVt,&marr);CHKERRQ(ierr); 4294 for (i=0;i<reuse_solver->benign_n;i++) { 4295 const PetscScalar *vals; 4296 const PetscInt *idxs,*idxs_zero; 4297 PetscInt n,j,nz; 4298 4299 ierr = ISGetLocalSize(reuse_solver->benign_zerodiag_subs[i],&nz);CHKERRQ(ierr); 4300 ierr = ISGetIndices(reuse_solver->benign_zerodiag_subs[i],&idxs_zero);CHKERRQ(ierr); 4301 for (j=0;j<n_vertices;j++) { 4302 PetscInt k; 4303 sums[j] = 0.; 4304 for (k=0;k<nz;k++) sums[j] += work[idxs_zero[k]+j*lda_rhs]; 4305 } 4306 ierr = MatGetRow(A_RV_bcorr,i,&n,&idxs,&vals);CHKERRQ(ierr); 4307 for (j=0;j<n;j++) { 4308 PetscScalar val = vals[j]; 4309 PetscInt k; 4310 for (k=0;k<n_vertices;k++) { 4311 marr[idxs[j]+k*n_vertices] += val*sums[k]; 4312 } 4313 } 4314 ierr = MatRestoreRow(A_RV_bcorr,i,&n,&idxs,&vals);CHKERRQ(ierr); 4315 ierr = ISRestoreIndices(reuse_solver->benign_zerodiag_subs[i],&idxs_zero);CHKERRQ(ierr); 4316 } 4317 ierr = PetscFree(sums);CHKERRQ(ierr); 4318 ierr = MatDenseRestoreArray(S_VVt,&marr);CHKERRQ(ierr); 4319 ierr = MatDestroy(&A_RV_bcorr);CHKERRQ(ierr); 4320 } 4321 ierr = MatDestroy(&A_RRmA_RV);CHKERRQ(ierr); 4322 ierr = PetscBLASIntCast(n_vertices*n_vertices,&B_N);CHKERRQ(ierr); 4323 ierr = MatDenseGetArray(A_VV,&x);CHKERRQ(ierr); 4324 ierr = MatDenseGetArray(S_VVt,&y);CHKERRQ(ierr); 4325 PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&B_N,&one,x,&B_one,y,&B_one)); 4326 ierr = MatDenseRestoreArray(A_VV,&x);CHKERRQ(ierr); 4327 ierr = MatDenseRestoreArray(S_VVt,&y);CHKERRQ(ierr); 4328 ierr = MatCopy(S_VVt,S_VV,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 4329 ierr = MatDestroy(&S_VVt);CHKERRQ(ierr); 4330 } else { 4331 ierr = MatCopy(A_VV,S_VV,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 4332 } 4333 ierr = MatDestroy(&A_VV);CHKERRQ(ierr); 4334 4335 /* coarse basis functions */ 4336 for (i=0;i<n_vertices;i++) { 4337 PetscScalar *y; 4338 4339 ierr = VecPlaceArray(pcbddc->vec1_R,work+lda_rhs*i);CHKERRQ(ierr); 4340 ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&y);CHKERRQ(ierr); 4341 ierr = VecPlaceArray(pcis->vec1_B,y+n_B*i);CHKERRQ(ierr); 4342 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4343 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4344 y[n_B*i+idx_V_B[i]] = 1.0; 4345 ierr = MatDenseRestoreArray(pcbddc->coarse_phi_B,&y);CHKERRQ(ierr); 4346 ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr); 4347 4348 if (pcbddc->switch_static || pcbddc->dbg_flag) { 4349 PetscInt j; 4350 4351 ierr = MatDenseGetArray(pcbddc->coarse_phi_D,&y);CHKERRQ(ierr); 4352 ierr = VecPlaceArray(pcis->vec1_D,y+n_D*i);CHKERRQ(ierr); 4353 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4354 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4355 ierr = VecResetArray(pcis->vec1_D);CHKERRQ(ierr); 4356 for (j=0;j<pcbddc->benign_n;j++) y[n_D*i+p0_lidx_I[j]] = 0.0; 4357 ierr = MatDenseRestoreArray(pcbddc->coarse_phi_D,&y);CHKERRQ(ierr); 4358 } 4359 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 4360 } 4361 /* if n_R == 0 the object is not destroyed */ 4362 ierr = MatDestroy(&A_RV);CHKERRQ(ierr); 4363 } 4364 ierr = VecDestroy(&dummy_vec);CHKERRQ(ierr); 4365 4366 if (n_constraints) { 4367 Mat B; 4368 4369 ierr = MatCreateSeqDense(PETSC_COMM_SELF,lda_rhs,n_constraints,work,&B);CHKERRQ(ierr); 4370 ierr = MatScale(S_CC,m_one);CHKERRQ(ierr); 4371 ierr = MatMatMult(local_auxmat2_R,S_CC,MAT_REUSE_MATRIX,PETSC_DEFAULT,&B);CHKERRQ(ierr); 4372 ierr = MatScale(S_CC,m_one);CHKERRQ(ierr); 4373 if (n_vertices) { 4374 if (isCHOL || need_benign_correction) { /* if we can solve the interior problem with cholesky, we should also be fine with transposing here */ 4375 ierr = MatTranspose(S_CV,MAT_REUSE_MATRIX,&S_VC);CHKERRQ(ierr); 4376 } else { 4377 Mat S_VCt; 4378 4379 if (lda_rhs != n_R) { 4380 ierr = MatDestroy(&B);CHKERRQ(ierr); 4381 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_constraints,work,&B);CHKERRQ(ierr); 4382 ierr = MatSeqDenseSetLDA(B,lda_rhs);CHKERRQ(ierr); 4383 } 4384 ierr = MatMatMult(A_VR,B,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&S_VCt);CHKERRQ(ierr); 4385 ierr = MatCopy(S_VCt,S_VC,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 4386 ierr = MatDestroy(&S_VCt);CHKERRQ(ierr); 4387 } 4388 } 4389 ierr = MatDestroy(&B);CHKERRQ(ierr); 4390 /* coarse basis functions */ 4391 for (i=0;i<n_constraints;i++) { 4392 PetscScalar *y; 4393 4394 ierr = VecPlaceArray(pcbddc->vec1_R,work+lda_rhs*i);CHKERRQ(ierr); 4395 ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&y);CHKERRQ(ierr); 4396 ierr = VecPlaceArray(pcis->vec1_B,y+n_B*(i+n_vertices));CHKERRQ(ierr); 4397 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4398 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4399 ierr = MatDenseRestoreArray(pcbddc->coarse_phi_B,&y);CHKERRQ(ierr); 4400 ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr); 4401 if (pcbddc->switch_static || pcbddc->dbg_flag) { 4402 PetscInt j; 4403 4404 ierr = MatDenseGetArray(pcbddc->coarse_phi_D,&y);CHKERRQ(ierr); 4405 ierr = VecPlaceArray(pcis->vec1_D,y+n_D*(i+n_vertices));CHKERRQ(ierr); 4406 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4407 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4408 ierr = VecResetArray(pcis->vec1_D);CHKERRQ(ierr); 4409 for (j=0;j<pcbddc->benign_n;j++) y[n_D*i+p0_lidx_I[j]] = 0.0; 4410 ierr = MatDenseRestoreArray(pcbddc->coarse_phi_D,&y);CHKERRQ(ierr); 4411 } 4412 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 4413 } 4414 } 4415 if (n_constraints) { 4416 ierr = MatDestroy(&local_auxmat2_R);CHKERRQ(ierr); 4417 } 4418 ierr = PetscFree(p0_lidx_I);CHKERRQ(ierr); 4419 4420 /* coarse matrix entries relative to B_0 */ 4421 if (pcbddc->benign_n) { 4422 Mat B0_B,B0_BPHI; 4423 IS is_dummy; 4424 PetscScalar *data; 4425 PetscInt j; 4426 4427 ierr = ISCreateStride(PETSC_COMM_SELF,pcbddc->benign_n,0,1,&is_dummy);CHKERRQ(ierr); 4428 ierr = MatCreateSubMatrix(pcbddc->benign_B0,is_dummy,pcis->is_B_local,MAT_INITIAL_MATRIX,&B0_B);CHKERRQ(ierr); 4429 ierr = ISDestroy(&is_dummy);CHKERRQ(ierr); 4430 ierr = MatMatMult(B0_B,pcbddc->coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&B0_BPHI);CHKERRQ(ierr); 4431 ierr = MatConvert(B0_BPHI,MATSEQDENSE,MAT_INPLACE_MATRIX,&B0_BPHI);CHKERRQ(ierr); 4432 ierr = MatDenseGetArray(B0_BPHI,&data);CHKERRQ(ierr); 4433 for (j=0;j<pcbddc->benign_n;j++) { 4434 PetscInt primal_idx = pcbddc->local_primal_size - pcbddc->benign_n + j; 4435 for (i=0;i<pcbddc->local_primal_size;i++) { 4436 coarse_submat_vals[primal_idx*pcbddc->local_primal_size+i] = data[i*pcbddc->benign_n+j]; 4437 coarse_submat_vals[i*pcbddc->local_primal_size+primal_idx] = data[i*pcbddc->benign_n+j]; 4438 } 4439 } 4440 ierr = MatDenseRestoreArray(B0_BPHI,&data);CHKERRQ(ierr); 4441 ierr = MatDestroy(&B0_B);CHKERRQ(ierr); 4442 ierr = MatDestroy(&B0_BPHI);CHKERRQ(ierr); 4443 } 4444 4445 /* compute other basis functions for non-symmetric problems */ 4446 if (!pcbddc->symmetric_primal) { 4447 Mat B_V=NULL,B_C=NULL; 4448 PetscScalar *marray; 4449 4450 if (n_constraints) { 4451 Mat S_CCT,C_CRT; 4452 4453 ierr = MatTranspose(C_CR,MAT_INITIAL_MATRIX,&C_CRT);CHKERRQ(ierr); 4454 ierr = MatTranspose(S_CC,MAT_INITIAL_MATRIX,&S_CCT);CHKERRQ(ierr); 4455 ierr = MatMatMult(C_CRT,S_CCT,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&B_C);CHKERRQ(ierr); 4456 ierr = MatDestroy(&S_CCT);CHKERRQ(ierr); 4457 if (n_vertices) { 4458 Mat S_VCT; 4459 4460 ierr = MatTranspose(S_VC,MAT_INITIAL_MATRIX,&S_VCT);CHKERRQ(ierr); 4461 ierr = MatMatMult(C_CRT,S_VCT,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&B_V);CHKERRQ(ierr); 4462 ierr = MatDestroy(&S_VCT);CHKERRQ(ierr); 4463 } 4464 ierr = MatDestroy(&C_CRT);CHKERRQ(ierr); 4465 } else { 4466 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_vertices,NULL,&B_V);CHKERRQ(ierr); 4467 } 4468 if (n_vertices && n_R) { 4469 PetscScalar *av,*marray; 4470 const PetscInt *xadj,*adjncy; 4471 PetscInt n; 4472 PetscBool flg_row; 4473 4474 /* B_V = B_V - A_VR^T */ 4475 ierr = MatConvert(A_VR,MATSEQAIJ,MAT_INPLACE_MATRIX,&A_VR);CHKERRQ(ierr); 4476 ierr = MatGetRowIJ(A_VR,0,PETSC_FALSE,PETSC_FALSE,&n,&xadj,&adjncy,&flg_row);CHKERRQ(ierr); 4477 ierr = MatSeqAIJGetArray(A_VR,&av);CHKERRQ(ierr); 4478 ierr = MatDenseGetArray(B_V,&marray);CHKERRQ(ierr); 4479 for (i=0;i<n;i++) { 4480 PetscInt j; 4481 for (j=xadj[i];j<xadj[i+1];j++) marray[i*n_R + adjncy[j]] -= av[j]; 4482 } 4483 ierr = MatDenseRestoreArray(B_V,&marray);CHKERRQ(ierr); 4484 ierr = MatRestoreRowIJ(A_VR,0,PETSC_FALSE,PETSC_FALSE,&n,&xadj,&adjncy,&flg_row);CHKERRQ(ierr); 4485 ierr = MatDestroy(&A_VR);CHKERRQ(ierr); 4486 } 4487 4488 /* currently there's no support for MatTransposeMatSolve(F,B,X) */ 4489 if (n_vertices) { 4490 ierr = MatDenseGetArray(B_V,&marray);CHKERRQ(ierr); 4491 for (i=0;i<n_vertices;i++) { 4492 ierr = VecPlaceArray(pcbddc->vec1_R,marray+i*n_R);CHKERRQ(ierr); 4493 ierr = VecPlaceArray(pcbddc->vec2_R,work+i*n_R);CHKERRQ(ierr); 4494 ierr = KSPSolveTranspose(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 4495 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 4496 ierr = VecResetArray(pcbddc->vec2_R);CHKERRQ(ierr); 4497 } 4498 ierr = MatDenseRestoreArray(B_V,&marray);CHKERRQ(ierr); 4499 } 4500 if (B_C) { 4501 ierr = MatDenseGetArray(B_C,&marray);CHKERRQ(ierr); 4502 for (i=n_vertices;i<n_constraints+n_vertices;i++) { 4503 ierr = VecPlaceArray(pcbddc->vec1_R,marray+(i-n_vertices)*n_R);CHKERRQ(ierr); 4504 ierr = VecPlaceArray(pcbddc->vec2_R,work+i*n_R);CHKERRQ(ierr); 4505 ierr = KSPSolveTranspose(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 4506 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 4507 ierr = VecResetArray(pcbddc->vec2_R);CHKERRQ(ierr); 4508 } 4509 ierr = MatDenseRestoreArray(B_C,&marray);CHKERRQ(ierr); 4510 } 4511 /* coarse basis functions */ 4512 for (i=0;i<pcbddc->local_primal_size;i++) { 4513 PetscScalar *y; 4514 4515 ierr = VecPlaceArray(pcbddc->vec1_R,work+i*n_R);CHKERRQ(ierr); 4516 ierr = MatDenseGetArray(pcbddc->coarse_psi_B,&y);CHKERRQ(ierr); 4517 ierr = VecPlaceArray(pcis->vec1_B,y+n_B*i);CHKERRQ(ierr); 4518 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4519 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4520 if (i<n_vertices) { 4521 y[n_B*i+idx_V_B[i]] = 1.0; 4522 } 4523 ierr = MatDenseRestoreArray(pcbddc->coarse_psi_B,&y);CHKERRQ(ierr); 4524 ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr); 4525 4526 if (pcbddc->switch_static || pcbddc->dbg_flag) { 4527 ierr = MatDenseGetArray(pcbddc->coarse_psi_D,&y);CHKERRQ(ierr); 4528 ierr = VecPlaceArray(pcis->vec1_D,y+n_D*i);CHKERRQ(ierr); 4529 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4530 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4531 ierr = VecResetArray(pcis->vec1_D);CHKERRQ(ierr); 4532 ierr = MatDenseRestoreArray(pcbddc->coarse_psi_D,&y);CHKERRQ(ierr); 4533 } 4534 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 4535 } 4536 ierr = MatDestroy(&B_V);CHKERRQ(ierr); 4537 ierr = MatDestroy(&B_C);CHKERRQ(ierr); 4538 } 4539 4540 /* free memory */ 4541 ierr = PetscFree(idx_V_B);CHKERRQ(ierr); 4542 ierr = MatDestroy(&S_VV);CHKERRQ(ierr); 4543 ierr = MatDestroy(&S_CV);CHKERRQ(ierr); 4544 ierr = MatDestroy(&S_VC);CHKERRQ(ierr); 4545 ierr = MatDestroy(&S_CC);CHKERRQ(ierr); 4546 ierr = PetscFree(work);CHKERRQ(ierr); 4547 if (n_vertices) { 4548 ierr = MatDestroy(&A_VR);CHKERRQ(ierr); 4549 } 4550 if (n_constraints) { 4551 ierr = MatDestroy(&C_CR);CHKERRQ(ierr); 4552 } 4553 /* Checking coarse_sub_mat and coarse basis functios */ 4554 /* Symmetric case : It should be \Phi^{(j)^T} A^{(j)} \Phi^{(j)}=coarse_sub_mat */ 4555 /* Non-symmetric case : It should be \Psi^{(j)^T} A^{(j)} \Phi^{(j)}=coarse_sub_mat */ 4556 if (pcbddc->dbg_flag) { 4557 Mat coarse_sub_mat; 4558 Mat AUXMAT,TM1,TM2,TM3,TM4; 4559 Mat coarse_phi_D,coarse_phi_B; 4560 Mat coarse_psi_D,coarse_psi_B; 4561 Mat A_II,A_BB,A_IB,A_BI; 4562 Mat C_B,CPHI; 4563 IS is_dummy; 4564 Vec mones; 4565 MatType checkmattype=MATSEQAIJ; 4566 PetscReal real_value; 4567 4568 if (pcbddc->benign_n && !pcbddc->benign_change_explicit) { 4569 Mat A; 4570 ierr = PCBDDCBenignProject(pc,NULL,NULL,&A);CHKERRQ(ierr); 4571 ierr = MatCreateSubMatrix(A,pcis->is_I_local,pcis->is_I_local,MAT_INITIAL_MATRIX,&A_II);CHKERRQ(ierr); 4572 ierr = MatCreateSubMatrix(A,pcis->is_I_local,pcis->is_B_local,MAT_INITIAL_MATRIX,&A_IB);CHKERRQ(ierr); 4573 ierr = MatCreateSubMatrix(A,pcis->is_B_local,pcis->is_I_local,MAT_INITIAL_MATRIX,&A_BI);CHKERRQ(ierr); 4574 ierr = MatCreateSubMatrix(A,pcis->is_B_local,pcis->is_B_local,MAT_INITIAL_MATRIX,&A_BB);CHKERRQ(ierr); 4575 ierr = MatDestroy(&A);CHKERRQ(ierr); 4576 } else { 4577 ierr = MatConvert(pcis->A_II,checkmattype,MAT_INITIAL_MATRIX,&A_II);CHKERRQ(ierr); 4578 ierr = MatConvert(pcis->A_IB,checkmattype,MAT_INITIAL_MATRIX,&A_IB);CHKERRQ(ierr); 4579 ierr = MatConvert(pcis->A_BI,checkmattype,MAT_INITIAL_MATRIX,&A_BI);CHKERRQ(ierr); 4580 ierr = MatConvert(pcis->A_BB,checkmattype,MAT_INITIAL_MATRIX,&A_BB);CHKERRQ(ierr); 4581 } 4582 ierr = MatConvert(pcbddc->coarse_phi_D,checkmattype,MAT_INITIAL_MATRIX,&coarse_phi_D);CHKERRQ(ierr); 4583 ierr = MatConvert(pcbddc->coarse_phi_B,checkmattype,MAT_INITIAL_MATRIX,&coarse_phi_B);CHKERRQ(ierr); 4584 if (!pcbddc->symmetric_primal) { 4585 ierr = MatConvert(pcbddc->coarse_psi_D,checkmattype,MAT_INITIAL_MATRIX,&coarse_psi_D);CHKERRQ(ierr); 4586 ierr = MatConvert(pcbddc->coarse_psi_B,checkmattype,MAT_INITIAL_MATRIX,&coarse_psi_B);CHKERRQ(ierr); 4587 } 4588 ierr = MatCreateSeqDense(PETSC_COMM_SELF,pcbddc->local_primal_size,pcbddc->local_primal_size,coarse_submat_vals,&coarse_sub_mat);CHKERRQ(ierr); 4589 4590 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 4591 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Check coarse sub mat computation (symmetric %d)\n",pcbddc->symmetric_primal);CHKERRQ(ierr); 4592 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4593 if (!pcbddc->symmetric_primal) { 4594 ierr = MatMatMult(A_II,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 4595 ierr = MatTransposeMatMult(coarse_psi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM1);CHKERRQ(ierr); 4596 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 4597 ierr = MatMatMult(A_BB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 4598 ierr = MatTransposeMatMult(coarse_psi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM2);CHKERRQ(ierr); 4599 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 4600 ierr = MatMatMult(A_IB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 4601 ierr = MatTransposeMatMult(coarse_psi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM3);CHKERRQ(ierr); 4602 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 4603 ierr = MatMatMult(A_BI,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 4604 ierr = MatTransposeMatMult(coarse_psi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM4);CHKERRQ(ierr); 4605 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 4606 } else { 4607 ierr = MatPtAP(A_II,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&TM1);CHKERRQ(ierr); 4608 ierr = MatPtAP(A_BB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&TM2);CHKERRQ(ierr); 4609 ierr = MatMatMult(A_IB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 4610 ierr = MatTransposeMatMult(coarse_phi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM3);CHKERRQ(ierr); 4611 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 4612 ierr = MatMatMult(A_BI,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 4613 ierr = MatTransposeMatMult(coarse_phi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM4);CHKERRQ(ierr); 4614 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 4615 } 4616 ierr = MatAXPY(TM1,one,TM2,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 4617 ierr = MatAXPY(TM1,one,TM3,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 4618 ierr = MatAXPY(TM1,one,TM4,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 4619 ierr = MatConvert(TM1,MATSEQDENSE,MAT_INPLACE_MATRIX,&TM1);CHKERRQ(ierr); 4620 if (pcbddc->benign_n) { 4621 Mat B0_B,B0_BPHI; 4622 PetscScalar *data,*data2; 4623 PetscInt j; 4624 4625 ierr = ISCreateStride(PETSC_COMM_SELF,pcbddc->benign_n,0,1,&is_dummy);CHKERRQ(ierr); 4626 ierr = MatCreateSubMatrix(pcbddc->benign_B0,is_dummy,pcis->is_B_local,MAT_INITIAL_MATRIX,&B0_B);CHKERRQ(ierr); 4627 ierr = MatMatMult(B0_B,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&B0_BPHI);CHKERRQ(ierr); 4628 ierr = MatConvert(B0_BPHI,MATSEQDENSE,MAT_INPLACE_MATRIX,&B0_BPHI);CHKERRQ(ierr); 4629 ierr = MatDenseGetArray(TM1,&data);CHKERRQ(ierr); 4630 ierr = MatDenseGetArray(B0_BPHI,&data2);CHKERRQ(ierr); 4631 for (j=0;j<pcbddc->benign_n;j++) { 4632 PetscInt primal_idx = pcbddc->local_primal_size - pcbddc->benign_n + j; 4633 for (i=0;i<pcbddc->local_primal_size;i++) { 4634 data[primal_idx*pcbddc->local_primal_size+i] += data2[i*pcbddc->benign_n+j]; 4635 data[i*pcbddc->local_primal_size+primal_idx] += data2[i*pcbddc->benign_n+j]; 4636 } 4637 } 4638 ierr = MatDenseRestoreArray(TM1,&data);CHKERRQ(ierr); 4639 ierr = MatDenseRestoreArray(B0_BPHI,&data2);CHKERRQ(ierr); 4640 ierr = MatDestroy(&B0_B);CHKERRQ(ierr); 4641 ierr = ISDestroy(&is_dummy);CHKERRQ(ierr); 4642 ierr = MatDestroy(&B0_BPHI);CHKERRQ(ierr); 4643 } 4644 #if 0 4645 { 4646 PetscViewer viewer; 4647 char filename[256]; 4648 sprintf(filename,"details_local_coarse_mat%d_level%d.m",PetscGlobalRank,pcbddc->current_level); 4649 ierr = PetscViewerASCIIOpen(PETSC_COMM_SELF,filename,&viewer);CHKERRQ(ierr); 4650 ierr = PetscViewerPushFormat(viewer,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr); 4651 ierr = PetscObjectSetName((PetscObject)coarse_sub_mat,"computed");CHKERRQ(ierr); 4652 ierr = MatView(coarse_sub_mat,viewer);CHKERRQ(ierr); 4653 ierr = PetscObjectSetName((PetscObject)TM1,"projected");CHKERRQ(ierr); 4654 ierr = MatView(TM1,viewer);CHKERRQ(ierr); 4655 if (pcbddc->coarse_phi_B) { 4656 ierr = PetscObjectSetName((PetscObject)pcbddc->coarse_phi_B,"phi_B");CHKERRQ(ierr); 4657 ierr = MatView(pcbddc->coarse_phi_B,viewer);CHKERRQ(ierr); 4658 } 4659 if (pcbddc->coarse_phi_D) { 4660 ierr = PetscObjectSetName((PetscObject)pcbddc->coarse_phi_D,"phi_D");CHKERRQ(ierr); 4661 ierr = MatView(pcbddc->coarse_phi_D,viewer);CHKERRQ(ierr); 4662 } 4663 if (pcbddc->coarse_psi_B) { 4664 ierr = PetscObjectSetName((PetscObject)pcbddc->coarse_psi_B,"psi_B");CHKERRQ(ierr); 4665 ierr = MatView(pcbddc->coarse_psi_B,viewer);CHKERRQ(ierr); 4666 } 4667 if (pcbddc->coarse_psi_D) { 4668 ierr = PetscObjectSetName((PetscObject)pcbddc->coarse_psi_D,"psi_D");CHKERRQ(ierr); 4669 ierr = MatView(pcbddc->coarse_psi_D,viewer);CHKERRQ(ierr); 4670 } 4671 ierr = PetscObjectSetName((PetscObject)pcbddc->local_mat,"A");CHKERRQ(ierr); 4672 ierr = MatView(pcbddc->local_mat,viewer);CHKERRQ(ierr); 4673 ierr = PetscObjectSetName((PetscObject)pcbddc->ConstraintMatrix,"C");CHKERRQ(ierr); 4674 ierr = MatView(pcbddc->ConstraintMatrix,viewer);CHKERRQ(ierr); 4675 ierr = PetscObjectSetName((PetscObject)pcis->is_I_local,"I");CHKERRQ(ierr); 4676 ierr = ISView(pcis->is_I_local,viewer);CHKERRQ(ierr); 4677 ierr = PetscObjectSetName((PetscObject)pcis->is_B_local,"B");CHKERRQ(ierr); 4678 ierr = ISView(pcis->is_B_local,viewer);CHKERRQ(ierr); 4679 ierr = PetscObjectSetName((PetscObject)pcbddc->is_R_local,"R");CHKERRQ(ierr); 4680 ierr = ISView(pcbddc->is_R_local,viewer);CHKERRQ(ierr); 4681 ierr = PetscViewerDestroy(&viewer);CHKERRQ(ierr); 4682 } 4683 #endif 4684 ierr = MatAXPY(TM1,m_one,coarse_sub_mat,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 4685 ierr = MatNorm(TM1,NORM_FROBENIUS,&real_value);CHKERRQ(ierr); 4686 ierr = PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 4687 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d matrix error % 1.14e\n",PetscGlobalRank,real_value);CHKERRQ(ierr); 4688 4689 /* check constraints */ 4690 ierr = ISCreateStride(PETSC_COMM_SELF,pcbddc->local_primal_size-pcbddc->benign_n,0,1,&is_dummy);CHKERRQ(ierr); 4691 ierr = MatCreateSubMatrix(pcbddc->ConstraintMatrix,is_dummy,pcis->is_B_local,MAT_INITIAL_MATRIX,&C_B);CHKERRQ(ierr); 4692 if (!pcbddc->benign_n) { /* TODO: add benign case */ 4693 ierr = MatMatMult(C_B,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&CPHI);CHKERRQ(ierr); 4694 } else { 4695 PetscScalar *data; 4696 Mat tmat; 4697 ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&data);CHKERRQ(ierr); 4698 ierr = MatCreateSeqDense(PETSC_COMM_SELF,pcis->n_B,pcbddc->local_primal_size-pcbddc->benign_n,data,&tmat);CHKERRQ(ierr); 4699 ierr = MatDenseRestoreArray(pcbddc->coarse_phi_B,&data);CHKERRQ(ierr); 4700 ierr = MatMatMult(C_B,tmat,MAT_INITIAL_MATRIX,1.0,&CPHI);CHKERRQ(ierr); 4701 ierr = MatDestroy(&tmat);CHKERRQ(ierr); 4702 } 4703 ierr = MatCreateVecs(CPHI,&mones,NULL);CHKERRQ(ierr); 4704 ierr = VecSet(mones,-1.0);CHKERRQ(ierr); 4705 ierr = MatDiagonalSet(CPHI,mones,ADD_VALUES);CHKERRQ(ierr); 4706 ierr = MatNorm(CPHI,NORM_FROBENIUS,&real_value);CHKERRQ(ierr); 4707 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d phi constraints error % 1.14e\n",PetscGlobalRank,real_value);CHKERRQ(ierr); 4708 if (!pcbddc->symmetric_primal) { 4709 ierr = MatMatMult(C_B,coarse_psi_B,MAT_REUSE_MATRIX,1.0,&CPHI);CHKERRQ(ierr); 4710 ierr = VecSet(mones,-1.0);CHKERRQ(ierr); 4711 ierr = MatDiagonalSet(CPHI,mones,ADD_VALUES);CHKERRQ(ierr); 4712 ierr = MatNorm(CPHI,NORM_FROBENIUS,&real_value);CHKERRQ(ierr); 4713 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d psi constraints error % 1.14e\n",PetscGlobalRank,real_value);CHKERRQ(ierr); 4714 } 4715 ierr = MatDestroy(&C_B);CHKERRQ(ierr); 4716 ierr = MatDestroy(&CPHI);CHKERRQ(ierr); 4717 ierr = ISDestroy(&is_dummy);CHKERRQ(ierr); 4718 ierr = VecDestroy(&mones);CHKERRQ(ierr); 4719 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4720 ierr = MatDestroy(&A_II);CHKERRQ(ierr); 4721 ierr = MatDestroy(&A_BB);CHKERRQ(ierr); 4722 ierr = MatDestroy(&A_IB);CHKERRQ(ierr); 4723 ierr = MatDestroy(&A_BI);CHKERRQ(ierr); 4724 ierr = MatDestroy(&TM1);CHKERRQ(ierr); 4725 ierr = MatDestroy(&TM2);CHKERRQ(ierr); 4726 ierr = MatDestroy(&TM3);CHKERRQ(ierr); 4727 ierr = MatDestroy(&TM4);CHKERRQ(ierr); 4728 ierr = MatDestroy(&coarse_phi_D);CHKERRQ(ierr); 4729 ierr = MatDestroy(&coarse_phi_B);CHKERRQ(ierr); 4730 if (!pcbddc->symmetric_primal) { 4731 ierr = MatDestroy(&coarse_psi_D);CHKERRQ(ierr); 4732 ierr = MatDestroy(&coarse_psi_B);CHKERRQ(ierr); 4733 } 4734 ierr = MatDestroy(&coarse_sub_mat);CHKERRQ(ierr); 4735 } 4736 /* get back data */ 4737 *coarse_submat_vals_n = coarse_submat_vals; 4738 PetscFunctionReturn(0); 4739 } 4740 4741 PetscErrorCode MatCreateSubMatrixUnsorted(Mat A, IS isrow, IS iscol, Mat* B) 4742 { 4743 Mat *work_mat; 4744 IS isrow_s,iscol_s; 4745 PetscBool rsorted,csorted; 4746 PetscInt rsize,*idxs_perm_r=NULL,csize,*idxs_perm_c=NULL; 4747 PetscErrorCode ierr; 4748 4749 PetscFunctionBegin; 4750 ierr = ISSorted(isrow,&rsorted);CHKERRQ(ierr); 4751 ierr = ISSorted(iscol,&csorted);CHKERRQ(ierr); 4752 ierr = ISGetLocalSize(isrow,&rsize);CHKERRQ(ierr); 4753 ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr); 4754 4755 if (!rsorted) { 4756 const PetscInt *idxs; 4757 PetscInt *idxs_sorted,i; 4758 4759 ierr = PetscMalloc1(rsize,&idxs_perm_r);CHKERRQ(ierr); 4760 ierr = PetscMalloc1(rsize,&idxs_sorted);CHKERRQ(ierr); 4761 for (i=0;i<rsize;i++) { 4762 idxs_perm_r[i] = i; 4763 } 4764 ierr = ISGetIndices(isrow,&idxs);CHKERRQ(ierr); 4765 ierr = PetscSortIntWithPermutation(rsize,idxs,idxs_perm_r);CHKERRQ(ierr); 4766 for (i=0;i<rsize;i++) { 4767 idxs_sorted[i] = idxs[idxs_perm_r[i]]; 4768 } 4769 ierr = ISRestoreIndices(isrow,&idxs);CHKERRQ(ierr); 4770 ierr = ISCreateGeneral(PETSC_COMM_SELF,rsize,idxs_sorted,PETSC_OWN_POINTER,&isrow_s);CHKERRQ(ierr); 4771 } else { 4772 ierr = PetscObjectReference((PetscObject)isrow);CHKERRQ(ierr); 4773 isrow_s = isrow; 4774 } 4775 4776 if (!csorted) { 4777 if (isrow == iscol) { 4778 ierr = PetscObjectReference((PetscObject)isrow_s);CHKERRQ(ierr); 4779 iscol_s = isrow_s; 4780 } else { 4781 const PetscInt *idxs; 4782 PetscInt *idxs_sorted,i; 4783 4784 ierr = PetscMalloc1(csize,&idxs_perm_c);CHKERRQ(ierr); 4785 ierr = PetscMalloc1(csize,&idxs_sorted);CHKERRQ(ierr); 4786 for (i=0;i<csize;i++) { 4787 idxs_perm_c[i] = i; 4788 } 4789 ierr = ISGetIndices(iscol,&idxs);CHKERRQ(ierr); 4790 ierr = PetscSortIntWithPermutation(csize,idxs,idxs_perm_c);CHKERRQ(ierr); 4791 for (i=0;i<csize;i++) { 4792 idxs_sorted[i] = idxs[idxs_perm_c[i]]; 4793 } 4794 ierr = ISRestoreIndices(iscol,&idxs);CHKERRQ(ierr); 4795 ierr = ISCreateGeneral(PETSC_COMM_SELF,csize,idxs_sorted,PETSC_OWN_POINTER,&iscol_s);CHKERRQ(ierr); 4796 } 4797 } else { 4798 ierr = PetscObjectReference((PetscObject)iscol);CHKERRQ(ierr); 4799 iscol_s = iscol; 4800 } 4801 4802 ierr = MatCreateSubMatrices(A,1,&isrow_s,&iscol_s,MAT_INITIAL_MATRIX,&work_mat);CHKERRQ(ierr); 4803 4804 if (!rsorted || !csorted) { 4805 Mat new_mat; 4806 IS is_perm_r,is_perm_c; 4807 4808 if (!rsorted) { 4809 PetscInt *idxs_r,i; 4810 ierr = PetscMalloc1(rsize,&idxs_r);CHKERRQ(ierr); 4811 for (i=0;i<rsize;i++) { 4812 idxs_r[idxs_perm_r[i]] = i; 4813 } 4814 ierr = PetscFree(idxs_perm_r);CHKERRQ(ierr); 4815 ierr = ISCreateGeneral(PETSC_COMM_SELF,rsize,idxs_r,PETSC_OWN_POINTER,&is_perm_r);CHKERRQ(ierr); 4816 } else { 4817 ierr = ISCreateStride(PETSC_COMM_SELF,rsize,0,1,&is_perm_r);CHKERRQ(ierr); 4818 } 4819 ierr = ISSetPermutation(is_perm_r);CHKERRQ(ierr); 4820 4821 if (!csorted) { 4822 if (isrow_s == iscol_s) { 4823 ierr = PetscObjectReference((PetscObject)is_perm_r);CHKERRQ(ierr); 4824 is_perm_c = is_perm_r; 4825 } else { 4826 PetscInt *idxs_c,i; 4827 if (!idxs_perm_c) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Permutation array not present"); 4828 ierr = PetscMalloc1(csize,&idxs_c);CHKERRQ(ierr); 4829 for (i=0;i<csize;i++) { 4830 idxs_c[idxs_perm_c[i]] = i; 4831 } 4832 ierr = PetscFree(idxs_perm_c);CHKERRQ(ierr); 4833 ierr = ISCreateGeneral(PETSC_COMM_SELF,csize,idxs_c,PETSC_OWN_POINTER,&is_perm_c);CHKERRQ(ierr); 4834 } 4835 } else { 4836 ierr = ISCreateStride(PETSC_COMM_SELF,csize,0,1,&is_perm_c);CHKERRQ(ierr); 4837 } 4838 ierr = ISSetPermutation(is_perm_c);CHKERRQ(ierr); 4839 4840 ierr = MatPermute(work_mat[0],is_perm_r,is_perm_c,&new_mat);CHKERRQ(ierr); 4841 ierr = MatDestroy(&work_mat[0]);CHKERRQ(ierr); 4842 work_mat[0] = new_mat; 4843 ierr = ISDestroy(&is_perm_r);CHKERRQ(ierr); 4844 ierr = ISDestroy(&is_perm_c);CHKERRQ(ierr); 4845 } 4846 4847 ierr = PetscObjectReference((PetscObject)work_mat[0]);CHKERRQ(ierr); 4848 *B = work_mat[0]; 4849 ierr = MatDestroyMatrices(1,&work_mat);CHKERRQ(ierr); 4850 ierr = ISDestroy(&isrow_s);CHKERRQ(ierr); 4851 ierr = ISDestroy(&iscol_s);CHKERRQ(ierr); 4852 PetscFunctionReturn(0); 4853 } 4854 4855 PetscErrorCode PCBDDCComputeLocalMatrix(PC pc, Mat ChangeOfBasisMatrix) 4856 { 4857 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 4858 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 4859 Mat new_mat,lA; 4860 IS is_local,is_global; 4861 PetscInt local_size; 4862 PetscBool isseqaij; 4863 PetscErrorCode ierr; 4864 4865 PetscFunctionBegin; 4866 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 4867 ierr = MatGetSize(matis->A,&local_size,NULL);CHKERRQ(ierr); 4868 ierr = ISCreateStride(PetscObjectComm((PetscObject)matis->A),local_size,0,1,&is_local);CHKERRQ(ierr); 4869 ierr = ISLocalToGlobalMappingApplyIS(pc->pmat->rmap->mapping,is_local,&is_global);CHKERRQ(ierr); 4870 ierr = ISDestroy(&is_local);CHKERRQ(ierr); 4871 ierr = MatCreateSubMatrixUnsorted(ChangeOfBasisMatrix,is_global,is_global,&new_mat);CHKERRQ(ierr); 4872 ierr = ISDestroy(&is_global);CHKERRQ(ierr); 4873 4874 /* check */ 4875 if (pcbddc->dbg_flag) { 4876 Vec x,x_change; 4877 PetscReal error; 4878 4879 ierr = MatCreateVecs(ChangeOfBasisMatrix,&x,&x_change);CHKERRQ(ierr); 4880 ierr = VecSetRandom(x,NULL);CHKERRQ(ierr); 4881 ierr = MatMult(ChangeOfBasisMatrix,x,x_change);CHKERRQ(ierr); 4882 ierr = VecScatterBegin(matis->cctx,x,matis->x,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4883 ierr = VecScatterEnd(matis->cctx,x,matis->x,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4884 ierr = MatMult(new_mat,matis->x,matis->y);CHKERRQ(ierr); 4885 if (!pcbddc->change_interior) { 4886 const PetscScalar *x,*y,*v; 4887 PetscReal lerror = 0.; 4888 PetscInt i; 4889 4890 ierr = VecGetArrayRead(matis->x,&x);CHKERRQ(ierr); 4891 ierr = VecGetArrayRead(matis->y,&y);CHKERRQ(ierr); 4892 ierr = VecGetArrayRead(matis->counter,&v);CHKERRQ(ierr); 4893 for (i=0;i<local_size;i++) 4894 if (PetscRealPart(v[i]) < 1.5 && PetscAbsScalar(x[i]-y[i]) > lerror) 4895 lerror = PetscAbsScalar(x[i]-y[i]); 4896 ierr = VecRestoreArrayRead(matis->x,&x);CHKERRQ(ierr); 4897 ierr = VecRestoreArrayRead(matis->y,&y);CHKERRQ(ierr); 4898 ierr = VecRestoreArrayRead(matis->counter,&v);CHKERRQ(ierr); 4899 ierr = MPIU_Allreduce(&lerror,&error,1,MPIU_REAL,MPI_MAX,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 4900 if (error > PETSC_SMALL) { 4901 if (!pcbddc->user_ChangeOfBasisMatrix || pcbddc->current_level) { 4902 SETERRQ1(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"Error global vs local change on I: %1.6e\n",error); 4903 } else { 4904 SETERRQ1(PetscObjectComm((PetscObject)pc),PETSC_ERR_USER,"Error global vs local change on I: %1.6e\n",error); 4905 } 4906 } 4907 } 4908 ierr = VecScatterBegin(matis->rctx,matis->y,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4909 ierr = VecScatterEnd(matis->rctx,matis->y,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4910 ierr = VecAXPY(x,-1.0,x_change);CHKERRQ(ierr); 4911 ierr = VecNorm(x,NORM_INFINITY,&error);CHKERRQ(ierr); 4912 if (error > PETSC_SMALL) { 4913 if (!pcbddc->user_ChangeOfBasisMatrix || pcbddc->current_level) { 4914 SETERRQ1(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"Error global vs local change on N: %1.6e\n",error); 4915 } else { 4916 SETERRQ1(PetscObjectComm((PetscObject)pc),PETSC_ERR_USER,"Error global vs local change on N: %1.6e\n",error); 4917 } 4918 } 4919 ierr = VecDestroy(&x);CHKERRQ(ierr); 4920 ierr = VecDestroy(&x_change);CHKERRQ(ierr); 4921 } 4922 4923 /* lA is present if we are setting up an inner BDDC for a saddle point FETI-DP */ 4924 ierr = PetscObjectQuery((PetscObject)pc,"__KSPFETIDP_lA" ,(PetscObject*)&lA);CHKERRQ(ierr); 4925 4926 /* TODO: HOW TO WORK WITH BAIJ and SBAIJ and SEQDENSE? */ 4927 ierr = PetscObjectTypeCompare((PetscObject)matis->A,MATSEQAIJ,&isseqaij);CHKERRQ(ierr); 4928 if (isseqaij) { 4929 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 4930 ierr = MatPtAP(matis->A,new_mat,MAT_INITIAL_MATRIX,2.0,&pcbddc->local_mat);CHKERRQ(ierr); 4931 if (lA) { 4932 Mat work; 4933 ierr = MatPtAP(lA,new_mat,MAT_INITIAL_MATRIX,2.0,&work);CHKERRQ(ierr); 4934 ierr = PetscObjectCompose((PetscObject)pc,"__KSPFETIDP_lA" ,(PetscObject)work);CHKERRQ(ierr); 4935 ierr = MatDestroy(&work);CHKERRQ(ierr); 4936 } 4937 } else { 4938 Mat work_mat; 4939 4940 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 4941 ierr = MatConvert(matis->A,MATSEQAIJ,MAT_INITIAL_MATRIX,&work_mat);CHKERRQ(ierr); 4942 ierr = MatPtAP(work_mat,new_mat,MAT_INITIAL_MATRIX,2.0,&pcbddc->local_mat);CHKERRQ(ierr); 4943 ierr = MatDestroy(&work_mat);CHKERRQ(ierr); 4944 if (lA) { 4945 Mat work; 4946 ierr = MatConvert(lA,MATSEQAIJ,MAT_INITIAL_MATRIX,&work_mat);CHKERRQ(ierr); 4947 ierr = MatPtAP(work_mat,new_mat,MAT_INITIAL_MATRIX,2.0,&work);CHKERRQ(ierr); 4948 ierr = PetscObjectCompose((PetscObject)pc,"__KSPFETIDP_lA" ,(PetscObject)work);CHKERRQ(ierr); 4949 ierr = MatDestroy(&work);CHKERRQ(ierr); 4950 } 4951 } 4952 if (matis->A->symmetric_set) { 4953 ierr = MatSetOption(pcbddc->local_mat,MAT_SYMMETRIC,matis->A->symmetric);CHKERRQ(ierr); 4954 #if !defined(PETSC_USE_COMPLEX) 4955 ierr = MatSetOption(pcbddc->local_mat,MAT_HERMITIAN,matis->A->symmetric);CHKERRQ(ierr); 4956 #endif 4957 } 4958 ierr = MatDestroy(&new_mat);CHKERRQ(ierr); 4959 PetscFunctionReturn(0); 4960 } 4961 4962 PetscErrorCode PCBDDCSetUpLocalScatters(PC pc) 4963 { 4964 PC_IS* pcis = (PC_IS*)(pc->data); 4965 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 4966 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 4967 PetscInt *idx_R_local=NULL; 4968 PetscInt n_vertices,i,j,n_R,n_D,n_B; 4969 PetscInt vbs,bs; 4970 PetscBT bitmask=NULL; 4971 PetscErrorCode ierr; 4972 4973 PetscFunctionBegin; 4974 /* 4975 No need to setup local scatters if 4976 - primal space is unchanged 4977 AND 4978 - we actually have locally some primal dofs (could not be true in multilevel or for isolated subdomains) 4979 AND 4980 - we are not in debugging mode (this is needed since there are Synchronized prints at the end of the subroutine 4981 */ 4982 if (!pcbddc->new_primal_space_local && pcbddc->local_primal_size && !pcbddc->dbg_flag) { 4983 PetscFunctionReturn(0); 4984 } 4985 /* destroy old objects */ 4986 ierr = ISDestroy(&pcbddc->is_R_local);CHKERRQ(ierr); 4987 ierr = VecScatterDestroy(&pcbddc->R_to_B);CHKERRQ(ierr); 4988 ierr = VecScatterDestroy(&pcbddc->R_to_D);CHKERRQ(ierr); 4989 /* Set Non-overlapping dimensions */ 4990 n_B = pcis->n_B; 4991 n_D = pcis->n - n_B; 4992 n_vertices = pcbddc->n_vertices; 4993 4994 /* Dohrmann's notation: dofs splitted in R (Remaining: all dofs but the vertices) and V (Vertices) */ 4995 4996 /* create auxiliary bitmask and allocate workspace */ 4997 if (!sub_schurs || !sub_schurs->reuse_solver) { 4998 ierr = PetscMalloc1(pcis->n-n_vertices,&idx_R_local);CHKERRQ(ierr); 4999 ierr = PetscBTCreate(pcis->n,&bitmask);CHKERRQ(ierr); 5000 for (i=0;i<n_vertices;i++) { 5001 ierr = PetscBTSet(bitmask,pcbddc->local_primal_ref_node[i]);CHKERRQ(ierr); 5002 } 5003 5004 for (i=0, n_R=0; i<pcis->n; i++) { 5005 if (!PetscBTLookup(bitmask,i)) { 5006 idx_R_local[n_R++] = i; 5007 } 5008 } 5009 } else { /* A different ordering (already computed) is present if we are reusing the Schur solver */ 5010 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 5011 5012 ierr = ISGetIndices(reuse_solver->is_R,(const PetscInt**)&idx_R_local);CHKERRQ(ierr); 5013 ierr = ISGetLocalSize(reuse_solver->is_R,&n_R);CHKERRQ(ierr); 5014 } 5015 5016 /* Block code */ 5017 vbs = 1; 5018 ierr = MatGetBlockSize(pcbddc->local_mat,&bs);CHKERRQ(ierr); 5019 if (bs>1 && !(n_vertices%bs)) { 5020 PetscBool is_blocked = PETSC_TRUE; 5021 PetscInt *vary; 5022 if (!sub_schurs || !sub_schurs->reuse_solver) { 5023 ierr = PetscMalloc1(pcis->n/bs,&vary);CHKERRQ(ierr); 5024 ierr = PetscMemzero(vary,pcis->n/bs*sizeof(PetscInt));CHKERRQ(ierr); 5025 /* Verify that the vertex indices correspond to each element in a block (code taken from sbaij2.c) */ 5026 /* it is ok to check this way since local_primal_ref_node are always sorted by local numbering and idx_R_local is obtained as a complement */ 5027 for (i=0; i<n_vertices; i++) vary[pcbddc->local_primal_ref_node[i]/bs]++; 5028 for (i=0; i<pcis->n/bs; i++) { 5029 if (vary[i]!=0 && vary[i]!=bs) { 5030 is_blocked = PETSC_FALSE; 5031 break; 5032 } 5033 } 5034 ierr = PetscFree(vary);CHKERRQ(ierr); 5035 } else { 5036 /* Verify directly the R set */ 5037 for (i=0; i<n_R/bs; i++) { 5038 PetscInt j,node=idx_R_local[bs*i]; 5039 for (j=1; j<bs; j++) { 5040 if (node != idx_R_local[bs*i+j]-j) { 5041 is_blocked = PETSC_FALSE; 5042 break; 5043 } 5044 } 5045 } 5046 } 5047 if (is_blocked) { /* build compressed IS for R nodes (complement of vertices) */ 5048 vbs = bs; 5049 for (i=0;i<n_R/vbs;i++) { 5050 idx_R_local[i] = idx_R_local[vbs*i]/vbs; 5051 } 5052 } 5053 } 5054 ierr = ISCreateBlock(PETSC_COMM_SELF,vbs,n_R/vbs,idx_R_local,PETSC_COPY_VALUES,&pcbddc->is_R_local);CHKERRQ(ierr); 5055 if (sub_schurs && sub_schurs->reuse_solver) { 5056 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 5057 5058 ierr = ISRestoreIndices(reuse_solver->is_R,(const PetscInt**)&idx_R_local);CHKERRQ(ierr); 5059 ierr = ISDestroy(&reuse_solver->is_R);CHKERRQ(ierr); 5060 ierr = PetscObjectReference((PetscObject)pcbddc->is_R_local);CHKERRQ(ierr); 5061 reuse_solver->is_R = pcbddc->is_R_local; 5062 } else { 5063 ierr = PetscFree(idx_R_local);CHKERRQ(ierr); 5064 } 5065 5066 /* print some info if requested */ 5067 if (pcbddc->dbg_flag) { 5068 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 5069 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 5070 ierr = PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 5071 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d local dimensions\n",PetscGlobalRank);CHKERRQ(ierr); 5072 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local_size = %d, dirichlet_size = %d, boundary_size = %d\n",pcis->n,n_D,n_B);CHKERRQ(ierr); 5073 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"r_size = %d, v_size = %d, constraints = %d, local_primal_size = %d\n",n_R,n_vertices,pcbddc->local_primal_size-n_vertices-pcbddc->benign_n,pcbddc->local_primal_size);CHKERRQ(ierr); 5074 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 5075 } 5076 5077 /* VecScatters pcbddc->R_to_B and (optionally) pcbddc->R_to_D */ 5078 if (!sub_schurs || !sub_schurs->reuse_solver) { 5079 IS is_aux1,is_aux2; 5080 PetscInt *aux_array1,*aux_array2,*is_indices,*idx_R_local; 5081 5082 ierr = ISGetIndices(pcbddc->is_R_local,(const PetscInt**)&idx_R_local);CHKERRQ(ierr); 5083 ierr = PetscMalloc1(pcis->n_B-n_vertices,&aux_array1);CHKERRQ(ierr); 5084 ierr = PetscMalloc1(pcis->n_B-n_vertices,&aux_array2);CHKERRQ(ierr); 5085 ierr = ISGetIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 5086 for (i=0; i<n_D; i++) { 5087 ierr = PetscBTSet(bitmask,is_indices[i]);CHKERRQ(ierr); 5088 } 5089 ierr = ISRestoreIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 5090 for (i=0, j=0; i<n_R; i++) { 5091 if (!PetscBTLookup(bitmask,idx_R_local[i])) { 5092 aux_array1[j++] = i; 5093 } 5094 } 5095 ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array1,PETSC_OWN_POINTER,&is_aux1);CHKERRQ(ierr); 5096 ierr = ISGetIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 5097 for (i=0, j=0; i<n_B; i++) { 5098 if (!PetscBTLookup(bitmask,is_indices[i])) { 5099 aux_array2[j++] = i; 5100 } 5101 } 5102 ierr = ISRestoreIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 5103 ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array2,PETSC_OWN_POINTER,&is_aux2);CHKERRQ(ierr); 5104 ierr = VecScatterCreate(pcbddc->vec1_R,is_aux1,pcis->vec1_B,is_aux2,&pcbddc->R_to_B);CHKERRQ(ierr); 5105 ierr = ISDestroy(&is_aux1);CHKERRQ(ierr); 5106 ierr = ISDestroy(&is_aux2);CHKERRQ(ierr); 5107 5108 if (pcbddc->switch_static || pcbddc->dbg_flag) { 5109 ierr = PetscMalloc1(n_D,&aux_array1);CHKERRQ(ierr); 5110 for (i=0, j=0; i<n_R; i++) { 5111 if (PetscBTLookup(bitmask,idx_R_local[i])) { 5112 aux_array1[j++] = i; 5113 } 5114 } 5115 ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array1,PETSC_OWN_POINTER,&is_aux1);CHKERRQ(ierr); 5116 ierr = VecScatterCreate(pcbddc->vec1_R,is_aux1,pcis->vec1_D,(IS)0,&pcbddc->R_to_D);CHKERRQ(ierr); 5117 ierr = ISDestroy(&is_aux1);CHKERRQ(ierr); 5118 } 5119 ierr = PetscBTDestroy(&bitmask);CHKERRQ(ierr); 5120 ierr = ISRestoreIndices(pcbddc->is_R_local,(const PetscInt**)&idx_R_local);CHKERRQ(ierr); 5121 } else { 5122 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 5123 IS tis; 5124 PetscInt schur_size; 5125 5126 ierr = ISGetLocalSize(reuse_solver->is_B,&schur_size);CHKERRQ(ierr); 5127 ierr = ISCreateStride(PETSC_COMM_SELF,schur_size,n_D,1,&tis);CHKERRQ(ierr); 5128 ierr = VecScatterCreate(pcbddc->vec1_R,tis,pcis->vec1_B,reuse_solver->is_B,&pcbddc->R_to_B);CHKERRQ(ierr); 5129 ierr = ISDestroy(&tis);CHKERRQ(ierr); 5130 if (pcbddc->switch_static || pcbddc->dbg_flag) { 5131 ierr = ISCreateStride(PETSC_COMM_SELF,n_D,0,1,&tis);CHKERRQ(ierr); 5132 ierr = VecScatterCreate(pcbddc->vec1_R,tis,pcis->vec1_D,(IS)0,&pcbddc->R_to_D);CHKERRQ(ierr); 5133 ierr = ISDestroy(&tis);CHKERRQ(ierr); 5134 } 5135 } 5136 PetscFunctionReturn(0); 5137 } 5138 5139 5140 PetscErrorCode PCBDDCSetUpLocalSolvers(PC pc, PetscBool dirichlet, PetscBool neumann) 5141 { 5142 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 5143 PC_IS *pcis = (PC_IS*)pc->data; 5144 PC pc_temp; 5145 Mat A_RR; 5146 MatReuse reuse; 5147 PetscScalar m_one = -1.0; 5148 PetscReal value; 5149 PetscInt n_D,n_R; 5150 PetscBool check_corr,issbaij; 5151 PetscErrorCode ierr; 5152 /* prefixes stuff */ 5153 char dir_prefix[256],neu_prefix[256],str_level[16]; 5154 size_t len; 5155 5156 PetscFunctionBegin; 5157 5158 /* compute prefixes */ 5159 ierr = PetscStrcpy(dir_prefix,"");CHKERRQ(ierr); 5160 ierr = PetscStrcpy(neu_prefix,"");CHKERRQ(ierr); 5161 if (!pcbddc->current_level) { 5162 ierr = PetscStrcpy(dir_prefix,((PetscObject)pc)->prefix);CHKERRQ(ierr); 5163 ierr = PetscStrcpy(neu_prefix,((PetscObject)pc)->prefix);CHKERRQ(ierr); 5164 ierr = PetscStrcat(dir_prefix,"pc_bddc_dirichlet_");CHKERRQ(ierr); 5165 ierr = PetscStrcat(neu_prefix,"pc_bddc_neumann_");CHKERRQ(ierr); 5166 } else { 5167 ierr = PetscSNPrintf(str_level,sizeof(str_level),"l%d_",(int)(pcbddc->current_level));CHKERRQ(ierr); 5168 ierr = PetscStrlen(((PetscObject)pc)->prefix,&len);CHKERRQ(ierr); 5169 len -= 15; /* remove "pc_bddc_coarse_" */ 5170 if (pcbddc->current_level>1) len -= 3; /* remove "lX_" with X level number */ 5171 if (pcbddc->current_level>10) len -= 1; /* remove another char from level number */ 5172 ierr = PetscStrncpy(dir_prefix,((PetscObject)pc)->prefix,len+1);CHKERRQ(ierr); 5173 ierr = PetscStrncpy(neu_prefix,((PetscObject)pc)->prefix,len+1);CHKERRQ(ierr); 5174 ierr = PetscStrcat(dir_prefix,"pc_bddc_dirichlet_");CHKERRQ(ierr); 5175 ierr = PetscStrcat(neu_prefix,"pc_bddc_neumann_");CHKERRQ(ierr); 5176 ierr = PetscStrcat(dir_prefix,str_level);CHKERRQ(ierr); 5177 ierr = PetscStrcat(neu_prefix,str_level);CHKERRQ(ierr); 5178 } 5179 5180 /* DIRICHLET PROBLEM */ 5181 if (dirichlet) { 5182 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 5183 if (pcbddc->benign_n && !pcbddc->benign_change_explicit) { 5184 if (!sub_schurs || !sub_schurs->reuse_solver) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Not yet implemented\n"); 5185 if (pcbddc->dbg_flag) { 5186 Mat A_IIn; 5187 5188 ierr = PCBDDCBenignProject(pc,pcis->is_I_local,pcis->is_I_local,&A_IIn);CHKERRQ(ierr); 5189 ierr = MatDestroy(&pcis->A_II);CHKERRQ(ierr); 5190 pcis->A_II = A_IIn; 5191 } 5192 } 5193 if (pcbddc->local_mat->symmetric_set) { 5194 ierr = MatSetOption(pcis->A_II,MAT_SYMMETRIC,pcbddc->local_mat->symmetric_set);CHKERRQ(ierr); 5195 } 5196 /* Matrix for Dirichlet problem is pcis->A_II */ 5197 n_D = pcis->n - pcis->n_B; 5198 if (!pcbddc->ksp_D) { /* create object if not yet build */ 5199 void (*f)(void) = 0; 5200 5201 ierr = KSPCreate(PETSC_COMM_SELF,&pcbddc->ksp_D);CHKERRQ(ierr); 5202 ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->ksp_D,(PetscObject)pc,1);CHKERRQ(ierr); 5203 /* default */ 5204 ierr = KSPSetType(pcbddc->ksp_D,KSPPREONLY);CHKERRQ(ierr); 5205 ierr = KSPSetOptionsPrefix(pcbddc->ksp_D,dir_prefix);CHKERRQ(ierr); 5206 ierr = PetscObjectTypeCompare((PetscObject)pcis->A_II,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr); 5207 ierr = KSPGetPC(pcbddc->ksp_D,&pc_temp);CHKERRQ(ierr); 5208 if (issbaij) { 5209 ierr = PCSetType(pc_temp,PCCHOLESKY);CHKERRQ(ierr); 5210 } else { 5211 ierr = PCSetType(pc_temp,PCLU);CHKERRQ(ierr); 5212 } 5213 /* Allow user's customization */ 5214 ierr = KSPSetFromOptions(pcbddc->ksp_D);CHKERRQ(ierr); 5215 ierr = PetscObjectQueryFunction((PetscObject)pc_temp,"PCSetCoordinates_C",&f);CHKERRQ(ierr); 5216 if (f && pcbddc->mat_graph->cloc) { 5217 PetscReal *coords = pcbddc->mat_graph->coords,*scoords; 5218 const PetscInt *idxs; 5219 PetscInt cdim = pcbddc->mat_graph->cdim,nl,i,d; 5220 5221 ierr = ISGetLocalSize(pcis->is_I_local,&nl);CHKERRQ(ierr); 5222 ierr = ISGetIndices(pcis->is_I_local,&idxs);CHKERRQ(ierr); 5223 ierr = PetscMalloc1(nl*cdim,&scoords);CHKERRQ(ierr); 5224 for (i=0;i<nl;i++) { 5225 for (d=0;d<cdim;d++) { 5226 scoords[i*cdim+d] = coords[idxs[i]*cdim+d]; 5227 } 5228 } 5229 ierr = ISRestoreIndices(pcis->is_I_local,&idxs);CHKERRQ(ierr); 5230 ierr = PCSetCoordinates(pc_temp,cdim,nl,scoords);CHKERRQ(ierr); 5231 ierr = PetscFree(scoords);CHKERRQ(ierr); 5232 } 5233 } 5234 ierr = KSPSetOperators(pcbddc->ksp_D,pcis->A_II,pcis->A_II);CHKERRQ(ierr); 5235 if (sub_schurs && sub_schurs->reuse_solver) { 5236 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 5237 5238 ierr = KSPSetPC(pcbddc->ksp_D,reuse_solver->interior_solver);CHKERRQ(ierr); 5239 } 5240 /* umfpack interface has a bug when matrix dimension is zero. TODO solve from umfpack interface */ 5241 if (!n_D) { 5242 ierr = KSPGetPC(pcbddc->ksp_D,&pc_temp);CHKERRQ(ierr); 5243 ierr = PCSetType(pc_temp,PCNONE);CHKERRQ(ierr); 5244 } 5245 /* set ksp_D into pcis data */ 5246 ierr = KSPDestroy(&pcis->ksp_D);CHKERRQ(ierr); 5247 ierr = PetscObjectReference((PetscObject)pcbddc->ksp_D);CHKERRQ(ierr); 5248 pcis->ksp_D = pcbddc->ksp_D; 5249 } 5250 5251 /* NEUMANN PROBLEM */ 5252 A_RR = 0; 5253 if (neumann) { 5254 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 5255 PetscInt ibs,mbs; 5256 PetscBool issbaij, reuse_neumann_solver; 5257 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 5258 5259 reuse_neumann_solver = PETSC_FALSE; 5260 if (sub_schurs && sub_schurs->reuse_solver) { 5261 IS iP; 5262 5263 reuse_neumann_solver = PETSC_TRUE; 5264 ierr = PetscObjectQuery((PetscObject)sub_schurs->A,"__KSPFETIDP_iP",(PetscObject*)&iP);CHKERRQ(ierr); 5265 if (iP) reuse_neumann_solver = PETSC_FALSE; 5266 } 5267 /* Matrix for Neumann problem is A_RR -> we need to create/reuse it at this point */ 5268 ierr = ISGetSize(pcbddc->is_R_local,&n_R);CHKERRQ(ierr); 5269 if (pcbddc->ksp_R) { /* already created ksp */ 5270 PetscInt nn_R; 5271 ierr = KSPGetOperators(pcbddc->ksp_R,NULL,&A_RR);CHKERRQ(ierr); 5272 ierr = PetscObjectReference((PetscObject)A_RR);CHKERRQ(ierr); 5273 ierr = MatGetSize(A_RR,&nn_R,NULL);CHKERRQ(ierr); 5274 if (nn_R != n_R) { /* old ksp is not reusable, so reset it */ 5275 ierr = KSPReset(pcbddc->ksp_R);CHKERRQ(ierr); 5276 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 5277 reuse = MAT_INITIAL_MATRIX; 5278 } else { /* same sizes, but nonzero pattern depend on primal vertices so it can be changed */ 5279 if (pcbddc->new_primal_space_local) { /* we are not sure the matrix will have the same nonzero pattern */ 5280 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 5281 reuse = MAT_INITIAL_MATRIX; 5282 } else { /* safe to reuse the matrix */ 5283 reuse = MAT_REUSE_MATRIX; 5284 } 5285 } 5286 /* last check */ 5287 if (pc->flag == DIFFERENT_NONZERO_PATTERN) { 5288 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 5289 reuse = MAT_INITIAL_MATRIX; 5290 } 5291 } else { /* first time, so we need to create the matrix */ 5292 reuse = MAT_INITIAL_MATRIX; 5293 } 5294 /* convert pcbddc->local_mat if needed later in PCBDDCSetUpCorrection */ 5295 ierr = MatGetBlockSize(pcbddc->local_mat,&mbs);CHKERRQ(ierr); 5296 ierr = ISGetBlockSize(pcbddc->is_R_local,&ibs);CHKERRQ(ierr); 5297 ierr = PetscObjectTypeCompare((PetscObject)pcbddc->local_mat,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr); 5298 if (ibs != mbs) { /* need to convert to SEQAIJ to extract any submatrix with is_R_local */ 5299 if (matis->A == pcbddc->local_mat) { 5300 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 5301 ierr = MatConvert(matis->A,MATSEQAIJ,MAT_INITIAL_MATRIX,&pcbddc->local_mat);CHKERRQ(ierr); 5302 } else { 5303 ierr = MatConvert(pcbddc->local_mat,MATSEQAIJ,MAT_INPLACE_MATRIX,&pcbddc->local_mat);CHKERRQ(ierr); 5304 } 5305 } else if (issbaij) { /* need to convert to BAIJ to get offdiagonal blocks */ 5306 if (matis->A == pcbddc->local_mat) { 5307 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 5308 ierr = MatConvert(matis->A,MATSEQBAIJ,MAT_INITIAL_MATRIX,&pcbddc->local_mat);CHKERRQ(ierr); 5309 } else { 5310 ierr = MatConvert(pcbddc->local_mat,MATSEQBAIJ,MAT_INPLACE_MATRIX,&pcbddc->local_mat);CHKERRQ(ierr); 5311 } 5312 } 5313 /* extract A_RR */ 5314 if (reuse_neumann_solver) { 5315 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 5316 5317 if (pcbddc->dbg_flag) { /* we need A_RR to test the solver later */ 5318 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 5319 if (reuse_solver->benign_n) { /* we are not using the explicit change of basis on the pressures */ 5320 ierr = PCBDDCBenignProject(pc,pcbddc->is_R_local,pcbddc->is_R_local,&A_RR);CHKERRQ(ierr); 5321 } else { 5322 ierr = MatCreateSubMatrix(pcbddc->local_mat,pcbddc->is_R_local,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&A_RR);CHKERRQ(ierr); 5323 } 5324 } else { 5325 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 5326 ierr = PCGetOperators(reuse_solver->correction_solver,&A_RR,NULL);CHKERRQ(ierr); 5327 ierr = PetscObjectReference((PetscObject)A_RR);CHKERRQ(ierr); 5328 } 5329 } else { /* we have to build the neumann solver, so we need to extract the relevant matrix */ 5330 ierr = MatCreateSubMatrix(pcbddc->local_mat,pcbddc->is_R_local,pcbddc->is_R_local,reuse,&A_RR);CHKERRQ(ierr); 5331 } 5332 if (pcbddc->local_mat->symmetric_set) { 5333 ierr = MatSetOption(A_RR,MAT_SYMMETRIC,pcbddc->local_mat->symmetric_set);CHKERRQ(ierr); 5334 } 5335 if (!pcbddc->ksp_R) { /* create object if not present */ 5336 void (*f)(void) = 0; 5337 5338 ierr = KSPCreate(PETSC_COMM_SELF,&pcbddc->ksp_R);CHKERRQ(ierr); 5339 ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->ksp_R,(PetscObject)pc,1);CHKERRQ(ierr); 5340 /* default */ 5341 ierr = KSPSetType(pcbddc->ksp_R,KSPPREONLY);CHKERRQ(ierr); 5342 ierr = KSPSetOptionsPrefix(pcbddc->ksp_R,neu_prefix);CHKERRQ(ierr); 5343 ierr = KSPGetPC(pcbddc->ksp_R,&pc_temp);CHKERRQ(ierr); 5344 ierr = PetscObjectTypeCompare((PetscObject)A_RR,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr); 5345 if (issbaij) { 5346 ierr = PCSetType(pc_temp,PCCHOLESKY);CHKERRQ(ierr); 5347 } else { 5348 ierr = PCSetType(pc_temp,PCLU);CHKERRQ(ierr); 5349 } 5350 /* Allow user's customization */ 5351 ierr = KSPSetFromOptions(pcbddc->ksp_R);CHKERRQ(ierr); 5352 ierr = PetscObjectQueryFunction((PetscObject)pc_temp,"PCSetCoordinates_C",&f);CHKERRQ(ierr); 5353 if (f && pcbddc->mat_graph->cloc) { 5354 PetscReal *coords = pcbddc->mat_graph->coords,*scoords; 5355 const PetscInt *idxs; 5356 PetscInt cdim = pcbddc->mat_graph->cdim,nl,i,d; 5357 5358 ierr = ISGetLocalSize(pcbddc->is_R_local,&nl);CHKERRQ(ierr); 5359 ierr = ISGetIndices(pcbddc->is_R_local,&idxs);CHKERRQ(ierr); 5360 ierr = PetscMalloc1(nl*cdim,&scoords);CHKERRQ(ierr); 5361 for (i=0;i<nl;i++) { 5362 for (d=0;d<cdim;d++) { 5363 scoords[i*cdim+d] = coords[idxs[i]*cdim+d]; 5364 } 5365 } 5366 ierr = ISRestoreIndices(pcbddc->is_R_local,&idxs);CHKERRQ(ierr); 5367 ierr = PCSetCoordinates(pc_temp,cdim,nl,scoords);CHKERRQ(ierr); 5368 ierr = PetscFree(scoords);CHKERRQ(ierr); 5369 } 5370 } 5371 /* umfpack interface has a bug when matrix dimension is zero. TODO solve from umfpack interface */ 5372 if (!n_R) { 5373 ierr = KSPGetPC(pcbddc->ksp_R,&pc_temp);CHKERRQ(ierr); 5374 ierr = PCSetType(pc_temp,PCNONE);CHKERRQ(ierr); 5375 } 5376 ierr = KSPSetOperators(pcbddc->ksp_R,A_RR,A_RR);CHKERRQ(ierr); 5377 /* Reuse solver if it is present */ 5378 if (reuse_neumann_solver) { 5379 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 5380 5381 ierr = KSPSetPC(pcbddc->ksp_R,reuse_solver->correction_solver);CHKERRQ(ierr); 5382 } 5383 } 5384 5385 if (pcbddc->dbg_flag) { 5386 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 5387 ierr = PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 5388 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 5389 } 5390 5391 /* adapt Dirichlet and Neumann solvers if a nullspace correction has been requested */ 5392 check_corr = PETSC_FALSE; 5393 if (pcbddc->NullSpace_corr[0]) { 5394 ierr = PCBDDCSetUseExactDirichlet(pc,PETSC_FALSE);CHKERRQ(ierr); 5395 } 5396 if (dirichlet && pcbddc->NullSpace_corr[0] && !pcbddc->switch_static) { 5397 check_corr = PETSC_TRUE; 5398 ierr = PCBDDCNullSpaceAssembleCorrection(pc,PETSC_TRUE,pcbddc->NullSpace_corr[1]);CHKERRQ(ierr); 5399 } 5400 if (neumann && pcbddc->NullSpace_corr[2]) { 5401 check_corr = PETSC_TRUE; 5402 ierr = PCBDDCNullSpaceAssembleCorrection(pc,PETSC_FALSE,pcbddc->NullSpace_corr[3]);CHKERRQ(ierr); 5403 } 5404 /* check Dirichlet and Neumann solvers */ 5405 if (pcbddc->dbg_flag) { 5406 if (dirichlet) { /* Dirichlet */ 5407 ierr = VecSetRandom(pcis->vec1_D,NULL);CHKERRQ(ierr); 5408 ierr = MatMult(pcis->A_II,pcis->vec1_D,pcis->vec2_D);CHKERRQ(ierr); 5409 ierr = KSPSolve(pcbddc->ksp_D,pcis->vec2_D,pcis->vec2_D);CHKERRQ(ierr); 5410 ierr = VecAXPY(pcis->vec1_D,m_one,pcis->vec2_D);CHKERRQ(ierr); 5411 ierr = VecNorm(pcis->vec1_D,NORM_INFINITY,&value);CHKERRQ(ierr); 5412 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d infinity error for Dirichlet solve (%s) = % 1.14e \n",PetscGlobalRank,((PetscObject)(pcbddc->ksp_D))->prefix,value);CHKERRQ(ierr); 5413 if (check_corr) { 5414 ierr = PCBDDCNullSpaceCheckCorrection(pc,PETSC_TRUE);CHKERRQ(ierr); 5415 } 5416 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 5417 } 5418 if (neumann) { /* Neumann */ 5419 ierr = VecSetRandom(pcbddc->vec1_R,NULL);CHKERRQ(ierr); 5420 ierr = MatMult(A_RR,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 5421 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec2_R,pcbddc->vec2_R);CHKERRQ(ierr); 5422 ierr = VecAXPY(pcbddc->vec1_R,m_one,pcbddc->vec2_R);CHKERRQ(ierr); 5423 ierr = VecNorm(pcbddc->vec1_R,NORM_INFINITY,&value);CHKERRQ(ierr); 5424 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d infinity error for Neumann solve (%s) = % 1.14e\n",PetscGlobalRank,((PetscObject)(pcbddc->ksp_R))->prefix,value);CHKERRQ(ierr); 5425 if (check_corr) { 5426 ierr = PCBDDCNullSpaceCheckCorrection(pc,PETSC_FALSE);CHKERRQ(ierr); 5427 } 5428 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 5429 } 5430 } 5431 /* free Neumann problem's matrix */ 5432 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 5433 PetscFunctionReturn(0); 5434 } 5435 5436 static PetscErrorCode PCBDDCSolveSubstructureCorrection(PC pc, Vec inout_B, Vec inout_D, PetscBool applytranspose) 5437 { 5438 PetscErrorCode ierr; 5439 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 5440 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 5441 PetscBool reuse_solver = sub_schurs ? ( sub_schurs->reuse_solver ? PETSC_TRUE : PETSC_FALSE ) : PETSC_FALSE; 5442 5443 PetscFunctionBegin; 5444 if (!reuse_solver) { 5445 ierr = VecSet(pcbddc->vec1_R,0.);CHKERRQ(ierr); 5446 } 5447 if (!pcbddc->switch_static) { 5448 if (applytranspose && pcbddc->local_auxmat1) { 5449 ierr = MatMultTranspose(pcbddc->local_auxmat2,inout_B,pcbddc->vec1_C);CHKERRQ(ierr); 5450 ierr = MatMultTransposeAdd(pcbddc->local_auxmat1,pcbddc->vec1_C,inout_B,inout_B);CHKERRQ(ierr); 5451 } 5452 if (!reuse_solver) { 5453 ierr = VecScatterBegin(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 5454 ierr = VecScatterEnd(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 5455 } else { 5456 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 5457 5458 ierr = VecScatterBegin(reuse_solver->correction_scatter_B,inout_B,reuse_solver->rhs_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 5459 ierr = VecScatterEnd(reuse_solver->correction_scatter_B,inout_B,reuse_solver->rhs_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 5460 } 5461 } else { 5462 ierr = VecScatterBegin(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 5463 ierr = VecScatterEnd(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 5464 ierr = VecScatterBegin(pcbddc->R_to_D,inout_D,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 5465 ierr = VecScatterEnd(pcbddc->R_to_D,inout_D,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 5466 if (applytranspose && pcbddc->local_auxmat1) { 5467 ierr = MatMultTranspose(pcbddc->local_auxmat2,pcbddc->vec1_R,pcbddc->vec1_C);CHKERRQ(ierr); 5468 ierr = MatMultTransposeAdd(pcbddc->local_auxmat1,pcbddc->vec1_C,inout_B,inout_B);CHKERRQ(ierr); 5469 ierr = VecScatterBegin(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 5470 ierr = VecScatterEnd(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 5471 } 5472 } 5473 if (!reuse_solver || pcbddc->switch_static) { 5474 if (applytranspose) { 5475 ierr = KSPSolveTranspose(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr); 5476 } else { 5477 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr); 5478 } 5479 } else { 5480 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 5481 5482 if (applytranspose) { 5483 ierr = MatFactorSolveSchurComplementTranspose(reuse_solver->F,reuse_solver->rhs_B,reuse_solver->sol_B);CHKERRQ(ierr); 5484 } else { 5485 ierr = MatFactorSolveSchurComplement(reuse_solver->F,reuse_solver->rhs_B,reuse_solver->sol_B);CHKERRQ(ierr); 5486 } 5487 } 5488 ierr = VecSet(inout_B,0.);CHKERRQ(ierr); 5489 if (!pcbddc->switch_static) { 5490 if (!reuse_solver) { 5491 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 5492 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 5493 } else { 5494 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 5495 5496 ierr = VecScatterBegin(reuse_solver->correction_scatter_B,reuse_solver->sol_B,inout_B,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 5497 ierr = VecScatterEnd(reuse_solver->correction_scatter_B,reuse_solver->sol_B,inout_B,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 5498 } 5499 if (!applytranspose && pcbddc->local_auxmat1) { 5500 ierr = MatMult(pcbddc->local_auxmat1,inout_B,pcbddc->vec1_C);CHKERRQ(ierr); 5501 ierr = MatMultAdd(pcbddc->local_auxmat2,pcbddc->vec1_C,inout_B,inout_B);CHKERRQ(ierr); 5502 } 5503 } else { 5504 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 5505 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 5506 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,inout_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 5507 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,inout_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 5508 if (!applytranspose && pcbddc->local_auxmat1) { 5509 ierr = MatMult(pcbddc->local_auxmat1,inout_B,pcbddc->vec1_C);CHKERRQ(ierr); 5510 ierr = MatMultAdd(pcbddc->local_auxmat2,pcbddc->vec1_C,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr); 5511 } 5512 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 5513 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 5514 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,inout_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 5515 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,inout_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 5516 } 5517 PetscFunctionReturn(0); 5518 } 5519 5520 /* parameter apply transpose determines if the interface preconditioner should be applied transposed or not */ 5521 PetscErrorCode PCBDDCApplyInterfacePreconditioner(PC pc, PetscBool applytranspose) 5522 { 5523 PetscErrorCode ierr; 5524 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 5525 PC_IS* pcis = (PC_IS*) (pc->data); 5526 const PetscScalar zero = 0.0; 5527 5528 PetscFunctionBegin; 5529 /* Application of PSI^T or PHI^T (depending on applytranspose, see comment above) */ 5530 if (!pcbddc->benign_apply_coarse_only) { 5531 if (applytranspose) { 5532 ierr = MatMultTranspose(pcbddc->coarse_phi_B,pcis->vec1_B,pcbddc->vec1_P);CHKERRQ(ierr); 5533 if (pcbddc->switch_static) { ierr = MatMultTransposeAdd(pcbddc->coarse_phi_D,pcis->vec1_D,pcbddc->vec1_P,pcbddc->vec1_P);CHKERRQ(ierr); } 5534 } else { 5535 ierr = MatMultTranspose(pcbddc->coarse_psi_B,pcis->vec1_B,pcbddc->vec1_P);CHKERRQ(ierr); 5536 if (pcbddc->switch_static) { ierr = MatMultTransposeAdd(pcbddc->coarse_psi_D,pcis->vec1_D,pcbddc->vec1_P,pcbddc->vec1_P);CHKERRQ(ierr); } 5537 } 5538 } else { 5539 ierr = VecSet(pcbddc->vec1_P,zero);CHKERRQ(ierr); 5540 } 5541 5542 /* add p0 to the last value of vec1_P holding the coarse dof relative to p0 */ 5543 if (pcbddc->benign_n) { 5544 PetscScalar *array; 5545 PetscInt j; 5546 5547 ierr = VecGetArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 5548 for (j=0;j<pcbddc->benign_n;j++) array[pcbddc->local_primal_size-pcbddc->benign_n+j] += pcbddc->benign_p0[j]; 5549 ierr = VecRestoreArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 5550 } 5551 5552 /* start communications from local primal nodes to rhs of coarse solver */ 5553 ierr = VecSet(pcbddc->coarse_vec,zero);CHKERRQ(ierr); 5554 ierr = PCBDDCScatterCoarseDataBegin(pc,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 5555 ierr = PCBDDCScatterCoarseDataEnd(pc,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 5556 5557 /* Coarse solution -> rhs and sol updated inside PCBDDCScattarCoarseDataBegin/End */ 5558 if (pcbddc->coarse_ksp) { 5559 Mat coarse_mat; 5560 Vec rhs,sol; 5561 MatNullSpace nullsp; 5562 PetscBool isbddc = PETSC_FALSE; 5563 5564 if (pcbddc->benign_have_null) { 5565 PC coarse_pc; 5566 5567 ierr = KSPGetPC(pcbddc->coarse_ksp,&coarse_pc);CHKERRQ(ierr); 5568 ierr = PetscObjectTypeCompare((PetscObject)coarse_pc,PCBDDC,&isbddc);CHKERRQ(ierr); 5569 /* we need to propagate to coarser levels the need for a possible benign correction */ 5570 if (isbddc && pcbddc->benign_apply_coarse_only && !pcbddc->benign_skip_correction) { 5571 PC_BDDC* coarsepcbddc = (PC_BDDC*)(coarse_pc->data); 5572 coarsepcbddc->benign_skip_correction = PETSC_FALSE; 5573 coarsepcbddc->benign_apply_coarse_only = PETSC_TRUE; 5574 } 5575 } 5576 ierr = KSPGetRhs(pcbddc->coarse_ksp,&rhs);CHKERRQ(ierr); 5577 ierr = KSPGetSolution(pcbddc->coarse_ksp,&sol);CHKERRQ(ierr); 5578 ierr = KSPGetOperators(pcbddc->coarse_ksp,&coarse_mat,NULL);CHKERRQ(ierr); 5579 ierr = MatGetNullSpace(coarse_mat,&nullsp);CHKERRQ(ierr); 5580 if (nullsp) { 5581 ierr = MatNullSpaceRemove(nullsp,rhs);CHKERRQ(ierr); 5582 } 5583 if (applytranspose) { 5584 if (pcbddc->benign_apply_coarse_only) SETERRQ(PetscObjectComm((PetscObject)pcbddc->coarse_ksp),PETSC_ERR_SUP,"Not yet implemented"); 5585 ierr = KSPSolveTranspose(pcbddc->coarse_ksp,rhs,sol);CHKERRQ(ierr); 5586 } else { 5587 if (pcbddc->benign_apply_coarse_only && isbddc) { /* need just to apply the coarse preconditioner during presolve */ 5588 PC coarse_pc; 5589 5590 ierr = KSPGetPC(pcbddc->coarse_ksp,&coarse_pc);CHKERRQ(ierr); 5591 ierr = PCPreSolve(coarse_pc,pcbddc->coarse_ksp);CHKERRQ(ierr); 5592 ierr = PCBDDCBenignRemoveInterior(coarse_pc,rhs,sol);CHKERRQ(ierr); 5593 ierr = PCPostSolve(coarse_pc,pcbddc->coarse_ksp);CHKERRQ(ierr); 5594 } else { 5595 ierr = KSPSolve(pcbddc->coarse_ksp,rhs,sol);CHKERRQ(ierr); 5596 } 5597 } 5598 /* we don't need the benign correction at coarser levels anymore */ 5599 if (pcbddc->benign_have_null && isbddc) { 5600 PC coarse_pc; 5601 PC_BDDC* coarsepcbddc; 5602 5603 ierr = KSPGetPC(pcbddc->coarse_ksp,&coarse_pc);CHKERRQ(ierr); 5604 coarsepcbddc = (PC_BDDC*)(coarse_pc->data); 5605 coarsepcbddc->benign_skip_correction = PETSC_TRUE; 5606 coarsepcbddc->benign_apply_coarse_only = PETSC_FALSE; 5607 } 5608 if (nullsp) { 5609 ierr = MatNullSpaceRemove(nullsp,sol);CHKERRQ(ierr); 5610 } 5611 } 5612 5613 /* Local solution on R nodes */ 5614 if (pcis->n && !pcbddc->benign_apply_coarse_only) { 5615 ierr = PCBDDCSolveSubstructureCorrection(pc,pcis->vec1_B,pcis->vec1_D,applytranspose);CHKERRQ(ierr); 5616 } 5617 /* communications from coarse sol to local primal nodes */ 5618 ierr = PCBDDCScatterCoarseDataBegin(pc,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 5619 ierr = PCBDDCScatterCoarseDataEnd(pc,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 5620 5621 /* Sum contributions from the two levels */ 5622 if (!pcbddc->benign_apply_coarse_only) { 5623 if (applytranspose) { 5624 ierr = MatMultAdd(pcbddc->coarse_psi_B,pcbddc->vec1_P,pcis->vec1_B,pcis->vec1_B);CHKERRQ(ierr); 5625 if (pcbddc->switch_static) { ierr = MatMultAdd(pcbddc->coarse_psi_D,pcbddc->vec1_P,pcis->vec1_D,pcis->vec1_D);CHKERRQ(ierr); } 5626 } else { 5627 ierr = MatMultAdd(pcbddc->coarse_phi_B,pcbddc->vec1_P,pcis->vec1_B,pcis->vec1_B);CHKERRQ(ierr); 5628 if (pcbddc->switch_static) { ierr = MatMultAdd(pcbddc->coarse_phi_D,pcbddc->vec1_P,pcis->vec1_D,pcis->vec1_D);CHKERRQ(ierr); } 5629 } 5630 /* store p0 */ 5631 if (pcbddc->benign_n) { 5632 PetscScalar *array; 5633 PetscInt j; 5634 5635 ierr = VecGetArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 5636 for (j=0;j<pcbddc->benign_n;j++) pcbddc->benign_p0[j] = array[pcbddc->local_primal_size-pcbddc->benign_n+j]; 5637 ierr = VecRestoreArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 5638 } 5639 } else { /* expand the coarse solution */ 5640 if (applytranspose) { 5641 ierr = MatMult(pcbddc->coarse_psi_B,pcbddc->vec1_P,pcis->vec1_B);CHKERRQ(ierr); 5642 } else { 5643 ierr = MatMult(pcbddc->coarse_phi_B,pcbddc->vec1_P,pcis->vec1_B);CHKERRQ(ierr); 5644 } 5645 } 5646 PetscFunctionReturn(0); 5647 } 5648 5649 PetscErrorCode PCBDDCScatterCoarseDataBegin(PC pc,InsertMode imode, ScatterMode smode) 5650 { 5651 PetscErrorCode ierr; 5652 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 5653 PetscScalar *array; 5654 Vec from,to; 5655 5656 PetscFunctionBegin; 5657 if (smode == SCATTER_REVERSE) { /* from global to local -> get data from coarse solution */ 5658 from = pcbddc->coarse_vec; 5659 to = pcbddc->vec1_P; 5660 if (pcbddc->coarse_ksp) { /* get array from coarse processes */ 5661 Vec tvec; 5662 5663 ierr = KSPGetRhs(pcbddc->coarse_ksp,&tvec);CHKERRQ(ierr); 5664 ierr = VecResetArray(tvec);CHKERRQ(ierr); 5665 ierr = KSPGetSolution(pcbddc->coarse_ksp,&tvec);CHKERRQ(ierr); 5666 ierr = VecGetArray(tvec,&array);CHKERRQ(ierr); 5667 ierr = VecPlaceArray(from,array);CHKERRQ(ierr); 5668 ierr = VecRestoreArray(tvec,&array);CHKERRQ(ierr); 5669 } 5670 } else { /* from local to global -> put data in coarse right hand side */ 5671 from = pcbddc->vec1_P; 5672 to = pcbddc->coarse_vec; 5673 } 5674 ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,from,to,imode,smode);CHKERRQ(ierr); 5675 PetscFunctionReturn(0); 5676 } 5677 5678 PetscErrorCode PCBDDCScatterCoarseDataEnd(PC pc, InsertMode imode, ScatterMode smode) 5679 { 5680 PetscErrorCode ierr; 5681 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 5682 PetscScalar *array; 5683 Vec from,to; 5684 5685 PetscFunctionBegin; 5686 if (smode == SCATTER_REVERSE) { /* from global to local -> get data from coarse solution */ 5687 from = pcbddc->coarse_vec; 5688 to = pcbddc->vec1_P; 5689 } else { /* from local to global -> put data in coarse right hand side */ 5690 from = pcbddc->vec1_P; 5691 to = pcbddc->coarse_vec; 5692 } 5693 ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,from,to,imode,smode);CHKERRQ(ierr); 5694 if (smode == SCATTER_FORWARD) { 5695 if (pcbddc->coarse_ksp) { /* get array from coarse processes */ 5696 Vec tvec; 5697 5698 ierr = KSPGetRhs(pcbddc->coarse_ksp,&tvec);CHKERRQ(ierr); 5699 ierr = VecGetArray(to,&array);CHKERRQ(ierr); 5700 ierr = VecPlaceArray(tvec,array);CHKERRQ(ierr); 5701 ierr = VecRestoreArray(to,&array);CHKERRQ(ierr); 5702 } 5703 } else { 5704 if (pcbddc->coarse_ksp) { /* restore array of pcbddc->coarse_vec */ 5705 ierr = VecResetArray(from);CHKERRQ(ierr); 5706 } 5707 } 5708 PetscFunctionReturn(0); 5709 } 5710 5711 /* uncomment for testing purposes */ 5712 /* #define PETSC_MISSING_LAPACK_GESVD 1 */ 5713 PetscErrorCode PCBDDCConstraintsSetUp(PC pc) 5714 { 5715 PetscErrorCode ierr; 5716 PC_IS* pcis = (PC_IS*)(pc->data); 5717 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 5718 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 5719 /* one and zero */ 5720 PetscScalar one=1.0,zero=0.0; 5721 /* space to store constraints and their local indices */ 5722 PetscScalar *constraints_data; 5723 PetscInt *constraints_idxs,*constraints_idxs_B; 5724 PetscInt *constraints_idxs_ptr,*constraints_data_ptr; 5725 PetscInt *constraints_n; 5726 /* iterators */ 5727 PetscInt i,j,k,total_counts,total_counts_cc,cum; 5728 /* BLAS integers */ 5729 PetscBLASInt lwork,lierr; 5730 PetscBLASInt Blas_N,Blas_M,Blas_K,Blas_one=1; 5731 PetscBLASInt Blas_LDA,Blas_LDB,Blas_LDC; 5732 /* reuse */ 5733 PetscInt olocal_primal_size,olocal_primal_size_cc; 5734 PetscInt *olocal_primal_ref_node,*olocal_primal_ref_mult; 5735 /* change of basis */ 5736 PetscBool qr_needed; 5737 PetscBT change_basis,qr_needed_idx; 5738 /* auxiliary stuff */ 5739 PetscInt *nnz,*is_indices; 5740 PetscInt ncc; 5741 /* some quantities */ 5742 PetscInt n_vertices,total_primal_vertices,valid_constraints; 5743 PetscInt size_of_constraint,max_size_of_constraint=0,max_constraints,temp_constraints; 5744 PetscReal tol; /* tolerance for retaining eigenmodes */ 5745 5746 PetscFunctionBegin; 5747 tol = PetscSqrtReal(PETSC_SMALL); 5748 /* Destroy Mat objects computed previously */ 5749 ierr = MatDestroy(&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 5750 ierr = MatDestroy(&pcbddc->ConstraintMatrix);CHKERRQ(ierr); 5751 ierr = MatDestroy(&pcbddc->switch_static_change);CHKERRQ(ierr); 5752 /* save info on constraints from previous setup (if any) */ 5753 olocal_primal_size = pcbddc->local_primal_size; 5754 olocal_primal_size_cc = pcbddc->local_primal_size_cc; 5755 ierr = PetscMalloc2(olocal_primal_size_cc,&olocal_primal_ref_node,olocal_primal_size_cc,&olocal_primal_ref_mult);CHKERRQ(ierr); 5756 ierr = PetscMemcpy(olocal_primal_ref_node,pcbddc->local_primal_ref_node,olocal_primal_size_cc*sizeof(PetscInt));CHKERRQ(ierr); 5757 ierr = PetscMemcpy(olocal_primal_ref_mult,pcbddc->local_primal_ref_mult,olocal_primal_size_cc*sizeof(PetscInt));CHKERRQ(ierr); 5758 ierr = PetscFree2(pcbddc->local_primal_ref_node,pcbddc->local_primal_ref_mult);CHKERRQ(ierr); 5759 ierr = PetscFree(pcbddc->primal_indices_local_idxs);CHKERRQ(ierr); 5760 5761 if (!pcbddc->adaptive_selection) { 5762 IS ISForVertices,*ISForFaces,*ISForEdges; 5763 MatNullSpace nearnullsp; 5764 const Vec *nearnullvecs; 5765 Vec *localnearnullsp; 5766 PetscScalar *array; 5767 PetscInt n_ISForFaces,n_ISForEdges,nnsp_size; 5768 PetscBool nnsp_has_cnst; 5769 /* LAPACK working arrays for SVD or POD */ 5770 PetscBool skip_lapack,boolforchange; 5771 PetscScalar *work; 5772 PetscReal *singular_vals; 5773 #if defined(PETSC_USE_COMPLEX) 5774 PetscReal *rwork; 5775 #endif 5776 #if defined(PETSC_MISSING_LAPACK_GESVD) 5777 PetscScalar *temp_basis,*correlation_mat; 5778 #else 5779 PetscBLASInt dummy_int=1; 5780 PetscScalar dummy_scalar=1.; 5781 #endif 5782 5783 /* Get index sets for faces, edges and vertices from graph */ 5784 ierr = PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph,&n_ISForFaces,&ISForFaces,&n_ISForEdges,&ISForEdges,&ISForVertices);CHKERRQ(ierr); 5785 /* print some info */ 5786 if (pcbddc->dbg_flag && (!pcbddc->sub_schurs || pcbddc->sub_schurs_rebuild)) { 5787 PetscInt nv; 5788 5789 ierr = PCBDDCGraphASCIIView(pcbddc->mat_graph,pcbddc->dbg_flag,pcbddc->dbg_viewer);CHKERRQ(ierr); 5790 ierr = ISGetSize(ISForVertices,&nv);CHKERRQ(ierr); 5791 ierr = PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 5792 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"--------------------------------------------------------------\n");CHKERRQ(ierr); 5793 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate vertices (%d)\n",PetscGlobalRank,nv,pcbddc->use_vertices);CHKERRQ(ierr); 5794 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate edges (%d)\n",PetscGlobalRank,n_ISForEdges,pcbddc->use_edges);CHKERRQ(ierr); 5795 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate faces (%d)\n",PetscGlobalRank,n_ISForFaces,pcbddc->use_faces);CHKERRQ(ierr); 5796 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 5797 ierr = PetscViewerASCIIPopSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 5798 } 5799 5800 /* free unneeded index sets */ 5801 if (!pcbddc->use_vertices) { 5802 ierr = ISDestroy(&ISForVertices);CHKERRQ(ierr); 5803 } 5804 if (!pcbddc->use_edges) { 5805 for (i=0;i<n_ISForEdges;i++) { 5806 ierr = ISDestroy(&ISForEdges[i]);CHKERRQ(ierr); 5807 } 5808 ierr = PetscFree(ISForEdges);CHKERRQ(ierr); 5809 n_ISForEdges = 0; 5810 } 5811 if (!pcbddc->use_faces) { 5812 for (i=0;i<n_ISForFaces;i++) { 5813 ierr = ISDestroy(&ISForFaces[i]);CHKERRQ(ierr); 5814 } 5815 ierr = PetscFree(ISForFaces);CHKERRQ(ierr); 5816 n_ISForFaces = 0; 5817 } 5818 5819 /* check if near null space is attached to global mat */ 5820 ierr = MatGetNearNullSpace(pc->pmat,&nearnullsp);CHKERRQ(ierr); 5821 if (nearnullsp) { 5822 ierr = MatNullSpaceGetVecs(nearnullsp,&nnsp_has_cnst,&nnsp_size,&nearnullvecs);CHKERRQ(ierr); 5823 /* remove any stored info */ 5824 ierr = MatNullSpaceDestroy(&pcbddc->onearnullspace);CHKERRQ(ierr); 5825 ierr = PetscFree(pcbddc->onearnullvecs_state);CHKERRQ(ierr); 5826 /* store information for BDDC solver reuse */ 5827 ierr = PetscObjectReference((PetscObject)nearnullsp);CHKERRQ(ierr); 5828 pcbddc->onearnullspace = nearnullsp; 5829 ierr = PetscMalloc1(nnsp_size,&pcbddc->onearnullvecs_state);CHKERRQ(ierr); 5830 for (i=0;i<nnsp_size;i++) { 5831 ierr = PetscObjectStateGet((PetscObject)nearnullvecs[i],&pcbddc->onearnullvecs_state[i]);CHKERRQ(ierr); 5832 } 5833 } else { /* if near null space is not provided BDDC uses constants by default */ 5834 nnsp_size = 0; 5835 nnsp_has_cnst = PETSC_TRUE; 5836 } 5837 /* get max number of constraints on a single cc */ 5838 max_constraints = nnsp_size; 5839 if (nnsp_has_cnst) max_constraints++; 5840 5841 /* 5842 Evaluate maximum storage size needed by the procedure 5843 - Indices for connected component i stored at "constraints_idxs + constraints_idxs_ptr[i]" 5844 - Values for constraints on connected component i stored at "constraints_data + constraints_data_ptr[i]" 5845 There can be multiple constraints per connected component 5846 */ 5847 n_vertices = 0; 5848 if (ISForVertices) { 5849 ierr = ISGetSize(ISForVertices,&n_vertices);CHKERRQ(ierr); 5850 } 5851 ncc = n_vertices+n_ISForFaces+n_ISForEdges; 5852 ierr = PetscMalloc3(ncc+1,&constraints_idxs_ptr,ncc+1,&constraints_data_ptr,ncc,&constraints_n);CHKERRQ(ierr); 5853 5854 total_counts = n_ISForFaces+n_ISForEdges; 5855 total_counts *= max_constraints; 5856 total_counts += n_vertices; 5857 ierr = PetscBTCreate(total_counts,&change_basis);CHKERRQ(ierr); 5858 5859 total_counts = 0; 5860 max_size_of_constraint = 0; 5861 for (i=0;i<n_ISForEdges+n_ISForFaces;i++) { 5862 IS used_is; 5863 if (i<n_ISForEdges) { 5864 used_is = ISForEdges[i]; 5865 } else { 5866 used_is = ISForFaces[i-n_ISForEdges]; 5867 } 5868 ierr = ISGetSize(used_is,&j);CHKERRQ(ierr); 5869 total_counts += j; 5870 max_size_of_constraint = PetscMax(j,max_size_of_constraint); 5871 } 5872 ierr = PetscMalloc3(total_counts*max_constraints+n_vertices,&constraints_data,total_counts+n_vertices,&constraints_idxs,total_counts+n_vertices,&constraints_idxs_B);CHKERRQ(ierr); 5873 5874 /* get local part of global near null space vectors */ 5875 ierr = PetscMalloc1(nnsp_size,&localnearnullsp);CHKERRQ(ierr); 5876 for (k=0;k<nnsp_size;k++) { 5877 ierr = VecDuplicate(pcis->vec1_N,&localnearnullsp[k]);CHKERRQ(ierr); 5878 ierr = VecScatterBegin(matis->rctx,nearnullvecs[k],localnearnullsp[k],INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 5879 ierr = VecScatterEnd(matis->rctx,nearnullvecs[k],localnearnullsp[k],INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 5880 } 5881 5882 /* whether or not to skip lapack calls */ 5883 skip_lapack = PETSC_TRUE; 5884 if (n_ISForFaces+n_ISForEdges && max_constraints > 1 && !pcbddc->use_nnsp_true) skip_lapack = PETSC_FALSE; 5885 5886 /* First we issue queries to allocate optimal workspace for LAPACKgesvd (or LAPACKsyev if SVD is missing) */ 5887 if (!skip_lapack) { 5888 PetscScalar temp_work; 5889 5890 #if defined(PETSC_MISSING_LAPACK_GESVD) 5891 /* Proper Orthogonal Decomposition (POD) using the snapshot method */ 5892 ierr = PetscMalloc1(max_constraints*max_constraints,&correlation_mat);CHKERRQ(ierr); 5893 ierr = PetscMalloc1(max_constraints,&singular_vals);CHKERRQ(ierr); 5894 ierr = PetscMalloc1(max_size_of_constraint*max_constraints,&temp_basis);CHKERRQ(ierr); 5895 #if defined(PETSC_USE_COMPLEX) 5896 ierr = PetscMalloc1(3*max_constraints,&rwork);CHKERRQ(ierr); 5897 #endif 5898 /* now we evaluate the optimal workspace using query with lwork=-1 */ 5899 ierr = PetscBLASIntCast(max_constraints,&Blas_N);CHKERRQ(ierr); 5900 ierr = PetscBLASIntCast(max_constraints,&Blas_LDA);CHKERRQ(ierr); 5901 lwork = -1; 5902 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 5903 #if !defined(PETSC_USE_COMPLEX) 5904 PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,&temp_work,&lwork,&lierr)); 5905 #else 5906 PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,&temp_work,&lwork,rwork,&lierr)); 5907 #endif 5908 ierr = PetscFPTrapPop();CHKERRQ(ierr); 5909 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to SYEV Lapack routine %d",(int)lierr); 5910 #else /* on missing GESVD */ 5911 /* SVD */ 5912 PetscInt max_n,min_n; 5913 max_n = max_size_of_constraint; 5914 min_n = max_constraints; 5915 if (max_size_of_constraint < max_constraints) { 5916 min_n = max_size_of_constraint; 5917 max_n = max_constraints; 5918 } 5919 ierr = PetscMalloc1(min_n,&singular_vals);CHKERRQ(ierr); 5920 #if defined(PETSC_USE_COMPLEX) 5921 ierr = PetscMalloc1(5*min_n,&rwork);CHKERRQ(ierr); 5922 #endif 5923 /* now we evaluate the optimal workspace using query with lwork=-1 */ 5924 lwork = -1; 5925 ierr = PetscBLASIntCast(max_n,&Blas_M);CHKERRQ(ierr); 5926 ierr = PetscBLASIntCast(min_n,&Blas_N);CHKERRQ(ierr); 5927 ierr = PetscBLASIntCast(max_n,&Blas_LDA);CHKERRQ(ierr); 5928 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 5929 #if !defined(PETSC_USE_COMPLEX) 5930 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&constraints_data[0],&Blas_LDA,singular_vals,&dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,&temp_work,&lwork,&lierr)); 5931 #else 5932 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&constraints_data[0],&Blas_LDA,singular_vals,&dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,&temp_work,&lwork,rwork,&lierr)); 5933 #endif 5934 ierr = PetscFPTrapPop();CHKERRQ(ierr); 5935 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to GESVD Lapack routine %d",(int)lierr); 5936 #endif /* on missing GESVD */ 5937 /* Allocate optimal workspace */ 5938 ierr = PetscBLASIntCast((PetscInt)PetscRealPart(temp_work),&lwork);CHKERRQ(ierr); 5939 ierr = PetscMalloc1(lwork,&work);CHKERRQ(ierr); 5940 } 5941 /* Now we can loop on constraining sets */ 5942 total_counts = 0; 5943 constraints_idxs_ptr[0] = 0; 5944 constraints_data_ptr[0] = 0; 5945 /* vertices */ 5946 if (n_vertices) { 5947 ierr = ISGetIndices(ISForVertices,(const PetscInt**)&is_indices);CHKERRQ(ierr); 5948 ierr = PetscMemcpy(constraints_idxs,is_indices,n_vertices*sizeof(PetscInt));CHKERRQ(ierr); 5949 for (i=0;i<n_vertices;i++) { 5950 constraints_n[total_counts] = 1; 5951 constraints_data[total_counts] = 1.0; 5952 constraints_idxs_ptr[total_counts+1] = constraints_idxs_ptr[total_counts]+1; 5953 constraints_data_ptr[total_counts+1] = constraints_data_ptr[total_counts]+1; 5954 total_counts++; 5955 } 5956 ierr = ISRestoreIndices(ISForVertices,(const PetscInt**)&is_indices);CHKERRQ(ierr); 5957 n_vertices = total_counts; 5958 } 5959 5960 /* edges and faces */ 5961 total_counts_cc = total_counts; 5962 for (ncc=0;ncc<n_ISForEdges+n_ISForFaces;ncc++) { 5963 IS used_is; 5964 PetscBool idxs_copied = PETSC_FALSE; 5965 5966 if (ncc<n_ISForEdges) { 5967 used_is = ISForEdges[ncc]; 5968 boolforchange = pcbddc->use_change_of_basis; /* change or not the basis on the edge */ 5969 } else { 5970 used_is = ISForFaces[ncc-n_ISForEdges]; 5971 boolforchange = (PetscBool)(pcbddc->use_change_of_basis && pcbddc->use_change_on_faces); /* change or not the basis on the face */ 5972 } 5973 temp_constraints = 0; /* zero the number of constraints I have on this conn comp */ 5974 5975 ierr = ISGetSize(used_is,&size_of_constraint);CHKERRQ(ierr); 5976 ierr = ISGetIndices(used_is,(const PetscInt**)&is_indices);CHKERRQ(ierr); 5977 /* change of basis should not be performed on local periodic nodes */ 5978 if (pcbddc->mat_graph->mirrors && pcbddc->mat_graph->mirrors[is_indices[0]]) boolforchange = PETSC_FALSE; 5979 if (nnsp_has_cnst) { 5980 PetscScalar quad_value; 5981 5982 ierr = PetscMemcpy(constraints_idxs + constraints_idxs_ptr[total_counts_cc],is_indices,size_of_constraint*sizeof(PetscInt));CHKERRQ(ierr); 5983 idxs_copied = PETSC_TRUE; 5984 5985 if (!pcbddc->use_nnsp_true) { 5986 quad_value = (PetscScalar)(1.0/PetscSqrtReal((PetscReal)size_of_constraint)); 5987 } else { 5988 quad_value = 1.0; 5989 } 5990 for (j=0;j<size_of_constraint;j++) { 5991 constraints_data[constraints_data_ptr[total_counts_cc]+j] = quad_value; 5992 } 5993 temp_constraints++; 5994 total_counts++; 5995 } 5996 for (k=0;k<nnsp_size;k++) { 5997 PetscReal real_value; 5998 PetscScalar *ptr_to_data; 5999 6000 ierr = VecGetArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr); 6001 ptr_to_data = &constraints_data[constraints_data_ptr[total_counts_cc]+temp_constraints*size_of_constraint]; 6002 for (j=0;j<size_of_constraint;j++) { 6003 ptr_to_data[j] = array[is_indices[j]]; 6004 } 6005 ierr = VecRestoreArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr); 6006 /* check if array is null on the connected component */ 6007 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 6008 PetscStackCallBLAS("BLASasum",real_value = BLASasum_(&Blas_N,ptr_to_data,&Blas_one)); 6009 if (real_value > tol*size_of_constraint) { /* keep indices and values */ 6010 temp_constraints++; 6011 total_counts++; 6012 if (!idxs_copied) { 6013 ierr = PetscMemcpy(constraints_idxs + constraints_idxs_ptr[total_counts_cc],is_indices,size_of_constraint*sizeof(PetscInt));CHKERRQ(ierr); 6014 idxs_copied = PETSC_TRUE; 6015 } 6016 } 6017 } 6018 ierr = ISRestoreIndices(used_is,(const PetscInt**)&is_indices);CHKERRQ(ierr); 6019 valid_constraints = temp_constraints; 6020 if (!pcbddc->use_nnsp_true && temp_constraints) { 6021 if (temp_constraints == 1) { /* just normalize the constraint */ 6022 PetscScalar norm,*ptr_to_data; 6023 6024 ptr_to_data = &constraints_data[constraints_data_ptr[total_counts_cc]]; 6025 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 6026 PetscStackCallBLAS("BLASdot",norm = BLASdot_(&Blas_N,ptr_to_data,&Blas_one,ptr_to_data,&Blas_one)); 6027 norm = 1.0/PetscSqrtReal(PetscRealPart(norm)); 6028 PetscStackCallBLAS("BLASscal",BLASscal_(&Blas_N,&norm,ptr_to_data,&Blas_one)); 6029 } else { /* perform SVD */ 6030 PetscScalar *ptr_to_data = &constraints_data[constraints_data_ptr[total_counts_cc]]; 6031 6032 #if defined(PETSC_MISSING_LAPACK_GESVD) 6033 /* SVD: Y = U*S*V^H -> U (eigenvectors of Y*Y^H) = Y*V*(S)^\dag 6034 POD: Y^H*Y = V*D*V^H, D = S^H*S -> U = Y*V*D^(-1/2) 6035 -> When PETSC_USE_COMPLEX and PETSC_MISSING_LAPACK_GESVD are defined 6036 the constraints basis will differ (by a complex factor with absolute value equal to 1) 6037 from that computed using LAPACKgesvd 6038 -> This is due to a different computation of eigenvectors in LAPACKheev 6039 -> The quality of the POD-computed basis will be the same */ 6040 ierr = PetscMemzero(correlation_mat,temp_constraints*temp_constraints*sizeof(PetscScalar));CHKERRQ(ierr); 6041 /* Store upper triangular part of correlation matrix */ 6042 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 6043 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 6044 for (j=0;j<temp_constraints;j++) { 6045 for (k=0;k<j+1;k++) { 6046 PetscStackCallBLAS("BLASdot",correlation_mat[j*temp_constraints+k] = BLASdot_(&Blas_N,ptr_to_data+k*size_of_constraint,&Blas_one,ptr_to_data+j*size_of_constraint,&Blas_one)); 6047 } 6048 } 6049 /* compute eigenvalues and eigenvectors of correlation matrix */ 6050 ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr); 6051 ierr = PetscBLASIntCast(temp_constraints,&Blas_LDA);CHKERRQ(ierr); 6052 #if !defined(PETSC_USE_COMPLEX) 6053 PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,work,&lwork,&lierr)); 6054 #else 6055 PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,work,&lwork,rwork,&lierr)); 6056 #endif 6057 ierr = PetscFPTrapPop();CHKERRQ(ierr); 6058 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYEV Lapack routine %d",(int)lierr); 6059 /* retain eigenvalues greater than tol: note that LAPACKsyev gives eigs in ascending order */ 6060 j = 0; 6061 while (j < temp_constraints && singular_vals[j] < tol) j++; 6062 total_counts = total_counts-j; 6063 valid_constraints = temp_constraints-j; 6064 /* scale and copy POD basis into used quadrature memory */ 6065 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 6066 ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr); 6067 ierr = PetscBLASIntCast(temp_constraints,&Blas_K);CHKERRQ(ierr); 6068 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 6069 ierr = PetscBLASIntCast(temp_constraints,&Blas_LDB);CHKERRQ(ierr); 6070 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDC);CHKERRQ(ierr); 6071 if (j<temp_constraints) { 6072 PetscInt ii; 6073 for (k=j;k<temp_constraints;k++) singular_vals[k] = 1.0/PetscSqrtReal(singular_vals[k]); 6074 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 6075 PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&Blas_M,&Blas_N,&Blas_K,&one,ptr_to_data,&Blas_LDA,correlation_mat,&Blas_LDB,&zero,temp_basis,&Blas_LDC)); 6076 ierr = PetscFPTrapPop();CHKERRQ(ierr); 6077 for (k=0;k<temp_constraints-j;k++) { 6078 for (ii=0;ii<size_of_constraint;ii++) { 6079 ptr_to_data[k*size_of_constraint+ii] = singular_vals[temp_constraints-1-k]*temp_basis[(temp_constraints-1-k)*size_of_constraint+ii]; 6080 } 6081 } 6082 } 6083 #else /* on missing GESVD */ 6084 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 6085 ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr); 6086 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 6087 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 6088 #if !defined(PETSC_USE_COMPLEX) 6089 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,ptr_to_data,&Blas_LDA,singular_vals,&dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,work,&lwork,&lierr)); 6090 #else 6091 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,ptr_to_data,&Blas_LDA,singular_vals,&dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,work,&lwork,rwork,&lierr)); 6092 #endif 6093 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in GESVD Lapack routine %d",(int)lierr); 6094 ierr = PetscFPTrapPop();CHKERRQ(ierr); 6095 /* retain eigenvalues greater than tol: note that LAPACKgesvd gives eigs in descending order */ 6096 k = temp_constraints; 6097 if (k > size_of_constraint) k = size_of_constraint; 6098 j = 0; 6099 while (j < k && singular_vals[k-j-1] < tol) j++; 6100 valid_constraints = k-j; 6101 total_counts = total_counts-temp_constraints+valid_constraints; 6102 #endif /* on missing GESVD */ 6103 } 6104 } 6105 /* update pointers information */ 6106 if (valid_constraints) { 6107 constraints_n[total_counts_cc] = valid_constraints; 6108 constraints_idxs_ptr[total_counts_cc+1] = constraints_idxs_ptr[total_counts_cc]+size_of_constraint; 6109 constraints_data_ptr[total_counts_cc+1] = constraints_data_ptr[total_counts_cc]+size_of_constraint*valid_constraints; 6110 /* set change_of_basis flag */ 6111 if (boolforchange) { 6112 PetscBTSet(change_basis,total_counts_cc); 6113 } 6114 total_counts_cc++; 6115 } 6116 } 6117 /* free workspace */ 6118 if (!skip_lapack) { 6119 ierr = PetscFree(work);CHKERRQ(ierr); 6120 #if defined(PETSC_USE_COMPLEX) 6121 ierr = PetscFree(rwork);CHKERRQ(ierr); 6122 #endif 6123 ierr = PetscFree(singular_vals);CHKERRQ(ierr); 6124 #if defined(PETSC_MISSING_LAPACK_GESVD) 6125 ierr = PetscFree(correlation_mat);CHKERRQ(ierr); 6126 ierr = PetscFree(temp_basis);CHKERRQ(ierr); 6127 #endif 6128 } 6129 for (k=0;k<nnsp_size;k++) { 6130 ierr = VecDestroy(&localnearnullsp[k]);CHKERRQ(ierr); 6131 } 6132 ierr = PetscFree(localnearnullsp);CHKERRQ(ierr); 6133 /* free index sets of faces, edges and vertices */ 6134 for (i=0;i<n_ISForFaces;i++) { 6135 ierr = ISDestroy(&ISForFaces[i]);CHKERRQ(ierr); 6136 } 6137 if (n_ISForFaces) { 6138 ierr = PetscFree(ISForFaces);CHKERRQ(ierr); 6139 } 6140 for (i=0;i<n_ISForEdges;i++) { 6141 ierr = ISDestroy(&ISForEdges[i]);CHKERRQ(ierr); 6142 } 6143 if (n_ISForEdges) { 6144 ierr = PetscFree(ISForEdges);CHKERRQ(ierr); 6145 } 6146 ierr = ISDestroy(&ISForVertices);CHKERRQ(ierr); 6147 } else { 6148 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 6149 6150 total_counts = 0; 6151 n_vertices = 0; 6152 if (sub_schurs->is_vertices && pcbddc->use_vertices) { 6153 ierr = ISGetLocalSize(sub_schurs->is_vertices,&n_vertices);CHKERRQ(ierr); 6154 } 6155 max_constraints = 0; 6156 total_counts_cc = 0; 6157 for (i=0;i<sub_schurs->n_subs+n_vertices;i++) { 6158 total_counts += pcbddc->adaptive_constraints_n[i]; 6159 if (pcbddc->adaptive_constraints_n[i]) total_counts_cc++; 6160 max_constraints = PetscMax(max_constraints,pcbddc->adaptive_constraints_n[i]); 6161 } 6162 constraints_idxs_ptr = pcbddc->adaptive_constraints_idxs_ptr; 6163 constraints_data_ptr = pcbddc->adaptive_constraints_data_ptr; 6164 constraints_idxs = pcbddc->adaptive_constraints_idxs; 6165 constraints_data = pcbddc->adaptive_constraints_data; 6166 /* constraints_n differs from pcbddc->adaptive_constraints_n */ 6167 ierr = PetscMalloc1(total_counts_cc,&constraints_n);CHKERRQ(ierr); 6168 total_counts_cc = 0; 6169 for (i=0;i<sub_schurs->n_subs+n_vertices;i++) { 6170 if (pcbddc->adaptive_constraints_n[i]) { 6171 constraints_n[total_counts_cc++] = pcbddc->adaptive_constraints_n[i]; 6172 } 6173 } 6174 #if 0 6175 printf("Found %d totals (%d)\n",total_counts_cc,total_counts); 6176 for (i=0;i<total_counts_cc;i++) { 6177 printf("const %d, start %d",i,constraints_idxs_ptr[i]); 6178 printf(" end %d:\n",constraints_idxs_ptr[i+1]); 6179 for (j=constraints_idxs_ptr[i];j<constraints_idxs_ptr[i+1];j++) { 6180 printf(" %d",constraints_idxs[j]); 6181 } 6182 printf("\n"); 6183 printf("number of cc: %d\n",constraints_n[i]); 6184 } 6185 for (i=0;i<n_vertices;i++) { 6186 PetscPrintf(PETSC_COMM_SELF,"[%d] vertex %d, n %d\n",PetscGlobalRank,i,pcbddc->adaptive_constraints_n[i]); 6187 } 6188 for (i=0;i<sub_schurs->n_subs;i++) { 6189 PetscPrintf(PETSC_COMM_SELF,"[%d] sub %d, edge %d, n %d\n",PetscGlobalRank,i,(PetscBool)PetscBTLookup(sub_schurs->is_edge,i),pcbddc->adaptive_constraints_n[i+n_vertices]); 6190 } 6191 #endif 6192 6193 max_size_of_constraint = 0; 6194 for (i=0;i<total_counts_cc;i++) max_size_of_constraint = PetscMax(max_size_of_constraint,constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i]); 6195 ierr = PetscMalloc1(constraints_idxs_ptr[total_counts_cc],&constraints_idxs_B);CHKERRQ(ierr); 6196 /* Change of basis */ 6197 ierr = PetscBTCreate(total_counts_cc,&change_basis);CHKERRQ(ierr); 6198 if (pcbddc->use_change_of_basis) { 6199 for (i=0;i<sub_schurs->n_subs;i++) { 6200 if (PetscBTLookup(sub_schurs->is_edge,i) || pcbddc->use_change_on_faces) { 6201 ierr = PetscBTSet(change_basis,i+n_vertices);CHKERRQ(ierr); 6202 } 6203 } 6204 } 6205 } 6206 pcbddc->local_primal_size = total_counts; 6207 ierr = PetscMalloc1(pcbddc->local_primal_size+pcbddc->benign_n,&pcbddc->primal_indices_local_idxs);CHKERRQ(ierr); 6208 6209 /* map constraints_idxs in boundary numbering */ 6210 ierr = ISGlobalToLocalMappingApply(pcis->BtoNmap,IS_GTOLM_DROP,constraints_idxs_ptr[total_counts_cc],constraints_idxs,&i,constraints_idxs_B);CHKERRQ(ierr); 6211 if (i != constraints_idxs_ptr[total_counts_cc]) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error in boundary numbering for constraints indices %D != %D\n",constraints_idxs_ptr[total_counts_cc],i); 6212 6213 /* Create constraint matrix */ 6214 ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->ConstraintMatrix);CHKERRQ(ierr); 6215 ierr = MatSetType(pcbddc->ConstraintMatrix,MATAIJ);CHKERRQ(ierr); 6216 ierr = MatSetSizes(pcbddc->ConstraintMatrix,pcbddc->local_primal_size,pcis->n,pcbddc->local_primal_size,pcis->n);CHKERRQ(ierr); 6217 6218 /* find primal_dofs: subdomain corners plus dofs selected as primal after change of basis */ 6219 /* determine if a QR strategy is needed for change of basis */ 6220 qr_needed = PETSC_FALSE; 6221 ierr = PetscBTCreate(total_counts_cc,&qr_needed_idx);CHKERRQ(ierr); 6222 total_primal_vertices=0; 6223 pcbddc->local_primal_size_cc = 0; 6224 for (i=0;i<total_counts_cc;i++) { 6225 size_of_constraint = constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i]; 6226 if (size_of_constraint == 1 && pcbddc->mat_graph->custom_minimal_size) { 6227 pcbddc->primal_indices_local_idxs[total_primal_vertices++] = constraints_idxs[constraints_idxs_ptr[i]]; 6228 pcbddc->local_primal_size_cc += 1; 6229 } else if (PetscBTLookup(change_basis,i)) { 6230 for (k=0;k<constraints_n[i];k++) { 6231 pcbddc->primal_indices_local_idxs[total_primal_vertices++] = constraints_idxs[constraints_idxs_ptr[i]+k]; 6232 } 6233 pcbddc->local_primal_size_cc += constraints_n[i]; 6234 if (constraints_n[i] > 1 || pcbddc->use_qr_single) { 6235 PetscBTSet(qr_needed_idx,i); 6236 qr_needed = PETSC_TRUE; 6237 } 6238 } else { 6239 pcbddc->local_primal_size_cc += 1; 6240 } 6241 } 6242 /* note that the local variable n_vertices used below stores the number of pointwise constraints */ 6243 pcbddc->n_vertices = total_primal_vertices; 6244 /* permute indices in order to have a sorted set of vertices */ 6245 ierr = PetscSortInt(total_primal_vertices,pcbddc->primal_indices_local_idxs);CHKERRQ(ierr); 6246 ierr = PetscMalloc2(pcbddc->local_primal_size_cc+pcbddc->benign_n,&pcbddc->local_primal_ref_node,pcbddc->local_primal_size_cc+pcbddc->benign_n,&pcbddc->local_primal_ref_mult);CHKERRQ(ierr); 6247 ierr = PetscMemcpy(pcbddc->local_primal_ref_node,pcbddc->primal_indices_local_idxs,total_primal_vertices*sizeof(PetscInt));CHKERRQ(ierr); 6248 for (i=0;i<total_primal_vertices;i++) pcbddc->local_primal_ref_mult[i] = 1; 6249 6250 /* nonzero structure of constraint matrix */ 6251 /* and get reference dof for local constraints */ 6252 ierr = PetscMalloc1(pcbddc->local_primal_size,&nnz);CHKERRQ(ierr); 6253 for (i=0;i<total_primal_vertices;i++) nnz[i] = 1; 6254 6255 j = total_primal_vertices; 6256 total_counts = total_primal_vertices; 6257 cum = total_primal_vertices; 6258 for (i=n_vertices;i<total_counts_cc;i++) { 6259 if (!PetscBTLookup(change_basis,i)) { 6260 pcbddc->local_primal_ref_node[cum] = constraints_idxs[constraints_idxs_ptr[i]]; 6261 pcbddc->local_primal_ref_mult[cum] = constraints_n[i]; 6262 cum++; 6263 size_of_constraint = constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i]; 6264 for (k=0;k<constraints_n[i];k++) { 6265 pcbddc->primal_indices_local_idxs[total_counts++] = constraints_idxs[constraints_idxs_ptr[i]+k]; 6266 nnz[j+k] = size_of_constraint; 6267 } 6268 j += constraints_n[i]; 6269 } 6270 } 6271 ierr = MatSeqAIJSetPreallocation(pcbddc->ConstraintMatrix,0,nnz);CHKERRQ(ierr); 6272 ierr = PetscFree(nnz);CHKERRQ(ierr); 6273 6274 /* set values in constraint matrix */ 6275 for (i=0;i<total_primal_vertices;i++) { 6276 ierr = MatSetValue(pcbddc->ConstraintMatrix,i,pcbddc->local_primal_ref_node[i],1.0,INSERT_VALUES);CHKERRQ(ierr); 6277 } 6278 total_counts = total_primal_vertices; 6279 for (i=n_vertices;i<total_counts_cc;i++) { 6280 if (!PetscBTLookup(change_basis,i)) { 6281 PetscInt *cols; 6282 6283 size_of_constraint = constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i]; 6284 cols = constraints_idxs+constraints_idxs_ptr[i]; 6285 for (k=0;k<constraints_n[i];k++) { 6286 PetscInt row = total_counts+k; 6287 PetscScalar *vals; 6288 6289 vals = constraints_data+constraints_data_ptr[i]+k*size_of_constraint; 6290 ierr = MatSetValues(pcbddc->ConstraintMatrix,1,&row,size_of_constraint,cols,vals,INSERT_VALUES);CHKERRQ(ierr); 6291 } 6292 total_counts += constraints_n[i]; 6293 } 6294 } 6295 /* assembling */ 6296 ierr = MatAssemblyBegin(pcbddc->ConstraintMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 6297 ierr = MatAssemblyEnd(pcbddc->ConstraintMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 6298 ierr = MatChop(pcbddc->ConstraintMatrix,PETSC_SMALL);CHKERRQ(ierr); 6299 ierr = MatSeqAIJCompress(pcbddc->ConstraintMatrix,&pcbddc->ConstraintMatrix);CHKERRQ(ierr); 6300 ierr = MatViewFromOptions(pcbddc->ConstraintMatrix,NULL,"-pc_bddc_constraint_mat_view");CHKERRQ(ierr); 6301 6302 /* 6303 ierr = PetscViewerPushFormat(PETSC_VIEWER_STDOUT_SELF,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr); 6304 ierr = MatView(pcbddc->ConstraintMatrix,(PetscViewer)0);CHKERRQ(ierr); 6305 ierr = PetscViewerPopFormat(PETSC_VIEWER_STDOUT_SELF);CHKERRQ(ierr); 6306 */ 6307 /* Create matrix for change of basis. We don't need it in case pcbddc->use_change_of_basis is FALSE */ 6308 if (pcbddc->use_change_of_basis) { 6309 /* dual and primal dofs on a single cc */ 6310 PetscInt dual_dofs,primal_dofs; 6311 /* working stuff for GEQRF */ 6312 PetscScalar *qr_basis,*qr_tau = NULL,*qr_work,lqr_work_t; 6313 PetscBLASInt lqr_work; 6314 /* working stuff for UNGQR */ 6315 PetscScalar *gqr_work,lgqr_work_t; 6316 PetscBLASInt lgqr_work; 6317 /* working stuff for TRTRS */ 6318 PetscScalar *trs_rhs; 6319 PetscBLASInt Blas_NRHS; 6320 /* pointers for values insertion into change of basis matrix */ 6321 PetscInt *start_rows,*start_cols; 6322 PetscScalar *start_vals; 6323 /* working stuff for values insertion */ 6324 PetscBT is_primal; 6325 PetscInt *aux_primal_numbering_B; 6326 /* matrix sizes */ 6327 PetscInt global_size,local_size; 6328 /* temporary change of basis */ 6329 Mat localChangeOfBasisMatrix; 6330 /* extra space for debugging */ 6331 PetscScalar *dbg_work; 6332 6333 /* local temporary change of basis acts on local interfaces -> dimension is n_B x n_B */ 6334 ierr = MatCreate(PETSC_COMM_SELF,&localChangeOfBasisMatrix);CHKERRQ(ierr); 6335 ierr = MatSetType(localChangeOfBasisMatrix,MATAIJ);CHKERRQ(ierr); 6336 ierr = MatSetSizes(localChangeOfBasisMatrix,pcis->n,pcis->n,pcis->n,pcis->n);CHKERRQ(ierr); 6337 /* nonzeros for local mat */ 6338 ierr = PetscMalloc1(pcis->n,&nnz);CHKERRQ(ierr); 6339 if (!pcbddc->benign_change || pcbddc->fake_change) { 6340 for (i=0;i<pcis->n;i++) nnz[i]=1; 6341 } else { 6342 const PetscInt *ii; 6343 PetscInt n; 6344 PetscBool flg_row; 6345 ierr = MatGetRowIJ(pcbddc->benign_change,0,PETSC_FALSE,PETSC_FALSE,&n,&ii,NULL,&flg_row);CHKERRQ(ierr); 6346 for (i=0;i<n;i++) nnz[i] = ii[i+1]-ii[i]; 6347 ierr = MatRestoreRowIJ(pcbddc->benign_change,0,PETSC_FALSE,PETSC_FALSE,&n,&ii,NULL,&flg_row);CHKERRQ(ierr); 6348 } 6349 for (i=n_vertices;i<total_counts_cc;i++) { 6350 if (PetscBTLookup(change_basis,i)) { 6351 size_of_constraint = constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i]; 6352 if (PetscBTLookup(qr_needed_idx,i)) { 6353 for (j=0;j<size_of_constraint;j++) nnz[constraints_idxs[constraints_idxs_ptr[i]+j]] = size_of_constraint; 6354 } else { 6355 nnz[constraints_idxs[constraints_idxs_ptr[i]]] = size_of_constraint; 6356 for (j=1;j<size_of_constraint;j++) nnz[constraints_idxs[constraints_idxs_ptr[i]+j]] = 2; 6357 } 6358 } 6359 } 6360 ierr = MatSeqAIJSetPreallocation(localChangeOfBasisMatrix,0,nnz);CHKERRQ(ierr); 6361 ierr = PetscFree(nnz);CHKERRQ(ierr); 6362 /* Set interior change in the matrix */ 6363 if (!pcbddc->benign_change || pcbddc->fake_change) { 6364 for (i=0;i<pcis->n;i++) { 6365 ierr = MatSetValue(localChangeOfBasisMatrix,i,i,1.0,INSERT_VALUES);CHKERRQ(ierr); 6366 } 6367 } else { 6368 const PetscInt *ii,*jj; 6369 PetscScalar *aa; 6370 PetscInt n; 6371 PetscBool flg_row; 6372 ierr = MatGetRowIJ(pcbddc->benign_change,0,PETSC_FALSE,PETSC_FALSE,&n,&ii,&jj,&flg_row);CHKERRQ(ierr); 6373 ierr = MatSeqAIJGetArray(pcbddc->benign_change,&aa);CHKERRQ(ierr); 6374 for (i=0;i<n;i++) { 6375 ierr = MatSetValues(localChangeOfBasisMatrix,1,&i,ii[i+1]-ii[i],jj+ii[i],aa+ii[i],INSERT_VALUES);CHKERRQ(ierr); 6376 } 6377 ierr = MatSeqAIJRestoreArray(pcbddc->benign_change,&aa);CHKERRQ(ierr); 6378 ierr = MatRestoreRowIJ(pcbddc->benign_change,0,PETSC_FALSE,PETSC_FALSE,&n,&ii,&jj,&flg_row);CHKERRQ(ierr); 6379 } 6380 6381 if (pcbddc->dbg_flag) { 6382 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"--------------------------------------------------------------\n");CHKERRQ(ierr); 6383 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Checking change of basis computation for subdomain %04d\n",PetscGlobalRank);CHKERRQ(ierr); 6384 } 6385 6386 6387 /* Now we loop on the constraints which need a change of basis */ 6388 /* 6389 Change of basis matrix is evaluated similarly to the FIRST APPROACH in 6390 Klawonn and Widlund, Dual-primal FETI-DP methods for linear elasticity, (see Sect 6.2.1) 6391 6392 Basic blocks of change of basis matrix T computed by 6393 6394 - Using the following block transformation if there is only a primal dof on the cc (and -pc_bddc_use_qr_single is not specified) 6395 6396 | 1 0 ... 0 s_1/S | 6397 | 0 1 ... 0 s_2/S | 6398 | ... | 6399 | 0 ... 1 s_{n-1}/S | 6400 | -s_1/s_n ... -s_{n-1}/s_n s_n/S | 6401 6402 with S = \sum_{i=1}^n s_i^2 6403 NOTE: in the above example, the primal dof is the last one of the edge in LOCAL ordering 6404 in the current implementation, the primal dof is the first one of the edge in GLOBAL ordering 6405 6406 - QR decomposition of constraints otherwise 6407 */ 6408 if (qr_needed) { 6409 /* space to store Q */ 6410 ierr = PetscMalloc1(max_size_of_constraint*max_size_of_constraint,&qr_basis);CHKERRQ(ierr); 6411 /* array to store scaling factors for reflectors */ 6412 ierr = PetscMalloc1(max_constraints,&qr_tau);CHKERRQ(ierr); 6413 /* first we issue queries for optimal work */ 6414 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_M);CHKERRQ(ierr); 6415 ierr = PetscBLASIntCast(max_constraints,&Blas_N);CHKERRQ(ierr); 6416 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 6417 lqr_work = -1; 6418 PetscStackCallBLAS("LAPACKgeqrf",LAPACKgeqrf_(&Blas_M,&Blas_N,qr_basis,&Blas_LDA,qr_tau,&lqr_work_t,&lqr_work,&lierr)); 6419 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to GEQRF Lapack routine %d",(int)lierr); 6420 ierr = PetscBLASIntCast((PetscInt)PetscRealPart(lqr_work_t),&lqr_work);CHKERRQ(ierr); 6421 ierr = PetscMalloc1((PetscInt)PetscRealPart(lqr_work_t),&qr_work);CHKERRQ(ierr); 6422 lgqr_work = -1; 6423 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_M);CHKERRQ(ierr); 6424 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_N);CHKERRQ(ierr); 6425 ierr = PetscBLASIntCast(max_constraints,&Blas_K);CHKERRQ(ierr); 6426 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 6427 if (Blas_K>Blas_M) Blas_K=Blas_M; /* adjust just for computing optimal work */ 6428 PetscStackCallBLAS("LAPACKorgqr",LAPACKorgqr_(&Blas_M,&Blas_N,&Blas_K,qr_basis,&Blas_LDA,qr_tau,&lgqr_work_t,&lgqr_work,&lierr)); 6429 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to ORGQR/UNGQR Lapack routine %d",(int)lierr); 6430 ierr = PetscBLASIntCast((PetscInt)PetscRealPart(lgqr_work_t),&lgqr_work);CHKERRQ(ierr); 6431 ierr = PetscMalloc1((PetscInt)PetscRealPart(lgqr_work_t),&gqr_work);CHKERRQ(ierr); 6432 /* array to store rhs and solution of triangular solver */ 6433 ierr = PetscMalloc1(max_constraints*max_constraints,&trs_rhs);CHKERRQ(ierr); 6434 /* allocating workspace for check */ 6435 if (pcbddc->dbg_flag) { 6436 ierr = PetscMalloc1(max_size_of_constraint*(max_constraints+max_size_of_constraint),&dbg_work);CHKERRQ(ierr); 6437 } 6438 } 6439 /* array to store whether a node is primal or not */ 6440 ierr = PetscBTCreate(pcis->n_B,&is_primal);CHKERRQ(ierr); 6441 ierr = PetscMalloc1(total_primal_vertices,&aux_primal_numbering_B);CHKERRQ(ierr); 6442 ierr = ISGlobalToLocalMappingApply(pcis->BtoNmap,IS_GTOLM_DROP,total_primal_vertices,pcbddc->local_primal_ref_node,&i,aux_primal_numbering_B);CHKERRQ(ierr); 6443 if (i != total_primal_vertices) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error in boundary numbering for BDDC vertices! %D != %D\n",total_primal_vertices,i); 6444 for (i=0;i<total_primal_vertices;i++) { 6445 ierr = PetscBTSet(is_primal,aux_primal_numbering_B[i]);CHKERRQ(ierr); 6446 } 6447 ierr = PetscFree(aux_primal_numbering_B);CHKERRQ(ierr); 6448 6449 /* loop on constraints and see whether or not they need a change of basis and compute it */ 6450 for (total_counts=n_vertices;total_counts<total_counts_cc;total_counts++) { 6451 size_of_constraint = constraints_idxs_ptr[total_counts+1]-constraints_idxs_ptr[total_counts]; 6452 if (PetscBTLookup(change_basis,total_counts)) { 6453 /* get constraint info */ 6454 primal_dofs = constraints_n[total_counts]; 6455 dual_dofs = size_of_constraint-primal_dofs; 6456 6457 if (pcbddc->dbg_flag) { 6458 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Constraints %d: %d need a change of basis (size %d)\n",total_counts,primal_dofs,size_of_constraint);CHKERRQ(ierr); 6459 } 6460 6461 if (PetscBTLookup(qr_needed_idx,total_counts)) { /* QR */ 6462 6463 /* copy quadrature constraints for change of basis check */ 6464 if (pcbddc->dbg_flag) { 6465 ierr = PetscMemcpy(dbg_work,&constraints_data[constraints_data_ptr[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr); 6466 } 6467 /* copy temporary constraints into larger work vector (in order to store all columns of Q) */ 6468 ierr = PetscMemcpy(qr_basis,&constraints_data[constraints_data_ptr[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr); 6469 6470 /* compute QR decomposition of constraints */ 6471 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 6472 ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr); 6473 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 6474 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 6475 PetscStackCallBLAS("LAPACKgeqrf",LAPACKgeqrf_(&Blas_M,&Blas_N,qr_basis,&Blas_LDA,qr_tau,qr_work,&lqr_work,&lierr)); 6476 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in GEQRF Lapack routine %d",(int)lierr); 6477 ierr = PetscFPTrapPop();CHKERRQ(ierr); 6478 6479 /* explictly compute R^-T */ 6480 ierr = PetscMemzero(trs_rhs,primal_dofs*primal_dofs*sizeof(*trs_rhs));CHKERRQ(ierr); 6481 for (j=0;j<primal_dofs;j++) trs_rhs[j*(primal_dofs+1)] = 1.0; 6482 ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr); 6483 ierr = PetscBLASIntCast(primal_dofs,&Blas_NRHS);CHKERRQ(ierr); 6484 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 6485 ierr = PetscBLASIntCast(primal_dofs,&Blas_LDB);CHKERRQ(ierr); 6486 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 6487 PetscStackCallBLAS("LAPACKtrtrs",LAPACKtrtrs_("U","T","N",&Blas_N,&Blas_NRHS,qr_basis,&Blas_LDA,trs_rhs,&Blas_LDB,&lierr)); 6488 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in TRTRS Lapack routine %d",(int)lierr); 6489 ierr = PetscFPTrapPop();CHKERRQ(ierr); 6490 6491 /* explicitly compute all columns of Q (Q = [Q1 | Q2] ) overwriting QR factorization in qr_basis */ 6492 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 6493 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 6494 ierr = PetscBLASIntCast(primal_dofs,&Blas_K);CHKERRQ(ierr); 6495 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 6496 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 6497 PetscStackCallBLAS("LAPACKorgqr",LAPACKorgqr_(&Blas_M,&Blas_N,&Blas_K,qr_basis,&Blas_LDA,qr_tau,gqr_work,&lgqr_work,&lierr)); 6498 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in ORGQR/UNGQR Lapack routine %d",(int)lierr); 6499 ierr = PetscFPTrapPop();CHKERRQ(ierr); 6500 6501 /* first primal_dofs columns of Q need to be re-scaled in order to be unitary w.r.t constraints 6502 i.e. C_{pxn}*Q_{nxn} should be equal to [I_pxp | 0_pxd] (see check below) 6503 where n=size_of_constraint, p=primal_dofs, d=dual_dofs (n=p+d), I and 0 identity and null matrix resp. */ 6504 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 6505 ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr); 6506 ierr = PetscBLASIntCast(primal_dofs,&Blas_K);CHKERRQ(ierr); 6507 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 6508 ierr = PetscBLASIntCast(primal_dofs,&Blas_LDB);CHKERRQ(ierr); 6509 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDC);CHKERRQ(ierr); 6510 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 6511 PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&Blas_M,&Blas_N,&Blas_K,&one,qr_basis,&Blas_LDA,trs_rhs,&Blas_LDB,&zero,constraints_data+constraints_data_ptr[total_counts],&Blas_LDC)); 6512 ierr = PetscFPTrapPop();CHKERRQ(ierr); 6513 ierr = PetscMemcpy(qr_basis,&constraints_data[constraints_data_ptr[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr); 6514 6515 /* insert values in change of basis matrix respecting global ordering of new primal dofs */ 6516 start_rows = &constraints_idxs[constraints_idxs_ptr[total_counts]]; 6517 /* insert cols for primal dofs */ 6518 for (j=0;j<primal_dofs;j++) { 6519 start_vals = &qr_basis[j*size_of_constraint]; 6520 start_cols = &constraints_idxs[constraints_idxs_ptr[total_counts]+j]; 6521 ierr = MatSetValues(localChangeOfBasisMatrix,size_of_constraint,start_rows,1,start_cols,start_vals,INSERT_VALUES);CHKERRQ(ierr); 6522 } 6523 /* insert cols for dual dofs */ 6524 for (j=0,k=0;j<dual_dofs;k++) { 6525 if (!PetscBTLookup(is_primal,constraints_idxs_B[constraints_idxs_ptr[total_counts]+k])) { 6526 start_vals = &qr_basis[(primal_dofs+j)*size_of_constraint]; 6527 start_cols = &constraints_idxs[constraints_idxs_ptr[total_counts]+k]; 6528 ierr = MatSetValues(localChangeOfBasisMatrix,size_of_constraint,start_rows,1,start_cols,start_vals,INSERT_VALUES);CHKERRQ(ierr); 6529 j++; 6530 } 6531 } 6532 6533 /* check change of basis */ 6534 if (pcbddc->dbg_flag) { 6535 PetscInt ii,jj; 6536 PetscBool valid_qr=PETSC_TRUE; 6537 ierr = PetscBLASIntCast(primal_dofs,&Blas_M);CHKERRQ(ierr); 6538 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 6539 ierr = PetscBLASIntCast(size_of_constraint,&Blas_K);CHKERRQ(ierr); 6540 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 6541 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDB);CHKERRQ(ierr); 6542 ierr = PetscBLASIntCast(primal_dofs,&Blas_LDC);CHKERRQ(ierr); 6543 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 6544 PetscStackCallBLAS("BLASgemm",BLASgemm_("T","N",&Blas_M,&Blas_N,&Blas_K,&one,dbg_work,&Blas_LDA,qr_basis,&Blas_LDB,&zero,&dbg_work[size_of_constraint*primal_dofs],&Blas_LDC)); 6545 ierr = PetscFPTrapPop();CHKERRQ(ierr); 6546 for (jj=0;jj<size_of_constraint;jj++) { 6547 for (ii=0;ii<primal_dofs;ii++) { 6548 if (ii != jj && PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]) > 1.e-12) valid_qr = PETSC_FALSE; 6549 if (ii == jj && PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]-(PetscReal)1) > 1.e-12) valid_qr = PETSC_FALSE; 6550 } 6551 } 6552 if (!valid_qr) { 6553 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> wrong change of basis!\n");CHKERRQ(ierr); 6554 for (jj=0;jj<size_of_constraint;jj++) { 6555 for (ii=0;ii<primal_dofs;ii++) { 6556 if (ii != jj && PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]) > 1.e-12) { 6557 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\tQr basis function %d is not orthogonal to constraint %d (%1.14e)!\n",jj,ii,PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii])); 6558 } 6559 if (ii == jj && PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]-(PetscReal)1) > 1.e-12) { 6560 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\tQr basis function %d is not unitary w.r.t constraint %d (%1.14e)!\n",jj,ii,PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii])); 6561 } 6562 } 6563 } 6564 } else { 6565 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> right change of basis!\n");CHKERRQ(ierr); 6566 } 6567 } 6568 } else { /* simple transformation block */ 6569 PetscInt row,col; 6570 PetscScalar val,norm; 6571 6572 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 6573 PetscStackCallBLAS("BLASdot",norm = BLASdot_(&Blas_N,constraints_data+constraints_data_ptr[total_counts],&Blas_one,constraints_data+constraints_data_ptr[total_counts],&Blas_one)); 6574 for (j=0;j<size_of_constraint;j++) { 6575 PetscInt row_B = constraints_idxs_B[constraints_idxs_ptr[total_counts]+j]; 6576 row = constraints_idxs[constraints_idxs_ptr[total_counts]+j]; 6577 if (!PetscBTLookup(is_primal,row_B)) { 6578 col = constraints_idxs[constraints_idxs_ptr[total_counts]]; 6579 ierr = MatSetValue(localChangeOfBasisMatrix,row,row,1.0,INSERT_VALUES);CHKERRQ(ierr); 6580 ierr = MatSetValue(localChangeOfBasisMatrix,row,col,constraints_data[constraints_data_ptr[total_counts]+j]/norm,INSERT_VALUES);CHKERRQ(ierr); 6581 } else { 6582 for (k=0;k<size_of_constraint;k++) { 6583 col = constraints_idxs[constraints_idxs_ptr[total_counts]+k]; 6584 if (row != col) { 6585 val = -constraints_data[constraints_data_ptr[total_counts]+k]/constraints_data[constraints_data_ptr[total_counts]]; 6586 } else { 6587 val = constraints_data[constraints_data_ptr[total_counts]]/norm; 6588 } 6589 ierr = MatSetValue(localChangeOfBasisMatrix,row,col,val,INSERT_VALUES);CHKERRQ(ierr); 6590 } 6591 } 6592 } 6593 if (pcbddc->dbg_flag) { 6594 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> using standard change of basis\n");CHKERRQ(ierr); 6595 } 6596 } 6597 } else { 6598 if (pcbddc->dbg_flag) { 6599 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Constraint %d does not need a change of basis (size %d)\n",total_counts,size_of_constraint);CHKERRQ(ierr); 6600 } 6601 } 6602 } 6603 6604 /* free workspace */ 6605 if (qr_needed) { 6606 if (pcbddc->dbg_flag) { 6607 ierr = PetscFree(dbg_work);CHKERRQ(ierr); 6608 } 6609 ierr = PetscFree(trs_rhs);CHKERRQ(ierr); 6610 ierr = PetscFree(qr_tau);CHKERRQ(ierr); 6611 ierr = PetscFree(qr_work);CHKERRQ(ierr); 6612 ierr = PetscFree(gqr_work);CHKERRQ(ierr); 6613 ierr = PetscFree(qr_basis);CHKERRQ(ierr); 6614 } 6615 ierr = PetscBTDestroy(&is_primal);CHKERRQ(ierr); 6616 ierr = MatAssemblyBegin(localChangeOfBasisMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 6617 ierr = MatAssemblyEnd(localChangeOfBasisMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 6618 6619 /* assembling of global change of variable */ 6620 if (!pcbddc->fake_change) { 6621 Mat tmat; 6622 PetscInt bs; 6623 6624 ierr = VecGetSize(pcis->vec1_global,&global_size);CHKERRQ(ierr); 6625 ierr = VecGetLocalSize(pcis->vec1_global,&local_size);CHKERRQ(ierr); 6626 ierr = MatDuplicate(pc->pmat,MAT_DO_NOT_COPY_VALUES,&tmat);CHKERRQ(ierr); 6627 ierr = MatISSetLocalMat(tmat,localChangeOfBasisMatrix);CHKERRQ(ierr); 6628 ierr = MatCreate(PetscObjectComm((PetscObject)pc),&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 6629 ierr = MatSetType(pcbddc->ChangeOfBasisMatrix,MATAIJ);CHKERRQ(ierr); 6630 ierr = MatGetBlockSize(pc->pmat,&bs);CHKERRQ(ierr); 6631 ierr = MatSetBlockSize(pcbddc->ChangeOfBasisMatrix,bs);CHKERRQ(ierr); 6632 ierr = MatSetSizes(pcbddc->ChangeOfBasisMatrix,local_size,local_size,global_size,global_size);CHKERRQ(ierr); 6633 ierr = MatISSetMPIXAIJPreallocation_Private(tmat,pcbddc->ChangeOfBasisMatrix,PETSC_TRUE);CHKERRQ(ierr); 6634 ierr = MatISGetMPIXAIJ(tmat,MAT_REUSE_MATRIX,&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 6635 ierr = MatDestroy(&tmat);CHKERRQ(ierr); 6636 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 6637 ierr = VecSet(pcis->vec1_N,1.0);CHKERRQ(ierr); 6638 ierr = VecScatterBegin(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 6639 ierr = VecScatterEnd(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 6640 ierr = VecReciprocal(pcis->vec1_global);CHKERRQ(ierr); 6641 ierr = MatDiagonalScale(pcbddc->ChangeOfBasisMatrix,pcis->vec1_global,NULL);CHKERRQ(ierr); 6642 6643 /* check */ 6644 if (pcbddc->dbg_flag) { 6645 PetscReal error; 6646 Vec x,x_change; 6647 6648 ierr = VecDuplicate(pcis->vec1_global,&x);CHKERRQ(ierr); 6649 ierr = VecDuplicate(pcis->vec1_global,&x_change);CHKERRQ(ierr); 6650 ierr = VecSetRandom(x,NULL);CHKERRQ(ierr); 6651 ierr = VecCopy(x,pcis->vec1_global);CHKERRQ(ierr); 6652 ierr = VecScatterBegin(matis->rctx,x,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 6653 ierr = VecScatterEnd(matis->rctx,x,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 6654 ierr = MatMult(localChangeOfBasisMatrix,pcis->vec1_N,pcis->vec2_N);CHKERRQ(ierr); 6655 ierr = VecScatterBegin(matis->rctx,pcis->vec2_N,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 6656 ierr = VecScatterEnd(matis->rctx,pcis->vec2_N,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 6657 ierr = MatMult(pcbddc->ChangeOfBasisMatrix,pcis->vec1_global,x_change);CHKERRQ(ierr); 6658 ierr = VecAXPY(x,-1.0,x_change);CHKERRQ(ierr); 6659 ierr = VecNorm(x,NORM_INFINITY,&error);CHKERRQ(ierr); 6660 if (error > PETSC_SMALL) { 6661 SETERRQ1(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"Error global vs local change on N: %1.6e\n",error); 6662 } 6663 ierr = VecDestroy(&x);CHKERRQ(ierr); 6664 ierr = VecDestroy(&x_change);CHKERRQ(ierr); 6665 } 6666 /* adapt sub_schurs computed (if any) */ 6667 if (pcbddc->use_deluxe_scaling) { 6668 PCBDDCSubSchurs sub_schurs=pcbddc->sub_schurs; 6669 6670 if (pcbddc->use_change_of_basis && pcbddc->adaptive_userdefined) SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_SUP,"Cannot mix automatic change of basis, adaptive selection and user-defined constraints"); 6671 if (sub_schurs && sub_schurs->S_Ej_all) { 6672 Mat S_new,tmat; 6673 IS is_all_N,is_V_Sall = NULL; 6674 6675 ierr = ISLocalToGlobalMappingApplyIS(pcis->BtoNmap,sub_schurs->is_Ej_all,&is_all_N);CHKERRQ(ierr); 6676 ierr = MatCreateSubMatrix(localChangeOfBasisMatrix,is_all_N,is_all_N,MAT_INITIAL_MATRIX,&tmat);CHKERRQ(ierr); 6677 if (pcbddc->deluxe_zerorows) { 6678 ISLocalToGlobalMapping NtoSall; 6679 IS is_V; 6680 ierr = ISCreateGeneral(PETSC_COMM_SELF,pcbddc->n_vertices,pcbddc->local_primal_ref_node,PETSC_COPY_VALUES,&is_V);CHKERRQ(ierr); 6681 ierr = ISLocalToGlobalMappingCreateIS(is_all_N,&NtoSall);CHKERRQ(ierr); 6682 ierr = ISGlobalToLocalMappingApplyIS(NtoSall,IS_GTOLM_DROP,is_V,&is_V_Sall);CHKERRQ(ierr); 6683 ierr = ISLocalToGlobalMappingDestroy(&NtoSall);CHKERRQ(ierr); 6684 ierr = ISDestroy(&is_V);CHKERRQ(ierr); 6685 } 6686 ierr = ISDestroy(&is_all_N);CHKERRQ(ierr); 6687 ierr = MatPtAP(sub_schurs->S_Ej_all,tmat,MAT_INITIAL_MATRIX,1.0,&S_new);CHKERRQ(ierr); 6688 ierr = MatDestroy(&sub_schurs->S_Ej_all);CHKERRQ(ierr); 6689 ierr = PetscObjectReference((PetscObject)S_new);CHKERRQ(ierr); 6690 if (pcbddc->deluxe_zerorows) { 6691 const PetscScalar *array; 6692 const PetscInt *idxs_V,*idxs_all; 6693 PetscInt i,n_V; 6694 6695 ierr = MatZeroRowsColumnsIS(S_new,is_V_Sall,1.,NULL,NULL);CHKERRQ(ierr); 6696 ierr = ISGetLocalSize(is_V_Sall,&n_V);CHKERRQ(ierr); 6697 ierr = ISGetIndices(is_V_Sall,&idxs_V);CHKERRQ(ierr); 6698 ierr = ISGetIndices(sub_schurs->is_Ej_all,&idxs_all);CHKERRQ(ierr); 6699 ierr = VecGetArrayRead(pcis->D,&array);CHKERRQ(ierr); 6700 for (i=0;i<n_V;i++) { 6701 PetscScalar val; 6702 PetscInt idx; 6703 6704 idx = idxs_V[i]; 6705 val = array[idxs_all[idxs_V[i]]]; 6706 ierr = MatSetValue(S_new,idx,idx,val,INSERT_VALUES);CHKERRQ(ierr); 6707 } 6708 ierr = MatAssemblyBegin(S_new,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 6709 ierr = MatAssemblyEnd(S_new,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 6710 ierr = VecRestoreArrayRead(pcis->D,&array);CHKERRQ(ierr); 6711 ierr = ISRestoreIndices(sub_schurs->is_Ej_all,&idxs_all);CHKERRQ(ierr); 6712 ierr = ISRestoreIndices(is_V_Sall,&idxs_V);CHKERRQ(ierr); 6713 } 6714 sub_schurs->S_Ej_all = S_new; 6715 ierr = MatDestroy(&S_new);CHKERRQ(ierr); 6716 if (sub_schurs->sum_S_Ej_all) { 6717 ierr = MatPtAP(sub_schurs->sum_S_Ej_all,tmat,MAT_INITIAL_MATRIX,1.0,&S_new);CHKERRQ(ierr); 6718 ierr = MatDestroy(&sub_schurs->sum_S_Ej_all);CHKERRQ(ierr); 6719 ierr = PetscObjectReference((PetscObject)S_new);CHKERRQ(ierr); 6720 if (pcbddc->deluxe_zerorows) { 6721 ierr = MatZeroRowsColumnsIS(S_new,is_V_Sall,1.,NULL,NULL);CHKERRQ(ierr); 6722 } 6723 sub_schurs->sum_S_Ej_all = S_new; 6724 ierr = MatDestroy(&S_new);CHKERRQ(ierr); 6725 } 6726 ierr = ISDestroy(&is_V_Sall);CHKERRQ(ierr); 6727 ierr = MatDestroy(&tmat);CHKERRQ(ierr); 6728 } 6729 /* destroy any change of basis context in sub_schurs */ 6730 if (sub_schurs && sub_schurs->change) { 6731 PetscInt i; 6732 6733 for (i=0;i<sub_schurs->n_subs;i++) { 6734 ierr = KSPDestroy(&sub_schurs->change[i]);CHKERRQ(ierr); 6735 } 6736 ierr = PetscFree(sub_schurs->change);CHKERRQ(ierr); 6737 } 6738 } 6739 if (pcbddc->switch_static) { /* need to save the local change */ 6740 pcbddc->switch_static_change = localChangeOfBasisMatrix; 6741 } else { 6742 ierr = MatDestroy(&localChangeOfBasisMatrix);CHKERRQ(ierr); 6743 } 6744 /* determine if any process has changed the pressures locally */ 6745 pcbddc->change_interior = pcbddc->benign_have_null; 6746 } else { /* fake change (get back change of basis into ConstraintMatrix and info on qr) */ 6747 ierr = MatDestroy(&pcbddc->ConstraintMatrix);CHKERRQ(ierr); 6748 pcbddc->ConstraintMatrix = localChangeOfBasisMatrix; 6749 pcbddc->use_qr_single = qr_needed; 6750 } 6751 } else if (pcbddc->user_ChangeOfBasisMatrix || pcbddc->benign_saddle_point) { 6752 if (!pcbddc->benign_have_null && pcbddc->user_ChangeOfBasisMatrix) { 6753 ierr = PetscObjectReference((PetscObject)pcbddc->user_ChangeOfBasisMatrix);CHKERRQ(ierr); 6754 pcbddc->ChangeOfBasisMatrix = pcbddc->user_ChangeOfBasisMatrix; 6755 } else { 6756 Mat benign_global = NULL; 6757 if (pcbddc->benign_have_null) { 6758 Mat tmat; 6759 6760 pcbddc->change_interior = PETSC_TRUE; 6761 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 6762 ierr = VecSet(pcis->vec1_N,1.0);CHKERRQ(ierr); 6763 ierr = VecScatterBegin(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 6764 ierr = VecScatterEnd(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 6765 ierr = VecReciprocal(pcis->vec1_global);CHKERRQ(ierr); 6766 ierr = VecScatterBegin(matis->rctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 6767 ierr = VecScatterEnd(matis->rctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 6768 ierr = MatDuplicate(pc->pmat,MAT_DO_NOT_COPY_VALUES,&tmat);CHKERRQ(ierr); 6769 if (pcbddc->benign_change) { 6770 Mat M; 6771 6772 ierr = MatDuplicate(pcbddc->benign_change,MAT_COPY_VALUES,&M);CHKERRQ(ierr); 6773 ierr = MatDiagonalScale(M,pcis->vec1_N,NULL);CHKERRQ(ierr); 6774 ierr = MatISSetLocalMat(tmat,M);CHKERRQ(ierr); 6775 ierr = MatDestroy(&M);CHKERRQ(ierr); 6776 } else { 6777 Mat eye; 6778 PetscScalar *array; 6779 6780 ierr = VecGetArray(pcis->vec1_N,&array);CHKERRQ(ierr); 6781 ierr = MatCreateSeqAIJ(PETSC_COMM_SELF,pcis->n,pcis->n,1,NULL,&eye);CHKERRQ(ierr); 6782 for (i=0;i<pcis->n;i++) { 6783 ierr = MatSetValue(eye,i,i,array[i],INSERT_VALUES);CHKERRQ(ierr); 6784 } 6785 ierr = VecRestoreArray(pcis->vec1_N,&array);CHKERRQ(ierr); 6786 ierr = MatAssemblyBegin(eye,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 6787 ierr = MatAssemblyEnd(eye,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 6788 ierr = MatISSetLocalMat(tmat,eye);CHKERRQ(ierr); 6789 ierr = MatDestroy(&eye);CHKERRQ(ierr); 6790 } 6791 ierr = MatISGetMPIXAIJ(tmat,MAT_INITIAL_MATRIX,&benign_global);CHKERRQ(ierr); 6792 ierr = MatDestroy(&tmat);CHKERRQ(ierr); 6793 } 6794 if (pcbddc->user_ChangeOfBasisMatrix) { 6795 ierr = MatMatMult(pcbddc->user_ChangeOfBasisMatrix,benign_global,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 6796 ierr = MatDestroy(&benign_global);CHKERRQ(ierr); 6797 } else if (pcbddc->benign_have_null) { 6798 pcbddc->ChangeOfBasisMatrix = benign_global; 6799 } 6800 } 6801 if (pcbddc->switch_static && pcbddc->ChangeOfBasisMatrix) { /* need to save the local change */ 6802 IS is_global; 6803 const PetscInt *gidxs; 6804 6805 ierr = ISLocalToGlobalMappingGetIndices(pc->pmat->rmap->mapping,&gidxs);CHKERRQ(ierr); 6806 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),pcis->n,gidxs,PETSC_COPY_VALUES,&is_global);CHKERRQ(ierr); 6807 ierr = ISLocalToGlobalMappingRestoreIndices(pc->pmat->rmap->mapping,&gidxs);CHKERRQ(ierr); 6808 ierr = MatCreateSubMatrixUnsorted(pcbddc->ChangeOfBasisMatrix,is_global,is_global,&pcbddc->switch_static_change);CHKERRQ(ierr); 6809 ierr = ISDestroy(&is_global);CHKERRQ(ierr); 6810 } 6811 } 6812 if (!pcbddc->fake_change && pcbddc->ChangeOfBasisMatrix && !pcbddc->work_change) { 6813 ierr = VecDuplicate(pcis->vec1_global,&pcbddc->work_change);CHKERRQ(ierr); 6814 } 6815 6816 if (!pcbddc->fake_change) { 6817 /* add pressure dofs to set of primal nodes for numbering purposes */ 6818 for (i=0;i<pcbddc->benign_n;i++) { 6819 pcbddc->local_primal_ref_node[pcbddc->local_primal_size_cc] = pcbddc->benign_p0_lidx[i]; 6820 pcbddc->primal_indices_local_idxs[pcbddc->local_primal_size] = pcbddc->benign_p0_lidx[i]; 6821 pcbddc->local_primal_ref_mult[pcbddc->local_primal_size_cc] = 1; 6822 pcbddc->local_primal_size_cc++; 6823 pcbddc->local_primal_size++; 6824 } 6825 6826 /* check if a new primal space has been introduced (also take into account benign trick) */ 6827 pcbddc->new_primal_space_local = PETSC_TRUE; 6828 if (olocal_primal_size == pcbddc->local_primal_size) { 6829 ierr = PetscMemcmp(pcbddc->local_primal_ref_node,olocal_primal_ref_node,olocal_primal_size_cc*sizeof(PetscInt),&pcbddc->new_primal_space_local);CHKERRQ(ierr); 6830 pcbddc->new_primal_space_local = (PetscBool)(!pcbddc->new_primal_space_local); 6831 if (!pcbddc->new_primal_space_local) { 6832 ierr = PetscMemcmp(pcbddc->local_primal_ref_mult,olocal_primal_ref_mult,olocal_primal_size_cc*sizeof(PetscInt),&pcbddc->new_primal_space_local);CHKERRQ(ierr); 6833 pcbddc->new_primal_space_local = (PetscBool)(!pcbddc->new_primal_space_local); 6834 } 6835 } 6836 /* new_primal_space will be used for numbering of coarse dofs, so it should be the same across all subdomains */ 6837 ierr = MPIU_Allreduce(&pcbddc->new_primal_space_local,&pcbddc->new_primal_space,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 6838 } 6839 ierr = PetscFree2(olocal_primal_ref_node,olocal_primal_ref_mult);CHKERRQ(ierr); 6840 6841 /* flush dbg viewer */ 6842 if (pcbddc->dbg_flag) { 6843 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 6844 } 6845 6846 /* free workspace */ 6847 ierr = PetscBTDestroy(&qr_needed_idx);CHKERRQ(ierr); 6848 ierr = PetscBTDestroy(&change_basis);CHKERRQ(ierr); 6849 if (!pcbddc->adaptive_selection) { 6850 ierr = PetscFree3(constraints_idxs_ptr,constraints_data_ptr,constraints_n);CHKERRQ(ierr); 6851 ierr = PetscFree3(constraints_data,constraints_idxs,constraints_idxs_B);CHKERRQ(ierr); 6852 } else { 6853 ierr = PetscFree5(pcbddc->adaptive_constraints_n, 6854 pcbddc->adaptive_constraints_idxs_ptr, 6855 pcbddc->adaptive_constraints_data_ptr, 6856 pcbddc->adaptive_constraints_idxs, 6857 pcbddc->adaptive_constraints_data);CHKERRQ(ierr); 6858 ierr = PetscFree(constraints_n);CHKERRQ(ierr); 6859 ierr = PetscFree(constraints_idxs_B);CHKERRQ(ierr); 6860 } 6861 PetscFunctionReturn(0); 6862 } 6863 /* #undef PETSC_MISSING_LAPACK_GESVD */ 6864 6865 PetscErrorCode PCBDDCAnalyzeInterface(PC pc) 6866 { 6867 ISLocalToGlobalMapping map; 6868 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 6869 Mat_IS *matis = (Mat_IS*)pc->pmat->data; 6870 PetscInt i,N; 6871 PetscBool rcsr = PETSC_FALSE; 6872 PetscErrorCode ierr; 6873 6874 PetscFunctionBegin; 6875 if (pcbddc->recompute_topography) { 6876 pcbddc->graphanalyzed = PETSC_FALSE; 6877 /* Reset previously computed graph */ 6878 ierr = PCBDDCGraphReset(pcbddc->mat_graph);CHKERRQ(ierr); 6879 /* Init local Graph struct */ 6880 ierr = MatGetSize(pc->pmat,&N,NULL);CHKERRQ(ierr); 6881 ierr = MatGetLocalToGlobalMapping(pc->pmat,&map,NULL);CHKERRQ(ierr); 6882 ierr = PCBDDCGraphInit(pcbddc->mat_graph,map,N,pcbddc->graphmaxcount);CHKERRQ(ierr); 6883 6884 if (pcbddc->user_primal_vertices_local && !pcbddc->user_primal_vertices) { 6885 ierr = PCBDDCConsistencyCheckIS(pc,MPI_LOR,&pcbddc->user_primal_vertices_local);CHKERRQ(ierr); 6886 } 6887 /* Check validity of the csr graph passed in by the user */ 6888 if (pcbddc->mat_graph->nvtxs_csr && pcbddc->mat_graph->nvtxs_csr != pcbddc->mat_graph->nvtxs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Invalid size of local CSR graph! Found %D, expected %D\n",pcbddc->mat_graph->nvtxs_csr,pcbddc->mat_graph->nvtxs); 6889 6890 /* Set default CSR adjacency of local dofs if not provided by the user with PCBDDCSetLocalAdjacencyGraph */ 6891 if (!pcbddc->mat_graph->xadj && pcbddc->use_local_adj) { 6892 PetscInt *xadj,*adjncy; 6893 PetscInt nvtxs; 6894 PetscBool flg_row=PETSC_FALSE; 6895 6896 ierr = MatGetRowIJ(matis->A,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);CHKERRQ(ierr); 6897 if (flg_row) { 6898 ierr = PCBDDCSetLocalAdjacencyGraph(pc,nvtxs,xadj,adjncy,PETSC_COPY_VALUES);CHKERRQ(ierr); 6899 pcbddc->computed_rowadj = PETSC_TRUE; 6900 } 6901 ierr = MatRestoreRowIJ(matis->A,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);CHKERRQ(ierr); 6902 rcsr = PETSC_TRUE; 6903 } 6904 if (pcbddc->dbg_flag) { 6905 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 6906 } 6907 6908 if (pcbddc->mat_graph->cdim && !pcbddc->mat_graph->cloc) { 6909 PetscReal *lcoords; 6910 PetscInt n; 6911 MPI_Datatype dimrealtype; 6912 6913 if (pcbddc->mat_graph->cnloc != pc->pmat->rmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_USER,"Invalid number of local coordinates! Got %D, expected %D",pcbddc->mat_graph->cnloc,pc->pmat->rmap->n); 6914 ierr = MatGetLocalSize(matis->A,&n,NULL);CHKERRQ(ierr); 6915 ierr = MatISSetUpSF(pc->pmat);CHKERRQ(ierr); 6916 ierr = PetscMalloc1(pcbddc->mat_graph->cdim*n,&lcoords);CHKERRQ(ierr); 6917 ierr = MPI_Type_contiguous(pcbddc->mat_graph->cdim,MPIU_REAL,&dimrealtype);CHKERRQ(ierr); 6918 ierr = MPI_Type_commit(&dimrealtype);CHKERRQ(ierr); 6919 ierr = PetscSFBcastBegin(matis->sf,dimrealtype,pcbddc->mat_graph->coords,lcoords);CHKERRQ(ierr); 6920 ierr = PetscSFBcastEnd(matis->sf,dimrealtype,pcbddc->mat_graph->coords,lcoords);CHKERRQ(ierr); 6921 ierr = MPI_Type_free(&dimrealtype);CHKERRQ(ierr); 6922 ierr = PetscFree(pcbddc->mat_graph->coords);CHKERRQ(ierr); 6923 6924 pcbddc->mat_graph->coords = lcoords; 6925 pcbddc->mat_graph->cloc = PETSC_TRUE; 6926 pcbddc->mat_graph->cnloc = n; 6927 } 6928 if (pcbddc->mat_graph->cnloc && pcbddc->mat_graph->cnloc != pcbddc->mat_graph->nvtxs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_USER,"Invalid number of local subdomain coordinates! Got %D, expected %D",pcbddc->mat_graph->cnloc,pcbddc->mat_graph->nvtxs); 6929 pcbddc->mat_graph->active_coords = (PetscBool)(pcbddc->corner_selection && !pcbddc->corner_selected); 6930 6931 /* Setup of Graph */ 6932 pcbddc->mat_graph->commsizelimit = 0; /* don't use the COMM_SELF variant of the graph */ 6933 ierr = PCBDDCGraphSetUp(pcbddc->mat_graph,pcbddc->vertex_size,pcbddc->NeumannBoundariesLocal,pcbddc->DirichletBoundariesLocal,pcbddc->n_ISForDofsLocal,pcbddc->ISForDofsLocal,pcbddc->user_primal_vertices_local);CHKERRQ(ierr); 6934 6935 /* attach info on disconnected subdomains if present */ 6936 if (pcbddc->n_local_subs) { 6937 PetscInt *local_subs; 6938 6939 ierr = PetscMalloc1(N,&local_subs);CHKERRQ(ierr); 6940 for (i=0;i<pcbddc->n_local_subs;i++) { 6941 const PetscInt *idxs; 6942 PetscInt nl,j; 6943 6944 ierr = ISGetLocalSize(pcbddc->local_subs[i],&nl);CHKERRQ(ierr); 6945 ierr = ISGetIndices(pcbddc->local_subs[i],&idxs);CHKERRQ(ierr); 6946 for (j=0;j<nl;j++) local_subs[idxs[j]] = i; 6947 ierr = ISRestoreIndices(pcbddc->local_subs[i],&idxs);CHKERRQ(ierr); 6948 } 6949 pcbddc->mat_graph->n_local_subs = pcbddc->n_local_subs; 6950 pcbddc->mat_graph->local_subs = local_subs; 6951 } 6952 } 6953 6954 if (!pcbddc->graphanalyzed) { 6955 /* Graph's connected components analysis */ 6956 ierr = PCBDDCGraphComputeConnectedComponents(pcbddc->mat_graph);CHKERRQ(ierr); 6957 pcbddc->graphanalyzed = PETSC_TRUE; 6958 } 6959 if (rcsr) pcbddc->mat_graph->nvtxs_csr = 0; 6960 PetscFunctionReturn(0); 6961 } 6962 6963 PetscErrorCode PCBDDCOrthonormalizeVecs(PetscInt n, Vec vecs[]) 6964 { 6965 PetscInt i,j; 6966 PetscScalar *alphas; 6967 PetscErrorCode ierr; 6968 6969 PetscFunctionBegin; 6970 if (!n) PetscFunctionReturn(0); 6971 ierr = PetscMalloc1(n,&alphas);CHKERRQ(ierr); 6972 ierr = VecNormalize(vecs[0],NULL);CHKERRQ(ierr); 6973 for (i=1;i<n;i++) { 6974 ierr = VecMDot(vecs[i],i,vecs,alphas);CHKERRQ(ierr); 6975 for (j=0;j<i;j++) alphas[j] = PetscConj(-alphas[j]); 6976 ierr = VecMAXPY(vecs[i],i,alphas,vecs);CHKERRQ(ierr); 6977 ierr = VecNormalize(vecs[i],NULL);CHKERRQ(ierr); 6978 } 6979 ierr = PetscFree(alphas);CHKERRQ(ierr); 6980 PetscFunctionReturn(0); 6981 } 6982 6983 PetscErrorCode PCBDDCMatISGetSubassemblingPattern(Mat mat, PetscInt *n_subdomains, PetscInt redprocs, IS* is_sends, PetscBool *have_void) 6984 { 6985 Mat A; 6986 PetscInt n_neighs,*neighs,*n_shared,**shared; 6987 PetscMPIInt size,rank,color; 6988 PetscInt *xadj,*adjncy; 6989 PetscInt *adjncy_wgt,*v_wgt,*ranks_send_to_idx; 6990 PetscInt im_active,active_procs,N,n,i,j,threshold = 2; 6991 PetscInt void_procs,*procs_candidates = NULL; 6992 PetscInt xadj_count,*count; 6993 PetscBool ismatis,use_vwgt=PETSC_FALSE; 6994 PetscSubcomm psubcomm; 6995 MPI_Comm subcomm; 6996 PetscErrorCode ierr; 6997 6998 PetscFunctionBegin; 6999 PetscValidHeaderSpecific(mat,MAT_CLASSID,1); 7000 ierr = PetscObjectTypeCompare((PetscObject)mat,MATIS,&ismatis);CHKERRQ(ierr); 7001 if (!ismatis) SETERRQ1(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot use %s on a matrix object which is not of type MATIS",PETSC_FUNCTION_NAME); 7002 PetscValidLogicalCollectiveInt(mat,*n_subdomains,2); 7003 PetscValidLogicalCollectiveInt(mat,redprocs,3); 7004 if (*n_subdomains <=0) SETERRQ1(PetscObjectComm((PetscObject)mat),PETSC_ERR_ARG_WRONG,"Invalid number of subdomains requested %d\n",*n_subdomains); 7005 7006 if (have_void) *have_void = PETSC_FALSE; 7007 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)mat),&size);CHKERRQ(ierr); 7008 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr); 7009 ierr = MatISGetLocalMat(mat,&A);CHKERRQ(ierr); 7010 ierr = MatGetLocalSize(A,&n,NULL);CHKERRQ(ierr); 7011 im_active = !!n; 7012 ierr = MPIU_Allreduce(&im_active,&active_procs,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 7013 void_procs = size - active_procs; 7014 /* get ranks of of non-active processes in mat communicator */ 7015 if (void_procs) { 7016 PetscInt ncand; 7017 7018 if (have_void) *have_void = PETSC_TRUE; 7019 ierr = PetscMalloc1(size,&procs_candidates);CHKERRQ(ierr); 7020 ierr = MPI_Allgather(&im_active,1,MPIU_INT,procs_candidates,1,MPIU_INT,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 7021 for (i=0,ncand=0;i<size;i++) { 7022 if (!procs_candidates[i]) { 7023 procs_candidates[ncand++] = i; 7024 } 7025 } 7026 /* force n_subdomains to be not greater that the number of non-active processes */ 7027 *n_subdomains = PetscMin(void_procs,*n_subdomains); 7028 } 7029 7030 /* number of subdomains requested greater than active processes or matrix size -> just shift the matrix 7031 number of subdomains requested 1 -> send to master or first candidate in voids */ 7032 ierr = MatGetSize(mat,&N,NULL);CHKERRQ(ierr); 7033 if (active_procs < *n_subdomains || *n_subdomains == 1 || N <= *n_subdomains) { 7034 PetscInt issize,isidx,dest; 7035 if (*n_subdomains == 1) dest = 0; 7036 else dest = rank; 7037 if (im_active) { 7038 issize = 1; 7039 if (procs_candidates) { /* shift the pattern on non-active candidates (if any) */ 7040 isidx = procs_candidates[dest]; 7041 } else { 7042 isidx = dest; 7043 } 7044 } else { 7045 issize = 0; 7046 isidx = -1; 7047 } 7048 if (*n_subdomains != 1) *n_subdomains = active_procs; 7049 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)mat),issize,&isidx,PETSC_COPY_VALUES,is_sends);CHKERRQ(ierr); 7050 ierr = PetscFree(procs_candidates);CHKERRQ(ierr); 7051 PetscFunctionReturn(0); 7052 } 7053 ierr = PetscOptionsGetBool(NULL,NULL,"-matis_partitioning_use_vwgt",&use_vwgt,NULL);CHKERRQ(ierr); 7054 ierr = PetscOptionsGetInt(NULL,NULL,"-matis_partitioning_threshold",&threshold,NULL);CHKERRQ(ierr); 7055 threshold = PetscMax(threshold,2); 7056 7057 /* Get info on mapping */ 7058 ierr = ISLocalToGlobalMappingGetInfo(mat->rmap->mapping,&n_neighs,&neighs,&n_shared,&shared);CHKERRQ(ierr); 7059 7060 /* build local CSR graph of subdomains' connectivity */ 7061 ierr = PetscMalloc1(2,&xadj);CHKERRQ(ierr); 7062 xadj[0] = 0; 7063 xadj[1] = PetscMax(n_neighs-1,0); 7064 ierr = PetscMalloc1(xadj[1],&adjncy);CHKERRQ(ierr); 7065 ierr = PetscMalloc1(xadj[1],&adjncy_wgt);CHKERRQ(ierr); 7066 ierr = PetscCalloc1(n,&count);CHKERRQ(ierr); 7067 for (i=1;i<n_neighs;i++) 7068 for (j=0;j<n_shared[i];j++) 7069 count[shared[i][j]] += 1; 7070 7071 xadj_count = 0; 7072 for (i=1;i<n_neighs;i++) { 7073 for (j=0;j<n_shared[i];j++) { 7074 if (count[shared[i][j]] < threshold) { 7075 adjncy[xadj_count] = neighs[i]; 7076 adjncy_wgt[xadj_count] = n_shared[i]; 7077 xadj_count++; 7078 break; 7079 } 7080 } 7081 } 7082 xadj[1] = xadj_count; 7083 ierr = PetscFree(count);CHKERRQ(ierr); 7084 ierr = ISLocalToGlobalMappingRestoreInfo(mat->rmap->mapping,&n_neighs,&neighs,&n_shared,&shared);CHKERRQ(ierr); 7085 ierr = PetscSortIntWithArray(xadj[1],adjncy,adjncy_wgt);CHKERRQ(ierr); 7086 7087 ierr = PetscMalloc1(1,&ranks_send_to_idx);CHKERRQ(ierr); 7088 7089 /* Restrict work on active processes only */ 7090 ierr = PetscMPIIntCast(im_active,&color);CHKERRQ(ierr); 7091 if (void_procs) { 7092 ierr = PetscSubcommCreate(PetscObjectComm((PetscObject)mat),&psubcomm);CHKERRQ(ierr); 7093 ierr = PetscSubcommSetNumber(psubcomm,2);CHKERRQ(ierr); /* 2 groups, active process and not active processes */ 7094 ierr = PetscSubcommSetTypeGeneral(psubcomm,color,rank);CHKERRQ(ierr); 7095 subcomm = PetscSubcommChild(psubcomm); 7096 } else { 7097 psubcomm = NULL; 7098 subcomm = PetscObjectComm((PetscObject)mat); 7099 } 7100 7101 v_wgt = NULL; 7102 if (!color) { 7103 ierr = PetscFree(xadj);CHKERRQ(ierr); 7104 ierr = PetscFree(adjncy);CHKERRQ(ierr); 7105 ierr = PetscFree(adjncy_wgt);CHKERRQ(ierr); 7106 } else { 7107 Mat subdomain_adj; 7108 IS new_ranks,new_ranks_contig; 7109 MatPartitioning partitioner; 7110 PetscInt rstart=0,rend=0; 7111 PetscInt *is_indices,*oldranks; 7112 PetscMPIInt size; 7113 PetscBool aggregate; 7114 7115 ierr = MPI_Comm_size(subcomm,&size);CHKERRQ(ierr); 7116 if (void_procs) { 7117 PetscInt prank = rank; 7118 ierr = PetscMalloc1(size,&oldranks);CHKERRQ(ierr); 7119 ierr = MPI_Allgather(&prank,1,MPIU_INT,oldranks,1,MPIU_INT,subcomm);CHKERRQ(ierr); 7120 for (i=0;i<xadj[1];i++) { 7121 ierr = PetscFindInt(adjncy[i],size,oldranks,&adjncy[i]);CHKERRQ(ierr); 7122 } 7123 ierr = PetscSortIntWithArray(xadj[1],adjncy,adjncy_wgt);CHKERRQ(ierr); 7124 } else { 7125 oldranks = NULL; 7126 } 7127 aggregate = ((redprocs > 0 && redprocs < size) ? PETSC_TRUE : PETSC_FALSE); 7128 if (aggregate) { /* TODO: all this part could be made more efficient */ 7129 PetscInt lrows,row,ncols,*cols; 7130 PetscMPIInt nrank; 7131 PetscScalar *vals; 7132 7133 ierr = MPI_Comm_rank(subcomm,&nrank);CHKERRQ(ierr); 7134 lrows = 0; 7135 if (nrank<redprocs) { 7136 lrows = size/redprocs; 7137 if (nrank<size%redprocs) lrows++; 7138 } 7139 ierr = MatCreateAIJ(subcomm,lrows,lrows,size,size,50,NULL,50,NULL,&subdomain_adj);CHKERRQ(ierr); 7140 ierr = MatGetOwnershipRange(subdomain_adj,&rstart,&rend);CHKERRQ(ierr); 7141 ierr = MatSetOption(subdomain_adj,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr); 7142 ierr = MatSetOption(subdomain_adj,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr); 7143 row = nrank; 7144 ncols = xadj[1]-xadj[0]; 7145 cols = adjncy; 7146 ierr = PetscMalloc1(ncols,&vals);CHKERRQ(ierr); 7147 for (i=0;i<ncols;i++) vals[i] = adjncy_wgt[i]; 7148 ierr = MatSetValues(subdomain_adj,1,&row,ncols,cols,vals,INSERT_VALUES);CHKERRQ(ierr); 7149 ierr = MatAssemblyBegin(subdomain_adj,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 7150 ierr = MatAssemblyEnd(subdomain_adj,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 7151 ierr = PetscFree(xadj);CHKERRQ(ierr); 7152 ierr = PetscFree(adjncy);CHKERRQ(ierr); 7153 ierr = PetscFree(adjncy_wgt);CHKERRQ(ierr); 7154 ierr = PetscFree(vals);CHKERRQ(ierr); 7155 if (use_vwgt) { 7156 Vec v; 7157 const PetscScalar *array; 7158 PetscInt nl; 7159 7160 ierr = MatCreateVecs(subdomain_adj,&v,NULL);CHKERRQ(ierr); 7161 ierr = VecSetValue(v,row,(PetscScalar)n,INSERT_VALUES);CHKERRQ(ierr); 7162 ierr = VecAssemblyBegin(v);CHKERRQ(ierr); 7163 ierr = VecAssemblyEnd(v);CHKERRQ(ierr); 7164 ierr = VecGetLocalSize(v,&nl);CHKERRQ(ierr); 7165 ierr = VecGetArrayRead(v,&array);CHKERRQ(ierr); 7166 ierr = PetscMalloc1(nl,&v_wgt);CHKERRQ(ierr); 7167 for (i=0;i<nl;i++) v_wgt[i] = (PetscInt)PetscRealPart(array[i]); 7168 ierr = VecRestoreArrayRead(v,&array);CHKERRQ(ierr); 7169 ierr = VecDestroy(&v);CHKERRQ(ierr); 7170 } 7171 } else { 7172 ierr = MatCreateMPIAdj(subcomm,1,(PetscInt)size,xadj,adjncy,adjncy_wgt,&subdomain_adj);CHKERRQ(ierr); 7173 if (use_vwgt) { 7174 ierr = PetscMalloc1(1,&v_wgt);CHKERRQ(ierr); 7175 v_wgt[0] = n; 7176 } 7177 } 7178 /* ierr = MatView(subdomain_adj,0);CHKERRQ(ierr); */ 7179 7180 /* Partition */ 7181 ierr = MatPartitioningCreate(subcomm,&partitioner);CHKERRQ(ierr); 7182 ierr = MatPartitioningSetAdjacency(partitioner,subdomain_adj);CHKERRQ(ierr); 7183 if (v_wgt) { 7184 ierr = MatPartitioningSetVertexWeights(partitioner,v_wgt);CHKERRQ(ierr); 7185 } 7186 *n_subdomains = PetscMin((PetscInt)size,*n_subdomains); 7187 ierr = MatPartitioningSetNParts(partitioner,*n_subdomains);CHKERRQ(ierr); 7188 ierr = MatPartitioningSetFromOptions(partitioner);CHKERRQ(ierr); 7189 ierr = MatPartitioningApply(partitioner,&new_ranks);CHKERRQ(ierr); 7190 /* ierr = MatPartitioningView(partitioner,0);CHKERRQ(ierr); */ 7191 7192 /* renumber new_ranks to avoid "holes" in new set of processors */ 7193 ierr = ISRenumber(new_ranks,NULL,NULL,&new_ranks_contig);CHKERRQ(ierr); 7194 ierr = ISDestroy(&new_ranks);CHKERRQ(ierr); 7195 ierr = ISGetIndices(new_ranks_contig,(const PetscInt**)&is_indices);CHKERRQ(ierr); 7196 if (!aggregate) { 7197 if (procs_candidates) { /* shift the pattern on non-active candidates (if any) */ 7198 #if defined(PETSC_USE_DEBUG) 7199 if (!oldranks) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"This should not happen"); 7200 #endif 7201 ranks_send_to_idx[0] = procs_candidates[oldranks[is_indices[0]]]; 7202 } else if (oldranks) { 7203 ranks_send_to_idx[0] = oldranks[is_indices[0]]; 7204 } else { 7205 ranks_send_to_idx[0] = is_indices[0]; 7206 } 7207 } else { 7208 PetscInt idx = 0; 7209 PetscMPIInt tag; 7210 MPI_Request *reqs; 7211 7212 ierr = PetscObjectGetNewTag((PetscObject)subdomain_adj,&tag);CHKERRQ(ierr); 7213 ierr = PetscMalloc1(rend-rstart,&reqs);CHKERRQ(ierr); 7214 for (i=rstart;i<rend;i++) { 7215 ierr = MPI_Isend(is_indices+i-rstart,1,MPIU_INT,i,tag,subcomm,&reqs[i-rstart]);CHKERRQ(ierr); 7216 } 7217 ierr = MPI_Recv(&idx,1,MPIU_INT,MPI_ANY_SOURCE,tag,subcomm,MPI_STATUS_IGNORE);CHKERRQ(ierr); 7218 ierr = MPI_Waitall(rend-rstart,reqs,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 7219 ierr = PetscFree(reqs);CHKERRQ(ierr); 7220 if (procs_candidates) { /* shift the pattern on non-active candidates (if any) */ 7221 #if defined(PETSC_USE_DEBUG) 7222 if (!oldranks) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"This should not happen"); 7223 #endif 7224 ranks_send_to_idx[0] = procs_candidates[oldranks[idx]]; 7225 } else if (oldranks) { 7226 ranks_send_to_idx[0] = oldranks[idx]; 7227 } else { 7228 ranks_send_to_idx[0] = idx; 7229 } 7230 } 7231 ierr = ISRestoreIndices(new_ranks_contig,(const PetscInt**)&is_indices);CHKERRQ(ierr); 7232 /* clean up */ 7233 ierr = PetscFree(oldranks);CHKERRQ(ierr); 7234 ierr = ISDestroy(&new_ranks_contig);CHKERRQ(ierr); 7235 ierr = MatDestroy(&subdomain_adj);CHKERRQ(ierr); 7236 ierr = MatPartitioningDestroy(&partitioner);CHKERRQ(ierr); 7237 } 7238 ierr = PetscSubcommDestroy(&psubcomm);CHKERRQ(ierr); 7239 ierr = PetscFree(procs_candidates);CHKERRQ(ierr); 7240 7241 /* assemble parallel IS for sends */ 7242 i = 1; 7243 if (!color) i=0; 7244 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)mat),i,ranks_send_to_idx,PETSC_OWN_POINTER,is_sends);CHKERRQ(ierr); 7245 PetscFunctionReturn(0); 7246 } 7247 7248 typedef enum {MATDENSE_PRIVATE=0,MATAIJ_PRIVATE,MATBAIJ_PRIVATE,MATSBAIJ_PRIVATE}MatTypePrivate; 7249 7250 PetscErrorCode PCBDDCMatISSubassemble(Mat mat, IS is_sends, PetscInt n_subdomains, PetscBool restrict_comm, PetscBool restrict_full, PetscBool reuse, Mat *mat_n, PetscInt nis, IS isarray[], PetscInt nvecs, Vec nnsp_vec[]) 7251 { 7252 Mat local_mat; 7253 IS is_sends_internal; 7254 PetscInt rows,cols,new_local_rows; 7255 PetscInt i,bs,buf_size_idxs,buf_size_idxs_is,buf_size_vals,buf_size_vecs; 7256 PetscBool ismatis,isdense,newisdense,destroy_mat; 7257 ISLocalToGlobalMapping l2gmap; 7258 PetscInt* l2gmap_indices; 7259 const PetscInt* is_indices; 7260 MatType new_local_type; 7261 /* buffers */ 7262 PetscInt *ptr_idxs,*send_buffer_idxs,*recv_buffer_idxs; 7263 PetscInt *ptr_idxs_is,*send_buffer_idxs_is,*recv_buffer_idxs_is; 7264 PetscInt *recv_buffer_idxs_local; 7265 PetscScalar *ptr_vals,*send_buffer_vals,*recv_buffer_vals; 7266 PetscScalar *ptr_vecs,*send_buffer_vecs,*recv_buffer_vecs; 7267 /* MPI */ 7268 MPI_Comm comm,comm_n; 7269 PetscSubcomm subcomm; 7270 PetscMPIInt n_sends,n_recvs,commsize; 7271 PetscMPIInt *iflags,*ilengths_idxs,*ilengths_vals,*ilengths_idxs_is; 7272 PetscMPIInt *onodes,*onodes_is,*olengths_idxs,*olengths_idxs_is,*olengths_vals; 7273 PetscMPIInt len,tag_idxs,tag_idxs_is,tag_vals,tag_vecs,source_dest; 7274 MPI_Request *send_req_idxs,*send_req_idxs_is,*send_req_vals,*send_req_vecs; 7275 MPI_Request *recv_req_idxs,*recv_req_idxs_is,*recv_req_vals,*recv_req_vecs; 7276 PetscErrorCode ierr; 7277 7278 PetscFunctionBegin; 7279 PetscValidHeaderSpecific(mat,MAT_CLASSID,1); 7280 ierr = PetscObjectTypeCompare((PetscObject)mat,MATIS,&ismatis);CHKERRQ(ierr); 7281 if (!ismatis) SETERRQ1(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot use %s on a matrix object which is not of type MATIS",PETSC_FUNCTION_NAME); 7282 PetscValidLogicalCollectiveInt(mat,n_subdomains,3); 7283 PetscValidLogicalCollectiveBool(mat,restrict_comm,4); 7284 PetscValidLogicalCollectiveBool(mat,restrict_full,5); 7285 PetscValidLogicalCollectiveBool(mat,reuse,6); 7286 PetscValidLogicalCollectiveInt(mat,nis,8); 7287 PetscValidLogicalCollectiveInt(mat,nvecs,10); 7288 if (nvecs) { 7289 if (nvecs > 1) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Just 1 vector supported"); 7290 PetscValidHeaderSpecific(nnsp_vec[0],VEC_CLASSID,11); 7291 } 7292 /* further checks */ 7293 ierr = MatISGetLocalMat(mat,&local_mat);CHKERRQ(ierr); 7294 ierr = PetscObjectTypeCompare((PetscObject)local_mat,MATSEQDENSE,&isdense);CHKERRQ(ierr); 7295 if (!isdense) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Currently cannot subassemble MATIS when local matrix type is not of type SEQDENSE"); 7296 ierr = MatGetSize(local_mat,&rows,&cols);CHKERRQ(ierr); 7297 if (rows != cols) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Local MATIS matrices should be square"); 7298 if (reuse && *mat_n) { 7299 PetscInt mrows,mcols,mnrows,mncols; 7300 PetscValidHeaderSpecific(*mat_n,MAT_CLASSID,7); 7301 ierr = PetscObjectTypeCompare((PetscObject)*mat_n,MATIS,&ismatis);CHKERRQ(ierr); 7302 if (!ismatis) SETERRQ(PetscObjectComm((PetscObject)*mat_n),PETSC_ERR_SUP,"Cannot reuse a matrix which is not of type MATIS"); 7303 ierr = MatGetSize(mat,&mrows,&mcols);CHKERRQ(ierr); 7304 ierr = MatGetSize(*mat_n,&mnrows,&mncols);CHKERRQ(ierr); 7305 if (mrows != mnrows) SETERRQ2(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot reuse matrix! Wrong number of rows %D != %D",mrows,mnrows); 7306 if (mcols != mncols) SETERRQ2(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot reuse matrix! Wrong number of cols %D != %D",mcols,mncols); 7307 } 7308 ierr = MatGetBlockSize(local_mat,&bs);CHKERRQ(ierr); 7309 PetscValidLogicalCollectiveInt(mat,bs,0); 7310 7311 /* prepare IS for sending if not provided */ 7312 if (!is_sends) { 7313 if (!n_subdomains) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"You should specify either an IS or a target number of subdomains"); 7314 ierr = PCBDDCMatISGetSubassemblingPattern(mat,&n_subdomains,0,&is_sends_internal,NULL);CHKERRQ(ierr); 7315 } else { 7316 ierr = PetscObjectReference((PetscObject)is_sends);CHKERRQ(ierr); 7317 is_sends_internal = is_sends; 7318 } 7319 7320 /* get comm */ 7321 ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr); 7322 7323 /* compute number of sends */ 7324 ierr = ISGetLocalSize(is_sends_internal,&i);CHKERRQ(ierr); 7325 ierr = PetscMPIIntCast(i,&n_sends);CHKERRQ(ierr); 7326 7327 /* compute number of receives */ 7328 ierr = MPI_Comm_size(comm,&commsize);CHKERRQ(ierr); 7329 ierr = PetscMalloc1(commsize,&iflags);CHKERRQ(ierr); 7330 ierr = PetscMemzero(iflags,commsize*sizeof(*iflags));CHKERRQ(ierr); 7331 ierr = ISGetIndices(is_sends_internal,&is_indices);CHKERRQ(ierr); 7332 for (i=0;i<n_sends;i++) iflags[is_indices[i]] = 1; 7333 ierr = PetscGatherNumberOfMessages(comm,iflags,NULL,&n_recvs);CHKERRQ(ierr); 7334 ierr = PetscFree(iflags);CHKERRQ(ierr); 7335 7336 /* restrict comm if requested */ 7337 subcomm = 0; 7338 destroy_mat = PETSC_FALSE; 7339 if (restrict_comm) { 7340 PetscMPIInt color,subcommsize; 7341 7342 color = 0; 7343 if (restrict_full) { 7344 if (!n_recvs) color = 1; /* processes not receiving anything will not partecipate in new comm (full restriction) */ 7345 } else { 7346 if (!n_recvs && n_sends) color = 1; /* just those processes that are sending but not receiving anything will not partecipate in new comm */ 7347 } 7348 ierr = MPIU_Allreduce(&color,&subcommsize,1,MPI_INT,MPI_SUM,comm);CHKERRQ(ierr); 7349 subcommsize = commsize - subcommsize; 7350 /* check if reuse has been requested */ 7351 if (reuse) { 7352 if (*mat_n) { 7353 PetscMPIInt subcommsize2; 7354 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)*mat_n),&subcommsize2);CHKERRQ(ierr); 7355 if (subcommsize != subcommsize2) SETERRQ2(PetscObjectComm((PetscObject)*mat_n),PETSC_ERR_PLIB,"Cannot reuse matrix! wrong subcomm size %d != %d",subcommsize,subcommsize2); 7356 comm_n = PetscObjectComm((PetscObject)*mat_n); 7357 } else { 7358 comm_n = PETSC_COMM_SELF; 7359 } 7360 } else { /* MAT_INITIAL_MATRIX */ 7361 PetscMPIInt rank; 7362 7363 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 7364 ierr = PetscSubcommCreate(comm,&subcomm);CHKERRQ(ierr); 7365 ierr = PetscSubcommSetNumber(subcomm,2);CHKERRQ(ierr); 7366 ierr = PetscSubcommSetTypeGeneral(subcomm,color,rank);CHKERRQ(ierr); 7367 comm_n = PetscSubcommChild(subcomm); 7368 } 7369 /* flag to destroy *mat_n if not significative */ 7370 if (color) destroy_mat = PETSC_TRUE; 7371 } else { 7372 comm_n = comm; 7373 } 7374 7375 /* prepare send/receive buffers */ 7376 ierr = PetscMalloc1(commsize,&ilengths_idxs);CHKERRQ(ierr); 7377 ierr = PetscMemzero(ilengths_idxs,commsize*sizeof(*ilengths_idxs));CHKERRQ(ierr); 7378 ierr = PetscMalloc1(commsize,&ilengths_vals);CHKERRQ(ierr); 7379 ierr = PetscMemzero(ilengths_vals,commsize*sizeof(*ilengths_vals));CHKERRQ(ierr); 7380 if (nis) { 7381 ierr = PetscCalloc1(commsize,&ilengths_idxs_is);CHKERRQ(ierr); 7382 } 7383 7384 /* Get data from local matrices */ 7385 if (!isdense) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Subassembling of AIJ local matrices not yet implemented"); 7386 /* TODO: See below some guidelines on how to prepare the local buffers */ 7387 /* 7388 send_buffer_vals should contain the raw values of the local matrix 7389 send_buffer_idxs should contain: 7390 - MatType_PRIVATE type 7391 - PetscInt size_of_l2gmap 7392 - PetscInt global_row_indices[size_of_l2gmap] 7393 - PetscInt all_other_info_which_is_needed_to_compute_preallocation_and_set_values 7394 */ 7395 else { 7396 ierr = MatDenseGetArray(local_mat,&send_buffer_vals);CHKERRQ(ierr); 7397 ierr = ISLocalToGlobalMappingGetSize(mat->rmap->mapping,&i);CHKERRQ(ierr); 7398 ierr = PetscMalloc1(i+2,&send_buffer_idxs);CHKERRQ(ierr); 7399 send_buffer_idxs[0] = (PetscInt)MATDENSE_PRIVATE; 7400 send_buffer_idxs[1] = i; 7401 ierr = ISLocalToGlobalMappingGetIndices(mat->rmap->mapping,(const PetscInt**)&ptr_idxs);CHKERRQ(ierr); 7402 ierr = PetscMemcpy(&send_buffer_idxs[2],ptr_idxs,i*sizeof(PetscInt));CHKERRQ(ierr); 7403 ierr = ISLocalToGlobalMappingRestoreIndices(mat->rmap->mapping,(const PetscInt**)&ptr_idxs);CHKERRQ(ierr); 7404 ierr = PetscMPIIntCast(i,&len);CHKERRQ(ierr); 7405 for (i=0;i<n_sends;i++) { 7406 ilengths_vals[is_indices[i]] = len*len; 7407 ilengths_idxs[is_indices[i]] = len+2; 7408 } 7409 } 7410 ierr = PetscGatherMessageLengths2(comm,n_sends,n_recvs,ilengths_idxs,ilengths_vals,&onodes,&olengths_idxs,&olengths_vals);CHKERRQ(ierr); 7411 /* additional is (if any) */ 7412 if (nis) { 7413 PetscMPIInt psum; 7414 PetscInt j; 7415 for (j=0,psum=0;j<nis;j++) { 7416 PetscInt plen; 7417 ierr = ISGetLocalSize(isarray[j],&plen);CHKERRQ(ierr); 7418 ierr = PetscMPIIntCast(plen,&len);CHKERRQ(ierr); 7419 psum += len+1; /* indices + lenght */ 7420 } 7421 ierr = PetscMalloc1(psum,&send_buffer_idxs_is);CHKERRQ(ierr); 7422 for (j=0,psum=0;j<nis;j++) { 7423 PetscInt plen; 7424 const PetscInt *is_array_idxs; 7425 ierr = ISGetLocalSize(isarray[j],&plen);CHKERRQ(ierr); 7426 send_buffer_idxs_is[psum] = plen; 7427 ierr = ISGetIndices(isarray[j],&is_array_idxs);CHKERRQ(ierr); 7428 ierr = PetscMemcpy(&send_buffer_idxs_is[psum+1],is_array_idxs,plen*sizeof(PetscInt));CHKERRQ(ierr); 7429 ierr = ISRestoreIndices(isarray[j],&is_array_idxs);CHKERRQ(ierr); 7430 psum += plen+1; /* indices + lenght */ 7431 } 7432 for (i=0;i<n_sends;i++) { 7433 ilengths_idxs_is[is_indices[i]] = psum; 7434 } 7435 ierr = PetscGatherMessageLengths(comm,n_sends,n_recvs,ilengths_idxs_is,&onodes_is,&olengths_idxs_is);CHKERRQ(ierr); 7436 } 7437 ierr = MatISRestoreLocalMat(mat,&local_mat);CHKERRQ(ierr); 7438 7439 buf_size_idxs = 0; 7440 buf_size_vals = 0; 7441 buf_size_idxs_is = 0; 7442 buf_size_vecs = 0; 7443 for (i=0;i<n_recvs;i++) { 7444 buf_size_idxs += (PetscInt)olengths_idxs[i]; 7445 buf_size_vals += (PetscInt)olengths_vals[i]; 7446 if (nis) buf_size_idxs_is += (PetscInt)olengths_idxs_is[i]; 7447 if (nvecs) buf_size_vecs += (PetscInt)olengths_idxs[i]; 7448 } 7449 ierr = PetscMalloc1(buf_size_idxs,&recv_buffer_idxs);CHKERRQ(ierr); 7450 ierr = PetscMalloc1(buf_size_vals,&recv_buffer_vals);CHKERRQ(ierr); 7451 ierr = PetscMalloc1(buf_size_idxs_is,&recv_buffer_idxs_is);CHKERRQ(ierr); 7452 ierr = PetscMalloc1(buf_size_vecs,&recv_buffer_vecs);CHKERRQ(ierr); 7453 7454 /* get new tags for clean communications */ 7455 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_idxs);CHKERRQ(ierr); 7456 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_vals);CHKERRQ(ierr); 7457 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_idxs_is);CHKERRQ(ierr); 7458 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_vecs);CHKERRQ(ierr); 7459 7460 /* allocate for requests */ 7461 ierr = PetscMalloc1(n_sends,&send_req_idxs);CHKERRQ(ierr); 7462 ierr = PetscMalloc1(n_sends,&send_req_vals);CHKERRQ(ierr); 7463 ierr = PetscMalloc1(n_sends,&send_req_idxs_is);CHKERRQ(ierr); 7464 ierr = PetscMalloc1(n_sends,&send_req_vecs);CHKERRQ(ierr); 7465 ierr = PetscMalloc1(n_recvs,&recv_req_idxs);CHKERRQ(ierr); 7466 ierr = PetscMalloc1(n_recvs,&recv_req_vals);CHKERRQ(ierr); 7467 ierr = PetscMalloc1(n_recvs,&recv_req_idxs_is);CHKERRQ(ierr); 7468 ierr = PetscMalloc1(n_recvs,&recv_req_vecs);CHKERRQ(ierr); 7469 7470 /* communications */ 7471 ptr_idxs = recv_buffer_idxs; 7472 ptr_vals = recv_buffer_vals; 7473 ptr_idxs_is = recv_buffer_idxs_is; 7474 ptr_vecs = recv_buffer_vecs; 7475 for (i=0;i<n_recvs;i++) { 7476 source_dest = onodes[i]; 7477 ierr = MPI_Irecv(ptr_idxs,olengths_idxs[i],MPIU_INT,source_dest,tag_idxs,comm,&recv_req_idxs[i]);CHKERRQ(ierr); 7478 ierr = MPI_Irecv(ptr_vals,olengths_vals[i],MPIU_SCALAR,source_dest,tag_vals,comm,&recv_req_vals[i]);CHKERRQ(ierr); 7479 ptr_idxs += olengths_idxs[i]; 7480 ptr_vals += olengths_vals[i]; 7481 if (nis) { 7482 source_dest = onodes_is[i]; 7483 ierr = MPI_Irecv(ptr_idxs_is,olengths_idxs_is[i],MPIU_INT,source_dest,tag_idxs_is,comm,&recv_req_idxs_is[i]);CHKERRQ(ierr); 7484 ptr_idxs_is += olengths_idxs_is[i]; 7485 } 7486 if (nvecs) { 7487 source_dest = onodes[i]; 7488 ierr = MPI_Irecv(ptr_vecs,olengths_idxs[i]-2,MPIU_SCALAR,source_dest,tag_vecs,comm,&recv_req_vecs[i]);CHKERRQ(ierr); 7489 ptr_vecs += olengths_idxs[i]-2; 7490 } 7491 } 7492 for (i=0;i<n_sends;i++) { 7493 ierr = PetscMPIIntCast(is_indices[i],&source_dest);CHKERRQ(ierr); 7494 ierr = MPI_Isend(send_buffer_idxs,ilengths_idxs[source_dest],MPIU_INT,source_dest,tag_idxs,comm,&send_req_idxs[i]);CHKERRQ(ierr); 7495 ierr = MPI_Isend(send_buffer_vals,ilengths_vals[source_dest],MPIU_SCALAR,source_dest,tag_vals,comm,&send_req_vals[i]);CHKERRQ(ierr); 7496 if (nis) { 7497 ierr = MPI_Isend(send_buffer_idxs_is,ilengths_idxs_is[source_dest],MPIU_INT,source_dest,tag_idxs_is,comm,&send_req_idxs_is[i]);CHKERRQ(ierr); 7498 } 7499 if (nvecs) { 7500 ierr = VecGetArray(nnsp_vec[0],&send_buffer_vecs);CHKERRQ(ierr); 7501 ierr = MPI_Isend(send_buffer_vecs,ilengths_idxs[source_dest]-2,MPIU_SCALAR,source_dest,tag_vecs,comm,&send_req_vecs[i]);CHKERRQ(ierr); 7502 } 7503 } 7504 ierr = ISRestoreIndices(is_sends_internal,&is_indices);CHKERRQ(ierr); 7505 ierr = ISDestroy(&is_sends_internal);CHKERRQ(ierr); 7506 7507 /* assemble new l2g map */ 7508 ierr = MPI_Waitall(n_recvs,recv_req_idxs,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 7509 ptr_idxs = recv_buffer_idxs; 7510 new_local_rows = 0; 7511 for (i=0;i<n_recvs;i++) { 7512 new_local_rows += *(ptr_idxs+1); /* second element is the local size of the l2gmap */ 7513 ptr_idxs += olengths_idxs[i]; 7514 } 7515 ierr = PetscMalloc1(new_local_rows,&l2gmap_indices);CHKERRQ(ierr); 7516 ptr_idxs = recv_buffer_idxs; 7517 new_local_rows = 0; 7518 for (i=0;i<n_recvs;i++) { 7519 ierr = PetscMemcpy(&l2gmap_indices[new_local_rows],ptr_idxs+2,(*(ptr_idxs+1))*sizeof(PetscInt));CHKERRQ(ierr); 7520 new_local_rows += *(ptr_idxs+1); /* second element is the local size of the l2gmap */ 7521 ptr_idxs += olengths_idxs[i]; 7522 } 7523 ierr = PetscSortRemoveDupsInt(&new_local_rows,l2gmap_indices);CHKERRQ(ierr); 7524 ierr = ISLocalToGlobalMappingCreate(comm_n,1,new_local_rows,l2gmap_indices,PETSC_COPY_VALUES,&l2gmap);CHKERRQ(ierr); 7525 ierr = PetscFree(l2gmap_indices);CHKERRQ(ierr); 7526 7527 /* infer new local matrix type from received local matrices type */ 7528 /* currently if all local matrices are of type X, then the resulting matrix will be of type X, except for the dense case */ 7529 /* it also assumes that if the block size is set, than it is the same among all local matrices (see checks at the beginning of the function) */ 7530 if (n_recvs) { 7531 MatTypePrivate new_local_type_private = (MatTypePrivate)send_buffer_idxs[0]; 7532 ptr_idxs = recv_buffer_idxs; 7533 for (i=0;i<n_recvs;i++) { 7534 if ((PetscInt)new_local_type_private != *ptr_idxs) { 7535 new_local_type_private = MATAIJ_PRIVATE; 7536 break; 7537 } 7538 ptr_idxs += olengths_idxs[i]; 7539 } 7540 switch (new_local_type_private) { 7541 case MATDENSE_PRIVATE: 7542 new_local_type = MATSEQAIJ; 7543 bs = 1; 7544 break; 7545 case MATAIJ_PRIVATE: 7546 new_local_type = MATSEQAIJ; 7547 bs = 1; 7548 break; 7549 case MATBAIJ_PRIVATE: 7550 new_local_type = MATSEQBAIJ; 7551 break; 7552 case MATSBAIJ_PRIVATE: 7553 new_local_type = MATSEQSBAIJ; 7554 break; 7555 default: 7556 SETERRQ2(comm,PETSC_ERR_SUP,"Unsupported private type %d in %s",new_local_type_private,PETSC_FUNCTION_NAME); 7557 break; 7558 } 7559 } else { /* by default, new_local_type is seqaij */ 7560 new_local_type = MATSEQAIJ; 7561 bs = 1; 7562 } 7563 7564 /* create MATIS object if needed */ 7565 if (!reuse) { 7566 ierr = MatGetSize(mat,&rows,&cols);CHKERRQ(ierr); 7567 ierr = MatCreateIS(comm_n,bs,PETSC_DECIDE,PETSC_DECIDE,rows,cols,l2gmap,NULL,mat_n);CHKERRQ(ierr); 7568 } else { 7569 /* it also destroys the local matrices */ 7570 if (*mat_n) { 7571 ierr = MatSetLocalToGlobalMapping(*mat_n,l2gmap,l2gmap);CHKERRQ(ierr); 7572 } else { /* this is a fake object */ 7573 ierr = MatCreateIS(comm_n,bs,PETSC_DECIDE,PETSC_DECIDE,rows,cols,l2gmap,NULL,mat_n);CHKERRQ(ierr); 7574 } 7575 } 7576 ierr = MatISGetLocalMat(*mat_n,&local_mat);CHKERRQ(ierr); 7577 ierr = MatSetType(local_mat,new_local_type);CHKERRQ(ierr); 7578 7579 ierr = MPI_Waitall(n_recvs,recv_req_vals,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 7580 7581 /* Global to local map of received indices */ 7582 ierr = PetscMalloc1(buf_size_idxs,&recv_buffer_idxs_local);CHKERRQ(ierr); /* needed for values insertion */ 7583 ierr = ISGlobalToLocalMappingApply(l2gmap,IS_GTOLM_MASK,buf_size_idxs,recv_buffer_idxs,&i,recv_buffer_idxs_local);CHKERRQ(ierr); 7584 ierr = ISLocalToGlobalMappingDestroy(&l2gmap);CHKERRQ(ierr); 7585 7586 /* restore attributes -> type of incoming data and its size */ 7587 buf_size_idxs = 0; 7588 for (i=0;i<n_recvs;i++) { 7589 recv_buffer_idxs_local[buf_size_idxs] = recv_buffer_idxs[buf_size_idxs]; 7590 recv_buffer_idxs_local[buf_size_idxs+1] = recv_buffer_idxs[buf_size_idxs+1]; 7591 buf_size_idxs += (PetscInt)olengths_idxs[i]; 7592 } 7593 ierr = PetscFree(recv_buffer_idxs);CHKERRQ(ierr); 7594 7595 /* set preallocation */ 7596 ierr = PetscObjectTypeCompare((PetscObject)local_mat,MATSEQDENSE,&newisdense);CHKERRQ(ierr); 7597 if (!newisdense) { 7598 PetscInt *new_local_nnz=0; 7599 7600 ptr_idxs = recv_buffer_idxs_local; 7601 if (n_recvs) { 7602 ierr = PetscCalloc1(new_local_rows,&new_local_nnz);CHKERRQ(ierr); 7603 } 7604 for (i=0;i<n_recvs;i++) { 7605 PetscInt j; 7606 if (*ptr_idxs == (PetscInt)MATDENSE_PRIVATE) { /* preallocation provided for dense case only */ 7607 for (j=0;j<*(ptr_idxs+1);j++) { 7608 new_local_nnz[*(ptr_idxs+2+j)] += *(ptr_idxs+1); 7609 } 7610 } else { 7611 /* TODO */ 7612 } 7613 ptr_idxs += olengths_idxs[i]; 7614 } 7615 if (new_local_nnz) { 7616 for (i=0;i<new_local_rows;i++) new_local_nnz[i] = PetscMin(new_local_nnz[i],new_local_rows); 7617 ierr = MatSeqAIJSetPreallocation(local_mat,0,new_local_nnz);CHKERRQ(ierr); 7618 for (i=0;i<new_local_rows;i++) new_local_nnz[i] /= bs; 7619 ierr = MatSeqBAIJSetPreallocation(local_mat,bs,0,new_local_nnz);CHKERRQ(ierr); 7620 for (i=0;i<new_local_rows;i++) new_local_nnz[i] = PetscMax(new_local_nnz[i]-i,0); 7621 ierr = MatSeqSBAIJSetPreallocation(local_mat,bs,0,new_local_nnz);CHKERRQ(ierr); 7622 } else { 7623 ierr = MatSetUp(local_mat);CHKERRQ(ierr); 7624 } 7625 ierr = PetscFree(new_local_nnz);CHKERRQ(ierr); 7626 } else { 7627 ierr = MatSetUp(local_mat);CHKERRQ(ierr); 7628 } 7629 7630 /* set values */ 7631 ptr_vals = recv_buffer_vals; 7632 ptr_idxs = recv_buffer_idxs_local; 7633 for (i=0;i<n_recvs;i++) { 7634 if (*ptr_idxs == (PetscInt)MATDENSE_PRIVATE) { /* values insertion provided for dense case only */ 7635 ierr = MatSetOption(local_mat,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr); 7636 ierr = MatSetValues(local_mat,*(ptr_idxs+1),ptr_idxs+2,*(ptr_idxs+1),ptr_idxs+2,ptr_vals,ADD_VALUES);CHKERRQ(ierr); 7637 ierr = MatAssemblyBegin(local_mat,MAT_FLUSH_ASSEMBLY);CHKERRQ(ierr); 7638 ierr = MatAssemblyEnd(local_mat,MAT_FLUSH_ASSEMBLY);CHKERRQ(ierr); 7639 ierr = MatSetOption(local_mat,MAT_ROW_ORIENTED,PETSC_TRUE);CHKERRQ(ierr); 7640 } else { 7641 /* TODO */ 7642 } 7643 ptr_idxs += olengths_idxs[i]; 7644 ptr_vals += olengths_vals[i]; 7645 } 7646 ierr = MatAssemblyBegin(local_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 7647 ierr = MatAssemblyEnd(local_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 7648 ierr = MatISRestoreLocalMat(*mat_n,&local_mat);CHKERRQ(ierr); 7649 ierr = MatAssemblyBegin(*mat_n,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 7650 ierr = MatAssemblyEnd(*mat_n,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 7651 ierr = PetscFree(recv_buffer_vals);CHKERRQ(ierr); 7652 7653 #if 0 7654 if (!restrict_comm) { /* check */ 7655 Vec lvec,rvec; 7656 PetscReal infty_error; 7657 7658 ierr = MatCreateVecs(mat,&rvec,&lvec);CHKERRQ(ierr); 7659 ierr = VecSetRandom(rvec,NULL);CHKERRQ(ierr); 7660 ierr = MatMult(mat,rvec,lvec);CHKERRQ(ierr); 7661 ierr = VecScale(lvec,-1.0);CHKERRQ(ierr); 7662 ierr = MatMultAdd(*mat_n,rvec,lvec,lvec);CHKERRQ(ierr); 7663 ierr = VecNorm(lvec,NORM_INFINITY,&infty_error);CHKERRQ(ierr); 7664 ierr = PetscPrintf(PetscObjectComm((PetscObject)mat),"Infinity error subassembling %1.6e\n",infty_error); 7665 ierr = VecDestroy(&rvec);CHKERRQ(ierr); 7666 ierr = VecDestroy(&lvec);CHKERRQ(ierr); 7667 } 7668 #endif 7669 7670 /* assemble new additional is (if any) */ 7671 if (nis) { 7672 PetscInt **temp_idxs,*count_is,j,psum; 7673 7674 ierr = MPI_Waitall(n_recvs,recv_req_idxs_is,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 7675 ierr = PetscCalloc1(nis,&count_is);CHKERRQ(ierr); 7676 ptr_idxs = recv_buffer_idxs_is; 7677 psum = 0; 7678 for (i=0;i<n_recvs;i++) { 7679 for (j=0;j<nis;j++) { 7680 PetscInt plen = *(ptr_idxs); /* first element is the local size of IS's indices */ 7681 count_is[j] += plen; /* increment counting of buffer for j-th IS */ 7682 psum += plen; 7683 ptr_idxs += plen+1; /* shift pointer to received data */ 7684 } 7685 } 7686 ierr = PetscMalloc1(nis,&temp_idxs);CHKERRQ(ierr); 7687 ierr = PetscMalloc1(psum,&temp_idxs[0]);CHKERRQ(ierr); 7688 for (i=1;i<nis;i++) { 7689 temp_idxs[i] = temp_idxs[i-1]+count_is[i-1]; 7690 } 7691 ierr = PetscMemzero(count_is,nis*sizeof(PetscInt));CHKERRQ(ierr); 7692 ptr_idxs = recv_buffer_idxs_is; 7693 for (i=0;i<n_recvs;i++) { 7694 for (j=0;j<nis;j++) { 7695 PetscInt plen = *(ptr_idxs); /* first element is the local size of IS's indices */ 7696 ierr = PetscMemcpy(&temp_idxs[j][count_is[j]],ptr_idxs+1,plen*sizeof(PetscInt));CHKERRQ(ierr); 7697 count_is[j] += plen; /* increment starting point of buffer for j-th IS */ 7698 ptr_idxs += plen+1; /* shift pointer to received data */ 7699 } 7700 } 7701 for (i=0;i<nis;i++) { 7702 ierr = ISDestroy(&isarray[i]);CHKERRQ(ierr); 7703 ierr = PetscSortRemoveDupsInt(&count_is[i],temp_idxs[i]);CHKERRQ(ierr);CHKERRQ(ierr); 7704 ierr = ISCreateGeneral(comm_n,count_is[i],temp_idxs[i],PETSC_COPY_VALUES,&isarray[i]);CHKERRQ(ierr); 7705 } 7706 ierr = PetscFree(count_is);CHKERRQ(ierr); 7707 ierr = PetscFree(temp_idxs[0]);CHKERRQ(ierr); 7708 ierr = PetscFree(temp_idxs);CHKERRQ(ierr); 7709 } 7710 /* free workspace */ 7711 ierr = PetscFree(recv_buffer_idxs_is);CHKERRQ(ierr); 7712 ierr = MPI_Waitall(n_sends,send_req_idxs,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 7713 ierr = PetscFree(send_buffer_idxs);CHKERRQ(ierr); 7714 ierr = MPI_Waitall(n_sends,send_req_vals,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 7715 if (isdense) { 7716 ierr = MatISGetLocalMat(mat,&local_mat);CHKERRQ(ierr); 7717 ierr = MatDenseRestoreArray(local_mat,&send_buffer_vals);CHKERRQ(ierr); 7718 ierr = MatISRestoreLocalMat(mat,&local_mat);CHKERRQ(ierr); 7719 } else { 7720 /* ierr = PetscFree(send_buffer_vals);CHKERRQ(ierr); */ 7721 } 7722 if (nis) { 7723 ierr = MPI_Waitall(n_sends,send_req_idxs_is,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 7724 ierr = PetscFree(send_buffer_idxs_is);CHKERRQ(ierr); 7725 } 7726 7727 if (nvecs) { 7728 ierr = MPI_Waitall(n_recvs,recv_req_vecs,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 7729 ierr = MPI_Waitall(n_sends,send_req_vecs,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 7730 ierr = VecRestoreArray(nnsp_vec[0],&send_buffer_vecs);CHKERRQ(ierr); 7731 ierr = VecDestroy(&nnsp_vec[0]);CHKERRQ(ierr); 7732 ierr = VecCreate(comm_n,&nnsp_vec[0]);CHKERRQ(ierr); 7733 ierr = VecSetSizes(nnsp_vec[0],new_local_rows,PETSC_DECIDE);CHKERRQ(ierr); 7734 ierr = VecSetType(nnsp_vec[0],VECSTANDARD);CHKERRQ(ierr); 7735 /* set values */ 7736 ptr_vals = recv_buffer_vecs; 7737 ptr_idxs = recv_buffer_idxs_local; 7738 ierr = VecGetArray(nnsp_vec[0],&send_buffer_vecs);CHKERRQ(ierr); 7739 for (i=0;i<n_recvs;i++) { 7740 PetscInt j; 7741 for (j=0;j<*(ptr_idxs+1);j++) { 7742 send_buffer_vecs[*(ptr_idxs+2+j)] += *(ptr_vals + j); 7743 } 7744 ptr_idxs += olengths_idxs[i]; 7745 ptr_vals += olengths_idxs[i]-2; 7746 } 7747 ierr = VecRestoreArray(nnsp_vec[0],&send_buffer_vecs);CHKERRQ(ierr); 7748 ierr = VecAssemblyBegin(nnsp_vec[0]);CHKERRQ(ierr); 7749 ierr = VecAssemblyEnd(nnsp_vec[0]);CHKERRQ(ierr); 7750 } 7751 7752 ierr = PetscFree(recv_buffer_vecs);CHKERRQ(ierr); 7753 ierr = PetscFree(recv_buffer_idxs_local);CHKERRQ(ierr); 7754 ierr = PetscFree(recv_req_idxs);CHKERRQ(ierr); 7755 ierr = PetscFree(recv_req_vals);CHKERRQ(ierr); 7756 ierr = PetscFree(recv_req_vecs);CHKERRQ(ierr); 7757 ierr = PetscFree(recv_req_idxs_is);CHKERRQ(ierr); 7758 ierr = PetscFree(send_req_idxs);CHKERRQ(ierr); 7759 ierr = PetscFree(send_req_vals);CHKERRQ(ierr); 7760 ierr = PetscFree(send_req_vecs);CHKERRQ(ierr); 7761 ierr = PetscFree(send_req_idxs_is);CHKERRQ(ierr); 7762 ierr = PetscFree(ilengths_vals);CHKERRQ(ierr); 7763 ierr = PetscFree(ilengths_idxs);CHKERRQ(ierr); 7764 ierr = PetscFree(olengths_vals);CHKERRQ(ierr); 7765 ierr = PetscFree(olengths_idxs);CHKERRQ(ierr); 7766 ierr = PetscFree(onodes);CHKERRQ(ierr); 7767 if (nis) { 7768 ierr = PetscFree(ilengths_idxs_is);CHKERRQ(ierr); 7769 ierr = PetscFree(olengths_idxs_is);CHKERRQ(ierr); 7770 ierr = PetscFree(onodes_is);CHKERRQ(ierr); 7771 } 7772 ierr = PetscSubcommDestroy(&subcomm);CHKERRQ(ierr); 7773 if (destroy_mat) { /* destroy mat is true only if restrict comm is true and process will not partecipate */ 7774 ierr = MatDestroy(mat_n);CHKERRQ(ierr); 7775 for (i=0;i<nis;i++) { 7776 ierr = ISDestroy(&isarray[i]);CHKERRQ(ierr); 7777 } 7778 if (nvecs) { /* need to match VecDestroy nnsp_vec called in the other code path */ 7779 ierr = VecDestroy(&nnsp_vec[0]);CHKERRQ(ierr); 7780 } 7781 *mat_n = NULL; 7782 } 7783 PetscFunctionReturn(0); 7784 } 7785 7786 /* temporary hack into ksp private data structure */ 7787 #include <petsc/private/kspimpl.h> 7788 7789 PetscErrorCode PCBDDCSetUpCoarseSolver(PC pc,PetscScalar* coarse_submat_vals) 7790 { 7791 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 7792 PC_IS *pcis = (PC_IS*)pc->data; 7793 Mat coarse_mat,coarse_mat_is,coarse_submat_dense; 7794 Mat coarsedivudotp = NULL; 7795 Mat coarseG,t_coarse_mat_is; 7796 MatNullSpace CoarseNullSpace = NULL; 7797 ISLocalToGlobalMapping coarse_islg; 7798 IS coarse_is,*isarray; 7799 PetscInt i,im_active=-1,active_procs=-1; 7800 PetscInt nis,nisdofs,nisneu,nisvert; 7801 PC pc_temp; 7802 PCType coarse_pc_type; 7803 KSPType coarse_ksp_type; 7804 PetscBool multilevel_requested,multilevel_allowed; 7805 PetscBool coarse_reuse; 7806 PetscInt ncoarse,nedcfield; 7807 PetscBool compute_vecs = PETSC_FALSE; 7808 PetscScalar *array; 7809 MatReuse coarse_mat_reuse; 7810 PetscBool restr, full_restr, have_void; 7811 PetscMPIInt commsize; 7812 PetscErrorCode ierr; 7813 7814 PetscFunctionBegin; 7815 /* Assign global numbering to coarse dofs */ 7816 if (pcbddc->new_primal_space || pcbddc->coarse_size == -1) { /* a new primal space is present or it is the first initialization, so recompute global numbering */ 7817 PetscInt ocoarse_size; 7818 compute_vecs = PETSC_TRUE; 7819 7820 pcbddc->new_primal_space = PETSC_TRUE; 7821 ocoarse_size = pcbddc->coarse_size; 7822 ierr = PetscFree(pcbddc->global_primal_indices);CHKERRQ(ierr); 7823 ierr = PCBDDCComputePrimalNumbering(pc,&pcbddc->coarse_size,&pcbddc->global_primal_indices);CHKERRQ(ierr); 7824 /* see if we can avoid some work */ 7825 if (pcbddc->coarse_ksp) { /* coarse ksp has already been created */ 7826 /* if the coarse size is different or we are using adaptive selection, better to not reuse the coarse matrix */ 7827 if (ocoarse_size != pcbddc->coarse_size || pcbddc->adaptive_selection) { 7828 ierr = KSPReset(pcbddc->coarse_ksp);CHKERRQ(ierr); 7829 coarse_reuse = PETSC_FALSE; 7830 } else { /* we can safely reuse already computed coarse matrix */ 7831 coarse_reuse = PETSC_TRUE; 7832 } 7833 } else { /* there's no coarse ksp, so we need to create the coarse matrix too */ 7834 coarse_reuse = PETSC_FALSE; 7835 } 7836 /* reset any subassembling information */ 7837 if (!coarse_reuse || pcbddc->recompute_topography) { 7838 ierr = ISDestroy(&pcbddc->coarse_subassembling);CHKERRQ(ierr); 7839 } 7840 } else { /* primal space is unchanged, so we can reuse coarse matrix */ 7841 coarse_reuse = PETSC_TRUE; 7842 } 7843 /* assemble coarse matrix */ 7844 if (coarse_reuse && pcbddc->coarse_ksp) { 7845 ierr = KSPGetOperators(pcbddc->coarse_ksp,&coarse_mat,NULL);CHKERRQ(ierr); 7846 ierr = PetscObjectReference((PetscObject)coarse_mat);CHKERRQ(ierr); 7847 coarse_mat_reuse = MAT_REUSE_MATRIX; 7848 } else { 7849 coarse_mat = NULL; 7850 coarse_mat_reuse = MAT_INITIAL_MATRIX; 7851 } 7852 7853 /* creates temporary l2gmap and IS for coarse indexes */ 7854 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),pcbddc->local_primal_size,pcbddc->global_primal_indices,PETSC_COPY_VALUES,&coarse_is);CHKERRQ(ierr); 7855 ierr = ISLocalToGlobalMappingCreateIS(coarse_is,&coarse_islg);CHKERRQ(ierr); 7856 7857 /* creates temporary MATIS object for coarse matrix */ 7858 ierr = MatCreateSeqDense(PETSC_COMM_SELF,pcbddc->local_primal_size,pcbddc->local_primal_size,NULL,&coarse_submat_dense);CHKERRQ(ierr); 7859 ierr = MatDenseGetArray(coarse_submat_dense,&array);CHKERRQ(ierr); 7860 ierr = PetscMemcpy(array,coarse_submat_vals,sizeof(*coarse_submat_vals)*pcbddc->local_primal_size*pcbddc->local_primal_size);CHKERRQ(ierr); 7861 ierr = MatDenseRestoreArray(coarse_submat_dense,&array);CHKERRQ(ierr); 7862 ierr = MatCreateIS(PetscObjectComm((PetscObject)pc),1,PETSC_DECIDE,PETSC_DECIDE,pcbddc->coarse_size,pcbddc->coarse_size,coarse_islg,NULL,&t_coarse_mat_is);CHKERRQ(ierr); 7863 ierr = MatISSetLocalMat(t_coarse_mat_is,coarse_submat_dense);CHKERRQ(ierr); 7864 ierr = MatAssemblyBegin(t_coarse_mat_is,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 7865 ierr = MatAssemblyEnd(t_coarse_mat_is,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 7866 ierr = MatDestroy(&coarse_submat_dense);CHKERRQ(ierr); 7867 7868 /* count "active" (i.e. with positive local size) and "void" processes */ 7869 im_active = !!(pcis->n); 7870 ierr = MPIU_Allreduce(&im_active,&active_procs,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 7871 7872 /* determine number of processes partecipating to coarse solver and compute subassembling pattern */ 7873 /* restr : whether if we want to exclude senders (which are not receivers) from the subassembling pattern */ 7874 /* full_restr : just use the receivers from the subassembling pattern */ 7875 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)pc),&commsize);CHKERRQ(ierr); 7876 coarse_mat_is = NULL; 7877 multilevel_allowed = PETSC_FALSE; 7878 multilevel_requested = PETSC_FALSE; 7879 pcbddc->coarse_eqs_per_proc = PetscMin(PetscMax(pcbddc->coarse_size,1),pcbddc->coarse_eqs_per_proc); 7880 if (pcbddc->current_level < pcbddc->max_levels) multilevel_requested = PETSC_TRUE; 7881 if (multilevel_requested) { 7882 ncoarse = active_procs/pcbddc->coarsening_ratio; 7883 restr = PETSC_FALSE; 7884 full_restr = PETSC_FALSE; 7885 } else { 7886 ncoarse = pcbddc->coarse_size/pcbddc->coarse_eqs_per_proc; 7887 restr = PETSC_TRUE; 7888 full_restr = PETSC_TRUE; 7889 } 7890 if (!pcbddc->coarse_size || commsize == 1) multilevel_allowed = multilevel_requested = restr = full_restr = PETSC_FALSE; 7891 ncoarse = PetscMax(1,ncoarse); 7892 if (!pcbddc->coarse_subassembling) { 7893 if (pcbddc->coarsening_ratio > 1) { 7894 if (multilevel_requested) { 7895 ierr = PCBDDCMatISGetSubassemblingPattern(pc->pmat,&ncoarse,pcbddc->coarse_adj_red,&pcbddc->coarse_subassembling,&have_void);CHKERRQ(ierr); 7896 } else { 7897 ierr = PCBDDCMatISGetSubassemblingPattern(t_coarse_mat_is,&ncoarse,pcbddc->coarse_adj_red,&pcbddc->coarse_subassembling,&have_void);CHKERRQ(ierr); 7898 } 7899 } else { 7900 PetscMPIInt rank; 7901 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)pc),&rank);CHKERRQ(ierr); 7902 have_void = (active_procs == (PetscInt)commsize) ? PETSC_FALSE : PETSC_TRUE; 7903 ierr = ISCreateStride(PetscObjectComm((PetscObject)pc),1,rank,1,&pcbddc->coarse_subassembling);CHKERRQ(ierr); 7904 } 7905 } else { /* if a subassembling pattern exists, then we can reuse the coarse ksp and compute the number of process involved */ 7906 PetscInt psum; 7907 if (pcbddc->coarse_ksp) psum = 1; 7908 else psum = 0; 7909 ierr = MPIU_Allreduce(&psum,&ncoarse,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 7910 if (ncoarse < commsize) have_void = PETSC_TRUE; 7911 } 7912 /* determine if we can go multilevel */ 7913 if (multilevel_requested) { 7914 if (ncoarse > 1) multilevel_allowed = PETSC_TRUE; /* found enough processes */ 7915 else restr = full_restr = PETSC_TRUE; /* 1 subdomain, use a direct solver */ 7916 } 7917 if (multilevel_allowed && have_void) restr = PETSC_TRUE; 7918 7919 /* dump subassembling pattern */ 7920 if (pcbddc->dbg_flag && multilevel_allowed) { 7921 ierr = ISView(pcbddc->coarse_subassembling,pcbddc->dbg_viewer);CHKERRQ(ierr); 7922 } 7923 7924 /* compute dofs splitting and neumann boundaries for coarse dofs */ 7925 nedcfield = -1; 7926 if (multilevel_allowed && !coarse_reuse && (pcbddc->n_ISForDofsLocal || pcbddc->NeumannBoundariesLocal || pcbddc->nedclocal)) { /* protects from unneded computations */ 7927 PetscInt *tidxs,*tidxs2,nout,tsize,i; 7928 const PetscInt *idxs; 7929 ISLocalToGlobalMapping tmap; 7930 7931 /* create map between primal indices (in local representative ordering) and local primal numbering */ 7932 ierr = ISLocalToGlobalMappingCreate(PETSC_COMM_SELF,1,pcbddc->local_primal_size,pcbddc->primal_indices_local_idxs,PETSC_COPY_VALUES,&tmap);CHKERRQ(ierr); 7933 /* allocate space for temporary storage */ 7934 ierr = PetscMalloc1(pcbddc->local_primal_size,&tidxs);CHKERRQ(ierr); 7935 ierr = PetscMalloc1(pcbddc->local_primal_size,&tidxs2);CHKERRQ(ierr); 7936 /* allocate for IS array */ 7937 nisdofs = pcbddc->n_ISForDofsLocal; 7938 if (pcbddc->nedclocal) { 7939 if (pcbddc->nedfield > -1) { 7940 nedcfield = pcbddc->nedfield; 7941 } else { 7942 nedcfield = 0; 7943 if (nisdofs) SETERRQ1(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"This should not happen (%d)",nisdofs); 7944 nisdofs = 1; 7945 } 7946 } 7947 nisneu = !!pcbddc->NeumannBoundariesLocal; 7948 nisvert = 0; /* nisvert is not used */ 7949 nis = nisdofs + nisneu + nisvert; 7950 ierr = PetscMalloc1(nis,&isarray);CHKERRQ(ierr); 7951 /* dofs splitting */ 7952 for (i=0;i<nisdofs;i++) { 7953 /* ierr = ISView(pcbddc->ISForDofsLocal[i],0);CHKERRQ(ierr); */ 7954 if (nedcfield != i) { 7955 ierr = ISGetLocalSize(pcbddc->ISForDofsLocal[i],&tsize);CHKERRQ(ierr); 7956 ierr = ISGetIndices(pcbddc->ISForDofsLocal[i],&idxs);CHKERRQ(ierr); 7957 ierr = ISGlobalToLocalMappingApply(tmap,IS_GTOLM_DROP,tsize,idxs,&nout,tidxs);CHKERRQ(ierr); 7958 ierr = ISRestoreIndices(pcbddc->ISForDofsLocal[i],&idxs);CHKERRQ(ierr); 7959 } else { 7960 ierr = ISGetLocalSize(pcbddc->nedclocal,&tsize);CHKERRQ(ierr); 7961 ierr = ISGetIndices(pcbddc->nedclocal,&idxs);CHKERRQ(ierr); 7962 ierr = ISGlobalToLocalMappingApply(tmap,IS_GTOLM_DROP,tsize,idxs,&nout,tidxs);CHKERRQ(ierr); 7963 if (tsize != nout) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Failed when mapping coarse nedelec field! %d != %d\n",tsize,nout); 7964 ierr = ISRestoreIndices(pcbddc->nedclocal,&idxs);CHKERRQ(ierr); 7965 } 7966 ierr = ISLocalToGlobalMappingApply(coarse_islg,nout,tidxs,tidxs2);CHKERRQ(ierr); 7967 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),nout,tidxs2,PETSC_COPY_VALUES,&isarray[i]);CHKERRQ(ierr); 7968 /* ierr = ISView(isarray[i],0);CHKERRQ(ierr); */ 7969 } 7970 /* neumann boundaries */ 7971 if (pcbddc->NeumannBoundariesLocal) { 7972 /* ierr = ISView(pcbddc->NeumannBoundariesLocal,0);CHKERRQ(ierr); */ 7973 ierr = ISGetLocalSize(pcbddc->NeumannBoundariesLocal,&tsize);CHKERRQ(ierr); 7974 ierr = ISGetIndices(pcbddc->NeumannBoundariesLocal,&idxs);CHKERRQ(ierr); 7975 ierr = ISGlobalToLocalMappingApply(tmap,IS_GTOLM_DROP,tsize,idxs,&nout,tidxs);CHKERRQ(ierr); 7976 ierr = ISRestoreIndices(pcbddc->NeumannBoundariesLocal,&idxs);CHKERRQ(ierr); 7977 ierr = ISLocalToGlobalMappingApply(coarse_islg,nout,tidxs,tidxs2);CHKERRQ(ierr); 7978 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),nout,tidxs2,PETSC_COPY_VALUES,&isarray[nisdofs]);CHKERRQ(ierr); 7979 /* ierr = ISView(isarray[nisdofs],0);CHKERRQ(ierr); */ 7980 } 7981 /* free memory */ 7982 ierr = PetscFree(tidxs);CHKERRQ(ierr); 7983 ierr = PetscFree(tidxs2);CHKERRQ(ierr); 7984 ierr = ISLocalToGlobalMappingDestroy(&tmap);CHKERRQ(ierr); 7985 } else { 7986 nis = 0; 7987 nisdofs = 0; 7988 nisneu = 0; 7989 nisvert = 0; 7990 isarray = NULL; 7991 } 7992 /* destroy no longer needed map */ 7993 ierr = ISLocalToGlobalMappingDestroy(&coarse_islg);CHKERRQ(ierr); 7994 7995 /* subassemble */ 7996 if (multilevel_allowed) { 7997 Vec vp[1]; 7998 PetscInt nvecs = 0; 7999 PetscBool reuse,reuser; 8000 8001 if (coarse_mat) reuse = PETSC_TRUE; 8002 else reuse = PETSC_FALSE; 8003 ierr = MPIU_Allreduce(&reuse,&reuser,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 8004 vp[0] = NULL; 8005 if (pcbddc->benign_have_null) { /* propagate no-net-flux quadrature to coarser level */ 8006 ierr = VecCreate(PetscObjectComm((PetscObject)pc),&vp[0]);CHKERRQ(ierr); 8007 ierr = VecSetSizes(vp[0],pcbddc->local_primal_size,PETSC_DECIDE);CHKERRQ(ierr); 8008 ierr = VecSetType(vp[0],VECSTANDARD);CHKERRQ(ierr); 8009 nvecs = 1; 8010 8011 if (pcbddc->divudotp) { 8012 Mat B,loc_divudotp; 8013 Vec v,p; 8014 IS dummy; 8015 PetscInt np; 8016 8017 ierr = MatISGetLocalMat(pcbddc->divudotp,&loc_divudotp);CHKERRQ(ierr); 8018 ierr = MatGetSize(loc_divudotp,&np,NULL);CHKERRQ(ierr); 8019 ierr = ISCreateStride(PETSC_COMM_SELF,np,0,1,&dummy);CHKERRQ(ierr); 8020 ierr = MatCreateSubMatrix(loc_divudotp,dummy,pcis->is_B_local,MAT_INITIAL_MATRIX,&B);CHKERRQ(ierr); 8021 ierr = MatCreateVecs(B,&v,&p);CHKERRQ(ierr); 8022 ierr = VecSet(p,1.);CHKERRQ(ierr); 8023 ierr = MatMultTranspose(B,p,v);CHKERRQ(ierr); 8024 ierr = VecDestroy(&p);CHKERRQ(ierr); 8025 ierr = MatDestroy(&B);CHKERRQ(ierr); 8026 ierr = VecGetArray(vp[0],&array);CHKERRQ(ierr); 8027 ierr = VecPlaceArray(pcbddc->vec1_P,array);CHKERRQ(ierr); 8028 ierr = VecRestoreArray(vp[0],&array);CHKERRQ(ierr); 8029 ierr = MatMultTranspose(pcbddc->coarse_phi_B,v,pcbddc->vec1_P);CHKERRQ(ierr); 8030 ierr = VecResetArray(pcbddc->vec1_P);CHKERRQ(ierr); 8031 ierr = ISDestroy(&dummy);CHKERRQ(ierr); 8032 ierr = VecDestroy(&v);CHKERRQ(ierr); 8033 } 8034 } 8035 if (reuser) { 8036 ierr = PCBDDCMatISSubassemble(t_coarse_mat_is,pcbddc->coarse_subassembling,0,restr,full_restr,PETSC_TRUE,&coarse_mat,nis,isarray,nvecs,vp);CHKERRQ(ierr); 8037 } else { 8038 ierr = PCBDDCMatISSubassemble(t_coarse_mat_is,pcbddc->coarse_subassembling,0,restr,full_restr,PETSC_FALSE,&coarse_mat_is,nis,isarray,nvecs,vp);CHKERRQ(ierr); 8039 } 8040 if (vp[0]) { /* vp[0] could have been placed on a different set of processes */ 8041 PetscScalar *arraym,*arrayv; 8042 PetscInt nl; 8043 ierr = VecGetLocalSize(vp[0],&nl);CHKERRQ(ierr); 8044 ierr = MatCreateSeqDense(PETSC_COMM_SELF,1,nl,NULL,&coarsedivudotp);CHKERRQ(ierr); 8045 ierr = MatDenseGetArray(coarsedivudotp,&arraym);CHKERRQ(ierr); 8046 ierr = VecGetArray(vp[0],&arrayv);CHKERRQ(ierr); 8047 ierr = PetscMemcpy(arraym,arrayv,nl*sizeof(PetscScalar));CHKERRQ(ierr); 8048 ierr = VecRestoreArray(vp[0],&arrayv);CHKERRQ(ierr); 8049 ierr = MatDenseRestoreArray(coarsedivudotp,&arraym);CHKERRQ(ierr); 8050 ierr = VecDestroy(&vp[0]);CHKERRQ(ierr); 8051 } else { 8052 ierr = MatCreateSeqAIJ(PETSC_COMM_SELF,0,0,1,NULL,&coarsedivudotp);CHKERRQ(ierr); 8053 } 8054 } else { 8055 ierr = PCBDDCMatISSubassemble(t_coarse_mat_is,pcbddc->coarse_subassembling,0,restr,full_restr,PETSC_FALSE,&coarse_mat_is,0,NULL,0,NULL);CHKERRQ(ierr); 8056 } 8057 if (coarse_mat_is || coarse_mat) { 8058 PetscMPIInt size; 8059 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)coarse_mat_is),&size);CHKERRQ(ierr); 8060 if (!multilevel_allowed) { 8061 ierr = MatISGetMPIXAIJ(coarse_mat_is,coarse_mat_reuse,&coarse_mat);CHKERRQ(ierr); 8062 } else { 8063 Mat A; 8064 8065 /* if this matrix is present, it means we are not reusing the coarse matrix */ 8066 if (coarse_mat_is) { 8067 if (coarse_mat) SETERRQ(PetscObjectComm((PetscObject)coarse_mat_is),PETSC_ERR_PLIB,"This should not happen"); 8068 ierr = PetscObjectReference((PetscObject)coarse_mat_is);CHKERRQ(ierr); 8069 coarse_mat = coarse_mat_is; 8070 } 8071 /* be sure we don't have MatSeqDENSE as local mat */ 8072 ierr = MatISGetLocalMat(coarse_mat,&A);CHKERRQ(ierr); 8073 ierr = MatConvert(A,MATSEQAIJ,MAT_INPLACE_MATRIX,&A);CHKERRQ(ierr); 8074 } 8075 } 8076 ierr = MatDestroy(&t_coarse_mat_is);CHKERRQ(ierr); 8077 ierr = MatDestroy(&coarse_mat_is);CHKERRQ(ierr); 8078 8079 /* create local to global scatters for coarse problem */ 8080 if (compute_vecs) { 8081 PetscInt lrows; 8082 ierr = VecDestroy(&pcbddc->coarse_vec);CHKERRQ(ierr); 8083 if (coarse_mat) { 8084 ierr = MatGetLocalSize(coarse_mat,&lrows,NULL);CHKERRQ(ierr); 8085 } else { 8086 lrows = 0; 8087 } 8088 ierr = VecCreate(PetscObjectComm((PetscObject)pc),&pcbddc->coarse_vec);CHKERRQ(ierr); 8089 ierr = VecSetSizes(pcbddc->coarse_vec,lrows,PETSC_DECIDE);CHKERRQ(ierr); 8090 ierr = VecSetType(pcbddc->coarse_vec,VECSTANDARD);CHKERRQ(ierr); 8091 ierr = VecScatterDestroy(&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr); 8092 ierr = VecScatterCreate(pcbddc->vec1_P,NULL,pcbddc->coarse_vec,coarse_is,&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr); 8093 } 8094 ierr = ISDestroy(&coarse_is);CHKERRQ(ierr); 8095 8096 /* set defaults for coarse KSP and PC */ 8097 if (multilevel_allowed) { 8098 coarse_ksp_type = KSPRICHARDSON; 8099 coarse_pc_type = PCBDDC; 8100 } else { 8101 coarse_ksp_type = KSPPREONLY; 8102 coarse_pc_type = PCREDUNDANT; 8103 } 8104 8105 /* print some info if requested */ 8106 if (pcbddc->dbg_flag) { 8107 if (!multilevel_allowed) { 8108 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 8109 if (multilevel_requested) { 8110 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Not enough active processes on level %d (active processes %d, coarsening ratio %d)\n",pcbddc->current_level,active_procs,pcbddc->coarsening_ratio);CHKERRQ(ierr); 8111 } else if (pcbddc->max_levels) { 8112 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Maximum number of requested levels reached (%d)\n",pcbddc->max_levels);CHKERRQ(ierr); 8113 } 8114 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 8115 } 8116 } 8117 8118 /* communicate coarse discrete gradient */ 8119 coarseG = NULL; 8120 if (pcbddc->nedcG && multilevel_allowed) { 8121 MPI_Comm ccomm; 8122 if (coarse_mat) { 8123 ccomm = PetscObjectComm((PetscObject)coarse_mat); 8124 } else { 8125 ccomm = MPI_COMM_NULL; 8126 } 8127 ierr = MatMPIAIJRestrict(pcbddc->nedcG,ccomm,&coarseG);CHKERRQ(ierr); 8128 } 8129 8130 /* create the coarse KSP object only once with defaults */ 8131 if (coarse_mat) { 8132 PetscBool isredundant,isnn,isbddc; 8133 PetscViewer dbg_viewer = NULL; 8134 8135 if (pcbddc->dbg_flag) { 8136 dbg_viewer = PETSC_VIEWER_STDOUT_(PetscObjectComm((PetscObject)coarse_mat)); 8137 ierr = PetscViewerASCIIAddTab(dbg_viewer,2*pcbddc->current_level);CHKERRQ(ierr); 8138 } 8139 if (!pcbddc->coarse_ksp) { 8140 char prefix[256],str_level[16]; 8141 size_t len; 8142 8143 ierr = KSPCreate(PetscObjectComm((PetscObject)coarse_mat),&pcbddc->coarse_ksp);CHKERRQ(ierr); 8144 ierr = KSPSetErrorIfNotConverged(pcbddc->coarse_ksp,pc->erroriffailure);CHKERRQ(ierr); 8145 ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->coarse_ksp,(PetscObject)pc,1);CHKERRQ(ierr); 8146 ierr = KSPSetTolerances(pcbddc->coarse_ksp,PETSC_DEFAULT,PETSC_DEFAULT,PETSC_DEFAULT,1);CHKERRQ(ierr); 8147 ierr = KSPSetOperators(pcbddc->coarse_ksp,coarse_mat,coarse_mat);CHKERRQ(ierr); 8148 ierr = KSPSetType(pcbddc->coarse_ksp,coarse_ksp_type);CHKERRQ(ierr); 8149 ierr = KSPSetNormType(pcbddc->coarse_ksp,KSP_NORM_NONE);CHKERRQ(ierr); 8150 ierr = KSPGetPC(pcbddc->coarse_ksp,&pc_temp);CHKERRQ(ierr); 8151 /* TODO is this logic correct? should check for coarse_mat type */ 8152 ierr = PCSetType(pc_temp,coarse_pc_type);CHKERRQ(ierr); 8153 /* prefix */ 8154 ierr = PetscStrcpy(prefix,"");CHKERRQ(ierr); 8155 ierr = PetscStrcpy(str_level,"");CHKERRQ(ierr); 8156 if (!pcbddc->current_level) { 8157 ierr = PetscStrcpy(prefix,((PetscObject)pc)->prefix);CHKERRQ(ierr); 8158 ierr = PetscStrcat(prefix,"pc_bddc_coarse_");CHKERRQ(ierr); 8159 } else { 8160 ierr = PetscStrlen(((PetscObject)pc)->prefix,&len);CHKERRQ(ierr); 8161 if (pcbddc->current_level>1) len -= 3; /* remove "lX_" with X level number */ 8162 if (pcbddc->current_level>10) len -= 1; /* remove another char from level number */ 8163 ierr = PetscStrncpy(prefix,((PetscObject)pc)->prefix,len+1);CHKERRQ(ierr); 8164 ierr = PetscSNPrintf(str_level,sizeof(str_level),"l%d_",(int)(pcbddc->current_level));CHKERRQ(ierr); 8165 ierr = PetscStrcat(prefix,str_level);CHKERRQ(ierr); 8166 } 8167 ierr = KSPSetOptionsPrefix(pcbddc->coarse_ksp,prefix);CHKERRQ(ierr); 8168 /* propagate BDDC info to the next level (these are dummy calls if pc_temp is not of type PCBDDC) */ 8169 ierr = PCBDDCSetLevel(pc_temp,pcbddc->current_level+1);CHKERRQ(ierr); 8170 ierr = PCBDDCSetCoarseningRatio(pc_temp,pcbddc->coarsening_ratio);CHKERRQ(ierr); 8171 ierr = PCBDDCSetLevels(pc_temp,pcbddc->max_levels);CHKERRQ(ierr); 8172 /* allow user customization */ 8173 ierr = KSPSetFromOptions(pcbddc->coarse_ksp);CHKERRQ(ierr); 8174 } 8175 /* propagate BDDC info to the next level (these are dummy calls if pc_temp is not of type PCBDDC) */ 8176 ierr = KSPGetPC(pcbddc->coarse_ksp,&pc_temp);CHKERRQ(ierr); 8177 if (nisdofs) { 8178 ierr = PCBDDCSetDofsSplitting(pc_temp,nisdofs,isarray);CHKERRQ(ierr); 8179 for (i=0;i<nisdofs;i++) { 8180 ierr = ISDestroy(&isarray[i]);CHKERRQ(ierr); 8181 } 8182 } 8183 if (nisneu) { 8184 ierr = PCBDDCSetNeumannBoundaries(pc_temp,isarray[nisdofs]);CHKERRQ(ierr); 8185 ierr = ISDestroy(&isarray[nisdofs]);CHKERRQ(ierr); 8186 } 8187 if (nisvert) { 8188 ierr = PCBDDCSetPrimalVerticesIS(pc_temp,isarray[nis-1]);CHKERRQ(ierr); 8189 ierr = ISDestroy(&isarray[nis-1]);CHKERRQ(ierr); 8190 } 8191 if (coarseG) { 8192 ierr = PCBDDCSetDiscreteGradient(pc_temp,coarseG,1,nedcfield,PETSC_FALSE,PETSC_TRUE);CHKERRQ(ierr); 8193 } 8194 8195 /* get some info after set from options */ 8196 ierr = PetscObjectTypeCompare((PetscObject)pc_temp,PCBDDC,&isbddc);CHKERRQ(ierr); 8197 /* multilevel can only be requested via -pc_bddc_levels or PCBDDCSetLevels */ 8198 if (isbddc && !multilevel_allowed) { 8199 ierr = PCSetType(pc_temp,coarse_pc_type);CHKERRQ(ierr); 8200 isbddc = PETSC_FALSE; 8201 } 8202 /* multilevel cannot be done with coarse PCs different from BDDC or NN */ 8203 ierr = PetscObjectTypeCompare((PetscObject)pc_temp,PCNN,&isnn);CHKERRQ(ierr); 8204 if (multilevel_requested && multilevel_allowed && !isbddc && !isnn) { 8205 ierr = PCSetType(pc_temp,PCBDDC);CHKERRQ(ierr); 8206 isbddc = PETSC_TRUE; 8207 } 8208 ierr = PetscObjectTypeCompare((PetscObject)pc_temp,PCREDUNDANT,&isredundant);CHKERRQ(ierr); 8209 if (isredundant) { 8210 KSP inner_ksp; 8211 PC inner_pc; 8212 8213 ierr = PCRedundantGetKSP(pc_temp,&inner_ksp);CHKERRQ(ierr); 8214 ierr = KSPGetPC(inner_ksp,&inner_pc);CHKERRQ(ierr); 8215 } 8216 8217 /* parameters which miss an API */ 8218 ierr = PetscObjectTypeCompare((PetscObject)pc_temp,PCBDDC,&isbddc);CHKERRQ(ierr); 8219 if (isbddc) { 8220 PC_BDDC* pcbddc_coarse = (PC_BDDC*)pc_temp->data; 8221 8222 pcbddc_coarse->detect_disconnected = PETSC_TRUE; 8223 pcbddc_coarse->coarse_eqs_per_proc = pcbddc->coarse_eqs_per_proc; 8224 pcbddc_coarse->benign_saddle_point = pcbddc->benign_have_null; 8225 if (pcbddc_coarse->benign_saddle_point) { 8226 Mat coarsedivudotp_is; 8227 ISLocalToGlobalMapping l2gmap,rl2g,cl2g; 8228 IS row,col; 8229 const PetscInt *gidxs; 8230 PetscInt n,st,M,N; 8231 8232 ierr = MatGetSize(coarsedivudotp,&n,NULL);CHKERRQ(ierr); 8233 ierr = MPI_Scan(&n,&st,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)coarse_mat));CHKERRQ(ierr); 8234 st = st-n; 8235 ierr = ISCreateStride(PetscObjectComm((PetscObject)coarse_mat),1,st,1,&row);CHKERRQ(ierr); 8236 ierr = MatGetLocalToGlobalMapping(coarse_mat,&l2gmap,NULL);CHKERRQ(ierr); 8237 ierr = ISLocalToGlobalMappingGetSize(l2gmap,&n);CHKERRQ(ierr); 8238 ierr = ISLocalToGlobalMappingGetIndices(l2gmap,&gidxs);CHKERRQ(ierr); 8239 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)coarse_mat),n,gidxs,PETSC_COPY_VALUES,&col);CHKERRQ(ierr); 8240 ierr = ISLocalToGlobalMappingRestoreIndices(l2gmap,&gidxs);CHKERRQ(ierr); 8241 ierr = ISLocalToGlobalMappingCreateIS(row,&rl2g);CHKERRQ(ierr); 8242 ierr = ISLocalToGlobalMappingCreateIS(col,&cl2g);CHKERRQ(ierr); 8243 ierr = ISGetSize(row,&M);CHKERRQ(ierr); 8244 ierr = MatGetSize(coarse_mat,&N,NULL);CHKERRQ(ierr); 8245 ierr = ISDestroy(&row);CHKERRQ(ierr); 8246 ierr = ISDestroy(&col);CHKERRQ(ierr); 8247 ierr = MatCreate(PetscObjectComm((PetscObject)coarse_mat),&coarsedivudotp_is);CHKERRQ(ierr); 8248 ierr = MatSetType(coarsedivudotp_is,MATIS);CHKERRQ(ierr); 8249 ierr = MatSetSizes(coarsedivudotp_is,PETSC_DECIDE,PETSC_DECIDE,M,N);CHKERRQ(ierr); 8250 ierr = MatSetLocalToGlobalMapping(coarsedivudotp_is,rl2g,cl2g);CHKERRQ(ierr); 8251 ierr = ISLocalToGlobalMappingDestroy(&rl2g);CHKERRQ(ierr); 8252 ierr = ISLocalToGlobalMappingDestroy(&cl2g);CHKERRQ(ierr); 8253 ierr = MatISSetLocalMat(coarsedivudotp_is,coarsedivudotp);CHKERRQ(ierr); 8254 ierr = MatDestroy(&coarsedivudotp);CHKERRQ(ierr); 8255 ierr = PCBDDCSetDivergenceMat(pc_temp,coarsedivudotp_is,PETSC_FALSE,NULL);CHKERRQ(ierr); 8256 ierr = MatDestroy(&coarsedivudotp_is);CHKERRQ(ierr); 8257 pcbddc_coarse->adaptive_userdefined = PETSC_TRUE; 8258 if (pcbddc->adaptive_threshold[0] == 0.0) pcbddc_coarse->deluxe_zerorows = PETSC_TRUE; 8259 } 8260 } 8261 8262 /* propagate symmetry info of coarse matrix */ 8263 ierr = MatSetOption(coarse_mat,MAT_STRUCTURALLY_SYMMETRIC,PETSC_TRUE);CHKERRQ(ierr); 8264 if (pc->pmat->symmetric_set) { 8265 ierr = MatSetOption(coarse_mat,MAT_SYMMETRIC,pc->pmat->symmetric);CHKERRQ(ierr); 8266 } 8267 if (pc->pmat->hermitian_set) { 8268 ierr = MatSetOption(coarse_mat,MAT_HERMITIAN,pc->pmat->hermitian);CHKERRQ(ierr); 8269 } 8270 if (pc->pmat->spd_set) { 8271 ierr = MatSetOption(coarse_mat,MAT_SPD,pc->pmat->spd);CHKERRQ(ierr); 8272 } 8273 if (pcbddc->benign_saddle_point && !pcbddc->benign_have_null) { 8274 ierr = MatSetOption(coarse_mat,MAT_SPD,PETSC_TRUE);CHKERRQ(ierr); 8275 } 8276 /* set operators */ 8277 ierr = KSPSetOperators(pcbddc->coarse_ksp,coarse_mat,coarse_mat);CHKERRQ(ierr); 8278 if (pcbddc->dbg_flag) { 8279 ierr = PetscViewerASCIISubtractTab(dbg_viewer,2*pcbddc->current_level);CHKERRQ(ierr); 8280 } 8281 } 8282 ierr = MatDestroy(&coarseG);CHKERRQ(ierr); 8283 ierr = PetscFree(isarray);CHKERRQ(ierr); 8284 #if 0 8285 { 8286 PetscViewer viewer; 8287 char filename[256]; 8288 sprintf(filename,"coarse_mat_level%d.m",pcbddc->current_level); 8289 ierr = PetscViewerASCIIOpen(PetscObjectComm((PetscObject)coarse_mat),filename,&viewer);CHKERRQ(ierr); 8290 ierr = PetscViewerPushFormat(viewer,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr); 8291 ierr = MatView(coarse_mat,viewer);CHKERRQ(ierr); 8292 ierr = PetscViewerPopFormat(viewer);CHKERRQ(ierr); 8293 ierr = PetscViewerDestroy(&viewer);CHKERRQ(ierr); 8294 } 8295 #endif 8296 8297 if (pcbddc->coarse_ksp) { 8298 Vec crhs,csol; 8299 8300 ierr = KSPGetSolution(pcbddc->coarse_ksp,&csol);CHKERRQ(ierr); 8301 ierr = KSPGetRhs(pcbddc->coarse_ksp,&crhs);CHKERRQ(ierr); 8302 if (!csol) { 8303 ierr = MatCreateVecs(coarse_mat,&((pcbddc->coarse_ksp)->vec_sol),NULL);CHKERRQ(ierr); 8304 } 8305 if (!crhs) { 8306 ierr = MatCreateVecs(coarse_mat,NULL,&((pcbddc->coarse_ksp)->vec_rhs));CHKERRQ(ierr); 8307 } 8308 } 8309 ierr = MatDestroy(&coarsedivudotp);CHKERRQ(ierr); 8310 8311 /* compute null space for coarse solver if the benign trick has been requested */ 8312 if (pcbddc->benign_null) { 8313 8314 ierr = VecSet(pcbddc->vec1_P,0.);CHKERRQ(ierr); 8315 for (i=0;i<pcbddc->benign_n;i++) { 8316 ierr = VecSetValue(pcbddc->vec1_P,pcbddc->local_primal_size-pcbddc->benign_n+i,1.0,INSERT_VALUES);CHKERRQ(ierr); 8317 } 8318 ierr = VecAssemblyBegin(pcbddc->vec1_P);CHKERRQ(ierr); 8319 ierr = VecAssemblyEnd(pcbddc->vec1_P);CHKERRQ(ierr); 8320 ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,pcbddc->vec1_P,pcbddc->coarse_vec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 8321 ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,pcbddc->vec1_P,pcbddc->coarse_vec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 8322 if (coarse_mat) { 8323 Vec nullv; 8324 PetscScalar *array,*array2; 8325 PetscInt nl; 8326 8327 ierr = MatCreateVecs(coarse_mat,&nullv,NULL);CHKERRQ(ierr); 8328 ierr = VecGetLocalSize(nullv,&nl);CHKERRQ(ierr); 8329 ierr = VecGetArrayRead(pcbddc->coarse_vec,(const PetscScalar**)&array);CHKERRQ(ierr); 8330 ierr = VecGetArray(nullv,&array2);CHKERRQ(ierr); 8331 ierr = PetscMemcpy(array2,array,nl*sizeof(*array));CHKERRQ(ierr); 8332 ierr = VecRestoreArray(nullv,&array2);CHKERRQ(ierr); 8333 ierr = VecRestoreArrayRead(pcbddc->coarse_vec,(const PetscScalar**)&array);CHKERRQ(ierr); 8334 ierr = VecNormalize(nullv,NULL);CHKERRQ(ierr); 8335 ierr = MatNullSpaceCreate(PetscObjectComm((PetscObject)coarse_mat),PETSC_FALSE,1,&nullv,&CoarseNullSpace);CHKERRQ(ierr); 8336 ierr = VecDestroy(&nullv);CHKERRQ(ierr); 8337 } 8338 } 8339 8340 if (pcbddc->coarse_ksp) { 8341 PetscBool ispreonly; 8342 8343 if (CoarseNullSpace) { 8344 PetscBool isnull; 8345 ierr = MatNullSpaceTest(CoarseNullSpace,coarse_mat,&isnull);CHKERRQ(ierr); 8346 if (isnull) { 8347 ierr = MatSetNullSpace(coarse_mat,CoarseNullSpace);CHKERRQ(ierr); 8348 } 8349 /* TODO: add local nullspaces (if any) */ 8350 } 8351 /* setup coarse ksp */ 8352 ierr = KSPSetUp(pcbddc->coarse_ksp);CHKERRQ(ierr); 8353 /* Check coarse problem if in debug mode or if solving with an iterative method */ 8354 ierr = PetscObjectTypeCompare((PetscObject)pcbddc->coarse_ksp,KSPPREONLY,&ispreonly);CHKERRQ(ierr); 8355 if (pcbddc->dbg_flag || (!ispreonly && pcbddc->use_coarse_estimates) ) { 8356 KSP check_ksp; 8357 KSPType check_ksp_type; 8358 PC check_pc; 8359 Vec check_vec,coarse_vec; 8360 PetscReal abs_infty_error,infty_error,lambda_min=1.0,lambda_max=1.0; 8361 PetscInt its; 8362 PetscBool compute_eigs; 8363 PetscReal *eigs_r,*eigs_c; 8364 PetscInt neigs; 8365 const char *prefix; 8366 8367 /* Create ksp object suitable for estimation of extreme eigenvalues */ 8368 ierr = KSPCreate(PetscObjectComm((PetscObject)pcbddc->coarse_ksp),&check_ksp);CHKERRQ(ierr); 8369 ierr = KSPSetErrorIfNotConverged(pcbddc->coarse_ksp,pc->erroriffailure);CHKERRQ(ierr); 8370 ierr = KSPSetOperators(check_ksp,coarse_mat,coarse_mat);CHKERRQ(ierr); 8371 ierr = KSPSetTolerances(check_ksp,1.e-12,1.e-12,PETSC_DEFAULT,pcbddc->coarse_size);CHKERRQ(ierr); 8372 /* prevent from setup unneeded object */ 8373 ierr = KSPGetPC(check_ksp,&check_pc);CHKERRQ(ierr); 8374 ierr = PCSetType(check_pc,PCNONE);CHKERRQ(ierr); 8375 if (ispreonly) { 8376 check_ksp_type = KSPPREONLY; 8377 compute_eigs = PETSC_FALSE; 8378 } else { 8379 check_ksp_type = KSPGMRES; 8380 compute_eigs = PETSC_TRUE; 8381 } 8382 ierr = KSPSetType(check_ksp,check_ksp_type);CHKERRQ(ierr); 8383 ierr = KSPSetComputeSingularValues(check_ksp,compute_eigs);CHKERRQ(ierr); 8384 ierr = KSPSetComputeEigenvalues(check_ksp,compute_eigs);CHKERRQ(ierr); 8385 ierr = KSPGMRESSetRestart(check_ksp,pcbddc->coarse_size+1);CHKERRQ(ierr); 8386 ierr = KSPGetOptionsPrefix(pcbddc->coarse_ksp,&prefix);CHKERRQ(ierr); 8387 ierr = KSPSetOptionsPrefix(check_ksp,prefix);CHKERRQ(ierr); 8388 ierr = KSPAppendOptionsPrefix(check_ksp,"check_");CHKERRQ(ierr); 8389 ierr = KSPSetFromOptions(check_ksp);CHKERRQ(ierr); 8390 ierr = KSPSetUp(check_ksp);CHKERRQ(ierr); 8391 ierr = KSPGetPC(pcbddc->coarse_ksp,&check_pc);CHKERRQ(ierr); 8392 ierr = KSPSetPC(check_ksp,check_pc);CHKERRQ(ierr); 8393 /* create random vec */ 8394 ierr = MatCreateVecs(coarse_mat,&coarse_vec,&check_vec);CHKERRQ(ierr); 8395 ierr = VecSetRandom(check_vec,NULL);CHKERRQ(ierr); 8396 ierr = MatMult(coarse_mat,check_vec,coarse_vec);CHKERRQ(ierr); 8397 /* solve coarse problem */ 8398 ierr = KSPSolve(check_ksp,coarse_vec,coarse_vec);CHKERRQ(ierr); 8399 /* set eigenvalue estimation if preonly has not been requested */ 8400 if (compute_eigs) { 8401 ierr = PetscMalloc1(pcbddc->coarse_size+1,&eigs_r);CHKERRQ(ierr); 8402 ierr = PetscMalloc1(pcbddc->coarse_size+1,&eigs_c);CHKERRQ(ierr); 8403 ierr = KSPComputeEigenvalues(check_ksp,pcbddc->coarse_size+1,eigs_r,eigs_c,&neigs);CHKERRQ(ierr); 8404 if (neigs) { 8405 lambda_max = eigs_r[neigs-1]; 8406 lambda_min = eigs_r[0]; 8407 if (pcbddc->use_coarse_estimates) { 8408 if (lambda_max>=lambda_min) { /* using PETSC_SMALL since lambda_max == lambda_min is not allowed by KSPChebyshevSetEigenvalues */ 8409 ierr = KSPChebyshevSetEigenvalues(pcbddc->coarse_ksp,lambda_max+PETSC_SMALL,lambda_min);CHKERRQ(ierr); 8410 ierr = KSPRichardsonSetScale(pcbddc->coarse_ksp,2.0/(lambda_max+lambda_min));CHKERRQ(ierr); 8411 } 8412 } 8413 } 8414 } 8415 8416 /* check coarse problem residual error */ 8417 if (pcbddc->dbg_flag) { 8418 PetscViewer dbg_viewer = PETSC_VIEWER_STDOUT_(PetscObjectComm((PetscObject)pcbddc->coarse_ksp)); 8419 ierr = PetscViewerASCIIAddTab(dbg_viewer,2*(pcbddc->current_level+1));CHKERRQ(ierr); 8420 ierr = VecAXPY(check_vec,-1.0,coarse_vec);CHKERRQ(ierr); 8421 ierr = VecNorm(check_vec,NORM_INFINITY,&infty_error);CHKERRQ(ierr); 8422 ierr = MatMult(coarse_mat,check_vec,coarse_vec);CHKERRQ(ierr); 8423 ierr = VecNorm(coarse_vec,NORM_INFINITY,&abs_infty_error);CHKERRQ(ierr); 8424 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem details (use estimates %d)\n",pcbddc->use_coarse_estimates);CHKERRQ(ierr); 8425 ierr = PetscObjectPrintClassNamePrefixType((PetscObject)(pcbddc->coarse_ksp),dbg_viewer);CHKERRQ(ierr); 8426 ierr = PetscObjectPrintClassNamePrefixType((PetscObject)(check_pc),dbg_viewer);CHKERRQ(ierr); 8427 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem exact infty_error : %1.6e\n",infty_error);CHKERRQ(ierr); 8428 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem residual infty_error: %1.6e\n",abs_infty_error);CHKERRQ(ierr); 8429 if (CoarseNullSpace) { 8430 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem is singular\n");CHKERRQ(ierr); 8431 } 8432 if (compute_eigs) { 8433 PetscReal lambda_max_s,lambda_min_s; 8434 KSPConvergedReason reason; 8435 ierr = KSPGetType(check_ksp,&check_ksp_type);CHKERRQ(ierr); 8436 ierr = KSPGetIterationNumber(check_ksp,&its);CHKERRQ(ierr); 8437 ierr = KSPGetConvergedReason(check_ksp,&reason);CHKERRQ(ierr); 8438 ierr = KSPComputeExtremeSingularValues(check_ksp,&lambda_max_s,&lambda_min_s);CHKERRQ(ierr); 8439 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem eigenvalues (estimated with %d iterations of %s, conv reason %d): %1.6e %1.6e (%1.6e %1.6e)\n",its,check_ksp_type,reason,lambda_min,lambda_max,lambda_min_s,lambda_max_s);CHKERRQ(ierr); 8440 for (i=0;i<neigs;i++) { 8441 ierr = PetscViewerASCIIPrintf(dbg_viewer,"%1.6e %1.6ei\n",eigs_r[i],eigs_c[i]);CHKERRQ(ierr); 8442 } 8443 } 8444 ierr = PetscViewerFlush(dbg_viewer);CHKERRQ(ierr); 8445 ierr = PetscViewerASCIISubtractTab(dbg_viewer,2*(pcbddc->current_level+1));CHKERRQ(ierr); 8446 } 8447 ierr = VecDestroy(&check_vec);CHKERRQ(ierr); 8448 ierr = VecDestroy(&coarse_vec);CHKERRQ(ierr); 8449 ierr = KSPDestroy(&check_ksp);CHKERRQ(ierr); 8450 if (compute_eigs) { 8451 ierr = PetscFree(eigs_r);CHKERRQ(ierr); 8452 ierr = PetscFree(eigs_c);CHKERRQ(ierr); 8453 } 8454 } 8455 } 8456 ierr = MatNullSpaceDestroy(&CoarseNullSpace);CHKERRQ(ierr); 8457 /* print additional info */ 8458 if (pcbddc->dbg_flag) { 8459 /* waits until all processes reaches this point */ 8460 ierr = PetscBarrier((PetscObject)pc);CHKERRQ(ierr); 8461 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Coarse solver setup completed at level %d\n",pcbddc->current_level);CHKERRQ(ierr); 8462 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 8463 } 8464 8465 /* free memory */ 8466 ierr = MatDestroy(&coarse_mat);CHKERRQ(ierr); 8467 PetscFunctionReturn(0); 8468 } 8469 8470 PetscErrorCode PCBDDCComputePrimalNumbering(PC pc,PetscInt* coarse_size_n,PetscInt** local_primal_indices_n) 8471 { 8472 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 8473 PC_IS* pcis = (PC_IS*)pc->data; 8474 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 8475 IS subset,subset_mult,subset_n; 8476 PetscInt local_size,coarse_size=0; 8477 PetscInt *local_primal_indices=NULL; 8478 const PetscInt *t_local_primal_indices; 8479 PetscErrorCode ierr; 8480 8481 PetscFunctionBegin; 8482 /* Compute global number of coarse dofs */ 8483 if (pcbddc->local_primal_size && !pcbddc->local_primal_ref_node) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"BDDC ConstraintsSetUp should be called first"); 8484 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)(pc->pmat)),pcbddc->local_primal_size_cc,pcbddc->local_primal_ref_node,PETSC_COPY_VALUES,&subset_n);CHKERRQ(ierr); 8485 ierr = ISLocalToGlobalMappingApplyIS(pcis->mapping,subset_n,&subset);CHKERRQ(ierr); 8486 ierr = ISDestroy(&subset_n);CHKERRQ(ierr); 8487 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)(pc->pmat)),pcbddc->local_primal_size_cc,pcbddc->local_primal_ref_mult,PETSC_COPY_VALUES,&subset_mult);CHKERRQ(ierr); 8488 ierr = ISRenumber(subset,subset_mult,&coarse_size,&subset_n);CHKERRQ(ierr); 8489 ierr = ISDestroy(&subset);CHKERRQ(ierr); 8490 ierr = ISDestroy(&subset_mult);CHKERRQ(ierr); 8491 ierr = ISGetLocalSize(subset_n,&local_size);CHKERRQ(ierr); 8492 if (local_size != pcbddc->local_primal_size) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Invalid number of local primal indices computed %D != %D",local_size,pcbddc->local_primal_size); 8493 ierr = PetscMalloc1(local_size,&local_primal_indices);CHKERRQ(ierr); 8494 ierr = ISGetIndices(subset_n,&t_local_primal_indices);CHKERRQ(ierr); 8495 ierr = PetscMemcpy(local_primal_indices,t_local_primal_indices,local_size*sizeof(PetscInt));CHKERRQ(ierr); 8496 ierr = ISRestoreIndices(subset_n,&t_local_primal_indices);CHKERRQ(ierr); 8497 ierr = ISDestroy(&subset_n);CHKERRQ(ierr); 8498 8499 /* check numbering */ 8500 if (pcbddc->dbg_flag) { 8501 PetscScalar coarsesum,*array,*array2; 8502 PetscInt i; 8503 PetscBool set_error = PETSC_FALSE,set_error_reduced = PETSC_FALSE; 8504 8505 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 8506 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 8507 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Check coarse indices\n");CHKERRQ(ierr); 8508 ierr = PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 8509 /* counter */ 8510 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 8511 ierr = VecSet(pcis->vec1_N,1.0);CHKERRQ(ierr); 8512 ierr = VecScatterBegin(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 8513 ierr = VecScatterEnd(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 8514 ierr = VecScatterBegin(matis->rctx,pcis->vec1_global,pcis->vec2_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 8515 ierr = VecScatterEnd(matis->rctx,pcis->vec1_global,pcis->vec2_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 8516 ierr = VecSet(pcis->vec1_N,0.0);CHKERRQ(ierr); 8517 for (i=0;i<pcbddc->local_primal_size;i++) { 8518 ierr = VecSetValue(pcis->vec1_N,pcbddc->primal_indices_local_idxs[i],1.0,INSERT_VALUES);CHKERRQ(ierr); 8519 } 8520 ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr); 8521 ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr); 8522 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 8523 ierr = VecScatterBegin(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 8524 ierr = VecScatterEnd(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 8525 ierr = VecScatterBegin(matis->rctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 8526 ierr = VecScatterEnd(matis->rctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 8527 ierr = VecGetArray(pcis->vec1_N,&array);CHKERRQ(ierr); 8528 ierr = VecGetArray(pcis->vec2_N,&array2);CHKERRQ(ierr); 8529 for (i=0;i<pcis->n;i++) { 8530 if (array[i] != 0.0 && array[i] != array2[i]) { 8531 PetscInt owned = (PetscInt)PetscRealPart(array[i]),gi; 8532 PetscInt neigh = (PetscInt)PetscRealPart(array2[i]); 8533 set_error = PETSC_TRUE; 8534 ierr = ISLocalToGlobalMappingApply(pcis->mapping,1,&i,&gi);CHKERRQ(ierr); 8535 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d: local index %d (gid %d) owned by %d processes instead of %d!\n",PetscGlobalRank,i,gi,owned,neigh);CHKERRQ(ierr); 8536 } 8537 } 8538 ierr = VecRestoreArray(pcis->vec2_N,&array2);CHKERRQ(ierr); 8539 ierr = MPIU_Allreduce(&set_error,&set_error_reduced,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 8540 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 8541 for (i=0;i<pcis->n;i++) { 8542 if (PetscRealPart(array[i]) > 0.0) array[i] = 1.0/PetscRealPart(array[i]); 8543 } 8544 ierr = VecRestoreArray(pcis->vec1_N,&array);CHKERRQ(ierr); 8545 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 8546 ierr = VecScatterBegin(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 8547 ierr = VecScatterEnd(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 8548 ierr = VecSum(pcis->vec1_global,&coarsesum);CHKERRQ(ierr); 8549 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Size of coarse problem is %d (%lf)\n",coarse_size,PetscRealPart(coarsesum));CHKERRQ(ierr); 8550 if (pcbddc->dbg_flag > 1 || set_error_reduced) { 8551 PetscInt *gidxs; 8552 8553 ierr = PetscMalloc1(pcbddc->local_primal_size,&gidxs);CHKERRQ(ierr); 8554 ierr = ISLocalToGlobalMappingApply(pcis->mapping,pcbddc->local_primal_size,pcbddc->primal_indices_local_idxs,gidxs);CHKERRQ(ierr); 8555 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Distribution of local primal indices\n");CHKERRQ(ierr); 8556 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 8557 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d\n",PetscGlobalRank);CHKERRQ(ierr); 8558 for (i=0;i<pcbddc->local_primal_size;i++) { 8559 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local_primal_indices[%d]=%d (%d,%d)\n",i,local_primal_indices[i],pcbddc->primal_indices_local_idxs[i],gidxs[i]);CHKERRQ(ierr); 8560 } 8561 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 8562 ierr = PetscFree(gidxs);CHKERRQ(ierr); 8563 } 8564 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 8565 ierr = PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 8566 if (set_error_reduced) SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"BDDC Numbering of coarse dofs failed"); 8567 } 8568 /* ierr = PetscPrintf(PetscObjectComm((PetscObject)pc),"Size of coarse problem is %d\n",coarse_size);CHKERRQ(ierr); */ 8569 /* get back data */ 8570 *coarse_size_n = coarse_size; 8571 *local_primal_indices_n = local_primal_indices; 8572 PetscFunctionReturn(0); 8573 } 8574 8575 PetscErrorCode PCBDDCGlobalToLocal(VecScatter g2l_ctx,Vec gwork, Vec lwork, IS globalis, IS* localis) 8576 { 8577 IS localis_t; 8578 PetscInt i,lsize,*idxs,n; 8579 PetscScalar *vals; 8580 PetscErrorCode ierr; 8581 8582 PetscFunctionBegin; 8583 /* get indices in local ordering exploiting local to global map */ 8584 ierr = ISGetLocalSize(globalis,&lsize);CHKERRQ(ierr); 8585 ierr = PetscMalloc1(lsize,&vals);CHKERRQ(ierr); 8586 for (i=0;i<lsize;i++) vals[i] = 1.0; 8587 ierr = ISGetIndices(globalis,(const PetscInt**)&idxs);CHKERRQ(ierr); 8588 ierr = VecSet(gwork,0.0);CHKERRQ(ierr); 8589 ierr = VecSet(lwork,0.0);CHKERRQ(ierr); 8590 if (idxs) { /* multilevel guard */ 8591 ierr = VecSetOption(gwork,VEC_IGNORE_NEGATIVE_INDICES,PETSC_TRUE);CHKERRQ(ierr); 8592 ierr = VecSetValues(gwork,lsize,idxs,vals,INSERT_VALUES);CHKERRQ(ierr); 8593 } 8594 ierr = VecAssemblyBegin(gwork);CHKERRQ(ierr); 8595 ierr = ISRestoreIndices(globalis,(const PetscInt**)&idxs);CHKERRQ(ierr); 8596 ierr = PetscFree(vals);CHKERRQ(ierr); 8597 ierr = VecAssemblyEnd(gwork);CHKERRQ(ierr); 8598 /* now compute set in local ordering */ 8599 ierr = VecScatterBegin(g2l_ctx,gwork,lwork,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 8600 ierr = VecScatterEnd(g2l_ctx,gwork,lwork,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 8601 ierr = VecGetArrayRead(lwork,(const PetscScalar**)&vals);CHKERRQ(ierr); 8602 ierr = VecGetSize(lwork,&n);CHKERRQ(ierr); 8603 for (i=0,lsize=0;i<n;i++) { 8604 if (PetscRealPart(vals[i]) > 0.5) { 8605 lsize++; 8606 } 8607 } 8608 ierr = PetscMalloc1(lsize,&idxs);CHKERRQ(ierr); 8609 for (i=0,lsize=0;i<n;i++) { 8610 if (PetscRealPart(vals[i]) > 0.5) { 8611 idxs[lsize++] = i; 8612 } 8613 } 8614 ierr = VecRestoreArrayRead(lwork,(const PetscScalar**)&vals);CHKERRQ(ierr); 8615 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)gwork),lsize,idxs,PETSC_OWN_POINTER,&localis_t);CHKERRQ(ierr); 8616 *localis = localis_t; 8617 PetscFunctionReturn(0); 8618 } 8619 8620 PetscErrorCode PCBDDCSetUpSubSchurs(PC pc) 8621 { 8622 PC_IS *pcis=(PC_IS*)pc->data; 8623 PC_BDDC *pcbddc=(PC_BDDC*)pc->data; 8624 PCBDDCSubSchurs sub_schurs=pcbddc->sub_schurs; 8625 Mat S_j; 8626 PetscInt *used_xadj,*used_adjncy; 8627 PetscBool free_used_adj; 8628 PetscErrorCode ierr; 8629 8630 PetscFunctionBegin; 8631 /* decide the adjacency to be used for determining internal problems for local schur on subsets */ 8632 free_used_adj = PETSC_FALSE; 8633 if (pcbddc->sub_schurs_layers == -1) { 8634 used_xadj = NULL; 8635 used_adjncy = NULL; 8636 } else { 8637 if (pcbddc->sub_schurs_use_useradj && pcbddc->mat_graph->xadj) { 8638 used_xadj = pcbddc->mat_graph->xadj; 8639 used_adjncy = pcbddc->mat_graph->adjncy; 8640 } else if (pcbddc->computed_rowadj) { 8641 used_xadj = pcbddc->mat_graph->xadj; 8642 used_adjncy = pcbddc->mat_graph->adjncy; 8643 } else { 8644 PetscBool flg_row=PETSC_FALSE; 8645 const PetscInt *xadj,*adjncy; 8646 PetscInt nvtxs; 8647 8648 ierr = MatGetRowIJ(pcbddc->local_mat,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,&xadj,&adjncy,&flg_row);CHKERRQ(ierr); 8649 if (flg_row) { 8650 ierr = PetscMalloc2(nvtxs+1,&used_xadj,xadj[nvtxs],&used_adjncy);CHKERRQ(ierr); 8651 ierr = PetscMemcpy(used_xadj,xadj,(nvtxs+1)*sizeof(*xadj));CHKERRQ(ierr); 8652 ierr = PetscMemcpy(used_adjncy,adjncy,(xadj[nvtxs])*sizeof(*adjncy));CHKERRQ(ierr); 8653 free_used_adj = PETSC_TRUE; 8654 } else { 8655 pcbddc->sub_schurs_layers = -1; 8656 used_xadj = NULL; 8657 used_adjncy = NULL; 8658 } 8659 ierr = MatRestoreRowIJ(pcbddc->local_mat,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,&xadj,&adjncy,&flg_row);CHKERRQ(ierr); 8660 } 8661 } 8662 8663 /* setup sub_schurs data */ 8664 ierr = MatCreateSchurComplement(pcis->A_II,pcis->A_II,pcis->A_IB,pcis->A_BI,pcis->A_BB,&S_j);CHKERRQ(ierr); 8665 if (!sub_schurs->schur_explicit) { 8666 /* pcbddc->ksp_D up to date only if not using MatFactor with Schur complement support */ 8667 ierr = MatSchurComplementSetKSP(S_j,pcbddc->ksp_D);CHKERRQ(ierr); 8668 ierr = PCBDDCSubSchursSetUp(sub_schurs,NULL,S_j,PETSC_FALSE,used_xadj,used_adjncy,pcbddc->sub_schurs_layers,NULL,pcbddc->adaptive_selection,PETSC_FALSE,PETSC_FALSE,0,NULL,NULL,NULL,NULL);CHKERRQ(ierr); 8669 } else { 8670 Mat change = NULL; 8671 Vec scaling = NULL; 8672 IS change_primal = NULL, iP; 8673 PetscInt benign_n; 8674 PetscBool reuse_solvers = (PetscBool)!pcbddc->use_change_of_basis; 8675 PetscBool isseqaij,need_change = PETSC_FALSE; 8676 PetscBool discrete_harmonic = PETSC_FALSE; 8677 8678 if (!pcbddc->use_vertices && reuse_solvers) { 8679 PetscInt n_vertices; 8680 8681 ierr = ISGetLocalSize(sub_schurs->is_vertices,&n_vertices);CHKERRQ(ierr); 8682 reuse_solvers = (PetscBool)!n_vertices; 8683 } 8684 ierr = PetscObjectTypeCompare((PetscObject)pcbddc->local_mat,MATSEQAIJ,&isseqaij);CHKERRQ(ierr); 8685 if (!isseqaij) { 8686 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 8687 if (matis->A == pcbddc->local_mat) { 8688 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 8689 ierr = MatConvert(matis->A,MATSEQAIJ,MAT_INITIAL_MATRIX,&pcbddc->local_mat);CHKERRQ(ierr); 8690 } else { 8691 ierr = MatConvert(pcbddc->local_mat,MATSEQAIJ,MAT_INPLACE_MATRIX,&pcbddc->local_mat);CHKERRQ(ierr); 8692 } 8693 } 8694 if (!pcbddc->benign_change_explicit) { 8695 benign_n = pcbddc->benign_n; 8696 } else { 8697 benign_n = 0; 8698 } 8699 /* sub_schurs->change is a local object; instead, PCBDDCConstraintsSetUp and the quantities used in the test below are logically collective on pc. 8700 We need a global reduction to avoid possible deadlocks. 8701 We assume that sub_schurs->change is created once, and then reused for different solves, unless the topography has been recomputed */ 8702 if (pcbddc->adaptive_userdefined || (pcbddc->deluxe_zerorows && !pcbddc->use_change_of_basis)) { 8703 PetscBool have_loc_change = (PetscBool)(!!sub_schurs->change); 8704 ierr = MPIU_Allreduce(&have_loc_change,&need_change,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 8705 need_change = (PetscBool)(!need_change); 8706 } 8707 /* If the user defines additional constraints, we import them here. 8708 We need to compute the change of basis according to the quadrature weights attached to pmat via MatSetNearNullSpace, and this could not be done (at the moment) without some hacking */ 8709 if (need_change) { 8710 PC_IS *pcisf; 8711 PC_BDDC *pcbddcf; 8712 PC pcf; 8713 8714 if (pcbddc->sub_schurs_rebuild) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Cannot compute change of basis with a different graph"); 8715 ierr = PCCreate(PetscObjectComm((PetscObject)pc),&pcf);CHKERRQ(ierr); 8716 ierr = PCSetOperators(pcf,pc->mat,pc->pmat);CHKERRQ(ierr); 8717 ierr = PCSetType(pcf,PCBDDC);CHKERRQ(ierr); 8718 8719 /* hacks */ 8720 pcisf = (PC_IS*)pcf->data; 8721 pcisf->is_B_local = pcis->is_B_local; 8722 pcisf->vec1_N = pcis->vec1_N; 8723 pcisf->BtoNmap = pcis->BtoNmap; 8724 pcisf->n = pcis->n; 8725 pcisf->n_B = pcis->n_B; 8726 pcbddcf = (PC_BDDC*)pcf->data; 8727 ierr = PetscFree(pcbddcf->mat_graph);CHKERRQ(ierr); 8728 pcbddcf->mat_graph = pcbddc->mat_graph; 8729 pcbddcf->use_faces = PETSC_TRUE; 8730 pcbddcf->use_change_of_basis = PETSC_TRUE; 8731 pcbddcf->use_change_on_faces = PETSC_TRUE; 8732 pcbddcf->use_qr_single = PETSC_TRUE; 8733 pcbddcf->fake_change = PETSC_TRUE; 8734 8735 /* setup constraints so that we can get information on primal vertices and change of basis (in local numbering) */ 8736 ierr = PCBDDCConstraintsSetUp(pcf);CHKERRQ(ierr); 8737 sub_schurs->change_with_qr = pcbddcf->use_qr_single; 8738 ierr = ISCreateGeneral(PETSC_COMM_SELF,pcbddcf->n_vertices,pcbddcf->local_primal_ref_node,PETSC_COPY_VALUES,&change_primal);CHKERRQ(ierr); 8739 change = pcbddcf->ConstraintMatrix; 8740 pcbddcf->ConstraintMatrix = NULL; 8741 8742 /* free unneeded memory allocated in PCBDDCConstraintsSetUp */ 8743 ierr = PetscFree(pcbddcf->sub_schurs);CHKERRQ(ierr); 8744 ierr = MatNullSpaceDestroy(&pcbddcf->onearnullspace);CHKERRQ(ierr); 8745 ierr = PetscFree2(pcbddcf->local_primal_ref_node,pcbddcf->local_primal_ref_mult);CHKERRQ(ierr); 8746 ierr = PetscFree(pcbddcf->primal_indices_local_idxs);CHKERRQ(ierr); 8747 ierr = PetscFree(pcbddcf->onearnullvecs_state);CHKERRQ(ierr); 8748 ierr = PetscFree(pcf->data);CHKERRQ(ierr); 8749 pcf->ops->destroy = NULL; 8750 pcf->ops->reset = NULL; 8751 ierr = PCDestroy(&pcf);CHKERRQ(ierr); 8752 } 8753 if (!pcbddc->use_deluxe_scaling) scaling = pcis->D; 8754 8755 ierr = PetscObjectQuery((PetscObject)pc,"__KSPFETIDP_iP",(PetscObject*)&iP);CHKERRQ(ierr); 8756 if (iP) { 8757 ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)iP),sub_schurs->prefix,"BDDC sub_schurs options","PC");CHKERRQ(ierr); 8758 ierr = PetscOptionsBool("-sub_schurs_discrete_harmonic",NULL,NULL,discrete_harmonic,&discrete_harmonic,NULL);CHKERRQ(ierr); 8759 ierr = PetscOptionsEnd();CHKERRQ(ierr); 8760 } 8761 if (discrete_harmonic) { 8762 Mat A; 8763 ierr = MatDuplicate(pcbddc->local_mat,MAT_COPY_VALUES,&A);CHKERRQ(ierr); 8764 ierr = MatZeroRowsColumnsIS(A,iP,1.0,NULL,NULL);CHKERRQ(ierr); 8765 ierr = PetscObjectCompose((PetscObject)A,"__KSPFETIDP_iP",(PetscObject)iP);CHKERRQ(ierr); 8766 ierr = PCBDDCSubSchursSetUp(sub_schurs,A,S_j,pcbddc->sub_schurs_exact_schur,used_xadj,used_adjncy,pcbddc->sub_schurs_layers,scaling,pcbddc->adaptive_selection,reuse_solvers,pcbddc->benign_saddle_point,benign_n,pcbddc->benign_p0_lidx,pcbddc->benign_zerodiag_subs,change,change_primal);CHKERRQ(ierr); 8767 ierr = MatDestroy(&A);CHKERRQ(ierr); 8768 } else { 8769 ierr = PCBDDCSubSchursSetUp(sub_schurs,pcbddc->local_mat,S_j,pcbddc->sub_schurs_exact_schur,used_xadj,used_adjncy,pcbddc->sub_schurs_layers,scaling,pcbddc->adaptive_selection,reuse_solvers,pcbddc->benign_saddle_point,benign_n,pcbddc->benign_p0_lidx,pcbddc->benign_zerodiag_subs,change,change_primal);CHKERRQ(ierr); 8770 } 8771 ierr = MatDestroy(&change);CHKERRQ(ierr); 8772 ierr = ISDestroy(&change_primal);CHKERRQ(ierr); 8773 } 8774 ierr = MatDestroy(&S_j);CHKERRQ(ierr); 8775 8776 /* free adjacency */ 8777 if (free_used_adj) { 8778 ierr = PetscFree2(used_xadj,used_adjncy);CHKERRQ(ierr); 8779 } 8780 PetscFunctionReturn(0); 8781 } 8782 8783 PetscErrorCode PCBDDCInitSubSchurs(PC pc) 8784 { 8785 PC_IS *pcis=(PC_IS*)pc->data; 8786 PC_BDDC *pcbddc=(PC_BDDC*)pc->data; 8787 PCBDDCGraph graph; 8788 PetscErrorCode ierr; 8789 8790 PetscFunctionBegin; 8791 /* attach interface graph for determining subsets */ 8792 if (pcbddc->sub_schurs_rebuild) { /* in case rebuild has been requested, it uses a graph generated only by the neighbouring information */ 8793 IS verticesIS,verticescomm; 8794 PetscInt vsize,*idxs; 8795 8796 ierr = PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph,NULL,NULL,NULL,NULL,&verticesIS);CHKERRQ(ierr); 8797 ierr = ISGetSize(verticesIS,&vsize);CHKERRQ(ierr); 8798 ierr = ISGetIndices(verticesIS,(const PetscInt**)&idxs);CHKERRQ(ierr); 8799 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),vsize,idxs,PETSC_COPY_VALUES,&verticescomm);CHKERRQ(ierr); 8800 ierr = ISRestoreIndices(verticesIS,(const PetscInt**)&idxs);CHKERRQ(ierr); 8801 ierr = PCBDDCGraphRestoreCandidatesIS(pcbddc->mat_graph,NULL,NULL,NULL,NULL,&verticesIS);CHKERRQ(ierr); 8802 ierr = PCBDDCGraphCreate(&graph);CHKERRQ(ierr); 8803 ierr = PCBDDCGraphInit(graph,pcbddc->mat_graph->l2gmap,pcbddc->mat_graph->nvtxs_global,pcbddc->graphmaxcount);CHKERRQ(ierr); 8804 ierr = PCBDDCGraphSetUp(graph,pcbddc->mat_graph->custom_minimal_size,NULL,pcbddc->DirichletBoundariesLocal,0,NULL,verticescomm);CHKERRQ(ierr); 8805 ierr = ISDestroy(&verticescomm);CHKERRQ(ierr); 8806 ierr = PCBDDCGraphComputeConnectedComponents(graph);CHKERRQ(ierr); 8807 } else { 8808 graph = pcbddc->mat_graph; 8809 } 8810 /* print some info */ 8811 if (pcbddc->dbg_flag && !pcbddc->sub_schurs_rebuild) { 8812 IS vertices; 8813 PetscInt nv,nedges,nfaces; 8814 ierr = PCBDDCGraphASCIIView(graph,pcbddc->dbg_flag,pcbddc->dbg_viewer);CHKERRQ(ierr); 8815 ierr = PCBDDCGraphGetCandidatesIS(graph,&nfaces,NULL,&nedges,NULL,&vertices);CHKERRQ(ierr); 8816 ierr = ISGetSize(vertices,&nv);CHKERRQ(ierr); 8817 ierr = PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 8818 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"--------------------------------------------------------------\n");CHKERRQ(ierr); 8819 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate vertices (%d)\n",PetscGlobalRank,nv,pcbddc->use_vertices);CHKERRQ(ierr); 8820 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate edges (%d)\n",PetscGlobalRank,nedges,pcbddc->use_edges);CHKERRQ(ierr); 8821 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate faces (%d)\n",PetscGlobalRank,nfaces,pcbddc->use_faces);CHKERRQ(ierr); 8822 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 8823 ierr = PetscViewerASCIIPopSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 8824 ierr = PCBDDCGraphRestoreCandidatesIS(graph,&nfaces,NULL,&nedges,NULL,&vertices);CHKERRQ(ierr); 8825 } 8826 8827 /* sub_schurs init */ 8828 if (!pcbddc->sub_schurs) { 8829 ierr = PCBDDCSubSchursCreate(&pcbddc->sub_schurs);CHKERRQ(ierr); 8830 } 8831 ierr = PCBDDCSubSchursInit(pcbddc->sub_schurs,((PetscObject)pc)->prefix,pcis->is_I_local,pcis->is_B_local,graph,pcis->BtoNmap,pcbddc->sub_schurs_rebuild);CHKERRQ(ierr); 8832 8833 /* free graph struct */ 8834 if (pcbddc->sub_schurs_rebuild) { 8835 ierr = PCBDDCGraphDestroy(&graph);CHKERRQ(ierr); 8836 } 8837 PetscFunctionReturn(0); 8838 } 8839 8840 PetscErrorCode PCBDDCCheckOperator(PC pc) 8841 { 8842 PC_IS *pcis=(PC_IS*)pc->data; 8843 PC_BDDC *pcbddc=(PC_BDDC*)pc->data; 8844 PetscErrorCode ierr; 8845 8846 PetscFunctionBegin; 8847 if (pcbddc->n_vertices == pcbddc->local_primal_size) { 8848 IS zerodiag = NULL; 8849 Mat S_j,B0_B=NULL; 8850 Vec dummy_vec=NULL,vec_check_B,vec_scale_P; 8851 PetscScalar *p0_check,*array,*array2; 8852 PetscReal norm; 8853 PetscInt i; 8854 8855 /* B0 and B0_B */ 8856 if (zerodiag) { 8857 IS dummy; 8858 8859 ierr = ISCreateStride(PETSC_COMM_SELF,pcbddc->benign_n,0,1,&dummy);CHKERRQ(ierr); 8860 ierr = MatCreateSubMatrix(pcbddc->benign_B0,dummy,pcis->is_B_local,MAT_INITIAL_MATRIX,&B0_B);CHKERRQ(ierr); 8861 ierr = MatCreateVecs(B0_B,NULL,&dummy_vec);CHKERRQ(ierr); 8862 ierr = ISDestroy(&dummy);CHKERRQ(ierr); 8863 } 8864 /* I need a primal vector to scale primal nodes since BDDC sums contibutions */ 8865 ierr = VecDuplicate(pcbddc->vec1_P,&vec_scale_P);CHKERRQ(ierr); 8866 ierr = VecSet(pcbddc->vec1_P,1.0);CHKERRQ(ierr); 8867 ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,pcbddc->vec1_P,pcbddc->coarse_vec,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 8868 ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,pcbddc->vec1_P,pcbddc->coarse_vec,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 8869 ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,pcbddc->coarse_vec,vec_scale_P,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 8870 ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,pcbddc->coarse_vec,vec_scale_P,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 8871 ierr = VecReciprocal(vec_scale_P);CHKERRQ(ierr); 8872 /* S_j */ 8873 ierr = MatCreateSchurComplement(pcis->A_II,pcis->A_II,pcis->A_IB,pcis->A_BI,pcis->A_BB,&S_j);CHKERRQ(ierr); 8874 ierr = MatSchurComplementSetKSP(S_j,pcbddc->ksp_D);CHKERRQ(ierr); 8875 8876 /* mimic vector in \widetilde{W}_\Gamma */ 8877 ierr = VecSetRandom(pcis->vec1_N,NULL);CHKERRQ(ierr); 8878 /* continuous in primal space */ 8879 ierr = VecSetRandom(pcbddc->coarse_vec,NULL);CHKERRQ(ierr); 8880 ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,pcbddc->coarse_vec,pcbddc->vec1_P,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 8881 ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,pcbddc->coarse_vec,pcbddc->vec1_P,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 8882 ierr = VecGetArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 8883 ierr = PetscCalloc1(pcbddc->benign_n,&p0_check);CHKERRQ(ierr); 8884 for (i=0;i<pcbddc->benign_n;i++) p0_check[i] = array[pcbddc->local_primal_size-pcbddc->benign_n+i]; 8885 ierr = VecSetValues(pcis->vec1_N,pcbddc->local_primal_size,pcbddc->local_primal_ref_node,array,INSERT_VALUES);CHKERRQ(ierr); 8886 ierr = VecRestoreArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 8887 ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr); 8888 ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr); 8889 ierr = VecScatterBegin(pcis->N_to_B,pcis->vec1_N,pcis->vec2_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 8890 ierr = VecScatterEnd(pcis->N_to_B,pcis->vec1_N,pcis->vec2_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 8891 ierr = VecDuplicate(pcis->vec2_B,&vec_check_B);CHKERRQ(ierr); 8892 ierr = VecCopy(pcis->vec2_B,vec_check_B);CHKERRQ(ierr); 8893 8894 /* assemble rhs for coarse problem */ 8895 /* widetilde{S}_\Gamma w_\Gamma + \widetilde{B0}^T_B p0 */ 8896 /* local with Schur */ 8897 ierr = MatMult(S_j,pcis->vec2_B,pcis->vec1_B);CHKERRQ(ierr); 8898 if (zerodiag) { 8899 ierr = VecGetArray(dummy_vec,&array);CHKERRQ(ierr); 8900 for (i=0;i<pcbddc->benign_n;i++) array[i] = p0_check[i]; 8901 ierr = VecRestoreArray(dummy_vec,&array);CHKERRQ(ierr); 8902 ierr = MatMultTransposeAdd(B0_B,dummy_vec,pcis->vec1_B,pcis->vec1_B);CHKERRQ(ierr); 8903 } 8904 /* sum on primal nodes the local contributions */ 8905 ierr = VecScatterBegin(pcis->N_to_B,pcis->vec1_B,pcis->vec1_N,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 8906 ierr = VecScatterEnd(pcis->N_to_B,pcis->vec1_B,pcis->vec1_N,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 8907 ierr = VecGetArray(pcis->vec1_N,&array);CHKERRQ(ierr); 8908 ierr = VecGetArray(pcbddc->vec1_P,&array2);CHKERRQ(ierr); 8909 for (i=0;i<pcbddc->local_primal_size;i++) array2[i] = array[pcbddc->local_primal_ref_node[i]]; 8910 ierr = VecRestoreArray(pcbddc->vec1_P,&array2);CHKERRQ(ierr); 8911 ierr = VecRestoreArray(pcis->vec1_N,&array);CHKERRQ(ierr); 8912 ierr = VecSet(pcbddc->coarse_vec,0.);CHKERRQ(ierr); 8913 ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,pcbddc->vec1_P,pcbddc->coarse_vec,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 8914 ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,pcbddc->vec1_P,pcbddc->coarse_vec,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 8915 ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,pcbddc->coarse_vec,pcbddc->vec1_P,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 8916 ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,pcbddc->coarse_vec,pcbddc->vec1_P,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 8917 ierr = VecGetArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 8918 /* scale primal nodes (BDDC sums contibutions) */ 8919 ierr = VecPointwiseMult(pcbddc->vec1_P,vec_scale_P,pcbddc->vec1_P);CHKERRQ(ierr); 8920 ierr = VecSetValues(pcis->vec1_N,pcbddc->local_primal_size,pcbddc->local_primal_ref_node,array,INSERT_VALUES);CHKERRQ(ierr); 8921 ierr = VecRestoreArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 8922 ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr); 8923 ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr); 8924 ierr = VecScatterBegin(pcis->N_to_B,pcis->vec1_N,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 8925 ierr = VecScatterEnd(pcis->N_to_B,pcis->vec1_N,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 8926 /* global: \widetilde{B0}_B w_\Gamma */ 8927 if (zerodiag) { 8928 ierr = MatMult(B0_B,pcis->vec2_B,dummy_vec);CHKERRQ(ierr); 8929 ierr = VecGetArray(dummy_vec,&array);CHKERRQ(ierr); 8930 for (i=0;i<pcbddc->benign_n;i++) pcbddc->benign_p0[i] = array[i]; 8931 ierr = VecRestoreArray(dummy_vec,&array);CHKERRQ(ierr); 8932 } 8933 /* BDDC */ 8934 ierr = VecSet(pcis->vec1_D,0.);CHKERRQ(ierr); 8935 ierr = PCBDDCApplyInterfacePreconditioner(pc,PETSC_FALSE);CHKERRQ(ierr); 8936 8937 ierr = VecCopy(pcis->vec1_B,pcis->vec2_B);CHKERRQ(ierr); 8938 ierr = VecAXPY(pcis->vec1_B,-1.0,vec_check_B);CHKERRQ(ierr); 8939 ierr = VecNorm(pcis->vec1_B,NORM_INFINITY,&norm);CHKERRQ(ierr); 8940 PetscPrintf(PETSC_COMM_SELF,"[%d] BDDC local error is %1.4e\n",PetscGlobalRank,norm); 8941 for (i=0;i<pcbddc->benign_n;i++) { 8942 PetscPrintf(PETSC_COMM_SELF,"[%d] BDDC p0[%d] error is %1.4e\n",PetscGlobalRank,i,PetscAbsScalar(pcbddc->benign_p0[i]-p0_check[i])); 8943 } 8944 ierr = PetscFree(p0_check);CHKERRQ(ierr); 8945 ierr = VecDestroy(&vec_scale_P);CHKERRQ(ierr); 8946 ierr = VecDestroy(&vec_check_B);CHKERRQ(ierr); 8947 ierr = VecDestroy(&dummy_vec);CHKERRQ(ierr); 8948 ierr = MatDestroy(&S_j);CHKERRQ(ierr); 8949 ierr = MatDestroy(&B0_B);CHKERRQ(ierr); 8950 } 8951 PetscFunctionReturn(0); 8952 } 8953 8954 #include <../src/mat/impls/aij/mpi/mpiaij.h> 8955 PetscErrorCode MatMPIAIJRestrict(Mat A, MPI_Comm ccomm, Mat *B) 8956 { 8957 Mat At; 8958 IS rows; 8959 PetscInt rst,ren; 8960 PetscErrorCode ierr; 8961 PetscLayout rmap; 8962 8963 PetscFunctionBegin; 8964 rst = ren = 0; 8965 if (ccomm != MPI_COMM_NULL) { 8966 ierr = PetscLayoutCreate(ccomm,&rmap);CHKERRQ(ierr); 8967 ierr = PetscLayoutSetSize(rmap,A->rmap->N);CHKERRQ(ierr); 8968 ierr = PetscLayoutSetBlockSize(rmap,1);CHKERRQ(ierr); 8969 ierr = PetscLayoutSetUp(rmap);CHKERRQ(ierr); 8970 ierr = PetscLayoutGetRange(rmap,&rst,&ren);CHKERRQ(ierr); 8971 } 8972 ierr = ISCreateStride(PetscObjectComm((PetscObject)A),ren-rst,rst,1,&rows);CHKERRQ(ierr); 8973 ierr = MatCreateSubMatrix(A,rows,NULL,MAT_INITIAL_MATRIX,&At);CHKERRQ(ierr); 8974 ierr = ISDestroy(&rows);CHKERRQ(ierr); 8975 8976 if (ccomm != MPI_COMM_NULL) { 8977 Mat_MPIAIJ *a,*b; 8978 IS from,to; 8979 Vec gvec; 8980 PetscInt lsize; 8981 8982 ierr = MatCreate(ccomm,B);CHKERRQ(ierr); 8983 ierr = MatSetSizes(*B,ren-rst,PETSC_DECIDE,PETSC_DECIDE,At->cmap->N);CHKERRQ(ierr); 8984 ierr = MatSetType(*B,MATAIJ);CHKERRQ(ierr); 8985 ierr = PetscLayoutDestroy(&((*B)->rmap));CHKERRQ(ierr); 8986 ierr = PetscLayoutSetUp((*B)->cmap);CHKERRQ(ierr); 8987 a = (Mat_MPIAIJ*)At->data; 8988 b = (Mat_MPIAIJ*)(*B)->data; 8989 ierr = MPI_Comm_size(ccomm,&b->size);CHKERRQ(ierr); 8990 ierr = MPI_Comm_rank(ccomm,&b->rank);CHKERRQ(ierr); 8991 ierr = PetscObjectReference((PetscObject)a->A);CHKERRQ(ierr); 8992 ierr = PetscObjectReference((PetscObject)a->B);CHKERRQ(ierr); 8993 b->A = a->A; 8994 b->B = a->B; 8995 8996 b->donotstash = a->donotstash; 8997 b->roworiented = a->roworiented; 8998 b->rowindices = 0; 8999 b->rowvalues = 0; 9000 b->getrowactive = PETSC_FALSE; 9001 9002 (*B)->rmap = rmap; 9003 (*B)->factortype = A->factortype; 9004 (*B)->assembled = PETSC_TRUE; 9005 (*B)->insertmode = NOT_SET_VALUES; 9006 (*B)->preallocated = PETSC_TRUE; 9007 9008 if (a->colmap) { 9009 #if defined(PETSC_USE_CTABLE) 9010 ierr = PetscTableCreateCopy(a->colmap,&b->colmap);CHKERRQ(ierr); 9011 #else 9012 ierr = PetscMalloc1(At->cmap->N,&b->colmap);CHKERRQ(ierr); 9013 ierr = PetscLogObjectMemory((PetscObject)*B,At->cmap->N*sizeof(PetscInt));CHKERRQ(ierr); 9014 ierr = PetscMemcpy(b->colmap,a->colmap,At->cmap->N*sizeof(PetscInt));CHKERRQ(ierr); 9015 #endif 9016 } else b->colmap = 0; 9017 if (a->garray) { 9018 PetscInt len; 9019 len = a->B->cmap->n; 9020 ierr = PetscMalloc1(len+1,&b->garray);CHKERRQ(ierr); 9021 ierr = PetscLogObjectMemory((PetscObject)(*B),len*sizeof(PetscInt));CHKERRQ(ierr); 9022 if (len) { ierr = PetscMemcpy(b->garray,a->garray,len*sizeof(PetscInt));CHKERRQ(ierr); } 9023 } else b->garray = 0; 9024 9025 ierr = PetscObjectReference((PetscObject)a->lvec);CHKERRQ(ierr); 9026 b->lvec = a->lvec; 9027 ierr = PetscLogObjectParent((PetscObject)*B,(PetscObject)b->lvec);CHKERRQ(ierr); 9028 9029 /* cannot use VecScatterCopy */ 9030 ierr = VecGetLocalSize(b->lvec,&lsize);CHKERRQ(ierr); 9031 ierr = ISCreateGeneral(ccomm,lsize,b->garray,PETSC_USE_POINTER,&from);CHKERRQ(ierr); 9032 ierr = ISCreateStride(PETSC_COMM_SELF,lsize,0,1,&to);CHKERRQ(ierr); 9033 ierr = MatCreateVecs(*B,&gvec,NULL);CHKERRQ(ierr); 9034 ierr = VecScatterCreate(gvec,from,b->lvec,to,&b->Mvctx);CHKERRQ(ierr); 9035 ierr = PetscLogObjectParent((PetscObject)*B,(PetscObject)b->Mvctx);CHKERRQ(ierr); 9036 ierr = ISDestroy(&from);CHKERRQ(ierr); 9037 ierr = ISDestroy(&to);CHKERRQ(ierr); 9038 ierr = VecDestroy(&gvec);CHKERRQ(ierr); 9039 } 9040 ierr = MatDestroy(&At);CHKERRQ(ierr); 9041 PetscFunctionReturn(0); 9042 } 9043